Browse Source

Merge branch 'asoc-ab8500' into for-3.6

Mark Brown 13 years ago
parent
commit
66e61060d7
100 changed files with 2161 additions and 764 deletions
  1. 93 0
      Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt
  2. 9 0
      Documentation/kernel-parameters.txt
  3. 278 0
      Documentation/vm/frontswap.txt
  4. 33 17
      MAINTAINERS
  5. 1 1
      Makefile
  6. 0 1
      arch/arm/Kconfig
  7. 6 0
      arch/arm/mach-shmobile/Kconfig
  8. 14 0
      arch/arm/mach-ux500/board-mop500.c
  9. 4 6
      arch/arm/mm/dma-mapping.c
  10. 1 1
      arch/avr32/kernel/signal.c
  11. 1 1
      arch/blackfin/kernel/process.c
  12. 2 1
      arch/parisc/Makefile
  13. 1 0
      arch/parisc/include/asm/Kbuild
  14. 2 0
      arch/parisc/include/asm/bug.h
  15. 5 6
      arch/powerpc/kernel/module_32.c
  16. 11 3
      arch/powerpc/kernel/time.c
  17. 0 5
      arch/tile/include/asm/thread_info.h
  18. 0 14
      arch/tile/kernel/entry.S
  19. 1 0
      arch/tile/kernel/setup.c
  20. 31 11
      arch/x86/boot/header.S
  21. 109 63
      arch/x86/boot/tools/build.c
  22. 14 0
      arch/x86/include/asm/nmi.h
  23. 6 6
      arch/x86/include/asm/uaccess.h
  24. 0 1
      arch/x86/include/asm/uv/uv_bau.h
  25. 0 6
      arch/x86/kernel/aperture_64.c
  26. 2 2
      arch/x86/kernel/apic/io_apic.c
  27. 2 2
      arch/x86/kernel/cpu/mcheck/mce.c
  28. 9 2
      arch/x86/kernel/cpu/perf_event.c
  29. 2 0
      arch/x86/kernel/cpu/perf_event.h
  30. 108 37
      arch/x86/kernel/cpu/perf_event_intel.c
  31. 1 8
      arch/x86/kernel/cpu/perf_event_intel_ds.c
  32. 2 2
      arch/x86/kernel/nmi_selftest.c
  33. 4 2
      arch/x86/kernel/reboot.c
  34. 10 9
      arch/x86/kernel/smpboot.c
  35. 4 0
      arch/x86/lib/usercopy.c
  36. 4 4
      arch/x86/lib/x86-opcode-map.txt
  37. 2 1
      arch/x86/mm/init.c
  38. 2 0
      arch/x86/mm/srat.c
  39. 1 1
      arch/x86/platform/mrst/mrst.c
  40. 0 1
      arch/x86/platform/uv/tlb_uv.c
  41. 9 5
      arch/x86/tools/gen-insn-attr-x86.awk
  42. 2 2
      arch/xtensa/include/asm/syscall.h
  43. 1 1
      arch/xtensa/kernel/signal.c
  44. 1 1
      drivers/acpi/Kconfig
  45. 9 1
      drivers/acpi/battery.c
  46. 57 31
      drivers/acpi/bus.c
  47. 1 1
      drivers/acpi/power.c
  48. 25 5
      drivers/acpi/processor_perflib.c
  49. 1 0
      drivers/acpi/scan.c
  50. 45 4
      drivers/acpi/sleep.c
  51. 22 11
      drivers/acpi/video.c
  52. 1 0
      drivers/char/agp/intel-agp.c
  53. 1 0
      drivers/char/agp/intel-agp.h
  54. 1 0
      drivers/clocksource/Makefile
  55. 406 0
      drivers/clocksource/em_sti.c
  56. 1 1
      drivers/gpio/gpio-samsung.c
  57. 2 2
      drivers/gpu/drm/exynos/exynos_drm_drv.c
  58. 0 7
      drivers/gpu/drm/exynos/exynos_drm_encoder.c
  59. 14 5
      drivers/gpu/drm/exynos/exynos_drm_fb.c
  60. 2 2
      drivers/gpu/drm/exynos/exynos_drm_fb.h
  61. 3 6
      drivers/gpu/drm/exynos/exynos_drm_gem.c
  62. 7 5
      drivers/gpu/drm/exynos/exynos_mixer.c
  63. 9 4
      drivers/gpu/drm/i915/i915_drv.c
  64. 3 0
      drivers/gpu/drm/i915/i915_drv.h
  65. 35 3
      drivers/gpu/drm/i915/i915_irq.c
  66. 40 3
      drivers/gpu/drm/i915/i915_reg.h
  67. 18 1
      drivers/gpu/drm/i915/intel_display.c
  68. 18 3
      drivers/gpu/drm/i915/intel_ringbuffer.c
  69. 17 4
      drivers/gpu/drm/radeon/ni.c
  70. 6 9
      drivers/gpu/drm/radeon/r600.c
  71. 3 2
      drivers/gpu/drm/radeon/r600_audio.c
  72. 0 1
      drivers/gpu/drm/radeon/r600_hdmi.c
  73. 2 3
      drivers/gpu/drm/radeon/radeon.h
  74. 12 7
      drivers/gpu/drm/radeon/radeon_gart.c
  75. 1 1
      drivers/gpu/drm/radeon/radeon_kms.c
  76. 6 6
      drivers/gpu/drm/radeon/rs600.c
  77. 6 6
      drivers/gpu/drm/radeon/rs690.c
  78. 6 12
      drivers/gpu/drm/radeon/rv770.c
  79. 162 315
      drivers/gpu/drm/radeon/si.c
  80. 19 0
      drivers/gpu/drm/radeon/sid.h
  81. 12 0
      drivers/i2c/muxes/Kconfig
  82. 1 0
      drivers/i2c/muxes/Makefile
  83. 279 0
      drivers/i2c/muxes/i2c-mux-pinctrl.c
  84. 4 0
      drivers/infiniband/hw/cxgb4/cm.c
  85. 8 13
      drivers/infiniband/hw/mlx4/main.c
  86. 8 0
      drivers/infiniband/hw/mlx4/mlx4_ib.h
  87. 15 6
      drivers/infiniband/hw/mlx4/qp.c
  88. 0 1
      drivers/infiniband/hw/ocrdma/ocrdma.h
  89. 1 4
      drivers/infiniband/hw/ocrdma/ocrdma_abi.h
  90. 1 8
      drivers/infiniband/hw/ocrdma/ocrdma_hw.c
  91. 0 1
      drivers/infiniband/hw/ocrdma/ocrdma_main.c
  92. 0 5
      drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
  93. 0 1
      drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
  94. 44 27
      drivers/iommu/amd_iommu.c
  95. 5 8
      drivers/iommu/amd_iommu_init.c
  96. 3 0
      drivers/iommu/amd_iommu_types.h
  97. 4 0
      drivers/md/raid1.c
  98. 4 0
      drivers/md/raid10.c
  99. 10 2
      drivers/mtd/ubi/debug.c
  100. 13 4
      drivers/mtd/ubi/wl.c

+ 93 - 0
Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt

@@ -0,0 +1,93 @@
+Pinctrl-based I2C Bus Mux
+
+This binding describes an I2C bus multiplexer that uses pin multiplexing to
+route the I2C signals, and represents the pin multiplexing configuration
+using the pinctrl device tree bindings.
+
+                                 +-----+  +-----+
+                                 | dev |  | dev |
+    +------------------------+   +-----+  +-----+
+    | SoC                    |      |        |
+    |                   /----|------+--------+
+    |   +---+   +------+     | child bus A, on first set of pins
+    |   |I2C|---|Pinmux|     |
+    |   +---+   +------+     | child bus B, on second set of pins
+    |                   \----|------+--------+--------+
+    |                        |      |        |        |
+    +------------------------+  +-----+  +-----+  +-----+
+                                | dev |  | dev |  | dev |
+                                +-----+  +-----+  +-----+
+
+Required properties:
+- compatible: i2c-mux-pinctrl
+- i2c-parent: The phandle of the I2C bus that this multiplexer's master-side
+  port is connected to.
+
+Also required are:
+
+* Standard pinctrl properties that specify the pin mux state for each child
+  bus. See ../pinctrl/pinctrl-bindings.txt.
+
+* Standard I2C mux properties. See mux.txt in this directory.
+
+* I2C child bus nodes. See mux.txt in this directory.
+
+For each named state defined in the pinctrl-names property, an I2C child bus
+will be created. I2C child bus numbers are assigned based on the index into
+the pinctrl-names property.
+
+The only exception is that no bus will be created for a state named "idle". If
+such a state is defined, it must be the last entry in pinctrl-names. For
+example:
+
+	pinctrl-names = "ddc", "pta", "idle"  ->  ddc = bus 0, pta = bus 1
+	pinctrl-names = "ddc", "idle", "pta"  ->  Invalid ("idle" not last)
+	pinctrl-names = "idle", "ddc", "pta"  ->  Invalid ("idle" not last)
+
+Whenever an access is made to a device on a child bus, the relevant pinctrl
+state will be programmed into hardware.
+
+If an idle state is defined, whenever an access is not being made to a device
+on a child bus, the idle pinctrl state will be programmed into hardware.
+
+If an idle state is not defined, the most recently used pinctrl state will be
+left programmed into hardware whenever no access is being made of a device on
+a child bus.
+
+Example:
+
+	i2cmux {
+		compatible = "i2c-mux-pinctrl";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		i2c-parent = <&i2c1>;
+
+		pinctrl-names = "ddc", "pta", "idle";
+		pinctrl-0 = <&state_i2cmux_ddc>;
+		pinctrl-1 = <&state_i2cmux_pta>;
+		pinctrl-2 = <&state_i2cmux_idle>;
+
+		i2c@0 {
+			reg = <0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			eeprom {
+				compatible = "eeprom";
+				reg = <0x50>;
+			};
+		};
+
+		i2c@1 {
+			reg = <1>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			eeprom {
+				compatible = "eeprom";
+				reg = <0x50>;
+			};
+		};
+	};
+

+ 9 - 0
Documentation/kernel-parameters.txt

@@ -2543,6 +2543,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	sched_debug	[KNL] Enables verbose scheduler debug messages.
 
+	skew_tick=	[KNL] Offset the periodic timer tick per cpu to mitigate
+			xtime_lock contention on larger systems, and/or RCU lock
+			contention on all systems with CONFIG_MAXSMP set.
+			Format: { "0" | "1" }
+			0 -- disable. (may be 1 via CONFIG_CMDLINE="skew_tick=1"
+			1 -- enable.
+			Note: increases power consumption, thus should only be
+			enabled if running jitter sensitive (HPC/RT) workloads.
+
 	security=	[SECURITY] Choose a security module to enable at boot.
 			If this boot parameter is not specified, only the first
 			security module asking for security registration will be

+ 278 - 0
Documentation/vm/frontswap.txt

@@ -0,0 +1,278 @@
+Frontswap provides a "transcendent memory" interface for swap pages.
+In some environments, dramatic performance savings may be obtained because
+swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk.
+
+(Note, frontswap -- and cleancache (merged at 3.0) -- are the "frontends"
+and the only necessary changes to the core kernel for transcendent memory;
+all other supporting code -- the "backends" -- is implemented as drivers.
+See the LWN.net article "Transcendent memory in a nutshell" for a detailed
+overview of frontswap and related kernel parts:
+https://lwn.net/Articles/454795/ )
+
+Frontswap is so named because it can be thought of as the opposite of
+a "backing" store for a swap device.  The storage is assumed to be
+a synchronous concurrency-safe page-oriented "pseudo-RAM device" conforming
+to the requirements of transcendent memory (such as Xen's "tmem", or
+in-kernel compressed memory, aka "zcache", or future RAM-like devices);
+this pseudo-RAM device is not directly accessible or addressable by the
+kernel and is of unknown and possibly time-varying size.  The driver
+links itself to frontswap by calling frontswap_register_ops to set the
+frontswap_ops funcs appropriately and the functions it provides must
+conform to certain policies as follows:
+
+An "init" prepares the device to receive frontswap pages associated
+with the specified swap device number (aka "type").  A "store" will
+copy the page to transcendent memory and associate it with the type and
+offset associated with the page. A "load" will copy the page, if found,
+from transcendent memory into kernel memory, but will NOT remove the page
+from from transcendent memory.  An "invalidate_page" will remove the page
+from transcendent memory and an "invalidate_area" will remove ALL pages
+associated with the swap type (e.g., like swapoff) and notify the "device"
+to refuse further stores with that swap type.
+
+Once a page is successfully stored, a matching load on the page will normally
+succeed.  So when the kernel finds itself in a situation where it needs
+to swap out a page, it first attempts to use frontswap.  If the store returns
+success, the data has been successfully saved to transcendent memory and
+a disk write and, if the data is later read back, a disk read are avoided.
+If a store returns failure, transcendent memory has rejected the data, and the
+page can be written to swap as usual.
+
+If a backend chooses, frontswap can be configured as a "writethrough
+cache" by calling frontswap_writethrough().  In this mode, the reduction
+in swap device writes is lost (and also a non-trivial performance advantage)
+in order to allow the backend to arbitrarily "reclaim" space used to
+store frontswap pages to more completely manage its memory usage.
+
+Note that if a page is stored and the page already exists in transcendent memory
+(a "duplicate" store), either the store succeeds and the data is overwritten,
+or the store fails AND the page is invalidated.  This ensures stale data may
+never be obtained from frontswap.
+
+If properly configured, monitoring of frontswap is done via debugfs in
+the /sys/kernel/debug/frontswap directory.  The effectiveness of
+frontswap can be measured (across all swap devices) with:
+
+failed_stores	- how many store attempts have failed
+loads		- how many loads were attempted (all should succeed)
+succ_stores	- how many store attempts have succeeded
+invalidates	- how many invalidates were attempted
+
+A backend implementation may provide additional metrics.
+
+FAQ
+
+1) Where's the value?
+
+When a workload starts swapping, performance falls through the floor.
+Frontswap significantly increases performance in many such workloads by
+providing a clean, dynamic interface to read and write swap pages to
+"transcendent memory" that is otherwise not directly addressable to the kernel.
+This interface is ideal when data is transformed to a different form
+and size (such as with compression) or secretly moved (as might be
+useful for write-balancing for some RAM-like devices).  Swap pages (and
+evicted page-cache pages) are a great use for this kind of slower-than-RAM-
+but-much-faster-than-disk "pseudo-RAM device" and the frontswap (and
+cleancache) interface to transcendent memory provides a nice way to read
+and write -- and indirectly "name" -- the pages.
+
+Frontswap -- and cleancache -- with a fairly small impact on the kernel,
+provides a huge amount of flexibility for more dynamic, flexible RAM
+utilization in various system configurations:
+
+In the single kernel case, aka "zcache", pages are compressed and
+stored in local memory, thus increasing the total anonymous pages
+that can be safely kept in RAM.  Zcache essentially trades off CPU
+cycles used in compression/decompression for better memory utilization.
+Benchmarks have shown little or no impact when memory pressure is
+low while providing a significant performance improvement (25%+)
+on some workloads under high memory pressure.
+
+"RAMster" builds on zcache by adding "peer-to-peer" transcendent memory
+support for clustered systems.  Frontswap pages are locally compressed
+as in zcache, but then "remotified" to another system's RAM.  This
+allows RAM to be dynamically load-balanced back-and-forth as needed,
+i.e. when system A is overcommitted, it can swap to system B, and
+vice versa.  RAMster can also be configured as a memory server so
+many servers in a cluster can swap, dynamically as needed, to a single
+server configured with a large amount of RAM... without pre-configuring
+how much of the RAM is available for each of the clients!
+
+In the virtual case, the whole point of virtualization is to statistically
+multiplex physical resources acrosst the varying demands of multiple
+virtual machines.  This is really hard to do with RAM and efforts to do
+it well with no kernel changes have essentially failed (except in some
+well-publicized special-case workloads).
+Specifically, the Xen Transcendent Memory backend allows otherwise
+"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple
+virtual machines, but the pages can be compressed and deduplicated to
+optimize RAM utilization.  And when guest OS's are induced to surrender
+underutilized RAM (e.g. with "selfballooning"), sudden unexpected
+memory pressure may result in swapping; frontswap allows those pages
+to be swapped to and from hypervisor RAM (if overall host system memory
+conditions allow), thus mitigating the potentially awful performance impact
+of unplanned swapping.
+
+A KVM implementation is underway and has been RFC'ed to lkml.  And,
+using frontswap, investigation is also underway on the use of NVM as
+a memory extension technology.
+
+2) Sure there may be performance advantages in some situations, but
+   what's the space/time overhead of frontswap?
+
+If CONFIG_FRONTSWAP is disabled, every frontswap hook compiles into
+nothingness and the only overhead is a few extra bytes per swapon'ed
+swap device.  If CONFIG_FRONTSWAP is enabled but no frontswap "backend"
+registers, there is one extra global variable compared to zero for
+every swap page read or written.  If CONFIG_FRONTSWAP is enabled
+AND a frontswap backend registers AND the backend fails every "store"
+request (i.e. provides no memory despite claiming it might),
+CPU overhead is still negligible -- and since every frontswap fail
+precedes a swap page write-to-disk, the system is highly likely
+to be I/O bound and using a small fraction of a percent of a CPU
+will be irrelevant anyway.
+
+As for space, if CONFIG_FRONTSWAP is enabled AND a frontswap backend
+registers, one bit is allocated for every swap page for every swap
+device that is swapon'd.  This is added to the EIGHT bits (which
+was sixteen until about 2.6.34) that the kernel already allocates
+for every swap page for every swap device that is swapon'd.  (Hugh
+Dickins has observed that frontswap could probably steal one of
+the existing eight bits, but let's worry about that minor optimization
+later.)  For very large swap disks (which are rare) on a standard
+4K pagesize, this is 1MB per 32GB swap.
+
+When swap pages are stored in transcendent memory instead of written
+out to disk, there is a side effect that this may create more memory
+pressure that can potentially outweigh the other advantages.  A
+backend, such as zcache, must implement policies to carefully (but
+dynamically) manage memory limits to ensure this doesn't happen.
+
+3) OK, how about a quick overview of what this frontswap patch does
+   in terms that a kernel hacker can grok?
+
+Let's assume that a frontswap "backend" has registered during
+kernel initialization; this registration indicates that this
+frontswap backend has access to some "memory" that is not directly
+accessible by the kernel.  Exactly how much memory it provides is
+entirely dynamic and random.
+
+Whenever a swap-device is swapon'd frontswap_init() is called,
+passing the swap device number (aka "type") as a parameter.
+This notifies frontswap to expect attempts to "store" swap pages
+associated with that number.
+
+Whenever the swap subsystem is readying a page to write to a swap
+device (c.f swap_writepage()), frontswap_store is called.  Frontswap
+consults with the frontswap backend and if the backend says it does NOT
+have room, frontswap_store returns -1 and the kernel swaps the page
+to the swap device as normal.  Note that the response from the frontswap
+backend is unpredictable to the kernel; it may choose to never accept a
+page, it could accept every ninth page, or it might accept every
+page.  But if the backend does accept a page, the data from the page
+has already been copied and associated with the type and offset,
+and the backend guarantees the persistence of the data.  In this case,
+frontswap sets a bit in the "frontswap_map" for the swap device
+corresponding to the page offset on the swap device to which it would
+otherwise have written the data.
+
+When the swap subsystem needs to swap-in a page (swap_readpage()),
+it first calls frontswap_load() which checks the frontswap_map to
+see if the page was earlier accepted by the frontswap backend.  If
+it was, the page of data is filled from the frontswap backend and
+the swap-in is complete.  If not, the normal swap-in code is
+executed to obtain the page of data from the real swap device.
+
+So every time the frontswap backend accepts a page, a swap device read
+and (potentially) a swap device write are replaced by a "frontswap backend
+store" and (possibly) a "frontswap backend loads", which are presumably much
+faster.
+
+4) Can't frontswap be configured as a "special" swap device that is
+   just higher priority than any real swap device (e.g. like zswap,
+   or maybe swap-over-nbd/NFS)?
+
+No.  First, the existing swap subsystem doesn't allow for any kind of
+swap hierarchy.  Perhaps it could be rewritten to accomodate a hierarchy,
+but this would require fairly drastic changes.  Even if it were
+rewritten, the existing swap subsystem uses the block I/O layer which
+assumes a swap device is fixed size and any page in it is linearly
+addressable.  Frontswap barely touches the existing swap subsystem,
+and works around the constraints of the block I/O subsystem to provide
+a great deal of flexibility and dynamicity.
+
+For example, the acceptance of any swap page by the frontswap backend is
+entirely unpredictable. This is critical to the definition of frontswap
+backends because it grants completely dynamic discretion to the
+backend.  In zcache, one cannot know a priori how compressible a page is.
+"Poorly" compressible pages can be rejected, and "poorly" can itself be
+defined dynamically depending on current memory constraints.
+
+Further, frontswap is entirely synchronous whereas a real swap
+device is, by definition, asynchronous and uses block I/O.  The
+block I/O layer is not only unnecessary, but may perform "optimizations"
+that are inappropriate for a RAM-oriented device including delaying
+the write of some pages for a significant amount of time.  Synchrony is
+required to ensure the dynamicity of the backend and to avoid thorny race
+conditions that would unnecessarily and greatly complicate frontswap
+and/or the block I/O subsystem.  That said, only the initial "store"
+and "load" operations need be synchronous.  A separate asynchronous thread
+is free to manipulate the pages stored by frontswap.  For example,
+the "remotification" thread in RAMster uses standard asynchronous
+kernel sockets to move compressed frontswap pages to a remote machine.
+Similarly, a KVM guest-side implementation could do in-guest compression
+and use "batched" hypercalls.
+
+In a virtualized environment, the dynamicity allows the hypervisor
+(or host OS) to do "intelligent overcommit".  For example, it can
+choose to accept pages only until host-swapping might be imminent,
+then force guests to do their own swapping.
+
+There is a downside to the transcendent memory specifications for
+frontswap:  Since any "store" might fail, there must always be a real
+slot on a real swap device to swap the page.  Thus frontswap must be
+implemented as a "shadow" to every swapon'd device with the potential
+capability of holding every page that the swap device might have held
+and the possibility that it might hold no pages at all.  This means
+that frontswap cannot contain more pages than the total of swapon'd
+swap devices.  For example, if NO swap device is configured on some
+installation, frontswap is useless.  Swapless portable devices
+can still use frontswap but a backend for such devices must configure
+some kind of "ghost" swap device and ensure that it is never used.
+
+5) Why this weird definition about "duplicate stores"?  If a page
+   has been previously successfully stored, can't it always be
+   successfully overwritten?
+
+Nearly always it can, but no, sometimes it cannot.  Consider an example
+where data is compressed and the original 4K page has been compressed
+to 1K.  Now an attempt is made to overwrite the page with data that
+is non-compressible and so would take the entire 4K.  But the backend
+has no more space.  In this case, the store must be rejected.  Whenever
+frontswap rejects a store that would overwrite, it also must invalidate
+the old data and ensure that it is no longer accessible.  Since the
+swap subsystem then writes the new data to the read swap device,
+this is the correct course of action to ensure coherency.
+
+6) What is frontswap_shrink for?
+
+When the (non-frontswap) swap subsystem swaps out a page to a real
+swap device, that page is only taking up low-value pre-allocated disk
+space.  But if frontswap has placed a page in transcendent memory, that
+page may be taking up valuable real estate.  The frontswap_shrink
+routine allows code outside of the swap subsystem to force pages out
+of the memory managed by frontswap and back into kernel-addressable memory.
+For example, in RAMster, a "suction driver" thread will attempt
+to "repatriate" pages sent to a remote machine back to the local machine;
+this is driven using the frontswap_shrink mechanism when memory pressure
+subsides.
+
+7) Why does the frontswap patch create the new include file swapfile.h?
+
+The frontswap code depends on some swap-subsystem-internal data
+structures that have, over the years, moved back and forth between
+static and global.  This seemed a reasonable compromise:  Define
+them as global but declare them in a new include file that isn't
+included by the large number of source files that include swap.h.
+
+Dan Magenheimer, last updated April 9, 2012

+ 33 - 17
MAINTAINERS

@@ -1077,7 +1077,7 @@ F:	drivers/media/video/s5p-fimc/
 ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT
 M:	Kyungmin Park <kyungmin.park@samsung.com>
 M:	Kamil Debski <k.debski@samsung.com>
-M:     Jeongtae Park <jtp.park@samsung.com>
+M:	Jeongtae Park <jtp.park@samsung.com>
 L:	linux-arm-kernel@lists.infradead.org
 L:	linux-media@vger.kernel.org
 S:	Maintained
@@ -1743,10 +1743,10 @@ F:	include/linux/can/platform/
 CAPABILITIES
 M:	Serge Hallyn <serge.hallyn@canonical.com>
 L:	linux-security-module@vger.kernel.org
-S:	Supported	
+S:	Supported
 F:	include/linux/capability.h
 F:	security/capability.c
-F:	security/commoncap.c 
+F:	security/commoncap.c
 F:	kernel/capability.c
 
 CELL BROADBAND ENGINE ARCHITECTURE
@@ -2146,11 +2146,11 @@ S:	Orphan
 F:	drivers/net/wan/pc300*
 
 CYTTSP TOUCHSCREEN DRIVER
-M:      Javier Martinez Canillas <javier@dowhile0.org>
-L:      linux-input@vger.kernel.org
-S:      Maintained
-F:      drivers/input/touchscreen/cyttsp*
-F:      include/linux/input/cyttsp.h
+M:	Javier Martinez Canillas <javier@dowhile0.org>
+L:	linux-input@vger.kernel.org
+S:	Maintained
+F:	drivers/input/touchscreen/cyttsp*
+F:	include/linux/input/cyttsp.h
 
 DAMA SLAVE for AX.25
 M:	Joerg Reuter <jreuter@yaina.de>
@@ -2270,7 +2270,7 @@ F:	include/linux/device-mapper.h
 F:	include/linux/dm-*.h
 
 DIOLAN U2C-12 I2C DRIVER
-M:	Guenter Roeck <guenter.roeck@ericsson.com>
+M:	Guenter Roeck <linux@roeck-us.net>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/i2c/busses/i2c-diolan-u2c.c
@@ -2930,6 +2930,13 @@ F:	Documentation/power/freezing-of-tasks.txt
 F:	include/linux/freezer.h
 F:	kernel/freezer.c
 
+FRONTSWAP API
+M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	mm/frontswap.c
+F:	include/linux/frontswap.h
+
 FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS
 M:	David Howells <dhowells@redhat.com>
 L:	linux-cachefs@redhat.com
@@ -3138,7 +3145,7 @@ F:	drivers/tty/hvc/
 
 HARDWARE MONITORING
 M:	Jean Delvare <khali@linux-fr.org>
-M:	Guenter Roeck <guenter.roeck@ericsson.com>
+M:	Guenter Roeck <linux@roeck-us.net>
 L:	lm-sensors@lm-sensors.org
 W:	http://www.lm-sensors.org/
 T:	quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/
@@ -4096,6 +4103,8 @@ F:	drivers/scsi/53c700*
 LED SUBSYSTEM
 M:	Bryan Wu <bryan.wu@canonical.com>
 M:	Richard Purdie <rpurdie@rpsys.net>
+L:	linux-leds@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/linux-leds.git
 S:	Maintained
 F:	drivers/leds/
 F:	include/linux/leds.h
@@ -4411,6 +4420,13 @@ S:	Orphan
 F:	drivers/video/matrox/matroxfb_*
 F:	include/linux/matroxfb.h
 
+MAX16065 HARDWARE MONITOR DRIVER
+M:	Guenter Roeck <linux@roeck-us.net>
+L:	lm-sensors@lm-sensors.org
+S:	Maintained
+F:	Documentation/hwmon/max16065
+F:	drivers/hwmon/max16065.c
+
 MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER
 M:	"Hans J. Koch" <hjk@hansjkoch.de>
 L:	lm-sensors@lm-sensors.org
@@ -5149,7 +5165,7 @@ F:	drivers/leds/leds-pca9532.c
 F:	include/linux/leds-pca9532.h
 
 PCA9541 I2C BUS MASTER SELECTOR DRIVER
-M:	Guenter Roeck <guenter.roeck@ericsson.com>
+M:	Guenter Roeck <linux@roeck-us.net>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/i2c/muxes/i2c-mux-pca9541.c
@@ -5169,7 +5185,7 @@ S:	Maintained
 F:	drivers/firmware/pcdp.*
 
 PCI ERROR RECOVERY
-M:     Linas Vepstas <linasvepstas@gmail.com>
+M:	Linas Vepstas <linasvepstas@gmail.com>
 L:	linux-pci@vger.kernel.org
 S:	Supported
 F:	Documentation/PCI/pci-error-recovery.txt
@@ -5299,7 +5315,7 @@ F:	drivers/video/fb-puv3.c
 F:	drivers/rtc/rtc-puv3.c
 
 PMBUS HARDWARE MONITORING DRIVERS
-M:	Guenter Roeck <guenter.roeck@ericsson.com>
+M:	Guenter Roeck <linux@roeck-us.net>
 L:	lm-sensors@lm-sensors.org
 W:	http://www.lm-sensors.org/
 W:	http://www.roeck-us.net/linux/drivers/
@@ -7293,11 +7309,11 @@ F:	Documentation/DocBook/uio-howto.tmpl
 F:	drivers/uio/
 F:	include/linux/uio*.h
 
-UTIL-LINUX-NG PACKAGE
+UTIL-LINUX PACKAGE
 M:	Karel Zak <kzak@redhat.com>
-L:	util-linux-ng@vger.kernel.org
-W:	http://kernel.org/~kzak/util-linux-ng/
-T:	git git://git.kernel.org/pub/scm/utils/util-linux-ng/util-linux-ng.git
+L:	util-linux@vger.kernel.org
+W:	http://en.wikipedia.org/wiki/Util-linux
+T:	git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git
 S:	Maintained
 
 UVESAFB DRIVER

+ 1 - 1
Makefile

@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Saber-toothed Squirrel
 
 # *DOCUMENTATION*

+ 0 - 1
arch/arm/Kconfig

@@ -7,7 +7,6 @@ config ARM
 	select HAVE_IDE if PCI || ISA || PCMCIA
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7)
-	select CMA if (CPU_V6 || CPU_V6K || CPU_V7)
 	select HAVE_MEMBLOCK
 	select RTC_LIB
 	select SYS_SUPPORTS_APM_EMULATION

+ 6 - 0
arch/arm/mach-shmobile/Kconfig

@@ -186,6 +186,12 @@ config SH_TIMER_TMU
 	help
 	  This enables build of the TMU timer driver.
 
+config EM_TIMER_STI
+	bool "STI timer driver"
+	default y
+	help
+	  This enables build of the STI timer driver.
+
 endmenu
 
 config SH_CLK_CPG

+ 14 - 0
arch/arm/mach-ux500/board-mop500.c

@@ -25,6 +25,7 @@
 #include <linux/mfd/tc3589x.h>
 #include <linux/mfd/tps6105x.h>
 #include <linux/mfd/abx500/ab8500-gpio.h>
+#include <linux/mfd/abx500/ab8500-codec.h>
 #include <linux/leds-lp5521.h>
 #include <linux/input.h>
 #include <linux/smsc911x.h>
@@ -97,6 +98,18 @@ static struct ab8500_gpio_platform_data ab8500_gpio_pdata = {
 					0x7A, 0x00, 0x00},
 };
 
+/* ab8500-codec */
+static struct ab8500_codec_platform_data ab8500_codec_pdata = {
+	.amics =  {
+		.mic1_type = AMIC_TYPE_DIFFERENTIAL,
+		.mic2_type = AMIC_TYPE_DIFFERENTIAL,
+		.mic1a_micbias = AMIC_MICBIAS_VAMIC1,
+		.mic1b_micbias = AMIC_MICBIAS_VAMIC1,
+		.mic2_micbias = AMIC_MICBIAS_VAMIC2
+	},
+	.ear_cmv = EAR_CMV_0_95V
+};
+
 static struct gpio_keys_button snowball_key_array[] = {
 	{
 		.gpio           = 32,
@@ -195,6 +208,7 @@ static struct ab8500_platform_data ab8500_platdata = {
 	.regulator	= ab8500_regulators,
 	.num_regulator	= ARRAY_SIZE(ab8500_regulators),
 	.gpio		= &ab8500_gpio_pdata,
+	.codec		= &ab8500_codec_pdata,
 };
 
 static struct resource ab8500_resources[] = {

+ 4 - 6
arch/arm/mm/dma-mapping.c

@@ -268,10 +268,8 @@ static int __init consistent_init(void)
 	unsigned long base = consistent_base;
 	unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT;
 
-#ifndef CONFIG_ARM_DMA_USE_IOMMU
-	if (cpu_architecture() >= CPU_ARCH_ARMv6)
+	if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU))
 		return 0;
-#endif
 
 	consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL);
 	if (!consistent_pte) {
@@ -342,7 +340,7 @@ static int __init coherent_init(void)
 	struct page *page;
 	void *ptr;
 
-	if (cpu_architecture() < CPU_ARCH_ARMv6)
+	if (!IS_ENABLED(CONFIG_CMA))
 		return 0;
 
 	ptr = __alloc_from_contiguous(NULL, size, prot, &page);
@@ -704,7 +702,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 
 	if (arch_is_coherent() || nommu())
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
-	else if (cpu_architecture() < CPU_ARCH_ARMv6)
+	else if (!IS_ENABLED(CONFIG_CMA))
 		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
 	else if (gfp & GFP_ATOMIC)
 		addr = __alloc_from_pool(dev, size, &page, caller);
@@ -773,7 +771,7 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 
 	if (arch_is_coherent() || nommu()) {
 		__dma_free_buffer(page, size);
-	} else if (cpu_architecture() < CPU_ARCH_ARMv6) {
+	} else if (!IS_ENABLED(CONFIG_CMA)) {
 		__dma_free_remap(cpu_addr, size);
 		__dma_free_buffer(page, size);
 	} else {

+ 1 - 1
arch/avr32/kernel/signal.c

@@ -300,7 +300,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
 	if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR)
 		syscall = 1;
 
-	if (ti->flags & _TIF_SIGPENDING))
+	if (ti->flags & _TIF_SIGPENDING)
 		do_signal(regs, syscall);
 
 	if (ti->flags & _TIF_NOTIFY_RESUME) {

+ 1 - 1
arch/blackfin/kernel/process.c

@@ -173,7 +173,7 @@ asmlinkage int bfin_clone(struct pt_regs *regs)
 	unsigned long newsp;
 
 #ifdef __ARCH_SYNC_CORE_DCACHE
-	if (current->rt.nr_cpus_allowed == num_possible_cpus())
+	if (current->nr_cpus_allowed == num_possible_cpus())
 		set_cpus_allowed_ptr(current, cpumask_of(smp_processor_id()));
 #endif
 

+ 2 - 1
arch/parisc/Makefile

@@ -21,6 +21,7 @@ KBUILD_DEFCONFIG := default_defconfig
 
 NM		= sh $(srctree)/arch/parisc/nm
 CHECKFLAGS	+= -D__hppa__=1
+LIBGCC		= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 
 MACHINE		:= $(shell uname -m)
 ifeq ($(MACHINE),parisc*)
@@ -79,7 +80,7 @@ kernel-y			:= mm/ kernel/ math-emu/
 kernel-$(CONFIG_HPUX)		+= hpux/
 
 core-y	+= $(addprefix arch/parisc/, $(kernel-y))
-libs-y	+= arch/parisc/lib/ `$(CC) -print-libgcc-file-name`
+libs-y	+= arch/parisc/lib/ $(LIBGCC)
 
 drivers-$(CONFIG_OPROFILE)		+= arch/parisc/oprofile/
 

+ 1 - 0
arch/parisc/include/asm/Kbuild

@@ -1,3 +1,4 @@
 include include/asm-generic/Kbuild.asm
 
 header-y += pdc.h
+generic-y += word-at-a-time.h

+ 2 - 0
arch/parisc/include/asm/bug.h

@@ -1,6 +1,8 @@
 #ifndef _PARISC_BUG_H
 #define _PARISC_BUG_H
 
+#include <linux/kernel.h>	/* for BUGFLAG_TAINT */
+
 /*
  * Tell the user there is some problem.
  * The offending file and line are encoded in the __bug_table section.

+ 5 - 6
arch/powerpc/kernel/module_32.c

@@ -176,8 +176,8 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
 
 static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
 {
-	if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16)
-	    && entry->jump[1] == 0x396b0000 + (val & 0xffff))
+	if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16)
+	    && entry->jump[1] == 0x398c0000 + (val & 0xffff))
 		return 1;
 	return 0;
 }
@@ -204,10 +204,9 @@ static uint32_t do_plt_call(void *location,
 		entry++;
 	}
 
-	/* Stolen from Paul Mackerras as well... */
-	entry->jump[0] = 0x3d600000+((val+0x8000)>>16);	/* lis r11,sym@ha */
-	entry->jump[1] = 0x396b0000 + (val&0xffff);	/* addi r11,r11,sym@l*/
-	entry->jump[2] = 0x7d6903a6;			/* mtctr r11 */
+	entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */
+	entry->jump[1] = 0x398c0000 + (val&0xffff);     /* addi r12,r12,sym@l*/
+	entry->jump[2] = 0x7d8903a6;                    /* mtctr r12 */
 	entry->jump[3] = 0x4e800420;			/* bctr */
 
 	DEBUGP("Initialized plt for 0x%x at %p\n", val, entry);

+ 11 - 3
arch/powerpc/kernel/time.c

@@ -475,6 +475,7 @@ void timer_interrupt(struct pt_regs * regs)
 	struct pt_regs *old_regs;
 	u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
 	struct clock_event_device *evt = &__get_cpu_var(decrementers);
+	u64 now;
 
 	/* Ensure a positive value is written to the decrementer, or else
 	 * some CPUs will continue to take decrementer exceptions.
@@ -509,9 +510,16 @@ void timer_interrupt(struct pt_regs * regs)
 		irq_work_run();
 	}
 
-	*next_tb = ~(u64)0;
-	if (evt->event_handler)
-		evt->event_handler(evt);
+	now = get_tb_or_rtc();
+	if (now >= *next_tb) {
+		*next_tb = ~(u64)0;
+		if (evt->event_handler)
+			evt->event_handler(evt);
+	} else {
+		now = *next_tb - now;
+		if (now <= DECREMENTER_MAX)
+			set_dec((int)now);
+	}
 
 #ifdef CONFIG_PPC64
 	/* collect purr register values often, for accurate calculations */

+ 0 - 5
arch/tile/include/asm/thread_info.h

@@ -91,11 +91,6 @@ extern void smp_nap(void);
 /* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */
 extern void _cpu_idle(void);
 
-/* Switch boot idle thread to a freshly-allocated stack and free old stack. */
-extern void cpu_idle_on_new_stack(struct thread_info *old_ti,
-				  unsigned long new_sp,
-				  unsigned long new_ss10);
-
 #else /* __ASSEMBLY__ */
 
 /*

+ 0 - 14
arch/tile/kernel/entry.S

@@ -68,20 +68,6 @@ STD_ENTRY(KBacktraceIterator_init_current)
 	jrp lr   /* keep backtracer happy */
 	STD_ENDPROC(KBacktraceIterator_init_current)
 
-/*
- * Reset our stack to r1/r2 (sp and ksp0+cpu respectively), then
- * free the old stack (passed in r0) and re-invoke cpu_idle().
- * We update sp and ksp0 simultaneously to avoid backtracer warnings.
- */
-STD_ENTRY(cpu_idle_on_new_stack)
-	{
-	 move sp, r1
-	 mtspr SPR_SYSTEM_SAVE_K_0, r2
-	}
-	jal free_thread_info
-	j cpu_idle
-	STD_ENDPROC(cpu_idle_on_new_stack)
-
 /* Loop forever on a nap during SMP boot. */
 STD_ENTRY(smp_nap)
 	nap

+ 1 - 0
arch/tile/kernel/setup.c

@@ -29,6 +29,7 @@
 #include <linux/smp.h>
 #include <linux/timex.h>
 #include <linux/hugetlb.h>
+#include <linux/start_kernel.h>
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>

+ 31 - 11
arch/x86/boot/header.S

@@ -94,10 +94,10 @@ bs_die:
 
 	.section ".bsdata", "a"
 bugger_off_msg:
-	.ascii	"Direct booting from floppy is no longer supported.\r\n"
-	.ascii	"Please use a boot loader program instead.\r\n"
+	.ascii	"Direct floppy boot is not supported. "
+	.ascii	"Use a boot loader program instead.\r\n"
 	.ascii	"\n"
-	.ascii	"Remove disk and press any key to reboot . . .\r\n"
+	.ascii	"Remove disk and press any key to reboot ...\r\n"
 	.byte	0
 
 #ifdef CONFIG_EFI_STUB
@@ -111,7 +111,7 @@ coff_header:
 #else
 	.word	0x8664				# x86-64
 #endif
-	.word	2				# nr_sections
+	.word	3				# nr_sections
 	.long	0 				# TimeDateStamp
 	.long	0				# PointerToSymbolTable
 	.long	1				# NumberOfSymbols
@@ -158,8 +158,8 @@ extra_header_fields:
 #else
 	.quad	0				# ImageBase
 #endif
-	.long	0x1000				# SectionAlignment
-	.long	0x200				# FileAlignment
+	.long	0x20				# SectionAlignment
+	.long	0x20				# FileAlignment
 	.word	0				# MajorOperatingSystemVersion
 	.word	0				# MinorOperatingSystemVersion
 	.word	0				# MajorImageVersion
@@ -200,8 +200,10 @@ extra_header_fields:
 
 	# Section table
 section_table:
-	.ascii	".text"
-	.byte	0
+	#
+	# The offset & size fields are filled in by build.c.
+	#
+	.ascii	".setup"
 	.byte	0
 	.byte	0
 	.long	0
@@ -217,9 +219,8 @@ section_table:
 
 	#
 	# The EFI application loader requires a relocation section
-	# because EFI applications must be relocatable. But since
-	# we don't need the loader to fixup any relocs for us, we
-	# just create an empty (zero-length) .reloc section header.
+	# because EFI applications must be relocatable. The .reloc
+	# offset & size fields are filled in by build.c.
 	#
 	.ascii	".reloc"
 	.byte	0
@@ -233,6 +234,25 @@ section_table:
 	.word	0				# NumberOfRelocations
 	.word	0				# NumberOfLineNumbers
 	.long	0x42100040			# Characteristics (section flags)
+
+	#
+	# The offset & size fields are filled in by build.c.
+	#
+	.ascii	".text"
+	.byte	0
+	.byte	0
+	.byte	0
+	.long	0
+	.long	0x0				# startup_{32,64}
+	.long	0				# Size of initialized data
+						# on disk
+	.long	0x0				# startup_{32,64}
+	.long	0				# PointerToRelocations
+	.long	0				# PointerToLineNumbers
+	.word	0				# NumberOfRelocations
+	.word	0				# NumberOfLineNumbers
+	.long	0x60500020			# Characteristics (section flags)
+
 #endif /* CONFIG_EFI_STUB */
 
 	# Kernel attributes; used by setup.  This is part 1 of the

+ 109 - 63
arch/x86/boot/tools/build.c

@@ -50,6 +50,8 @@ typedef unsigned int   u32;
 u8 buf[SETUP_SECT_MAX*512];
 int is_big_kernel;
 
+#define PECOFF_RELOC_RESERVE 0x20
+
 /*----------------------------------------------------------------------*/
 
 static const u32 crctab32[] = {
@@ -133,11 +135,103 @@ static void usage(void)
 	die("Usage: build setup system [> image]");
 }
 
-int main(int argc, char ** argv)
-{
 #ifdef CONFIG_EFI_STUB
-	unsigned int file_sz, pe_header;
+
+static void update_pecoff_section_header(char *section_name, u32 offset, u32 size)
+{
+	unsigned int pe_header;
+	unsigned short num_sections;
+	u8 *section;
+
+	pe_header = get_unaligned_le32(&buf[0x3c]);
+	num_sections = get_unaligned_le16(&buf[pe_header + 6]);
+
+#ifdef CONFIG_X86_32
+	section = &buf[pe_header + 0xa8];
+#else
+	section = &buf[pe_header + 0xb8];
 #endif
+
+	while (num_sections > 0) {
+		if (strncmp((char*)section, section_name, 8) == 0) {
+			/* section header size field */
+			put_unaligned_le32(size, section + 0x8);
+
+			/* section header vma field */
+			put_unaligned_le32(offset, section + 0xc);
+
+			/* section header 'size of initialised data' field */
+			put_unaligned_le32(size, section + 0x10);
+
+			/* section header 'file offset' field */
+			put_unaligned_le32(offset, section + 0x14);
+
+			break;
+		}
+		section += 0x28;
+		num_sections--;
+	}
+}
+
+static void update_pecoff_setup_and_reloc(unsigned int size)
+{
+	u32 setup_offset = 0x200;
+	u32 reloc_offset = size - PECOFF_RELOC_RESERVE;
+	u32 setup_size = reloc_offset - setup_offset;
+
+	update_pecoff_section_header(".setup", setup_offset, setup_size);
+	update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE);
+
+	/*
+	 * Modify .reloc section contents with a single entry. The
+	 * relocation is applied to offset 10 of the relocation section.
+	 */
+	put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]);
+	put_unaligned_le32(10, &buf[reloc_offset + 4]);
+}
+
+static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
+{
+	unsigned int pe_header;
+	unsigned int text_sz = file_sz - text_start;
+
+	pe_header = get_unaligned_le32(&buf[0x3c]);
+
+	/* Size of image */
+	put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
+
+	/*
+	 * Size of code: Subtract the size of the first sector (512 bytes)
+	 * which includes the header.
+	 */
+	put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]);
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Address of entry point.
+	 *
+	 * The EFI stub entry point is +16 bytes from the start of
+	 * the .text section.
+	 */
+	put_unaligned_le32(text_start + 16, &buf[pe_header + 0x28]);
+#else
+	/*
+	 * Address of entry point. startup_32 is at the beginning and
+	 * the 64-bit entry point (startup_64) is always 512 bytes
+	 * after. The EFI stub entry point is 16 bytes after that, as
+	 * the first instruction allows legacy loaders to jump over
+	 * the EFI stub initialisation
+	 */
+	put_unaligned_le32(text_start + 528, &buf[pe_header + 0x28]);
+#endif /* CONFIG_X86_32 */
+
+	update_pecoff_section_header(".text", text_start, text_sz);
+}
+
+#endif /* CONFIG_EFI_STUB */
+
+int main(int argc, char ** argv)
+{
 	unsigned int i, sz, setup_sectors;
 	int c;
 	u32 sys_size;
@@ -163,6 +257,12 @@ int main(int argc, char ** argv)
 		die("Boot block hasn't got boot flag (0xAA55)");
 	fclose(file);
 
+#ifdef CONFIG_EFI_STUB
+	/* Reserve 0x20 bytes for .reloc section */
+	memset(buf+c, 0, PECOFF_RELOC_RESERVE);
+	c += PECOFF_RELOC_RESERVE;
+#endif
+
 	/* Pad unused space with zeros */
 	setup_sectors = (c + 511) / 512;
 	if (setup_sectors < SETUP_SECT_MIN)
@@ -170,6 +270,10 @@ int main(int argc, char ** argv)
 	i = setup_sectors*512;
 	memset(buf+c, 0, i-c);
 
+#ifdef CONFIG_EFI_STUB
+	update_pecoff_setup_and_reloc(i);
+#endif
+
 	/* Set the default root device */
 	put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
 
@@ -194,66 +298,8 @@ int main(int argc, char ** argv)
 	put_unaligned_le32(sys_size, &buf[0x1f4]);
 
 #ifdef CONFIG_EFI_STUB
-	file_sz = sz + i + ((sys_size * 16) - sz);
-
-	pe_header = get_unaligned_le32(&buf[0x3c]);
-
-	/* Size of image */
-	put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
-
-	/*
-	 * Subtract the size of the first section (512 bytes) which
-	 * includes the header and .reloc section. The remaining size
-	 * is that of the .text section.
-	 */
-	file_sz -= 512;
-
-	/* Size of code */
-	put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
-
-#ifdef CONFIG_X86_32
-	/*
-	 * Address of entry point.
-	 *
-	 * The EFI stub entry point is +16 bytes from the start of
-	 * the .text section.
-	 */
-	put_unaligned_le32(i + 16, &buf[pe_header + 0x28]);
-
-	/* .text size */
-	put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]);
-
-	/* .text vma */
-	put_unaligned_le32(0x200, &buf[pe_header + 0xb4]);
-
-	/* .text size of initialised data */
-	put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]);
-
-	/* .text file offset */
-	put_unaligned_le32(0x200, &buf[pe_header + 0xbc]);
-#else
-	/*
-	 * Address of entry point. startup_32 is at the beginning and
-	 * the 64-bit entry point (startup_64) is always 512 bytes
-	 * after. The EFI stub entry point is 16 bytes after that, as
-	 * the first instruction allows legacy loaders to jump over
-	 * the EFI stub initialisation
-	 */
-	put_unaligned_le32(i + 528, &buf[pe_header + 0x28]);
-
-	/* .text size */
-	put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]);
-
-	/* .text vma */
-	put_unaligned_le32(0x200, &buf[pe_header + 0xc4]);
-
-	/* .text size of initialised data */
-	put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]);
-
-	/* .text file offset */
-	put_unaligned_le32(0x200, &buf[pe_header + 0xcc]);
-#endif /* CONFIG_X86_32 */
-#endif /* CONFIG_EFI_STUB */
+	update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz));
+#endif
 
 	crc = partial_crc32(buf, i, crc);
 	if (fwrite(buf, 1, i, stdout) != i)

+ 14 - 0
arch/x86/include/asm/nmi.h

@@ -54,6 +54,20 @@ struct nmiaction {
 	__register_nmi_handler((t), &fn##_na);	\
 })
 
+/*
+ * For special handlers that register/unregister in the
+ * init section only.  This should be considered rare.
+ */
+#define register_nmi_handler_initonly(t, fn, fg, n)		\
+({							\
+	static struct nmiaction fn##_na __initdata = {		\
+		.handler = (fn),			\
+		.name = (n),				\
+		.flags = (fg),				\
+	};						\
+	__register_nmi_handler((t), &fn##_na);	\
+})
+
 int __register_nmi_handler(unsigned int, struct nmiaction *);
 
 void unregister_nmi_handler(unsigned int, const char *);

+ 6 - 6
arch/x86/include/asm/uaccess.h

@@ -33,9 +33,8 @@
 #define segment_eq(a, b)	((a).seg == (b).seg)
 
 #define user_addr_max() (current_thread_info()->addr_limit.seg)
-#define __addr_ok(addr)					\
-	((unsigned long __force)(addr) <		\
-	 (current_thread_info()->addr_limit.seg))
+#define __addr_ok(addr) 	\
+	((unsigned long __force)(addr) < user_addr_max())
 
 /*
  * Test whether a block of memory is a valid user space address.
@@ -47,14 +46,14 @@
  * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
  */
 
-#define __range_not_ok(addr, size)					\
+#define __range_not_ok(addr, size, limit)				\
 ({									\
 	unsigned long flag, roksum;					\
 	__chk_user_ptr(addr);						\
 	asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0"		\
 	    : "=&r" (flag), "=r" (roksum)				\
 	    : "1" (addr), "g" ((long)(size)),				\
-	      "rm" (current_thread_info()->addr_limit.seg));		\
+	      "rm" (limit));						\
 	flag;								\
 })
 
@@ -77,7 +76,8 @@
  * checks that the pointer is in the user space range - after calling
  * this function, memory access functions may still return -EFAULT.
  */
-#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
+#define access_ok(type, addr, size) \
+	(likely(__range_not_ok(addr, size, user_addr_max()) == 0))
 
 /*
  * The exception table consists of pairs of addresses relative to the

+ 0 - 1
arch/x86/include/asm/uv/uv_bau.h

@@ -149,7 +149,6 @@
 /* 4 bits of software ack period */
 #define UV2_ACK_MASK			0x7UL
 #define UV2_ACK_UNITS_SHFT		3
-#define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT
 #define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT
 
 /*

+ 0 - 6
arch/x86/kernel/aperture_64.c

@@ -20,7 +20,6 @@
 #include <linux/bitops.h>
 #include <linux/ioport.h>
 #include <linux/suspend.h>
-#include <linux/kmemleak.h>
 #include <asm/e820.h>
 #include <asm/io.h>
 #include <asm/iommu.h>
@@ -95,11 +94,6 @@ static u32 __init allocate_aperture(void)
 		return 0;
 	}
 	memblock_reserve(addr, aper_size);
-	/*
-	 * Kmemleak should not scan this block as it may not be mapped via the
-	 * kernel direct mapping.
-	 */
-	kmemleak_ignore(phys_to_virt(addr));
 	printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
 			aper_size >> 10, addr);
 	insert_aperture_resource((u32)addr, aper_size);

+ 2 - 2
arch/x86/kernel/apic/io_apic.c

@@ -1195,7 +1195,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
 	BUG_ON(!cfg->vector);
 
 	vector = cfg->vector;
-	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+	for_each_cpu(cpu, cfg->domain)
 		per_cpu(vector_irq, cpu)[vector] = -1;
 
 	cfg->vector = 0;
@@ -1203,7 +1203,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
 
 	if (likely(!cfg->move_in_progress))
 		return;
-	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+	for_each_cpu(cpu, cfg->old_domain) {
 		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
 								vector++) {
 			if (per_cpu(vector_irq, cpu)[vector] != irq)

+ 2 - 2
arch/x86/kernel/cpu/mcheck/mce.c

@@ -1274,7 +1274,7 @@ static void mce_timer_fn(unsigned long data)
 	 */
 	iv = __this_cpu_read(mce_next_interval);
 	if (mce_notify_irq())
-		iv = max(iv, (unsigned long) HZ/100);
+		iv = max(iv / 2, (unsigned long) HZ/100);
 	else
 		iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
 	__this_cpu_write(mce_next_interval, iv);
@@ -1557,7 +1557,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 static void __mcheck_cpu_init_timer(void)
 {
 	struct timer_list *t = &__get_cpu_var(mce_timer);
-	unsigned long iv = __this_cpu_read(mce_next_interval);
+	unsigned long iv = check_interval * HZ;
 
 	setup_timer(t, mce_timer_fn, smp_processor_id());
 

+ 9 - 2
arch/x86/kernel/cpu/perf_event.c

@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
 		if (!cpuc->shared_regs)
 			goto error;
 	}
+	cpuc->is_fake = 1;
 	return cpuc;
 error:
 	free_fake_cpuc(cpuc);
@@ -1756,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
 }
 
+static inline int
+valid_user_frame(const void __user *fp, unsigned long size)
+{
+	return (__range_not_ok(fp, size, TASK_SIZE) == 0);
+}
+
 #ifdef CONFIG_COMPAT
 
 #include <asm/compat.h>
@@ -1780,7 +1787,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 		if (bytes != sizeof(frame))
 			break;
 
-		if (fp < compat_ptr(regs->sp))
+		if (!valid_user_frame(fp, sizeof(frame)))
 			break;
 
 		perf_callchain_store(entry, frame.return_address);
@@ -1826,7 +1833,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 		if (bytes != sizeof(frame))
 			break;
 
-		if ((unsigned long)fp < regs->sp)
+		if (!valid_user_frame(fp, sizeof(frame)))
 			break;
 
 		perf_callchain_store(entry, frame.return_address);

+ 2 - 0
arch/x86/kernel/cpu/perf_event.h

@@ -117,6 +117,7 @@ struct cpu_hw_events {
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 
 	unsigned int		group_flag;
+	int			is_fake;
 
 	/*
 	 * Intel DebugStore bits
@@ -364,6 +365,7 @@ struct x86_pmu {
 	int		pebs_record_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
+	void		(*pebs_aliases)(struct perf_event *event);
 
 	/*
 	 * Intel LBR

+ 108 - 37
arch/x86/kernel/cpu/perf_event_intel.c

@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
 	return NULL;
 }
 
-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
 {
 	if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
-		return false;
+		return idx;
 
-	if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
-		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-		event->hw.config |= 0x01bb;
-		event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
-		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
-	} else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+	if (idx == EXTRA_REG_RSP_0)
+		return EXTRA_REG_RSP_1;
+
+	if (idx == EXTRA_REG_RSP_1)
+		return EXTRA_REG_RSP_0;
+
+	return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+	event->hw.extra_reg.idx = idx;
+
+	if (idx == EXTRA_REG_RSP_0) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
 		event->hw.config |= 0x01b7;
-		event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+	} else if (idx == EXTRA_REG_RSP_1) {
+		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+		event->hw.config |= 0x01bb;
+		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
 	}
-
-	if (event->hw.extra_reg.idx == orig_idx)
-		return false;
-
-	return true;
 }
 
 /*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
 	struct event_constraint *c = &emptyconstraint;
 	struct er_account *era;
 	unsigned long flags;
-	int orig_idx = reg->idx;
+	int idx = reg->idx;
 
-	/* already allocated shared msr */
-	if (reg->alloc)
+	/*
+	 * reg->alloc can be set due to existing state, so for fake cpuc we
+	 * need to ignore this, otherwise we might fail to allocate proper fake
+	 * state for this extra reg constraint. Also see the comment below.
+	 */
+	if (reg->alloc && !cpuc->is_fake)
 		return NULL; /* call x86_get_event_constraint() */
 
 again:
-	era = &cpuc->shared_regs->regs[reg->idx];
+	era = &cpuc->shared_regs->regs[idx];
 	/*
 	 * we use spin_lock_irqsave() to avoid lockdep issues when
 	 * passing a fake cpuc
@@ -1173,6 +1183,29 @@ again:
 
 	if (!atomic_read(&era->ref) || era->config == reg->config) {
 
+		/*
+		 * If its a fake cpuc -- as per validate_{group,event}() we
+		 * shouldn't touch event state and we can avoid doing so
+		 * since both will only call get_event_constraints() once
+		 * on each event, this avoids the need for reg->alloc.
+		 *
+		 * Not doing the ER fixup will only result in era->reg being
+		 * wrong, but since we won't actually try and program hardware
+		 * this isn't a problem either.
+		 */
+		if (!cpuc->is_fake) {
+			if (idx != reg->idx)
+				intel_fixup_er(event, idx);
+
+			/*
+			 * x86_schedule_events() can call get_event_constraints()
+			 * multiple times on events in the case of incremental
+			 * scheduling(). reg->alloc ensures we only do the ER
+			 * allocation once.
+			 */
+			reg->alloc = 1;
+		}
+
 		/* lock in msr value */
 		era->config = reg->config;
 		era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ again:
 		/* one more user */
 		atomic_inc(&era->ref);
 
-		/* no need to reallocate during incremental event scheduling */
-		reg->alloc = 1;
-
 		/*
 		 * need to call x86_get_event_constraint()
 		 * to check if associated event has constraints
 		 */
 		c = NULL;
-	} else if (intel_try_alt_er(event, orig_idx)) {
-		raw_spin_unlock_irqrestore(&era->lock, flags);
-		goto again;
+	} else {
+		idx = intel_alt_er(idx);
+		if (idx != reg->idx) {
+			raw_spin_unlock_irqrestore(&era->lock, flags);
+			goto again;
+		}
 	}
 	raw_spin_unlock_irqrestore(&era->lock, flags);
 
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
 	struct er_account *era;
 
 	/*
-	 * only put constraint if extra reg was actually
-	 * allocated. Also takes care of event which do
-	 * not use an extra shared reg
+	 * Only put constraint if extra reg was actually allocated. Also takes
+	 * care of event which do not use an extra shared reg.
+	 *
+	 * Also, if this is a fake cpuc we shouldn't touch any event state
+	 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+	 * either since it'll be thrown out.
 	 */
-	if (!reg->alloc)
+	if (!reg->alloc || cpuc->is_fake)
 		return;
 
 	era = &cpuc->shared_regs->regs[reg->idx];
@@ -1300,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
 	intel_put_shared_regs_event_constraints(cpuc, event);
 }
 
-static int intel_pmu_hw_config(struct perf_event *event)
+static void intel_pebs_aliases_core2(struct perf_event *event)
 {
-	int ret = x86_pmu_hw_config(event);
-
-	if (ret)
-		return ret;
-
-	if (event->attr.precise_ip &&
-	    (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
 		/*
 		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
 		 * (0x003c) so that we can use it with PEBS.
@@ -1329,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event)
 		 */
 		u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
 
+		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+		event->hw.config = alt_config;
+	}
+}
+
+static void intel_pebs_aliases_snb(struct perf_event *event)
+{
+	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+		/*
+		 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+		 * (0x003c) so that we can use it with PEBS.
+		 *
+		 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+		 * PEBS capable. However we can use UOPS_RETIRED.ALL
+		 * (0x01c2), which is a PEBS capable event, to get the same
+		 * count.
+		 *
+		 * UOPS_RETIRED.ALL counts the number of cycles that retires
+		 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
+		 * larger than the maximum number of micro-ops that can be
+		 * retired per cycle (4) and then inverting the condition, we
+		 * count all cycles that retire 16 or less micro-ops, which
+		 * is every cycle.
+		 *
+		 * Thereby we gain a PEBS capable cycle counter.
+		 */
+		u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
 
 		alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
 		event->hw.config = alt_config;
 	}
+}
+
+static int intel_pmu_hw_config(struct perf_event *event)
+{
+	int ret = x86_pmu_hw_config(event);
+
+	if (ret)
+		return ret;
+
+	if (event->attr.precise_ip && x86_pmu.pebs_aliases)
+		x86_pmu.pebs_aliases(event);
 
 	if (intel_pmu_needs_lbr_smpl(event)) {
 		ret = intel_pmu_setup_lbr_filter(event);
@@ -1607,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.max_period		= (1ULL << 31) - 1,
 	.get_event_constraints	= intel_get_event_constraints,
 	.put_event_constraints	= intel_put_event_constraints,
+	.pebs_aliases		= intel_pebs_aliases_core2,
 
 	.format_attrs		= intel_arch3_formats_attr,
 
@@ -1840,8 +1909,9 @@ __init int intel_pmu_init(void)
 		break;
 
 	case 42: /* SandyBridge */
-		x86_add_quirk(intel_sandybridge_quirk);
 	case 45: /* SandyBridge, "Romely-EP" */
+		x86_add_quirk(intel_sandybridge_quirk);
+	case 58: /* IvyBridge */
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
@@ -1849,6 +1919,7 @@ __init int intel_pmu_init(void)
 
 		x86_pmu.event_constraints = intel_snb_event_constraints;
 		x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
+		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
 		x86_pmu.extra_regs = intel_snb_extra_regs;
 		/* all extra regs are per-cpu when HT is on */
 		x86_pmu.er_flags |= ERF_HAS_RSP_1;

+ 1 - 8
arch/x86/kernel/cpu/perf_event_intel_ds.c

@@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
 	INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0xcd, 0x8),    /* MEM_TRANS_RETIRED.* */
-	INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
-	INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
-	INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
-	INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
-	INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
-	INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
-	INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
-	INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
+	INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
 	INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */

+ 2 - 2
arch/x86/kernel/nmi_selftest.c

@@ -42,7 +42,7 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
 static void __init init_nmi_testsuite(void)
 {
 	/* trap all the unknown NMIs we may generate */
-	register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
+	register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
 }
 
 static void __init cleanup_nmi_testsuite(void)
@@ -64,7 +64,7 @@ static void __init test_nmi_ipi(struct cpumask *mask)
 {
 	unsigned long timeout;
 
-	if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
+	if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback,
 				 NMI_FLAG_FIRST, "nmi_selftest")) {
 		nmi_fail = FAILURE;
 		return;

+ 4 - 2
arch/x86/kernel/reboot.c

@@ -639,9 +639,11 @@ void native_machine_shutdown(void)
 	set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
 
 	/*
-	 * O.K Now that I'm on the appropriate processor,
-	 * stop all of the others.
+	 * O.K Now that I'm on the appropriate processor, stop all of the
+	 * others. Also disable the local irq to not receive the per-cpu
+	 * timer interrupt which may trigger scheduler's load balance.
 	 */
+	local_irq_disable();
 	stop_other_cpus();
 #endif
 

+ 10 - 9
arch/x86/kernel/smpboot.c

@@ -382,6 +382,15 @@ void __cpuinit set_cpu_sibling_map(int cpu)
 		if ((i == cpu) || (has_mc && match_llc(c, o)))
 			link_mask(llc_shared, cpu, i);
 
+	}
+
+	/*
+	 * This needs a separate iteration over the cpus because we rely on all
+	 * cpu_sibling_mask links to be set-up.
+	 */
+	for_each_cpu(i, cpu_sibling_setup_mask) {
+		o = &cpu_data(i);
+
 		if ((i == cpu) || (has_mc && match_mc(c, o))) {
 			link_mask(core, cpu, i);
 
@@ -410,15 +419,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
 /* maps the cpu to the sched domain representing multi-core */
 const struct cpumask *cpu_coregroup_mask(int cpu)
 {
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	/*
-	 * For perf, we return last level cache shared map.
-	 * And for power savings, we return cpu_core_map
-	 */
-	if (!(cpu_has(c, X86_FEATURE_AMD_DCM)))
-		return cpu_core_mask(cpu);
-	else
-		return cpu_llc_shared_mask(cpu);
+	return cpu_llc_shared_mask(cpu);
 }
 
 static void impress_friends(void)

+ 4 - 0
arch/x86/lib/usercopy.c

@@ -8,6 +8,7 @@
 #include <linux/module.h>
 
 #include <asm/word-at-a-time.h>
+#include <linux/sched.h>
 
 /*
  * best effort, GUP based copy_from_user() that is NMI-safe
@@ -21,6 +22,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 	void *map;
 	int ret;
 
+	if (__range_not_ok(from, n, TASK_SIZE) == 0)
+		return len;
+
 	do {
 		ret = __get_user_pages_fast(addr, 1, 0, &page);
 		if (!ret)

+ 4 - 4
arch/x86/lib/x86-opcode-map.txt

@@ -28,7 +28,7 @@
 #  - (66): the last prefix is 0x66
 #  - (F3): the last prefix is 0xF3
 #  - (F2): the last prefix is 0xF2
-#
+#  - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
 
 Table: one byte opcode
 Referrer:
@@ -515,12 +515,12 @@ b4: LFS Gv,Mp
 b5: LGS Gv,Mp
 b6: MOVZX Gv,Eb
 b7: MOVZX Gv,Ew
-b8: JMPE | POPCNT Gv,Ev (F3)
+b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
 b9: Grp10 (1A)
 ba: Grp8 Ev,Ib (1A)
 bb: BTC Ev,Gv
-bc: BSF Gv,Ev | TZCNT Gv,Ev (F3)
-bd: BSR Gv,Ev | LZCNT Gv,Ev (F3)
+bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
+bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
 be: MOVSX Gv,Eb
 bf: MOVSX Gv,Ew
 # 0x0f 0xc0-0xcf

+ 2 - 1
arch/x86/mm/init.c

@@ -62,7 +62,8 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en
 		extra += PMD_SIZE;
 #endif
 		/* The first 2/4M doesn't use large pages. */
-		extra += mr->end - mr->start;
+		if (mr->start < PMD_SIZE)
+			extra += mr->end - mr->start;
 
 		ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	} else

+ 2 - 0
arch/x86/mm/srat.c

@@ -176,6 +176,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 		return;
 	}
 
+	node_set(node, numa_nodes_parsed);
+
 	printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
 	       node, pxm,
 	       (unsigned long long) start, (unsigned long long) end - 1);

+ 1 - 1
arch/x86/platform/mrst/mrst.c

@@ -782,7 +782,7 @@ BLOCKING_NOTIFIER_HEAD(intel_scu_notifier);
 EXPORT_SYMBOL_GPL(intel_scu_notifier);
 
 /* Called by IPC driver */
-void intel_scu_devices_create(void)
+void __devinit intel_scu_devices_create(void)
 {
 	int i;
 

+ 0 - 1
arch/x86/platform/uv/tlb_uv.c

@@ -1295,7 +1295,6 @@ static void __init enable_timeouts(void)
 		 */
 		mmr_image |= (1L << SOFTACK_MSHIFT);
 		if (is_uv2_hub()) {
-			mmr_image &= ~(1L << UV2_LEG_SHFT);
 			mmr_image |= (1L << UV2_EXT_SHFT);
 		}
 		write_mmr_misc_control(pnode, mmr_image);

+ 9 - 5
arch/x86/tools/gen-insn-attr-x86.awk

@@ -66,9 +66,10 @@ BEGIN {
 	rex_expr = "^REX(\\.[XRWB]+)*"
 	fpu_expr = "^ESC" # TODO
 
-	lprefix1_expr = "\\(66\\)"
+	lprefix1_expr = "\\((66|!F3)\\)"
 	lprefix2_expr = "\\(F3\\)"
-	lprefix3_expr = "\\(F2\\)"
+	lprefix3_expr = "\\((F2|!F3)\\)"
+	lprefix_expr = "\\((66|F2|F3)\\)"
 	max_lprefix = 4
 
 	# All opcodes starting with lower-case 'v' or with (v1) superscript
@@ -333,13 +334,16 @@ function convert_operands(count,opnd,       i,j,imm,mod)
 		if (match(ext, lprefix1_expr)) {
 			lptable1[idx] = add_flags(lptable1[idx],flags)
 			variant = "INAT_VARIANT"
-		} else if (match(ext, lprefix2_expr)) {
+		}
+		if (match(ext, lprefix2_expr)) {
 			lptable2[idx] = add_flags(lptable2[idx],flags)
 			variant = "INAT_VARIANT"
-		} else if (match(ext, lprefix3_expr)) {
+		}
+		if (match(ext, lprefix3_expr)) {
 			lptable3[idx] = add_flags(lptable3[idx],flags)
 			variant = "INAT_VARIANT"
-		} else {
+		}
+		if (!match(ext, lprefix_expr)){
 			table[idx] = add_flags(table[idx],flags)
 		}
 	}

+ 2 - 2
arch/xtensa/include/asm/syscall.h

@@ -31,5 +31,5 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
 asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
 	struct timespec __user *tsp, const sigset_t __user *sigmask,
 	size_t sigsetsize);
-
-
+asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset,
+		size_t sigsetsize);

+ 1 - 1
arch/xtensa/kernel/signal.c

@@ -493,7 +493,7 @@ static void do_signal(struct pt_regs *regs)
 		if (ret)
 			return;
 
-		signal_delivered(signr, info, ka, regs, 0);
+		signal_delivered(signr, &info, &ka, regs, 0);
 		if (current->ptrace & PT_SINGLESTEP)
 			task_pt_regs(current)->icountlevel = 1;
 

+ 1 - 1
drivers/acpi/Kconfig

@@ -208,7 +208,7 @@ config ACPI_IPMI
 
 config ACPI_HOTPLUG_CPU
 	bool
-	depends on ACPI_PROCESSOR && HOTPLUG_CPU
+	depends on EXPERIMENTAL && ACPI_PROCESSOR && HOTPLUG_CPU
 	select ACPI_CONTAINER
 	default y
 

+ 9 - 1
drivers/acpi/battery.c

@@ -643,11 +643,19 @@ static int acpi_battery_update(struct acpi_battery *battery)
 
 static void acpi_battery_refresh(struct acpi_battery *battery)
 {
+	int power_unit;
+
 	if (!battery->bat.dev)
 		return;
 
+	power_unit = battery->power_unit;
+
 	acpi_battery_get_info(battery);
-	/* The battery may have changed its reporting units. */
+
+	if (power_unit == battery->power_unit)
+		return;
+
+	/* The battery has changed its reporting units. */
 	sysfs_remove_battery(battery);
 	sysfs_add_battery(battery);
 }

+ 57 - 31
drivers/acpi/bus.c

@@ -182,41 +182,66 @@ EXPORT_SYMBOL(acpi_bus_get_private_data);
                                  Power Management
    -------------------------------------------------------------------------- */
 
+static const char *state_string(int state)
+{
+	switch (state) {
+	case ACPI_STATE_D0:
+		return "D0";
+	case ACPI_STATE_D1:
+		return "D1";
+	case ACPI_STATE_D2:
+		return "D2";
+	case ACPI_STATE_D3_HOT:
+		return "D3hot";
+	case ACPI_STATE_D3_COLD:
+		return "D3";
+	default:
+		return "(unknown)";
+	}
+}
+
 static int __acpi_bus_get_power(struct acpi_device *device, int *state)
 {
-	int result = 0;
-	acpi_status status = 0;
-	unsigned long long psc = 0;
+	int result = ACPI_STATE_UNKNOWN;
 
 	if (!device || !state)
 		return -EINVAL;
 
-	*state = ACPI_STATE_UNKNOWN;
-
-	if (device->flags.power_manageable) {
-		/*
-		 * Get the device's power state either directly (via _PSC) or
-		 * indirectly (via power resources).
-		 */
-		if (device->power.flags.power_resources) {
-			result = acpi_power_get_inferred_state(device, state);
-			if (result)
-				return result;
-		} else if (device->power.flags.explicit_get) {
-			status = acpi_evaluate_integer(device->handle, "_PSC",
-						       NULL, &psc);
-			if (ACPI_FAILURE(status))
-				return -ENODEV;
-			*state = (int)psc;
-		}
-	} else {
+	if (!device->flags.power_manageable) {
 		/* TBD: Non-recursive algorithm for walking up hierarchy. */
 		*state = device->parent ?
 			device->parent->power.state : ACPI_STATE_D0;
+		goto out;
+	}
+
+	/*
+	 * Get the device's power state either directly (via _PSC) or
+	 * indirectly (via power resources).
+	 */
+	if (device->power.flags.explicit_get) {
+		unsigned long long psc;
+		acpi_status status = acpi_evaluate_integer(device->handle,
+							   "_PSC", NULL, &psc);
+		if (ACPI_FAILURE(status))
+			return -ENODEV;
+
+		result = psc;
+	}
+	/* The test below covers ACPI_STATE_UNKNOWN too. */
+	if (result <= ACPI_STATE_D2) {
+	  ; /* Do nothing. */
+	} else if (device->power.flags.power_resources) {
+		int error = acpi_power_get_inferred_state(device, &result);
+		if (error)
+			return error;
+	} else if (result == ACPI_STATE_D3_HOT) {
+		result = ACPI_STATE_D3;
 	}
+	*state = result;
 
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] power state is D%d\n",
-			  device->pnp.bus_id, *state));
+ out:
+	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] power state is %s\n",
+			  device->pnp.bus_id, state_string(*state)));
 
 	return 0;
 }
@@ -234,13 +259,14 @@ static int __acpi_bus_set_power(struct acpi_device *device, int state)
 	/* Make sure this is a valid target state */
 
 	if (state == device->power.state) {
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device is already at D%d\n",
-				  state));
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device is already at %s\n",
+				  state_string(state)));
 		return 0;
 	}
 
 	if (!device->power.states[state].flags.valid) {
-		printk(KERN_WARNING PREFIX "Device does not support D%d\n", state);
+		printk(KERN_WARNING PREFIX "Device does not support %s\n",
+		       state_string(state));
 		return -ENODEV;
 	}
 	if (device->parent && (state < device->parent->power.state)) {
@@ -294,13 +320,13 @@ static int __acpi_bus_set_power(struct acpi_device *device, int state)
       end:
 	if (result)
 		printk(KERN_WARNING PREFIX
-			      "Device [%s] failed to transition to D%d\n",
-			      device->pnp.bus_id, state);
+			      "Device [%s] failed to transition to %s\n",
+			      device->pnp.bus_id, state_string(state));
 	else {
 		device->power.state = state;
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-				  "Device [%s] transitioned to D%d\n",
-				  device->pnp.bus_id, state));
+				  "Device [%s] transitioned to %s\n",
+				  device->pnp.bus_id, state_string(state)));
 	}
 
 	return result;

+ 1 - 1
drivers/acpi/power.c

@@ -631,7 +631,7 @@ int acpi_power_get_inferred_state(struct acpi_device *device, int *state)
 	 * We know a device's inferred power state when all the resources
 	 * required for a given D-state are 'on'.
 	 */
-	for (i = ACPI_STATE_D0; i < ACPI_STATE_D3_HOT; i++) {
+	for (i = ACPI_STATE_D0; i <= ACPI_STATE_D3_HOT; i++) {
 		list = &device->power.states[i].resources;
 		if (list->count < 1)
 			continue;

+ 25 - 5
drivers/acpi/processor_perflib.c

@@ -333,6 +333,7 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
 	struct acpi_buffer state = { 0, NULL };
 	union acpi_object *pss = NULL;
 	int i;
+	int last_invalid = -1;
 
 
 	status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
@@ -394,14 +395,33 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
 		    ((u32)(px->core_frequency * 1000) !=
 		     (px->core_frequency * 1000))) {
 			printk(KERN_ERR FW_BUG PREFIX
-			       "Invalid BIOS _PSS frequency: 0x%llx MHz\n",
-			       px->core_frequency);
-			result = -EFAULT;
-			kfree(pr->performance->states);
-			goto end;
+			       "Invalid BIOS _PSS frequency found for processor %d: 0x%llx MHz\n",
+			       pr->id, px->core_frequency);
+			if (last_invalid == -1)
+				last_invalid = i;
+		} else {
+			if (last_invalid != -1) {
+				/*
+				 * Copy this valid entry over last_invalid entry
+				 */
+				memcpy(&(pr->performance->states[last_invalid]),
+				       px, sizeof(struct acpi_processor_px));
+				++last_invalid;
+			}
 		}
 	}
 
+	if (last_invalid == 0) {
+		printk(KERN_ERR FW_BUG PREFIX
+		       "No valid BIOS _PSS frequency found for processor %d\n", pr->id);
+		result = -EFAULT;
+		kfree(pr->performance->states);
+		pr->performance->states = NULL;
+	}
+
+	if (last_invalid > 0)
+		pr->performance->state_count = last_invalid;
+
       end:
 	kfree(buffer.pointer);
 

+ 1 - 0
drivers/acpi/scan.c

@@ -1567,6 +1567,7 @@ static int acpi_bus_scan_fixed(void)
 						ACPI_BUS_TYPE_POWER_BUTTON,
 						ACPI_STA_DEFAULT,
 						&ops);
+		device_init_wakeup(&device->dev, true);
 	}
 
 	if ((acpi_gbl_FADT.flags & ACPI_FADT_SLEEP_BUTTON) == 0) {

+ 45 - 4
drivers/acpi/sleep.c

@@ -57,6 +57,7 @@ MODULE_PARM_DESC(gts, "Enable evaluation of _GTS on suspend.");
 MODULE_PARM_DESC(bfs, "Enable evaluation of _BFS on resume".);
 
 static u8 sleep_states[ACPI_S_STATE_COUNT];
+static bool pwr_btn_event_pending;
 
 static void acpi_sleep_tts_switch(u32 acpi_state)
 {
@@ -184,6 +185,14 @@ static int acpi_pm_prepare(void)
 	return error;
 }
 
+static int find_powerf_dev(struct device *dev, void *data)
+{
+	struct acpi_device *device = to_acpi_device(dev);
+	const char *hid = acpi_device_hid(device);
+
+	return !strcmp(hid, ACPI_BUTTON_HID_POWERF);
+}
+
 /**
  *	acpi_pm_finish - Instruct the platform to leave a sleep state.
  *
@@ -192,6 +201,7 @@ static int acpi_pm_prepare(void)
  */
 static void acpi_pm_finish(void)
 {
+	struct device *pwr_btn_dev;
 	u32 acpi_state = acpi_target_sleep_state;
 
 	acpi_ec_unblock_transactions();
@@ -209,6 +219,23 @@ static void acpi_pm_finish(void)
 	acpi_set_firmware_waking_vector((acpi_physical_address) 0);
 
 	acpi_target_sleep_state = ACPI_STATE_S0;
+
+	/* If we were woken with the fixed power button, provide a small
+	 * hint to userspace in the form of a wakeup event on the fixed power
+	 * button device (if it can be found).
+	 *
+	 * We delay the event generation til now, as the PM layer requires
+	 * timekeeping to be running before we generate events. */
+	if (!pwr_btn_event_pending)
+		return;
+
+	pwr_btn_event_pending = false;
+	pwr_btn_dev = bus_find_device(&acpi_bus_type, NULL, NULL,
+				      find_powerf_dev);
+	if (pwr_btn_dev) {
+		pm_wakeup_event(pwr_btn_dev, 0);
+		put_device(pwr_btn_dev);
+	}
 }
 
 /**
@@ -298,9 +325,23 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
 	/* ACPI 3.0 specs (P62) says that it's the responsibility
 	 * of the OSPM to clear the status bit [ implying that the
 	 * POWER_BUTTON event should not reach userspace ]
+	 *
+	 * However, we do generate a small hint for userspace in the form of
+	 * a wakeup event. We flag this condition for now and generate the
+	 * event later, as we're currently too early in resume to be able to
+	 * generate wakeup events.
 	 */
-	if (ACPI_SUCCESS(status) && (acpi_state == ACPI_STATE_S3))
-		acpi_clear_event(ACPI_EVENT_POWER_BUTTON);
+	if (ACPI_SUCCESS(status) && (acpi_state == ACPI_STATE_S3)) {
+		acpi_event_status pwr_btn_status;
+
+		acpi_get_event_status(ACPI_EVENT_POWER_BUTTON, &pwr_btn_status);
+
+		if (pwr_btn_status & ACPI_EVENT_FLAG_SET) {
+			acpi_clear_event(ACPI_EVENT_POWER_BUTTON);
+			/* Flag for later */
+			pwr_btn_event_pending = true;
+		}
+	}
 
 	/*
 	 * Disable and clear GPE status before interrupt is enabled. Some GPEs
@@ -730,8 +771,8 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
 	 * can wake the system.  _S0W may be valid, too.
 	 */
 	if (acpi_target_sleep_state == ACPI_STATE_S0 ||
-	    (device_may_wakeup(dev) &&
-	     adev->wakeup.sleep_state <= acpi_target_sleep_state)) {
+	    (device_may_wakeup(dev) && adev->wakeup.flags.valid &&
+	     adev->wakeup.sleep_state >= acpi_target_sleep_state)) {
 		acpi_status status;
 
 		acpi_method[3] = 'W';

+ 22 - 11
drivers/acpi/video.c

@@ -1687,10 +1687,6 @@ static int acpi_video_bus_add(struct acpi_device *device)
 	set_bit(KEY_BRIGHTNESS_ZERO, input->keybit);
 	set_bit(KEY_DISPLAY_OFF, input->keybit);
 
-	error = input_register_device(input);
-	if (error)
-		goto err_stop_video;
-
 	printk(KERN_INFO PREFIX "%s [%s] (multi-head: %s  rom: %s  post: %s)\n",
 	       ACPI_VIDEO_DEVICE_NAME, acpi_device_bid(device),
 	       video->flags.multihead ? "yes" : "no",
@@ -1701,12 +1697,16 @@ static int acpi_video_bus_add(struct acpi_device *device)
 	video->pm_nb.priority = 0;
 	error = register_pm_notifier(&video->pm_nb);
 	if (error)
-		goto err_unregister_input_dev;
+		goto err_stop_video;
+
+	error = input_register_device(input);
+	if (error)
+		goto err_unregister_pm_notifier;
 
 	return 0;
 
- err_unregister_input_dev:
-	input_unregister_device(input);
+ err_unregister_pm_notifier:
+	unregister_pm_notifier(&video->pm_nb);
  err_stop_video:
 	acpi_video_bus_stop_devices(video);
  err_free_input_dev:
@@ -1743,9 +1743,18 @@ static int acpi_video_bus_remove(struct acpi_device *device, int type)
 	return 0;
 }
 
+static int __init is_i740(struct pci_dev *dev)
+{
+	if (dev->device == 0x00D1)
+		return 1;
+	if (dev->device == 0x7000)
+		return 1;
+	return 0;
+}
+
 static int __init intel_opregion_present(void)
 {
-#if defined(CONFIG_DRM_I915) || defined(CONFIG_DRM_I915_MODULE)
+	int opregion = 0;
 	struct pci_dev *dev = NULL;
 	u32 address;
 
@@ -1754,13 +1763,15 @@ static int __init intel_opregion_present(void)
 			continue;
 		if (dev->vendor != PCI_VENDOR_ID_INTEL)
 			continue;
+		/* We don't want to poke around undefined i740 registers */
+		if (is_i740(dev))
+			continue;
 		pci_read_config_dword(dev, 0xfc, &address);
 		if (!address)
 			continue;
-		return 1;
+		opregion = 1;
 	}
-#endif
-	return 0;
+	return opregion;
 }
 
 int acpi_video_register(void)

+ 1 - 0
drivers/char/agp/intel-agp.c

@@ -898,6 +898,7 @@ static struct pci_device_id agp_intel_pci_table[] = {
 	ID(PCI_DEVICE_ID_INTEL_B43_HB),
 	ID(PCI_DEVICE_ID_INTEL_B43_1_HB),
 	ID(PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB),
+	ID(PCI_DEVICE_ID_INTEL_IRONLAKE_D2_HB),
 	ID(PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB),
 	ID(PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB),
 	ID(PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB),

+ 1 - 0
drivers/char/agp/intel-agp.h

@@ -212,6 +212,7 @@
 #define PCI_DEVICE_ID_INTEL_G41_HB          0x2E30
 #define PCI_DEVICE_ID_INTEL_G41_IG          0x2E32
 #define PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB	    0x0040
+#define PCI_DEVICE_ID_INTEL_IRONLAKE_D2_HB	    0x0069
 #define PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG	    0x0042
 #define PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB	    0x0044
 #define PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB	    0x0062

+ 1 - 0
drivers/clocksource/Makefile

@@ -6,6 +6,7 @@ obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC)	+= cs5535-clockevt.o
 obj-$(CONFIG_SH_TIMER_CMT)	+= sh_cmt.o
 obj-$(CONFIG_SH_TIMER_MTU2)	+= sh_mtu2.o
 obj-$(CONFIG_SH_TIMER_TMU)	+= sh_tmu.o
+obj-$(CONFIG_EM_TIMER_STI)	+= em_sti.o
 obj-$(CONFIG_CLKBLD_I8253)	+= i8253.o
 obj-$(CONFIG_CLKSRC_MMIO)	+= mmio.o
 obj-$(CONFIG_DW_APB_TIMER)	+= dw_apb_timer.o

+ 406 - 0
drivers/clocksource/em_sti.c

@@ -0,0 +1,406 @@
+/*
+ * Emma Mobile Timer Support - STI
+ *
+ *  Copyright (C) 2012 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+enum { USER_CLOCKSOURCE, USER_CLOCKEVENT, USER_NR };
+
+struct em_sti_priv {
+	void __iomem *base;
+	struct clk *clk;
+	struct platform_device *pdev;
+	unsigned int active[USER_NR];
+	unsigned long rate;
+	raw_spinlock_t lock;
+	struct clock_event_device ced;
+	struct clocksource cs;
+};
+
+#define STI_CONTROL 0x00
+#define STI_COMPA_H 0x10
+#define STI_COMPA_L 0x14
+#define STI_COMPB_H 0x18
+#define STI_COMPB_L 0x1c
+#define STI_COUNT_H 0x20
+#define STI_COUNT_L 0x24
+#define STI_COUNT_RAW_H 0x28
+#define STI_COUNT_RAW_L 0x2c
+#define STI_SET_H 0x30
+#define STI_SET_L 0x34
+#define STI_INTSTATUS 0x40
+#define STI_INTRAWSTATUS 0x44
+#define STI_INTENSET 0x48
+#define STI_INTENCLR 0x4c
+#define STI_INTFFCLR 0x50
+
+static inline unsigned long em_sti_read(struct em_sti_priv *p, int offs)
+{
+	return ioread32(p->base + offs);
+}
+
+static inline void em_sti_write(struct em_sti_priv *p, int offs,
+				unsigned long value)
+{
+	iowrite32(value, p->base + offs);
+}
+
+static int em_sti_enable(struct em_sti_priv *p)
+{
+	int ret;
+
+	/* enable clock */
+	ret = clk_enable(p->clk);
+	if (ret) {
+		dev_err(&p->pdev->dev, "cannot enable clock\n");
+		return ret;
+	}
+
+	/* configure channel, periodic mode and maximum timeout */
+	p->rate = clk_get_rate(p->clk);
+
+	/* reset the counter */
+	em_sti_write(p, STI_SET_H, 0x40000000);
+	em_sti_write(p, STI_SET_L, 0x00000000);
+
+	/* mask and clear pending interrupts */
+	em_sti_write(p, STI_INTENCLR, 3);
+	em_sti_write(p, STI_INTFFCLR, 3);
+
+	/* enable updates of counter registers */
+	em_sti_write(p, STI_CONTROL, 1);
+
+	return 0;
+}
+
+static void em_sti_disable(struct em_sti_priv *p)
+{
+	/* mask interrupts */
+	em_sti_write(p, STI_INTENCLR, 3);
+
+	/* stop clock */
+	clk_disable(p->clk);
+}
+
+static cycle_t em_sti_count(struct em_sti_priv *p)
+{
+	cycle_t ticks;
+	unsigned long flags;
+
+	/* the STI hardware buffers the 48-bit count, but to
+	 * break it out into two 32-bit access the registers
+	 * must be accessed in a certain order.
+	 * Always read STI_COUNT_H before STI_COUNT_L.
+	 */
+	raw_spin_lock_irqsave(&p->lock, flags);
+	ticks = (cycle_t)(em_sti_read(p, STI_COUNT_H) & 0xffff) << 32;
+	ticks |= em_sti_read(p, STI_COUNT_L);
+	raw_spin_unlock_irqrestore(&p->lock, flags);
+
+	return ticks;
+}
+
+static cycle_t em_sti_set_next(struct em_sti_priv *p, cycle_t next)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&p->lock, flags);
+
+	/* mask compare A interrupt */
+	em_sti_write(p, STI_INTENCLR, 1);
+
+	/* update compare A value */
+	em_sti_write(p, STI_COMPA_H, next >> 32);
+	em_sti_write(p, STI_COMPA_L, next & 0xffffffff);
+
+	/* clear compare A interrupt source */
+	em_sti_write(p, STI_INTFFCLR, 1);
+
+	/* unmask compare A interrupt */
+	em_sti_write(p, STI_INTENSET, 1);
+
+	raw_spin_unlock_irqrestore(&p->lock, flags);
+
+	return next;
+}
+
+static irqreturn_t em_sti_interrupt(int irq, void *dev_id)
+{
+	struct em_sti_priv *p = dev_id;
+
+	p->ced.event_handler(&p->ced);
+	return IRQ_HANDLED;
+}
+
+static int em_sti_start(struct em_sti_priv *p, unsigned int user)
+{
+	unsigned long flags;
+	int used_before;
+	int ret = 0;
+
+	raw_spin_lock_irqsave(&p->lock, flags);
+	used_before = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT];
+	if (!used_before)
+		ret = em_sti_enable(p);
+
+	if (!ret)
+		p->active[user] = 1;
+	raw_spin_unlock_irqrestore(&p->lock, flags);
+
+	return ret;
+}
+
+static void em_sti_stop(struct em_sti_priv *p, unsigned int user)
+{
+	unsigned long flags;
+	int used_before, used_after;
+
+	raw_spin_lock_irqsave(&p->lock, flags);
+	used_before = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT];
+	p->active[user] = 0;
+	used_after = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT];
+
+	if (used_before && !used_after)
+		em_sti_disable(p);
+	raw_spin_unlock_irqrestore(&p->lock, flags);
+}
+
+static struct em_sti_priv *cs_to_em_sti(struct clocksource *cs)
+{
+	return container_of(cs, struct em_sti_priv, cs);
+}
+
+static cycle_t em_sti_clocksource_read(struct clocksource *cs)
+{
+	return em_sti_count(cs_to_em_sti(cs));
+}
+
+static int em_sti_clocksource_enable(struct clocksource *cs)
+{
+	int ret;
+	struct em_sti_priv *p = cs_to_em_sti(cs);
+
+	ret = em_sti_start(p, USER_CLOCKSOURCE);
+	if (!ret)
+		__clocksource_updatefreq_hz(cs, p->rate);
+	return ret;
+}
+
+static void em_sti_clocksource_disable(struct clocksource *cs)
+{
+	em_sti_stop(cs_to_em_sti(cs), USER_CLOCKSOURCE);
+}
+
+static void em_sti_clocksource_resume(struct clocksource *cs)
+{
+	em_sti_clocksource_enable(cs);
+}
+
+static int em_sti_register_clocksource(struct em_sti_priv *p)
+{
+	struct clocksource *cs = &p->cs;
+
+	memset(cs, 0, sizeof(*cs));
+	cs->name = dev_name(&p->pdev->dev);
+	cs->rating = 200;
+	cs->read = em_sti_clocksource_read;
+	cs->enable = em_sti_clocksource_enable;
+	cs->disable = em_sti_clocksource_disable;
+	cs->suspend = em_sti_clocksource_disable;
+	cs->resume = em_sti_clocksource_resume;
+	cs->mask = CLOCKSOURCE_MASK(48);
+	cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
+
+	dev_info(&p->pdev->dev, "used as clock source\n");
+
+	/* Register with dummy 1 Hz value, gets updated in ->enable() */
+	clocksource_register_hz(cs, 1);
+	return 0;
+}
+
+static struct em_sti_priv *ced_to_em_sti(struct clock_event_device *ced)
+{
+	return container_of(ced, struct em_sti_priv, ced);
+}
+
+static void em_sti_clock_event_mode(enum clock_event_mode mode,
+				    struct clock_event_device *ced)
+{
+	struct em_sti_priv *p = ced_to_em_sti(ced);
+
+	/* deal with old setting first */
+	switch (ced->mode) {
+	case CLOCK_EVT_MODE_ONESHOT:
+		em_sti_stop(p, USER_CLOCKEVENT);
+		break;
+	default:
+		break;
+	}
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_ONESHOT:
+		dev_info(&p->pdev->dev, "used for oneshot clock events\n");
+		em_sti_start(p, USER_CLOCKEVENT);
+		clockevents_config(&p->ced, p->rate);
+		break;
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	case CLOCK_EVT_MODE_UNUSED:
+		em_sti_stop(p, USER_CLOCKEVENT);
+		break;
+	default:
+		break;
+	}
+}
+
+static int em_sti_clock_event_next(unsigned long delta,
+				   struct clock_event_device *ced)
+{
+	struct em_sti_priv *p = ced_to_em_sti(ced);
+	cycle_t next;
+	int safe;
+
+	next = em_sti_set_next(p, em_sti_count(p) + delta);
+	safe = em_sti_count(p) < (next - 1);
+
+	return !safe;
+}
+
+static void em_sti_register_clockevent(struct em_sti_priv *p)
+{
+	struct clock_event_device *ced = &p->ced;
+
+	memset(ced, 0, sizeof(*ced));
+	ced->name = dev_name(&p->pdev->dev);
+	ced->features = CLOCK_EVT_FEAT_ONESHOT;
+	ced->rating = 200;
+	ced->cpumask = cpumask_of(0);
+	ced->set_next_event = em_sti_clock_event_next;
+	ced->set_mode = em_sti_clock_event_mode;
+
+	dev_info(&p->pdev->dev, "used for clock events\n");
+
+	/* Register with dummy 1 Hz value, gets updated in ->set_mode() */
+	clockevents_config_and_register(ced, 1, 2, 0xffffffff);
+}
+
+static int __devinit em_sti_probe(struct platform_device *pdev)
+{
+	struct em_sti_priv *p;
+	struct resource *res;
+	int irq, ret;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL) {
+		dev_err(&pdev->dev, "failed to allocate driver data\n");
+		ret = -ENOMEM;
+		goto err0;
+	}
+
+	p->pdev = pdev;
+	platform_set_drvdata(pdev, p);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "failed to get I/O memory\n");
+		ret = -EINVAL;
+		goto err0;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "failed to get irq\n");
+		ret = -EINVAL;
+		goto err0;
+	}
+
+	/* map memory, let base point to the STI instance */
+	p->base = ioremap_nocache(res->start, resource_size(res));
+	if (p->base == NULL) {
+		dev_err(&pdev->dev, "failed to remap I/O memory\n");
+		ret = -ENXIO;
+		goto err0;
+	}
+
+	/* get hold of clock */
+	p->clk = clk_get(&pdev->dev, "sclk");
+	if (IS_ERR(p->clk)) {
+		dev_err(&pdev->dev, "cannot get clock\n");
+		ret = PTR_ERR(p->clk);
+		goto err1;
+	}
+
+	if (request_irq(irq, em_sti_interrupt,
+			IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING,
+			dev_name(&pdev->dev), p)) {
+		dev_err(&pdev->dev, "failed to request low IRQ\n");
+		ret = -ENOENT;
+		goto err2;
+	}
+
+	raw_spin_lock_init(&p->lock);
+	em_sti_register_clockevent(p);
+	em_sti_register_clocksource(p);
+	return 0;
+
+err2:
+	clk_put(p->clk);
+err1:
+	iounmap(p->base);
+err0:
+	kfree(p);
+	return ret;
+}
+
+static int __devexit em_sti_remove(struct platform_device *pdev)
+{
+	return -EBUSY; /* cannot unregister clockevent and clocksource */
+}
+
+static const struct of_device_id em_sti_dt_ids[] __devinitconst = {
+	{ .compatible = "renesas,em-sti", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, em_sti_dt_ids);
+
+static struct platform_driver em_sti_device_driver = {
+	.probe		= em_sti_probe,
+	.remove		= __devexit_p(em_sti_remove),
+	.driver		= {
+		.name	= "em_sti",
+		.of_match_table = em_sti_dt_ids,
+	}
+};
+
+module_platform_driver(em_sti_device_driver);
+
+MODULE_AUTHOR("Magnus Damm");
+MODULE_DESCRIPTION("Renesas Emma Mobile STI Timer Driver");
+MODULE_LICENSE("GPL v2");

+ 1 - 1
drivers/gpio/gpio-samsung.c

@@ -2833,7 +2833,7 @@ static __init void exynos5_gpiolib_init(void)
 	}
 
 	/* need to set base address for gpc4 */
-	exonys5_gpios_1[11].base = gpio_base1 + 0x2E0;
+	exynos5_gpios_1[11].base = gpio_base1 + 0x2E0;
 
 	/* need to set base address for gpx */
 	chip = &exynos5_gpios_1[21];

+ 2 - 2
drivers/gpu/drm/exynos/exynos_drm_drv.c

@@ -244,8 +244,8 @@ static const struct file_operations exynos_drm_driver_fops = {
 };
 
 static struct drm_driver exynos_drm_driver = {
-	.driver_features	= DRIVER_HAVE_IRQ | DRIVER_BUS_PLATFORM |
-				  DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
+	.driver_features	= DRIVER_HAVE_IRQ | DRIVER_MODESET |
+					DRIVER_GEM | DRIVER_PRIME,
 	.load			= exynos_drm_load,
 	.unload			= exynos_drm_unload,
 	.open			= exynos_drm_open,

+ 0 - 7
drivers/gpu/drm/exynos/exynos_drm_encoder.c

@@ -172,19 +172,12 @@ static void exynos_drm_encoder_commit(struct drm_encoder *encoder)
 		manager_ops->commit(manager->dev);
 }
 
-static struct drm_crtc *
-exynos_drm_encoder_get_crtc(struct drm_encoder *encoder)
-{
-	return encoder->crtc;
-}
-
 static struct drm_encoder_helper_funcs exynos_encoder_helper_funcs = {
 	.dpms		= exynos_drm_encoder_dpms,
 	.mode_fixup	= exynos_drm_encoder_mode_fixup,
 	.mode_set	= exynos_drm_encoder_mode_set,
 	.prepare	= exynos_drm_encoder_prepare,
 	.commit		= exynos_drm_encoder_commit,
-	.get_crtc	= exynos_drm_encoder_get_crtc,
 };
 
 static void exynos_drm_encoder_destroy(struct drm_encoder *encoder)

+ 14 - 5
drivers/gpu/drm/exynos/exynos_drm_fb.c

@@ -51,11 +51,22 @@ struct exynos_drm_fb {
 static void exynos_drm_fb_destroy(struct drm_framebuffer *fb)
 {
 	struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
+	unsigned int i;
 
 	DRM_DEBUG_KMS("%s\n", __FILE__);
 
 	drm_framebuffer_cleanup(fb);
 
+	for (i = 0; i < ARRAY_SIZE(exynos_fb->exynos_gem_obj); i++) {
+		struct drm_gem_object *obj;
+
+		if (exynos_fb->exynos_gem_obj[i] == NULL)
+			continue;
+
+		obj = &exynos_fb->exynos_gem_obj[i]->base;
+		drm_gem_object_unreference_unlocked(obj);
+	}
+
 	kfree(exynos_fb);
 	exynos_fb = NULL;
 }
@@ -134,11 +145,11 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 		return ERR_PTR(-ENOENT);
 	}
 
-	drm_gem_object_unreference_unlocked(obj);
-
 	fb = exynos_drm_framebuffer_init(dev, mode_cmd, obj);
-	if (IS_ERR(fb))
+	if (IS_ERR(fb)) {
+		drm_gem_object_unreference_unlocked(obj);
 		return fb;
+	}
 
 	exynos_fb = to_exynos_fb(fb);
 	nr = exynos_drm_format_num_buffers(fb->pixel_format);
@@ -152,8 +163,6 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 			return ERR_PTR(-ENOENT);
 		}
 
-		drm_gem_object_unreference_unlocked(obj);
-
 		exynos_fb->exynos_gem_obj[i] = to_exynos_gem_obj(obj);
 	}
 

+ 2 - 2
drivers/gpu/drm/exynos/exynos_drm_fb.h

@@ -31,10 +31,10 @@
 static inline int exynos_drm_format_num_buffers(uint32_t format)
 {
 	switch (format) {
-	case DRM_FORMAT_NV12M:
+	case DRM_FORMAT_NV12:
 	case DRM_FORMAT_NV12MT:
 		return 2;
-	case DRM_FORMAT_YUV420M:
+	case DRM_FORMAT_YUV420:
 		return 3;
 	default:
 		return 1;

+ 3 - 6
drivers/gpu/drm/exynos/exynos_drm_gem.c

@@ -689,7 +689,6 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
 				   struct drm_device *dev, uint32_t handle,
 				   uint64_t *offset)
 {
-	struct exynos_drm_gem_obj *exynos_gem_obj;
 	struct drm_gem_object *obj;
 	int ret = 0;
 
@@ -710,15 +709,13 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
 		goto unlock;
 	}
 
-	exynos_gem_obj = to_exynos_gem_obj(obj);
-
-	if (!exynos_gem_obj->base.map_list.map) {
-		ret = drm_gem_create_mmap_offset(&exynos_gem_obj->base);
+	if (!obj->map_list.map) {
+		ret = drm_gem_create_mmap_offset(obj);
 		if (ret)
 			goto out;
 	}
 
-	*offset = (u64)exynos_gem_obj->base.map_list.hash.key << PAGE_SHIFT;
+	*offset = (u64)obj->map_list.hash.key << PAGE_SHIFT;
 	DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
 
 out:

+ 7 - 5
drivers/gpu/drm/exynos/exynos_mixer.c

@@ -365,7 +365,7 @@ static void vp_video_buffer(struct mixer_context *ctx, int win)
 	switch (win_data->pixel_format) {
 	case DRM_FORMAT_NV12MT:
 		tiled_mode = true;
-	case DRM_FORMAT_NV12M:
+	case DRM_FORMAT_NV12:
 		crcb_mode = false;
 		buf_num = 2;
 		break;
@@ -601,18 +601,20 @@ static void mixer_win_reset(struct mixer_context *ctx)
 	mixer_reg_write(res, MXR_BG_COLOR2, 0x008080);
 
 	/* setting graphical layers */
-
 	val  = MXR_GRP_CFG_COLOR_KEY_DISABLE; /* no blank key */
 	val |= MXR_GRP_CFG_WIN_BLEND_EN;
+	val |= MXR_GRP_CFG_BLEND_PRE_MUL;
+	val |= MXR_GRP_CFG_PIXEL_BLEND_EN;
 	val |= MXR_GRP_CFG_ALPHA_VAL(0xff); /* non-transparent alpha */
 
 	/* the same configuration for both layers */
 	mixer_reg_write(res, MXR_GRAPHIC_CFG(0), val);
-
-	val |= MXR_GRP_CFG_BLEND_PRE_MUL;
-	val |= MXR_GRP_CFG_PIXEL_BLEND_EN;
 	mixer_reg_write(res, MXR_GRAPHIC_CFG(1), val);
 
+	/* setting video layers */
+	val = MXR_GRP_CFG_ALPHA_VAL(0);
+	mixer_reg_write(res, MXR_VIDEO_CFG, val);
+
 	/* configuration of Video Processor Registers */
 	vp_win_reset(ctx);
 	vp_default_filter(res);

+ 9 - 4
drivers/gpu/drm/i915/i915_drv.c

@@ -233,6 +233,7 @@ static const struct intel_device_info intel_sandybridge_d_info = {
 	.has_blt_ring = 1,
 	.has_llc = 1,
 	.has_pch_split = 1,
+	.has_force_wake = 1,
 };
 
 static const struct intel_device_info intel_sandybridge_m_info = {
@@ -243,6 +244,7 @@ static const struct intel_device_info intel_sandybridge_m_info = {
 	.has_blt_ring = 1,
 	.has_llc = 1,
 	.has_pch_split = 1,
+	.has_force_wake = 1,
 };
 
 static const struct intel_device_info intel_ivybridge_d_info = {
@@ -252,6 +254,7 @@ static const struct intel_device_info intel_ivybridge_d_info = {
 	.has_blt_ring = 1,
 	.has_llc = 1,
 	.has_pch_split = 1,
+	.has_force_wake = 1,
 };
 
 static const struct intel_device_info intel_ivybridge_m_info = {
@@ -262,6 +265,7 @@ static const struct intel_device_info intel_ivybridge_m_info = {
 	.has_blt_ring = 1,
 	.has_llc = 1,
 	.has_pch_split = 1,
+	.has_force_wake = 1,
 };
 
 static const struct intel_device_info intel_valleyview_m_info = {
@@ -289,6 +293,7 @@ static const struct intel_device_info intel_haswell_d_info = {
 	.has_blt_ring = 1,
 	.has_llc = 1,
 	.has_pch_split = 1,
+	.has_force_wake = 1,
 };
 
 static const struct intel_device_info intel_haswell_m_info = {
@@ -298,6 +303,7 @@ static const struct intel_device_info intel_haswell_m_info = {
 	.has_blt_ring = 1,
 	.has_llc = 1,
 	.has_pch_split = 1,
+	.has_force_wake = 1,
 };
 
 static const struct pci_device_id pciidlist[] = {		/* aka */
@@ -1139,10 +1145,9 @@ MODULE_LICENSE("GPL and additional rights");
 
 /* We give fast paths for the really cool registers */
 #define NEEDS_FORCE_WAKE(dev_priv, reg) \
-       (((dev_priv)->info->gen >= 6) && \
-        ((reg) < 0x40000) &&            \
-        ((reg) != FORCEWAKE)) && \
-       (!IS_VALLEYVIEW((dev_priv)->dev))
+	((HAS_FORCE_WAKE((dev_priv)->dev)) && \
+	 ((reg) < 0x40000) &&            \
+	 ((reg) != FORCEWAKE))
 
 #define __i915_read(x, y) \
 u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \

+ 3 - 0
drivers/gpu/drm/i915/i915_drv.h

@@ -285,6 +285,7 @@ struct intel_device_info {
 	u8 is_ivybridge:1;
 	u8 is_valleyview:1;
 	u8 has_pch_split:1;
+	u8 has_force_wake:1;
 	u8 is_haswell:1;
 	u8 has_fbc:1;
 	u8 has_pipe_cxsr:1;
@@ -1101,6 +1102,8 @@ struct drm_i915_file_private {
 #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT)
 #define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX)
 
+#define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake)
+
 #include "i915_trace.h"
 
 /**

+ 35 - 3
drivers/gpu/drm/i915/i915_irq.c

@@ -510,7 +510,7 @@ out:
 	return ret;
 }
 
-static void pch_irq_handler(struct drm_device *dev, u32 pch_iir)
+static void ibx_irq_handler(struct drm_device *dev, u32 pch_iir)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	int pipe;
@@ -550,6 +550,35 @@ static void pch_irq_handler(struct drm_device *dev, u32 pch_iir)
 		DRM_DEBUG_DRIVER("PCH transcoder A underrun interrupt\n");
 }
 
+static void cpt_irq_handler(struct drm_device *dev, u32 pch_iir)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	int pipe;
+
+	if (pch_iir & SDE_AUDIO_POWER_MASK_CPT)
+		DRM_DEBUG_DRIVER("PCH audio power change on port %d\n",
+				 (pch_iir & SDE_AUDIO_POWER_MASK_CPT) >>
+				 SDE_AUDIO_POWER_SHIFT_CPT);
+
+	if (pch_iir & SDE_AUX_MASK_CPT)
+		DRM_DEBUG_DRIVER("AUX channel interrupt\n");
+
+	if (pch_iir & SDE_GMBUS_CPT)
+		DRM_DEBUG_DRIVER("PCH GMBUS interrupt\n");
+
+	if (pch_iir & SDE_AUDIO_CP_REQ_CPT)
+		DRM_DEBUG_DRIVER("Audio CP request interrupt\n");
+
+	if (pch_iir & SDE_AUDIO_CP_CHG_CPT)
+		DRM_DEBUG_DRIVER("Audio CP change interrupt\n");
+
+	if (pch_iir & SDE_FDI_MASK_CPT)
+		for_each_pipe(pipe)
+			DRM_DEBUG_DRIVER("  pipe %c FDI IIR: 0x%08x\n",
+					 pipe_name(pipe),
+					 I915_READ(FDI_RX_IIR(pipe)));
+}
+
 static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
@@ -591,7 +620,7 @@ static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS)
 
 			if (pch_iir & SDE_HOTPLUG_MASK_CPT)
 				queue_work(dev_priv->wq, &dev_priv->hotplug_work);
-			pch_irq_handler(dev, pch_iir);
+			cpt_irq_handler(dev, pch_iir);
 
 			/* clear PCH hotplug event before clear CPU irq */
 			I915_WRITE(SDEIIR, pch_iir);
@@ -684,7 +713,10 @@ static irqreturn_t ironlake_irq_handler(DRM_IRQ_ARGS)
 	if (de_iir & DE_PCH_EVENT) {
 		if (pch_iir & hotplug_mask)
 			queue_work(dev_priv->wq, &dev_priv->hotplug_work);
-		pch_irq_handler(dev, pch_iir);
+		if (HAS_PCH_CPT(dev))
+			cpt_irq_handler(dev, pch_iir);
+		else
+			ibx_irq_handler(dev, pch_iir);
 	}
 
 	if (de_iir & DE_PCU_EVENT) {

+ 40 - 3
drivers/gpu/drm/i915/i915_reg.h

@@ -210,6 +210,14 @@
 #define MI_DISPLAY_FLIP		MI_INSTR(0x14, 2)
 #define MI_DISPLAY_FLIP_I915	MI_INSTR(0x14, 1)
 #define   MI_DISPLAY_FLIP_PLANE(n) ((n) << 20)
+/* IVB has funny definitions for which plane to flip. */
+#define   MI_DISPLAY_FLIP_IVB_PLANE_A  (0 << 19)
+#define   MI_DISPLAY_FLIP_IVB_PLANE_B  (1 << 19)
+#define   MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19)
+#define   MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19)
+#define   MI_DISPLAY_FLIP_IVB_PLANE_C  (4 << 19)
+#define   MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19)
+
 #define MI_SET_CONTEXT		MI_INSTR(0x18, 0)
 #define   MI_MM_SPACE_GTT		(1<<8)
 #define   MI_MM_SPACE_PHYSICAL		(0<<8)
@@ -3313,7 +3321,7 @@
 
 /* PCH */
 
-/* south display engine interrupt */
+/* south display engine interrupt: IBX */
 #define SDE_AUDIO_POWER_D	(1 << 27)
 #define SDE_AUDIO_POWER_C	(1 << 26)
 #define SDE_AUDIO_POWER_B	(1 << 25)
@@ -3349,15 +3357,44 @@
 #define SDE_TRANSA_CRC_ERR	(1 << 1)
 #define SDE_TRANSA_FIFO_UNDER	(1 << 0)
 #define SDE_TRANS_MASK		(0x3f)
-/* CPT */
-#define SDE_CRT_HOTPLUG_CPT	(1 << 19)
+
+/* south display engine interrupt: CPT/PPT */
+#define SDE_AUDIO_POWER_D_CPT	(1 << 31)
+#define SDE_AUDIO_POWER_C_CPT	(1 << 30)
+#define SDE_AUDIO_POWER_B_CPT	(1 << 29)
+#define SDE_AUDIO_POWER_SHIFT_CPT   29
+#define SDE_AUDIO_POWER_MASK_CPT    (7 << 29)
+#define SDE_AUXD_CPT		(1 << 27)
+#define SDE_AUXC_CPT		(1 << 26)
+#define SDE_AUXB_CPT		(1 << 25)
+#define SDE_AUX_MASK_CPT	(7 << 25)
 #define SDE_PORTD_HOTPLUG_CPT	(1 << 23)
 #define SDE_PORTC_HOTPLUG_CPT	(1 << 22)
 #define SDE_PORTB_HOTPLUG_CPT	(1 << 21)
+#define SDE_CRT_HOTPLUG_CPT	(1 << 19)
 #define SDE_HOTPLUG_MASK_CPT	(SDE_CRT_HOTPLUG_CPT |		\
 				 SDE_PORTD_HOTPLUG_CPT |	\
 				 SDE_PORTC_HOTPLUG_CPT |	\
 				 SDE_PORTB_HOTPLUG_CPT)
+#define SDE_GMBUS_CPT		(1 << 17)
+#define SDE_AUDIO_CP_REQ_C_CPT	(1 << 10)
+#define SDE_AUDIO_CP_CHG_C_CPT	(1 << 9)
+#define SDE_FDI_RXC_CPT		(1 << 8)
+#define SDE_AUDIO_CP_REQ_B_CPT	(1 << 6)
+#define SDE_AUDIO_CP_CHG_B_CPT	(1 << 5)
+#define SDE_FDI_RXB_CPT		(1 << 4)
+#define SDE_AUDIO_CP_REQ_A_CPT	(1 << 2)
+#define SDE_AUDIO_CP_CHG_A_CPT	(1 << 1)
+#define SDE_FDI_RXA_CPT		(1 << 0)
+#define SDE_AUDIO_CP_REQ_CPT	(SDE_AUDIO_CP_REQ_C_CPT | \
+				 SDE_AUDIO_CP_REQ_B_CPT | \
+				 SDE_AUDIO_CP_REQ_A_CPT)
+#define SDE_AUDIO_CP_CHG_CPT	(SDE_AUDIO_CP_CHG_C_CPT | \
+				 SDE_AUDIO_CP_CHG_B_CPT | \
+				 SDE_AUDIO_CP_CHG_A_CPT)
+#define SDE_FDI_MASK_CPT	(SDE_FDI_RXC_CPT | \
+				 SDE_FDI_RXB_CPT | \
+				 SDE_FDI_RXA_CPT)
 
 #define SDEISR  0xc4000
 #define SDEIMR  0xc4004

+ 18 - 1
drivers/gpu/drm/i915/intel_display.c

@@ -6158,17 +6158,34 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
+	uint32_t plane_bit = 0;
 	int ret;
 
 	ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
 	if (ret)
 		goto err;
 
+	switch(intel_crtc->plane) {
+	case PLANE_A:
+		plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_A;
+		break;
+	case PLANE_B:
+		plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_B;
+		break;
+	case PLANE_C:
+		plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_C;
+		break;
+	default:
+		WARN_ONCE(1, "unknown plane in flip command\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
 	ret = intel_ring_begin(ring, 4);
 	if (ret)
 		goto err_unpin;
 
-	intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | (intel_crtc->plane << 19));
+	intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
 	intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
 	intel_ring_emit(ring, (obj->gtt_offset));
 	intel_ring_emit(ring, (MI_NOOP));

+ 18 - 3
drivers/gpu/drm/i915/intel_ringbuffer.c

@@ -266,10 +266,15 @@ u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
 
 static int init_ring_common(struct intel_ring_buffer *ring)
 {
-	drm_i915_private_t *dev_priv = ring->dev->dev_private;
+	struct drm_device *dev = ring->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj = ring->obj;
+	int ret = 0;
 	u32 head;
 
+	if (HAS_FORCE_WAKE(dev))
+		gen6_gt_force_wake_get(dev_priv);
+
 	/* Stop the ring if it's running. */
 	I915_WRITE_CTL(ring, 0);
 	I915_WRITE_HEAD(ring, 0);
@@ -317,7 +322,8 @@ static int init_ring_common(struct intel_ring_buffer *ring)
 				I915_READ_HEAD(ring),
 				I915_READ_TAIL(ring),
 				I915_READ_START(ring));
-		return -EIO;
+		ret = -EIO;
+		goto out;
 	}
 
 	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
@@ -326,9 +332,14 @@ static int init_ring_common(struct intel_ring_buffer *ring)
 		ring->head = I915_READ_HEAD(ring);
 		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
 		ring->space = ring_space(ring);
+		ring->last_retired_head = -1;
 	}
 
-	return 0;
+out:
+	if (HAS_FORCE_WAKE(dev))
+		gen6_gt_force_wake_put(dev_priv);
+
+	return ret;
 }
 
 static int
@@ -987,6 +998,10 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	if (ret)
 		goto err_unref;
 
+	ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	if (ret)
+		goto err_unpin;
+
 	ring->virtual_start = ioremap_wc(dev->agp->base + obj->gtt_offset,
 					 ring->size);
 	if (ring->virtual_start == NULL) {

+ 17 - 4
drivers/gpu/drm/radeon/ni.c

@@ -460,15 +460,28 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 		rdev->config.cayman.max_pipes_per_simd = 4;
 		rdev->config.cayman.max_tile_pipes = 2;
 		if ((rdev->pdev->device == 0x9900) ||
-		    (rdev->pdev->device == 0x9901)) {
+		    (rdev->pdev->device == 0x9901) ||
+		    (rdev->pdev->device == 0x9905) ||
+		    (rdev->pdev->device == 0x9906) ||
+		    (rdev->pdev->device == 0x9907) ||
+		    (rdev->pdev->device == 0x9908) ||
+		    (rdev->pdev->device == 0x9909) ||
+		    (rdev->pdev->device == 0x9910) ||
+		    (rdev->pdev->device == 0x9917)) {
 			rdev->config.cayman.max_simds_per_se = 6;
 			rdev->config.cayman.max_backends_per_se = 2;
 		} else if ((rdev->pdev->device == 0x9903) ||
-			   (rdev->pdev->device == 0x9904)) {
+			   (rdev->pdev->device == 0x9904) ||
+			   (rdev->pdev->device == 0x990A) ||
+			   (rdev->pdev->device == 0x9913) ||
+			   (rdev->pdev->device == 0x9918)) {
 			rdev->config.cayman.max_simds_per_se = 4;
 			rdev->config.cayman.max_backends_per_se = 2;
-		} else if ((rdev->pdev->device == 0x9990) ||
-			   (rdev->pdev->device == 0x9991)) {
+		} else if ((rdev->pdev->device == 0x9919) ||
+			   (rdev->pdev->device == 0x9990) ||
+			   (rdev->pdev->device == 0x9991) ||
+			   (rdev->pdev->device == 0x9994) ||
+			   (rdev->pdev->device == 0x99A0)) {
 			rdev->config.cayman.max_simds_per_se = 3;
 			rdev->config.cayman.max_backends_per_se = 1;
 		} else {

+ 6 - 9
drivers/gpu/drm/radeon/r600.c

@@ -2426,6 +2426,12 @@ int r600_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = r600_audio_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: audio init failed\n");
+		return r;
+	}
+
 	return 0;
 }
 
@@ -2462,12 +2468,6 @@ int r600_resume(struct radeon_device *rdev)
 		return r;
 	}
 
-	r = r600_audio_init(rdev);
-	if (r) {
-		DRM_ERROR("radeon: audio resume failed\n");
-		return r;
-	}
-
 	return r;
 }
 
@@ -2577,9 +2577,6 @@ int r600_init(struct radeon_device *rdev)
 		rdev->accel_working = false;
 	}
 
-	r = r600_audio_init(rdev);
-	if (r)
-		return r; /* TODO error handling */
 	return 0;
 }
 

+ 3 - 2
drivers/gpu/drm/radeon/r600_audio.c

@@ -192,6 +192,7 @@ void r600_audio_set_clock(struct drm_encoder *encoder, int clock)
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
 	int base_rate = 48000;
 
 	switch (radeon_encoder->encoder_id) {
@@ -217,8 +218,8 @@ void r600_audio_set_clock(struct drm_encoder *encoder, int clock)
 		WREG32(EVERGREEN_AUDIO_PLL1_DIV, clock * 10);
 		WREG32(EVERGREEN_AUDIO_PLL1_UNK, 0x00000071);
 
-		/* Some magic trigger or src sel? */
-		WREG32_P(0x5ac, 0x01, ~0x77);
+		/* Select DTO source */
+		WREG32(0x5ac, radeon_crtc->crtc_id);
 	} else {
 		switch (dig->dig_encoder) {
 		case 0:

+ 0 - 1
drivers/gpu/drm/radeon/r600_hdmi.c

@@ -348,7 +348,6 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod
 		WREG32(HDMI0_AUDIO_PACKET_CONTROL + offset,
 		       HDMI0_AUDIO_SAMPLE_SEND | /* send audio packets */
 		       HDMI0_AUDIO_DELAY_EN(1) | /* default audio delay */
-		       HDMI0_AUDIO_SEND_MAX_PACKETS | /* send NULL packets if no audio is available */
 		       HDMI0_AUDIO_PACKETS_PER_LINE(3) | /* should be suffient for all audio modes and small enough for all hblanks */
 		       HDMI0_60958_CS_UPDATE); /* allow 60958 channel status fields to be updated */
 	}

+ 2 - 3
drivers/gpu/drm/radeon/radeon.h

@@ -1374,9 +1374,9 @@ struct cayman_asic {
 
 struct si_asic {
 	unsigned max_shader_engines;
-	unsigned max_pipes_per_simd;
 	unsigned max_tile_pipes;
-	unsigned max_simds_per_se;
+	unsigned max_cu_per_sh;
+	unsigned max_sh_per_se;
 	unsigned max_backends_per_se;
 	unsigned max_texture_channel_caches;
 	unsigned max_gprs;
@@ -1387,7 +1387,6 @@ struct si_asic {
 	unsigned sc_hiz_tile_fifo_size;
 	unsigned sc_earlyz_tile_fifo_size;
 
-	unsigned num_shader_engines;
 	unsigned num_tile_pipes;
 	unsigned num_backends_per_se;
 	unsigned backend_disable_mask_per_asic;

+ 12 - 7
drivers/gpu/drm/radeon/radeon_gart.c

@@ -476,12 +476,18 @@ int radeon_vm_bo_add(struct radeon_device *rdev,
 
 	mutex_lock(&vm->mutex);
 	if (last_pfn > vm->last_pfn) {
-		/* grow va space 32M by 32M */
-		unsigned align = ((32 << 20) >> 12) - 1;
+		/* release mutex and lock in right order */
+		mutex_unlock(&vm->mutex);
 		radeon_mutex_lock(&rdev->cs_mutex);
-		radeon_vm_unbind_locked(rdev, vm);
+		mutex_lock(&vm->mutex);
+		/* and check again */
+		if (last_pfn > vm->last_pfn) {
+			/* grow va space 32M by 32M */
+			unsigned align = ((32 << 20) >> 12) - 1;
+			radeon_vm_unbind_locked(rdev, vm);
+			vm->last_pfn = (last_pfn + align) & ~align;
+		}
 		radeon_mutex_unlock(&rdev->cs_mutex);
-		vm->last_pfn = (last_pfn + align) & ~align;
 	}
 	head = &vm->va;
 	last_offset = 0;
@@ -595,8 +601,8 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 	if (bo_va == NULL)
 		return 0;
 
-	mutex_lock(&vm->mutex);
 	radeon_mutex_lock(&rdev->cs_mutex);
+	mutex_lock(&vm->mutex);
 	radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
 	radeon_mutex_unlock(&rdev->cs_mutex);
 	list_del(&bo_va->vm_list);
@@ -641,9 +647,8 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 	struct radeon_bo_va *bo_va, *tmp;
 	int r;
 
-	mutex_lock(&vm->mutex);
-
 	radeon_mutex_lock(&rdev->cs_mutex);
+	mutex_lock(&vm->mutex);
 	radeon_vm_unbind_locked(rdev, vm);
 	radeon_mutex_unlock(&rdev->cs_mutex);
 

+ 1 - 1
drivers/gpu/drm/radeon/radeon_kms.c

@@ -273,7 +273,7 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		break;
 	case RADEON_INFO_MAX_PIPES:
 		if (rdev->family >= CHIP_TAHITI)
-			value = rdev->config.si.max_pipes_per_simd;
+			value = rdev->config.si.max_cu_per_sh;
 		else if (rdev->family >= CHIP_CAYMAN)
 			value = rdev->config.cayman.max_pipes_per_simd;
 		else if (rdev->family >= CHIP_CEDAR)

+ 6 - 6
drivers/gpu/drm/radeon/rs600.c

@@ -908,12 +908,6 @@ static int rs600_startup(struct radeon_device *rdev)
 		return r;
 	}
 
-	r = r600_audio_init(rdev);
-	if (r) {
-		dev_err(rdev->dev, "failed initializing audio\n");
-		return r;
-	}
-
 	r = radeon_ib_pool_start(rdev);
 	if (r)
 		return r;
@@ -922,6 +916,12 @@ static int rs600_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = r600_audio_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing audio\n");
+		return r;
+	}
+
 	return 0;
 }
 

+ 6 - 6
drivers/gpu/drm/radeon/rs690.c

@@ -637,12 +637,6 @@ static int rs690_startup(struct radeon_device *rdev)
 		return r;
 	}
 
-	r = r600_audio_init(rdev);
-	if (r) {
-		dev_err(rdev->dev, "failed initializing audio\n");
-		return r;
-	}
-
 	r = radeon_ib_pool_start(rdev);
 	if (r)
 		return r;
@@ -651,6 +645,12 @@ static int rs690_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = r600_audio_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "failed initializing audio\n");
+		return r;
+	}
+
 	return 0;
 }
 

+ 6 - 12
drivers/gpu/drm/radeon/rv770.c

@@ -956,6 +956,12 @@ static int rv770_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	r = r600_audio_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: audio init failed\n");
+		return r;
+	}
+
 	return 0;
 }
 
@@ -978,12 +984,6 @@ int rv770_resume(struct radeon_device *rdev)
 		return r;
 	}
 
-	r = r600_audio_init(rdev);
-	if (r) {
-		dev_err(rdev->dev, "radeon: audio init failed\n");
-		return r;
-	}
-
 	return r;
 
 }
@@ -1092,12 +1092,6 @@ int rv770_init(struct radeon_device *rdev)
 		rdev->accel_working = false;
 	}
 
-	r = r600_audio_init(rdev);
-	if (r) {
-		dev_err(rdev->dev, "radeon: audio init failed\n");
-		return r;
-	}
-
 	return 0;
 }
 

+ 162 - 315
drivers/gpu/drm/radeon/si.c

@@ -867,200 +867,6 @@ void dce6_bandwidth_update(struct radeon_device *rdev)
 /*
  * Core functions
  */
-static u32 si_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
-					   u32 num_tile_pipes,
-					   u32 num_backends_per_asic,
-					   u32 *backend_disable_mask_per_asic,
-					   u32 num_shader_engines)
-{
-	u32 backend_map = 0;
-	u32 enabled_backends_mask = 0;
-	u32 enabled_backends_count = 0;
-	u32 num_backends_per_se;
-	u32 cur_pipe;
-	u32 swizzle_pipe[SI_MAX_PIPES];
-	u32 cur_backend = 0;
-	u32 i;
-	bool force_no_swizzle;
-
-	/* force legal values */
-	if (num_tile_pipes < 1)
-		num_tile_pipes = 1;
-	if (num_tile_pipes > rdev->config.si.max_tile_pipes)
-		num_tile_pipes = rdev->config.si.max_tile_pipes;
-	if (num_shader_engines < 1)
-		num_shader_engines = 1;
-	if (num_shader_engines > rdev->config.si.max_shader_engines)
-		num_shader_engines = rdev->config.si.max_shader_engines;
-	if (num_backends_per_asic < num_shader_engines)
-		num_backends_per_asic = num_shader_engines;
-	if (num_backends_per_asic > (rdev->config.si.max_backends_per_se * num_shader_engines))
-		num_backends_per_asic = rdev->config.si.max_backends_per_se * num_shader_engines;
-
-	/* make sure we have the same number of backends per se */
-	num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines);
-	/* set up the number of backends per se */
-	num_backends_per_se = num_backends_per_asic / num_shader_engines;
-	if (num_backends_per_se > rdev->config.si.max_backends_per_se) {
-		num_backends_per_se = rdev->config.si.max_backends_per_se;
-		num_backends_per_asic = num_backends_per_se * num_shader_engines;
-	}
-
-	/* create enable mask and count for enabled backends */
-	for (i = 0; i < SI_MAX_BACKENDS; ++i) {
-		if (((*backend_disable_mask_per_asic >> i) & 1) == 0) {
-			enabled_backends_mask |= (1 << i);
-			++enabled_backends_count;
-		}
-		if (enabled_backends_count == num_backends_per_asic)
-			break;
-	}
-
-	/* force the backends mask to match the current number of backends */
-	if (enabled_backends_count != num_backends_per_asic) {
-		u32 this_backend_enabled;
-		u32 shader_engine;
-		u32 backend_per_se;
-
-		enabled_backends_mask = 0;
-		enabled_backends_count = 0;
-		*backend_disable_mask_per_asic = SI_MAX_BACKENDS_MASK;
-		for (i = 0; i < SI_MAX_BACKENDS; ++i) {
-			/* calc the current se */
-			shader_engine = i / rdev->config.si.max_backends_per_se;
-			/* calc the backend per se */
-			backend_per_se = i % rdev->config.si.max_backends_per_se;
-			/* default to not enabled */
-			this_backend_enabled = 0;
-			if ((shader_engine < num_shader_engines) &&
-			    (backend_per_se < num_backends_per_se))
-				this_backend_enabled = 1;
-			if (this_backend_enabled) {
-				enabled_backends_mask |= (1 << i);
-				*backend_disable_mask_per_asic &= ~(1 << i);
-				++enabled_backends_count;
-			}
-		}
-	}
-
-
-	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * SI_MAX_PIPES);
-	switch (rdev->family) {
-	case CHIP_TAHITI:
-	case CHIP_PITCAIRN:
-	case CHIP_VERDE:
-		force_no_swizzle = true;
-		break;
-	default:
-		force_no_swizzle = false;
-		break;
-	}
-	if (force_no_swizzle) {
-		bool last_backend_enabled = false;
-
-		force_no_swizzle = false;
-		for (i = 0; i < SI_MAX_BACKENDS; ++i) {
-			if (((enabled_backends_mask >> i) & 1) == 1) {
-				if (last_backend_enabled)
-					force_no_swizzle = true;
-				last_backend_enabled = true;
-			} else
-				last_backend_enabled = false;
-		}
-	}
-
-	switch (num_tile_pipes) {
-	case 1:
-	case 3:
-	case 5:
-	case 7:
-		DRM_ERROR("odd number of pipes!\n");
-		break;
-	case 2:
-		swizzle_pipe[0] = 0;
-		swizzle_pipe[1] = 1;
-		break;
-	case 4:
-		if (force_no_swizzle) {
-			swizzle_pipe[0] = 0;
-			swizzle_pipe[1] = 1;
-			swizzle_pipe[2] = 2;
-			swizzle_pipe[3] = 3;
-		} else {
-			swizzle_pipe[0] = 0;
-			swizzle_pipe[1] = 2;
-			swizzle_pipe[2] = 1;
-			swizzle_pipe[3] = 3;
-		}
-		break;
-	case 6:
-		if (force_no_swizzle) {
-			swizzle_pipe[0] = 0;
-			swizzle_pipe[1] = 1;
-			swizzle_pipe[2] = 2;
-			swizzle_pipe[3] = 3;
-			swizzle_pipe[4] = 4;
-			swizzle_pipe[5] = 5;
-		} else {
-			swizzle_pipe[0] = 0;
-			swizzle_pipe[1] = 2;
-			swizzle_pipe[2] = 4;
-			swizzle_pipe[3] = 1;
-			swizzle_pipe[4] = 3;
-			swizzle_pipe[5] = 5;
-		}
-		break;
-	case 8:
-		if (force_no_swizzle) {
-			swizzle_pipe[0] = 0;
-			swizzle_pipe[1] = 1;
-			swizzle_pipe[2] = 2;
-			swizzle_pipe[3] = 3;
-			swizzle_pipe[4] = 4;
-			swizzle_pipe[5] = 5;
-			swizzle_pipe[6] = 6;
-			swizzle_pipe[7] = 7;
-		} else {
-			swizzle_pipe[0] = 0;
-			swizzle_pipe[1] = 2;
-			swizzle_pipe[2] = 4;
-			swizzle_pipe[3] = 6;
-			swizzle_pipe[4] = 1;
-			swizzle_pipe[5] = 3;
-			swizzle_pipe[6] = 5;
-			swizzle_pipe[7] = 7;
-		}
-		break;
-	}
-
-	for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
-		while (((1 << cur_backend) & enabled_backends_mask) == 0)
-			cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
-
-		backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4)));
-
-		cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS;
-	}
-
-	return backend_map;
-}
-
-static u32 si_get_disable_mask_per_asic(struct radeon_device *rdev,
-					u32 disable_mask_per_se,
-					u32 max_disable_mask_per_se,
-					u32 num_shader_engines)
-{
-	u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se);
-	u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se;
-
-	if (num_shader_engines == 1)
-		return disable_mask_per_asic;
-	else if (num_shader_engines == 2)
-		return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se);
-	else
-		return 0xffffffff;
-}
-
 static void si_tiling_mode_table_init(struct radeon_device *rdev)
 {
 	const u32 num_tile_mode_states = 32;
@@ -1562,18 +1368,151 @@ static void si_tiling_mode_table_init(struct radeon_device *rdev)
 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
 }
 
+static void si_select_se_sh(struct radeon_device *rdev,
+			    u32 se_num, u32 sh_num)
+{
+	u32 data = INSTANCE_BROADCAST_WRITES;
+
+	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
+		data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
+	else if (se_num == 0xffffffff)
+		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
+	else if (sh_num == 0xffffffff)
+		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
+	else
+		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
+	WREG32(GRBM_GFX_INDEX, data);
+}
+
+static u32 si_create_bitmask(u32 bit_width)
+{
+	u32 i, mask = 0;
+
+	for (i = 0; i < bit_width; i++) {
+		mask <<= 1;
+		mask |= 1;
+	}
+	return mask;
+}
+
+static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
+{
+	u32 data, mask;
+
+	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
+	if (data & 1)
+		data &= INACTIVE_CUS_MASK;
+	else
+		data = 0;
+	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
+
+	data >>= INACTIVE_CUS_SHIFT;
+
+	mask = si_create_bitmask(cu_per_sh);
+
+	return ~data & mask;
+}
+
+static void si_setup_spi(struct radeon_device *rdev,
+			 u32 se_num, u32 sh_per_se,
+			 u32 cu_per_sh)
+{
+	int i, j, k;
+	u32 data, mask, active_cu;
+
+	for (i = 0; i < se_num; i++) {
+		for (j = 0; j < sh_per_se; j++) {
+			si_select_se_sh(rdev, i, j);
+			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
+			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
+
+			mask = 1;
+			for (k = 0; k < 16; k++) {
+				mask <<= k;
+				if (active_cu & mask) {
+					data &= ~mask;
+					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
+					break;
+				}
+			}
+		}
+	}
+	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+}
+
+static u32 si_get_rb_disabled(struct radeon_device *rdev,
+			      u32 max_rb_num, u32 se_num,
+			      u32 sh_per_se)
+{
+	u32 data, mask;
+
+	data = RREG32(CC_RB_BACKEND_DISABLE);
+	if (data & 1)
+		data &= BACKEND_DISABLE_MASK;
+	else
+		data = 0;
+	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
+
+	data >>= BACKEND_DISABLE_SHIFT;
+
+	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
+
+	return data & mask;
+}
+
+static void si_setup_rb(struct radeon_device *rdev,
+			u32 se_num, u32 sh_per_se,
+			u32 max_rb_num)
+{
+	int i, j;
+	u32 data, mask;
+	u32 disabled_rbs = 0;
+	u32 enabled_rbs = 0;
+
+	for (i = 0; i < se_num; i++) {
+		for (j = 0; j < sh_per_se; j++) {
+			si_select_se_sh(rdev, i, j);
+			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
+			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
+		}
+	}
+	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+
+	mask = 1;
+	for (i = 0; i < max_rb_num; i++) {
+		if (!(disabled_rbs & mask))
+			enabled_rbs |= mask;
+		mask <<= 1;
+	}
+
+	for (i = 0; i < se_num; i++) {
+		si_select_se_sh(rdev, i, 0xffffffff);
+		data = 0;
+		for (j = 0; j < sh_per_se; j++) {
+			switch (enabled_rbs & 3) {
+			case 1:
+				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
+				break;
+			case 2:
+				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
+				break;
+			case 3:
+			default:
+				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
+				break;
+			}
+			enabled_rbs >>= 2;
+		}
+		WREG32(PA_SC_RASTER_CONFIG, data);
+	}
+	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+}
+
 static void si_gpu_init(struct radeon_device *rdev)
 {
-	u32 cc_rb_backend_disable = 0;
-	u32 cc_gc_shader_array_config;
 	u32 gb_addr_config = 0;
 	u32 mc_shared_chmap, mc_arb_ramcfg;
-	u32 gb_backend_map;
-	u32 cgts_tcc_disable;
 	u32 sx_debug_1;
-	u32 gc_user_shader_array_config;
-	u32 gc_user_rb_backend_disable;
-	u32 cgts_user_tcc_disable;
 	u32 hdp_host_path_cntl;
 	u32 tmp;
 	int i, j;
@@ -1581,9 +1520,9 @@ static void si_gpu_init(struct radeon_device *rdev)
 	switch (rdev->family) {
 	case CHIP_TAHITI:
 		rdev->config.si.max_shader_engines = 2;
-		rdev->config.si.max_pipes_per_simd = 4;
 		rdev->config.si.max_tile_pipes = 12;
-		rdev->config.si.max_simds_per_se = 8;
+		rdev->config.si.max_cu_per_sh = 8;
+		rdev->config.si.max_sh_per_se = 2;
 		rdev->config.si.max_backends_per_se = 4;
 		rdev->config.si.max_texture_channel_caches = 12;
 		rdev->config.si.max_gprs = 256;
@@ -1594,12 +1533,13 @@ static void si_gpu_init(struct radeon_device *rdev)
 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
+		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
 		break;
 	case CHIP_PITCAIRN:
 		rdev->config.si.max_shader_engines = 2;
-		rdev->config.si.max_pipes_per_simd = 4;
 		rdev->config.si.max_tile_pipes = 8;
-		rdev->config.si.max_simds_per_se = 5;
+		rdev->config.si.max_cu_per_sh = 5;
+		rdev->config.si.max_sh_per_se = 2;
 		rdev->config.si.max_backends_per_se = 4;
 		rdev->config.si.max_texture_channel_caches = 8;
 		rdev->config.si.max_gprs = 256;
@@ -1610,13 +1550,14 @@ static void si_gpu_init(struct radeon_device *rdev)
 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
+		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
 		break;
 	case CHIP_VERDE:
 	default:
 		rdev->config.si.max_shader_engines = 1;
-		rdev->config.si.max_pipes_per_simd = 4;
 		rdev->config.si.max_tile_pipes = 4;
-		rdev->config.si.max_simds_per_se = 2;
+		rdev->config.si.max_cu_per_sh = 2;
+		rdev->config.si.max_sh_per_se = 2;
 		rdev->config.si.max_backends_per_se = 4;
 		rdev->config.si.max_texture_channel_caches = 4;
 		rdev->config.si.max_gprs = 256;
@@ -1627,6 +1568,7 @@ static void si_gpu_init(struct radeon_device *rdev)
 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
+		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
 		break;
 	}
 
@@ -1648,31 +1590,7 @@ static void si_gpu_init(struct radeon_device *rdev)
 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
 
-	cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
-	cc_gc_shader_array_config = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
-	cgts_tcc_disable = 0xffff0000;
-	for (i = 0; i < rdev->config.si.max_texture_channel_caches; i++)
-		cgts_tcc_disable &= ~(1 << (16 + i));
-	gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
-	gc_user_shader_array_config = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
-	cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
-
-	rdev->config.si.num_shader_engines = rdev->config.si.max_shader_engines;
 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
-	tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
-	rdev->config.si.num_backends_per_se = r600_count_pipe_bits(tmp);
-	tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
-	rdev->config.si.backend_disable_mask_per_asic =
-		si_get_disable_mask_per_asic(rdev, tmp, SI_MAX_BACKENDS_PER_SE_MASK,
-					     rdev->config.si.num_shader_engines);
-	rdev->config.si.backend_map =
-		si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
-						rdev->config.si.num_backends_per_se *
-						rdev->config.si.num_shader_engines,
-						&rdev->config.si.backend_disable_mask_per_asic,
-						rdev->config.si.num_shader_engines);
-	tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT;
-	rdev->config.si.num_texture_channel_caches = r600_count_pipe_bits(tmp);
 	rdev->config.si.mem_max_burst_length_bytes = 256;
 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
@@ -1683,55 +1601,8 @@ static void si_gpu_init(struct radeon_device *rdev)
 	rdev->config.si.num_gpus = 1;
 	rdev->config.si.multi_gpu_tile_size = 64;
 
-	gb_addr_config = 0;
-	switch (rdev->config.si.num_tile_pipes) {
-	case 1:
-		gb_addr_config |= NUM_PIPES(0);
-		break;
-	case 2:
-		gb_addr_config |= NUM_PIPES(1);
-		break;
-	case 4:
-		gb_addr_config |= NUM_PIPES(2);
-		break;
-	case 8:
-	default:
-		gb_addr_config |= NUM_PIPES(3);
-		break;
-	}
-
-	tmp = (rdev->config.si.mem_max_burst_length_bytes / 256) - 1;
-	gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp);
-	gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.si.num_shader_engines - 1);
-	tmp = (rdev->config.si.shader_engine_tile_size / 16) - 1;
-	gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp);
-	switch (rdev->config.si.num_gpus) {
-	case 1:
-	default:
-		gb_addr_config |= NUM_GPUS(0);
-		break;
-	case 2:
-		gb_addr_config |= NUM_GPUS(1);
-		break;
-	case 4:
-		gb_addr_config |= NUM_GPUS(2);
-		break;
-	}
-	switch (rdev->config.si.multi_gpu_tile_size) {
-	case 16:
-		gb_addr_config |= MULTI_GPU_TILE_SIZE(0);
-		break;
-	case 32:
-	default:
-		gb_addr_config |= MULTI_GPU_TILE_SIZE(1);
-		break;
-	case 64:
-		gb_addr_config |= MULTI_GPU_TILE_SIZE(2);
-		break;
-	case 128:
-		gb_addr_config |= MULTI_GPU_TILE_SIZE(3);
-		break;
-	}
+	/* fix up row size */
+	gb_addr_config &= ~ROW_SIZE_MASK;
 	switch (rdev->config.si.mem_row_size_in_kb) {
 	case 1:
 	default:
@@ -1745,26 +1616,6 @@ static void si_gpu_init(struct radeon_device *rdev)
 		break;
 	}
 
-	tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
-	rdev->config.si.num_tile_pipes = (1 << tmp);
-	tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
-	rdev->config.si.mem_max_burst_length_bytes = (tmp + 1) * 256;
-	tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
-	rdev->config.si.num_shader_engines = tmp + 1;
-	tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
-	rdev->config.si.num_gpus = tmp + 1;
-	tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
-	rdev->config.si.multi_gpu_tile_size = 1 << tmp;
-	tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
-	rdev->config.si.mem_row_size_in_kb = 1 << tmp;
-
-	gb_backend_map =
-		si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes,
-						rdev->config.si.num_backends_per_se *
-						rdev->config.si.num_shader_engines,
-						&rdev->config.si.backend_disable_mask_per_asic,
-						rdev->config.si.num_shader_engines);
-
 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
 	 * not have bank info, so create a custom tiling dword.
 	 * bits 3:0   num_pipes
@@ -1789,33 +1640,29 @@ static void si_gpu_init(struct radeon_device *rdev)
 		rdev->config.si.tile_config |= (3 << 0);
 		break;
 	}
-	rdev->config.si.tile_config |=
-		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
+	if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
+		rdev->config.si.tile_config |= 1 << 4;
+	else
+		rdev->config.si.tile_config |= 0 << 4;
 	rdev->config.si.tile_config |=
 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
 	rdev->config.si.tile_config |=
 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
 
-	rdev->config.si.backend_map = gb_backend_map;
 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
 
-	/* primary versions */
-	WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
-	WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
-	WREG32(CC_GC_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
-
-	WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
+	si_tiling_mode_table_init(rdev);
 
-	/* user versions */
-	WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable);
-	WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
-	WREG32(GC_USER_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config);
+	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
+		    rdev->config.si.max_sh_per_se,
+		    rdev->config.si.max_backends_per_se);
 
-	WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
+	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
+		     rdev->config.si.max_sh_per_se,
+		     rdev->config.si.max_cu_per_sh);
 
-	si_tiling_mode_table_init(rdev);
 
 	/* set HW defaults for 3D engine */
 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |

+ 19 - 0
drivers/gpu/drm/radeon/sid.h

@@ -24,6 +24,11 @@
 #ifndef SI_H
 #define SI_H
 
+#define TAHITI_RB_BITMAP_WIDTH_PER_SH  2
+
+#define TAHITI_GB_ADDR_CONFIG_GOLDEN        0x12011003
+#define VERDE_GB_ADDR_CONFIG_GOLDEN         0x12010002
+
 #define	CG_MULT_THERMAL_STATUS					0x714
 #define		ASIC_MAX_TEMP(x)				((x) << 0)
 #define		ASIC_MAX_TEMP_MASK				0x000001ff
@@ -408,6 +413,12 @@
 #define		SOFT_RESET_IA					(1 << 15)
 
 #define GRBM_GFX_INDEX          			0x802C
+#define		INSTANCE_INDEX(x)			((x) << 0)
+#define		SH_INDEX(x)     			((x) << 8)
+#define		SE_INDEX(x)     			((x) << 16)
+#define		SH_BROADCAST_WRITES      		(1 << 29)
+#define		INSTANCE_BROADCAST_WRITES      		(1 << 30)
+#define		SE_BROADCAST_WRITES      		(1 << 31)
 
 #define GRBM_INT_CNTL                                   0x8060
 #       define RDERR_INT_ENABLE                         (1 << 0)
@@ -480,6 +491,8 @@
 #define	VGT_TF_MEMORY_BASE				0x89B8
 
 #define CC_GC_SHADER_ARRAY_CONFIG			0x89bc
+#define		INACTIVE_CUS_MASK			0xFFFF0000
+#define		INACTIVE_CUS_SHIFT			16
 #define GC_USER_SHADER_ARRAY_CONFIG			0x89c0
 
 #define	PA_CL_ENHANCE					0x8A14
@@ -688,6 +701,12 @@
 #define RLC_MC_CNTL                                       0xC344
 #define RLC_UCODE_CNTL                                    0xC348
 
+#define PA_SC_RASTER_CONFIG                             0x28350
+#       define RASTER_CONFIG_RB_MAP_0                   0
+#       define RASTER_CONFIG_RB_MAP_1                   1
+#       define RASTER_CONFIG_RB_MAP_2                   2
+#       define RASTER_CONFIG_RB_MAP_3                   3
+
 #define VGT_EVENT_INITIATOR                             0x28a90
 #       define SAMPLE_STREAMOUTSTATS1                   (1 << 0)
 #       define SAMPLE_STREAMOUTSTATS2                   (2 << 0)

+ 12 - 0
drivers/i2c/muxes/Kconfig

@@ -37,4 +37,16 @@ config I2C_MUX_PCA954x
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-mux-pca954x.
 
+config I2C_MUX_PINCTRL
+	tristate "pinctrl-based I2C multiplexer"
+	depends on PINCTRL
+	help
+	  If you say yes to this option, support will be included for an I2C
+	  multiplexer that uses the pinctrl subsystem, i.e. pin multiplexing.
+	  This is useful for SoCs whose I2C module's signals can be routed to
+	  different sets of pins at run-time.
+
+	  This driver can also be built as a module. If so, the module will be
+	  called pinctrl-i2cmux.
+
 endmenu

+ 1 - 0
drivers/i2c/muxes/Makefile

@@ -4,5 +4,6 @@
 obj-$(CONFIG_I2C_MUX_GPIO)	+= i2c-mux-gpio.o
 obj-$(CONFIG_I2C_MUX_PCA9541)	+= i2c-mux-pca9541.o
 obj-$(CONFIG_I2C_MUX_PCA954x)	+= i2c-mux-pca954x.o
+obj-$(CONFIG_I2C_MUX_PINCTRL)	+= i2c-mux-pinctrl.o
 
 ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG

+ 279 - 0
drivers/i2c/muxes/i2c-mux-pinctrl.c

@@ -0,0 +1,279 @@
+/*
+ * I2C multiplexer using pinctrl API
+ *
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/i2c.h>
+#include <linux/i2c-mux.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of_i2c.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/i2c-mux-pinctrl.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+struct i2c_mux_pinctrl {
+	struct device *dev;
+	struct i2c_mux_pinctrl_platform_data *pdata;
+	struct pinctrl *pinctrl;
+	struct pinctrl_state **states;
+	struct pinctrl_state *state_idle;
+	struct i2c_adapter *parent;
+	struct i2c_adapter **busses;
+};
+
+static int i2c_mux_pinctrl_select(struct i2c_adapter *adap, void *data,
+				  u32 chan)
+{
+	struct i2c_mux_pinctrl *mux = data;
+
+	return pinctrl_select_state(mux->pinctrl, mux->states[chan]);
+}
+
+static int i2c_mux_pinctrl_deselect(struct i2c_adapter *adap, void *data,
+				    u32 chan)
+{
+	struct i2c_mux_pinctrl *mux = data;
+
+	return pinctrl_select_state(mux->pinctrl, mux->state_idle);
+}
+
+#ifdef CONFIG_OF
+static int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux,
+				struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	int num_names, i, ret;
+	struct device_node *adapter_np;
+	struct i2c_adapter *adapter;
+
+	if (!np)
+		return 0;
+
+	mux->pdata = devm_kzalloc(&pdev->dev, sizeof(*mux->pdata), GFP_KERNEL);
+	if (!mux->pdata) {
+		dev_err(mux->dev,
+			"Cannot allocate i2c_mux_pinctrl_platform_data\n");
+		return -ENOMEM;
+	}
+
+	num_names = of_property_count_strings(np, "pinctrl-names");
+	if (num_names < 0) {
+		dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n",
+			num_names);
+		return num_names;
+	}
+
+	mux->pdata->pinctrl_states = devm_kzalloc(&pdev->dev,
+		sizeof(*mux->pdata->pinctrl_states) * num_names,
+		GFP_KERNEL);
+	if (!mux->pdata->pinctrl_states) {
+		dev_err(mux->dev, "Cannot allocate pinctrl_states\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < num_names; i++) {
+		ret = of_property_read_string_index(np, "pinctrl-names", i,
+			&mux->pdata->pinctrl_states[mux->pdata->bus_count]);
+		if (ret < 0) {
+			dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n",
+				ret);
+			return ret;
+		}
+		if (!strcmp(mux->pdata->pinctrl_states[mux->pdata->bus_count],
+			    "idle")) {
+			if (i != num_names - 1) {
+				dev_err(mux->dev, "idle state must be last\n");
+				return -EINVAL;
+			}
+			mux->pdata->pinctrl_state_idle = "idle";
+		} else {
+			mux->pdata->bus_count++;
+		}
+	}
+
+	adapter_np = of_parse_phandle(np, "i2c-parent", 0);
+	if (!adapter_np) {
+		dev_err(mux->dev, "Cannot parse i2c-parent\n");
+		return -ENODEV;
+	}
+	adapter = of_find_i2c_adapter_by_node(adapter_np);
+	if (!adapter) {
+		dev_err(mux->dev, "Cannot find parent bus\n");
+		return -ENODEV;
+	}
+	mux->pdata->parent_bus_num = i2c_adapter_id(adapter);
+	put_device(&adapter->dev);
+
+	return 0;
+}
+#else
+static inline int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux,
+					   struct platform_device *pdev)
+{
+	return 0;
+}
+#endif
+
+static int __devinit i2c_mux_pinctrl_probe(struct platform_device *pdev)
+{
+	struct i2c_mux_pinctrl *mux;
+	int (*deselect)(struct i2c_adapter *, void *, u32);
+	int i, ret;
+
+	mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL);
+	if (!mux) {
+		dev_err(&pdev->dev, "Cannot allocate i2c_mux_pinctrl\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+	platform_set_drvdata(pdev, mux);
+
+	mux->dev = &pdev->dev;
+
+	mux->pdata = pdev->dev.platform_data;
+	if (!mux->pdata) {
+		ret = i2c_mux_pinctrl_parse_dt(mux, pdev);
+		if (ret < 0)
+			goto err;
+	}
+	if (!mux->pdata) {
+		dev_err(&pdev->dev, "Missing platform data\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	mux->states = devm_kzalloc(&pdev->dev,
+				   sizeof(*mux->states) * mux->pdata->bus_count,
+				   GFP_KERNEL);
+	if (!mux->states) {
+		dev_err(&pdev->dev, "Cannot allocate states\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	mux->busses = devm_kzalloc(&pdev->dev,
+				   sizeof(mux->busses) * mux->pdata->bus_count,
+				   GFP_KERNEL);
+	if (!mux->states) {
+		dev_err(&pdev->dev, "Cannot allocate busses\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	mux->pinctrl = devm_pinctrl_get(&pdev->dev);
+	if (IS_ERR(mux->pinctrl)) {
+		ret = PTR_ERR(mux->pinctrl);
+		dev_err(&pdev->dev, "Cannot get pinctrl: %d\n", ret);
+		goto err;
+	}
+	for (i = 0; i < mux->pdata->bus_count; i++) {
+		mux->states[i] = pinctrl_lookup_state(mux->pinctrl,
+						mux->pdata->pinctrl_states[i]);
+			if (IS_ERR(mux->states[i])) {
+				ret = PTR_ERR(mux->states[i]);
+				dev_err(&pdev->dev,
+					"Cannot look up pinctrl state %s: %d\n",
+					mux->pdata->pinctrl_states[i], ret);
+				goto err;
+			}
+	}
+	if (mux->pdata->pinctrl_state_idle) {
+		mux->state_idle = pinctrl_lookup_state(mux->pinctrl,
+						mux->pdata->pinctrl_state_idle);
+		if (IS_ERR(mux->state_idle)) {
+			ret = PTR_ERR(mux->state_idle);
+			dev_err(&pdev->dev,
+				"Cannot look up pinctrl state %s: %d\n",
+				mux->pdata->pinctrl_state_idle, ret);
+			goto err;
+		}
+
+		deselect = i2c_mux_pinctrl_deselect;
+	} else {
+		deselect = NULL;
+	}
+
+	mux->parent = i2c_get_adapter(mux->pdata->parent_bus_num);
+	if (!mux->parent) {
+		dev_err(&pdev->dev, "Parent adapter (%d) not found\n",
+			mux->pdata->parent_bus_num);
+		ret = -ENODEV;
+		goto err;
+	}
+
+	for (i = 0; i < mux->pdata->bus_count; i++) {
+		u32 bus = mux->pdata->base_bus_num ?
+				(mux->pdata->base_bus_num + i) : 0;
+
+		mux->busses[i] = i2c_add_mux_adapter(mux->parent, &pdev->dev,
+						     mux, bus, i,
+						     i2c_mux_pinctrl_select,
+						     deselect);
+		if (!mux->busses[i]) {
+			ret = -ENODEV;
+			dev_err(&pdev->dev, "Failed to add adapter %d\n", i);
+			goto err_del_adapter;
+		}
+	}
+
+	return 0;
+
+err_del_adapter:
+	for (; i > 0; i--)
+		i2c_del_mux_adapter(mux->busses[i - 1]);
+	i2c_put_adapter(mux->parent);
+err:
+	return ret;
+}
+
+static int __devexit i2c_mux_pinctrl_remove(struct platform_device *pdev)
+{
+	struct i2c_mux_pinctrl *mux = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < mux->pdata->bus_count; i++)
+		i2c_del_mux_adapter(mux->busses[i]);
+
+	i2c_put_adapter(mux->parent);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id i2c_mux_pinctrl_of_match[] __devinitconst = {
+	{ .compatible = "i2c-mux-pinctrl", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, i2c_mux_pinctrl_of_match);
+#endif
+
+static struct platform_driver i2c_mux_pinctrl_driver = {
+	.driver	= {
+		.name	= "i2c-mux-pinctrl",
+		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(i2c_mux_pinctrl_of_match),
+	},
+	.probe	= i2c_mux_pinctrl_probe,
+	.remove	= __devexit_p(i2c_mux_pinctrl_remove),
+};
+module_platform_driver(i2c_mux_pinctrl_driver);
+
+MODULE_DESCRIPTION("pinctrl-based I2C multiplexer driver");
+MODULE_AUTHOR("Stephen Warren <swarren@nvidia.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:i2c-mux-pinctrl");

+ 4 - 0
drivers/infiniband/hw/cxgb4/cm.c

@@ -1593,6 +1593,10 @@ static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst,
 		struct net_device *pdev;
 
 		pdev = ip_dev_find(&init_net, peer_ip);
+		if (!pdev) {
+			err = -ENODEV;
+			goto out;
+		}
 		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
 					n, pdev, 0);
 		if (!ep->l2t)

+ 8 - 13
drivers/infiniband/hw/mlx4/main.c

@@ -140,7 +140,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 	props->max_mr_size	   = ~0ull;
 	props->page_size_cap	   = dev->dev->caps.page_size_cap;
 	props->max_qp		   = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
-	props->max_qp_wr	   = dev->dev->caps.max_wqes;
+	props->max_qp_wr	   = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
 	props->max_sge		   = min(dev->dev->caps.max_sq_sg,
 					 dev->dev->caps.max_rq_sg);
 	props->max_cq		   = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
@@ -1084,12 +1084,9 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 	int total_eqs = 0;
 	int i, j, eq;
 
-	/* Init eq table */
-	ibdev->eq_table = NULL;
-	ibdev->eq_added = 0;
-
-	/* Legacy mode? */
-	if (dev->caps.comp_pool == 0)
+	/* Legacy mode or comp_pool is not large enough */
+	if (dev->caps.comp_pool == 0 ||
+	    dev->caps.num_ports > dev->caps.comp_pool)
 		return;
 
 	eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/
@@ -1135,7 +1132,10 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 {
 	int i;
-	int total_eqs;
+
+	/* no additional eqs were added */
+	if (!ibdev->eq_table)
+		return;
 
 	/* Reset the advertised EQ number */
 	ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
@@ -1148,12 +1148,7 @@ static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 		mlx4_release_eq(dev, ibdev->eq_table[i]);
 	}
 
-	total_eqs = dev->caps.num_comp_vectors + ibdev->eq_added;
-	memset(ibdev->eq_table, 0, total_eqs * sizeof(int));
 	kfree(ibdev->eq_table);
-
-	ibdev->eq_table = NULL;
-	ibdev->eq_added = 0;
 }
 
 static void *mlx4_ib_add(struct mlx4_dev *dev)

+ 8 - 0
drivers/infiniband/hw/mlx4/mlx4_ib.h

@@ -44,6 +44,14 @@
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/doorbell.h>
 
+enum {
+	MLX4_IB_SQ_MIN_WQE_SHIFT = 6,
+	MLX4_IB_MAX_HEADROOM	 = 2048
+};
+
+#define MLX4_IB_SQ_HEADROOM(shift)	((MLX4_IB_MAX_HEADROOM >> (shift)) + 1)
+#define MLX4_IB_SQ_MAX_SPARE		(MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT))
+
 struct mlx4_ib_ucontext {
 	struct ib_ucontext	ibucontext;
 	struct mlx4_uar		uar;

+ 15 - 6
drivers/infiniband/hw/mlx4/qp.c

@@ -310,8 +310,8 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 		       int is_user, int has_rq, struct mlx4_ib_qp *qp)
 {
 	/* Sanity check RQ size before proceeding */
-	if (cap->max_recv_wr  > dev->dev->caps.max_wqes  ||
-	    cap->max_recv_sge > dev->dev->caps.max_rq_sg)
+	if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||
+	    cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))
 		return -EINVAL;
 
 	if (!has_rq) {
@@ -329,8 +329,17 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 		qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
 	}
 
-	cap->max_recv_wr  = qp->rq.max_post = qp->rq.wqe_cnt;
-	cap->max_recv_sge = qp->rq.max_gs;
+	/* leave userspace return values as they were, so as not to break ABI */
+	if (is_user) {
+		cap->max_recv_wr  = qp->rq.max_post = qp->rq.wqe_cnt;
+		cap->max_recv_sge = qp->rq.max_gs;
+	} else {
+		cap->max_recv_wr  = qp->rq.max_post =
+			min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);
+		cap->max_recv_sge = min(qp->rq.max_gs,
+					min(dev->dev->caps.max_sq_sg,
+					    dev->dev->caps.max_rq_sg));
+	}
 
 	return 0;
 }
@@ -341,8 +350,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 	int s;
 
 	/* Sanity check SQ size before proceeding */
-	if (cap->max_send_wr	 > dev->dev->caps.max_wqes  ||
-	    cap->max_send_sge	 > dev->dev->caps.max_sq_sg ||
+	if (cap->max_send_wr  > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) ||
+	    cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||
 	    cap->max_inline_data + send_wqe_overhead(type, qp->flags) +
 	    sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
 		return -EINVAL;

+ 0 - 1
drivers/infiniband/hw/ocrdma/ocrdma.h

@@ -231,7 +231,6 @@ struct ocrdma_qp_hwq_info {
 	u32 entry_size;
 	u32 max_cnt;
 	u32 max_wqe_idx;
-	u32 free_delta;
 	u16 dbid;		/* qid, where to ring the doorbell. */
 	u32 len;
 	dma_addr_t pa;

+ 1 - 4
drivers/infiniband/hw/ocrdma/ocrdma_abi.h

@@ -101,8 +101,6 @@ struct ocrdma_create_qp_uresp {
 	u32 rsvd1;
 	u32 num_wqe_allocated;
 	u32 num_rqe_allocated;
-	u32 free_wqe_delta;
-	u32 free_rqe_delta;
 	u32 db_sq_offset;
 	u32 db_rq_offset;
 	u32 db_shift;
@@ -126,8 +124,7 @@ struct ocrdma_create_srq_uresp {
 	u32 db_rq_offset;
 	u32 db_shift;
 
-	u32 free_rqe_delta;
-	u32 rsvd2;
+	u64 rsvd2;
 	u64 rsvd3;
 } __packed;
 

+ 1 - 8
drivers/infiniband/hw/ocrdma/ocrdma_hw.c

@@ -732,7 +732,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
 		break;
 	case OCRDMA_SRQ_LIMIT_EVENT:
 		ib_evt.element.srq = &qp->srq->ibsrq;
-		ib_evt.event = IB_EVENT_QP_LAST_WQE_REACHED;
+		ib_evt.event = IB_EVENT_SRQ_LIMIT_REACHED;
 		srq_event = 1;
 		qp_event = 0;
 		break;
@@ -1990,19 +1990,12 @@ static void ocrdma_get_create_qp_rsp(struct ocrdma_create_qp_rsp *rsp,
 	max_wqe_allocated = 1 << max_wqe_allocated;
 	max_rqe_allocated = 1 << ((u16)rsp->max_wqe_rqe);
 
-	if (qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
-		qp->sq.free_delta = 0;
-		qp->rq.free_delta = 1;
-	} else
-		qp->sq.free_delta = 1;
-
 	qp->sq.max_cnt = max_wqe_allocated;
 	qp->sq.max_wqe_idx = max_wqe_allocated - 1;
 
 	if (!attrs->srq) {
 		qp->rq.max_cnt = max_rqe_allocated;
 		qp->rq.max_wqe_idx = max_rqe_allocated - 1;
-		qp->rq.free_delta = 1;
 	}
 }
 

+ 0 - 1
drivers/infiniband/hw/ocrdma/ocrdma_main.c

@@ -26,7 +26,6 @@
  *******************************************************************/
 
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/idr.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_user_verbs.h>

+ 0 - 5
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c

@@ -940,8 +940,6 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
 		uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
 		uresp.db_shift = 16;
 	}
-	uresp.free_wqe_delta = qp->sq.free_delta;
-	uresp.free_rqe_delta = qp->rq.free_delta;
 
 	if (qp->dpp_enabled) {
 		uresp.dpp_credit = dpp_credit_lmt;
@@ -1307,8 +1305,6 @@ static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
 		free_cnt = (q->max_cnt - q->head) + q->tail;
 	else
 		free_cnt = q->tail - q->head;
-	if (q->free_delta)
-		free_cnt -= q->free_delta;
 	return free_cnt;
 }
 
@@ -1501,7 +1497,6 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata)
 	    (srq->pd->id * srq->dev->nic_info.db_page_size);
 	uresp.db_page_size = srq->dev->nic_info.db_page_size;
 	uresp.num_rqe_allocated = srq->rq.max_cnt;
-	uresp.free_rqe_delta = 1;
 	if (srq->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
 		uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET;
 		uresp.db_shift = 24;

+ 0 - 1
drivers/infiniband/hw/ocrdma/ocrdma_verbs.h

@@ -28,7 +28,6 @@
 #ifndef __OCRDMA_VERBS_H__
 #define __OCRDMA_VERBS_H__
 
-#include <linux/version.h>
 int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *,
 		     struct ib_send_wr **bad_wr);
 int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *,

+ 44 - 27
drivers/iommu/amd_iommu.c

@@ -547,26 +547,12 @@ static void iommu_poll_events(struct amd_iommu *iommu)
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
-static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head)
+static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
 {
 	struct amd_iommu_fault fault;
-	volatile u64 *raw;
-	int i;
 
 	INC_STATS_COUNTER(pri_requests);
 
-	raw = (u64 *)(iommu->ppr_log + head);
-
-	/*
-	 * Hardware bug: Interrupt may arrive before the entry is written to
-	 * memory. If this happens we need to wait for the entry to arrive.
-	 */
-	for (i = 0; i < LOOP_TIMEOUT; ++i) {
-		if (PPR_REQ_TYPE(raw[0]) != 0)
-			break;
-		udelay(1);
-	}
-
 	if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
 		pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
 		return;
@@ -578,12 +564,6 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head)
 	fault.tag       = PPR_TAG(raw[0]);
 	fault.flags     = PPR_FLAGS(raw[0]);
 
-	/*
-	 * To detect the hardware bug we need to clear the entry
-	 * to back to zero.
-	 */
-	raw[0] = raw[1] = 0;
-
 	atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
 }
 
@@ -595,25 +575,62 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu)
 	if (iommu->ppr_log == NULL)
 		return;
 
+	/* enable ppr interrupts again */
+	writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
+
 	spin_lock_irqsave(&iommu->lock, flags);
 
 	head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 	tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 
 	while (head != tail) {
+		volatile u64 *raw;
+		u64 entry[2];
+		int i;
 
-		/* Handle PPR entry */
-		iommu_handle_ppr_entry(iommu, head);
+		raw = (u64 *)(iommu->ppr_log + head);
+
+		/*
+		 * Hardware bug: Interrupt may arrive before the entry is
+		 * written to memory. If this happens we need to wait for the
+		 * entry to arrive.
+		 */
+		for (i = 0; i < LOOP_TIMEOUT; ++i) {
+			if (PPR_REQ_TYPE(raw[0]) != 0)
+				break;
+			udelay(1);
+		}
+
+		/* Avoid memcpy function-call overhead */
+		entry[0] = raw[0];
+		entry[1] = raw[1];
 
-		/* Update and refresh ring-buffer state*/
+		/*
+		 * To detect the hardware bug we need to clear the entry
+		 * back to zero.
+		 */
+		raw[0] = raw[1] = 0UL;
+
+		/* Update head pointer of hardware ring-buffer */
 		head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
 		writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+
+		/*
+		 * Release iommu->lock because ppr-handling might need to
+		 * re-aquire it
+		 */
+		spin_unlock_irqrestore(&iommu->lock, flags);
+
+		/* Handle PPR entry */
+		iommu_handle_ppr_entry(iommu, entry);
+
+		spin_lock_irqsave(&iommu->lock, flags);
+
+		/* Refresh ring-buffer information */
+		head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 		tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 	}
 
-	/* enable ppr interrupts again */
-	writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
-
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 

+ 5 - 8
drivers/iommu/amd_iommu_init.c

@@ -1029,6 +1029,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 	if (!iommu->dev)
 		return 1;
 
+	iommu->root_pdev = pci_get_bus_and_slot(iommu->dev->bus->number,
+						PCI_DEVFN(0, 0));
+
 	iommu->cap_ptr = h->cap_ptr;
 	iommu->pci_seg = h->pci_seg;
 	iommu->mmio_phys = h->mmio_phys;
@@ -1323,20 +1326,16 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
 {
 	int i, j;
 	u32 ioc_feature_control;
-	struct pci_dev *pdev = NULL;
+	struct pci_dev *pdev = iommu->root_pdev;
 
 	/* RD890 BIOSes may not have completely reconfigured the iommu */
-	if (!is_rd890_iommu(iommu->dev))
+	if (!is_rd890_iommu(iommu->dev) || !pdev)
 		return;
 
 	/*
 	 * First, we need to ensure that the iommu is enabled. This is
 	 * controlled by a register in the northbridge
 	 */
-	pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0));
-
-	if (!pdev)
-		return;
 
 	/* Select Northbridge indirect register 0x75 and enable writing */
 	pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
@@ -1346,8 +1345,6 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
 	if (!(ioc_feature_control & 0x1))
 		pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
 
-	pci_dev_put(pdev);
-
 	/* Restore the iommu BAR */
 	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
 			       iommu->stored_addr_lo);

+ 3 - 0
drivers/iommu/amd_iommu_types.h

@@ -481,6 +481,9 @@ struct amd_iommu {
 	/* Pointer to PCI device of this IOMMU */
 	struct pci_dev *dev;
 
+	/* Cache pdev to root device for resume quirks */
+	struct pci_dev *root_pdev;
+
 	/* physical address of MMIO space */
 	u64 mmio_phys;
 	/* virtual address of MMIO space */

+ 4 - 0
drivers/md/raid1.c

@@ -2550,6 +2550,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 	err = -EINVAL;
 	spin_lock_init(&conf->device_lock);
 	rdev_for_each(rdev, mddev) {
+		struct request_queue *q;
 		int disk_idx = rdev->raid_disk;
 		if (disk_idx >= mddev->raid_disks
 		    || disk_idx < 0)
@@ -2562,6 +2563,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 		if (disk->rdev)
 			goto abort;
 		disk->rdev = rdev;
+		q = bdev_get_queue(rdev->bdev);
+		if (q->merge_bvec_fn)
+			mddev->merge_check_needed = 1;
 
 		disk->head_position = 0;
 	}

+ 4 - 0
drivers/md/raid10.c

@@ -3475,6 +3475,7 @@ static int run(struct mddev *mddev)
 
 	rdev_for_each(rdev, mddev) {
 		long long diff;
+		struct request_queue *q;
 
 		disk_idx = rdev->raid_disk;
 		if (disk_idx < 0)
@@ -3493,6 +3494,9 @@ static int run(struct mddev *mddev)
 				goto out_free_conf;
 			disk->rdev = rdev;
 		}
+		q = bdev_get_queue(rdev->bdev);
+		if (q->merge_bvec_fn)
+			mddev->merge_check_needed = 1;
 		diff = (rdev->new_data_offset - rdev->data_offset);
 		if (!mddev->reshape_backwards)
 			diff = -diff;

+ 10 - 2
drivers/mtd/ubi/debug.c

@@ -264,6 +264,9 @@ static struct dentry *dfs_rootdir;
  */
 int ubi_debugfs_init(void)
 {
+	if (!IS_ENABLED(DEBUG_FS))
+		return 0;
+
 	dfs_rootdir = debugfs_create_dir("ubi", NULL);
 	if (IS_ERR_OR_NULL(dfs_rootdir)) {
 		int err = dfs_rootdir ? -ENODEV : PTR_ERR(dfs_rootdir);
@@ -281,7 +284,8 @@ int ubi_debugfs_init(void)
  */
 void ubi_debugfs_exit(void)
 {
-	debugfs_remove(dfs_rootdir);
+	if (IS_ENABLED(DEBUG_FS))
+		debugfs_remove(dfs_rootdir);
 }
 
 /* Read an UBI debugfs file */
@@ -403,6 +407,9 @@ int ubi_debugfs_init_dev(struct ubi_device *ubi)
 	struct dentry *dent;
 	struct ubi_debug_info *d = ubi->dbg;
 
+	if (!IS_ENABLED(DEBUG_FS))
+		return 0;
+
 	n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME,
 		     ubi->ubi_num);
 	if (n == UBI_DFS_DIR_LEN) {
@@ -470,5 +477,6 @@ out:
  */
 void ubi_debugfs_exit_dev(struct ubi_device *ubi)
 {
-	debugfs_remove_recursive(ubi->dbg->dfs_dir);
+	if (IS_ENABLED(DEBUG_FS))
+		debugfs_remove_recursive(ubi->dbg->dfs_dir);
 }

+ 13 - 4
drivers/mtd/ubi/wl.c

@@ -1262,11 +1262,11 @@ int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum)
 	dbg_wl("flush pending work for LEB %d:%d (%d pending works)",
 	       vol_id, lnum, ubi->works_count);
 
-	down_write(&ubi->work_sem);
 	while (found) {
 		struct ubi_work *wrk;
 		found = 0;
 
+		down_read(&ubi->work_sem);
 		spin_lock(&ubi->wl_lock);
 		list_for_each_entry(wrk, &ubi->works, list) {
 			if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) &&
@@ -1277,18 +1277,27 @@ int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum)
 				spin_unlock(&ubi->wl_lock);
 
 				err = wrk->func(ubi, wrk, 0);
-				if (err)
-					goto out;
+				if (err) {
+					up_read(&ubi->work_sem);
+					return err;
+				}
+
 				spin_lock(&ubi->wl_lock);
 				found = 1;
 				break;
 			}
 		}
 		spin_unlock(&ubi->wl_lock);
+		up_read(&ubi->work_sem);
 	}
 
-out:
+	/*
+	 * Make sure all the works which have been done in parallel are
+	 * finished.
+	 */
+	down_write(&ubi->work_sem);
 	up_write(&ubi->work_sem);
+
 	return err;
 }
 

Some files were not shown because too many files changed in this diff