Browse Source

Merge tag 'drm-intel-next-2013-07-26-fixed' of git://people.freedesktop.org/~danvet/drm-intel into drm-next

Neat that QA (and Ben) keeps on humming along while I'm on vacation, so
you already get the next feature pull request:
- proper eLLC support for HSW from Ben
- more interrupt refactoring
- add w/a tags where we implement them already (Damien)
- hangcheck fixes (Chris) + hangcheck stats (Mika)
- flesh out the new vm structs for ppgtt and ggtt (Ben)
- PSR for Haswell, still disabled by default (Rodrigo et al.)
- pc8+ refclock sequence code from Paulo
- more interrupt refactoring from Paulo, unifying ilk/snb with the ivb/hsw
  interrupt code
- full solution for the Haswell concurrent reg access issues (Chris)
- fix racy object accounting, used by some new leak tests
- fix sync polarity settings on ch7xxx dvo encoder
- random bits&pieces, little fixes and better debug output all over

[airlied: fix conflict with drm_mm cleanups]

* tag 'drm-intel-next-2013-07-26-fixed' of git://people.freedesktop.org/~danvet/drm-intel: (289 commits)
  drm/i915: Do not dereference NULL crtc or fb until after checking
  drm/i915: fix pnv display core clock readout out
  drm/i915: Replace open-coded offset_in_page()
  drm/i915: Retry DP aux_ch communications with a different clock after failure
  drm/i915: Add messages useful for HPD storm detection debugging (v2)
  drm/i915: dvo_ch7xxx: fix vsync polarity setting
  drm/i915: fix the racy object accounting
  drm/i915: Convert the register access tracepoint to be conditional
  drm/i915: Squash gen lookup through multiple indirections inside GT access
  drm/i915: Use the common register access functions for NOTRACE variants
  drm/i915: Use a private interface for register access within GT
  drm/i915: Colocate all GT access routines in the same file
  drm/i915: fix reference counting in i915_gem_create
  drm/i915: Use Graphics Base of Stolen Memory on all gen3+
  drm/i915: disable stolen mem for OVERLAY_NEEDS_PHYSICAL
  drm/i915: add functions to disable and restore LCPLL
  drm/i915: disable CLKOUT_DP when it's not needed
  drm/i915: extend lpt_enable_clkout_dp
  drm/i915: fix up error cleanup in i915_gem_object_bind_to_gtt
  drm/i915: Add some debug breadcrumbs to connector detection
  ...
Dave Airlie 12 years ago
parent
commit
32c913e436
100 changed files with 4393 additions and 2069 deletions
  1. 17 0
      Documentation/ABI/testing/sysfs-driver-xen-blkback
  2. 10 0
      Documentation/ABI/testing/sysfs-driver-xen-blkfront
  3. 24 13
      Documentation/bcache.txt
  4. 2 2
      MAINTAINERS
  5. 1 1
      Makefile
  6. 1 0
      arch/alpha/Kconfig
  7. 65 23
      arch/alpha/include/asm/atomic.h
  8. 5 3
      arch/alpha/include/asm/param.h
  9. 0 4
      arch/alpha/include/asm/spinlock.h
  10. 1 2
      arch/alpha/include/asm/unistd.h
  11. 0 7
      arch/alpha/include/uapi/asm/param.h
  12. 2 0
      arch/alpha/include/uapi/asm/unistd.h
  13. 288 111
      arch/alpha/kernel/entry.S
  14. 1 1
      arch/alpha/kernel/irq_alpha.c
  15. 3 2
      arch/alpha/kernel/smp.c
  16. 0 8
      arch/alpha/kernel/sys_dp264.c
  17. 2 1
      arch/alpha/kernel/sys_marvel.c
  18. 2 0
      arch/alpha/kernel/systbls.S
  19. 1 3
      arch/alpha/kernel/time.c
  20. 3 5
      arch/alpha/kernel/traps.c
  21. 1 1
      arch/arm/mach-s3c24xx/Kconfig
  22. 0 7
      arch/arm64/include/asm/debug-monitors.h
  23. 2 1
      arch/arm64/include/asm/system_misc.h
  24. 1 1
      arch/arm64/kernel/process.c
  25. 8 7
      arch/arm64/kernel/smp.c
  26. 20 26
      arch/arm64/mm/fault.c
  27. 1 0
      arch/mips/Kconfig
  28. 3 2
      arch/mips/cavium-octeon/octeon-platform.c
  29. 1 1
      arch/mips/kernel/smp-bmips.c
  30. 0 1
      arch/mips/kernel/traps.c
  31. 0 1
      arch/mips/kvm/Kconfig
  32. 2 2
      arch/mips/mm/tlbex.c
  33. 57 11
      arch/mips/netlogic/common/irq.c
  34. 2 1
      arch/mips/netlogic/dts/xlp_evp.dts
  35. 2 1
      arch/mips/netlogic/dts/xlp_svp.dts
  36. 1 1
      arch/mips/netlogic/xlp/usb-init.c
  37. 9 1
      arch/s390/include/asm/processor.h
  38. 2 2
      arch/s390/include/asm/switch_to.h
  39. 1 0
      arch/s390/include/uapi/asm/ptrace.h
  40. 47 4
      arch/s390/kernel/crash_dump.c
  41. 39 11
      arch/s390/kernel/ptrace.c
  42. 88 25
      arch/s390/net/bpf_jit_comp.c
  43. 4 4
      arch/um/include/shared/frame_kern.h
  44. 2 2
      arch/um/kernel/signal.c
  45. 1 1
      arch/um/kernel/skas/mmu.c
  46. 1 1
      arch/um/kernel/skas/uaccess.c
  47. 189 41
      arch/um/os-Linux/mem.c
  48. 4 4
      arch/um/os-Linux/signal.c
  49. 12 7
      arch/um/os-Linux/skas/process.c
  50. 7 0
      arch/x86/kvm/mmu.c
  51. 0 1
      arch/x86/um/signal.c
  52. 1 0
      drivers/acpi/acpi_memhotplug.c
  53. 0 13
      drivers/acpi/acpica/aclocal.h
  54. 11 0
      drivers/acpi/internal.h
  55. 9 4
      drivers/acpi/scan.c
  56. 85 13
      drivers/acpi/video.c
  57. 21 0
      drivers/acpi/video_detect.c
  58. 2 2
      drivers/block/Kconfig
  59. 21 0
      drivers/block/drbd/drbd_actlog.c
  60. 13 2
      drivers/block/drbd/drbd_int.h
  61. 34 27
      drivers/block/drbd/drbd_main.c
  62. 143 42
      drivers/block/drbd/drbd_nl.c
  63. 6 6
      drivers/block/drbd/drbd_receiver.c
  64. 3 1
      drivers/block/drbd/drbd_state.c
  65. 348 11
      drivers/block/rsxx/core.c
  66. 14 0
      drivers/block/rsxx/cregs.c
  67. 12 21
      drivers/block/rsxx/dev.c
  68. 96 89
      drivers/block/rsxx/dma.c
  69. 9 1
      drivers/block/rsxx/rsxx_priv.h
  70. 501 244
      drivers/block/xen-blkback/blkback.c
  71. 145 2
      drivers/block/xen-blkback/common.h
  72. 85 0
      drivers/block/xen-blkback/xenbus.c
  73. 432 100
      drivers/block/xen-blkfront.c
  74. 3 1
      drivers/cpufreq/cpufreq.c
  75. 0 3
      drivers/cpufreq/cpufreq_governor.c
  76. 2 4
      drivers/cpufreq/cpufreq_stats.c
  77. 2 2
      drivers/cpufreq/s3c24xx-cpufreq.c
  78. 1 1
      drivers/gpio/gpio-msm-v2.c
  79. 70 14
      drivers/gpio/gpio-omap.c
  80. 11 16
      drivers/gpu/drm/drm_crtc_helper.c
  81. 1 0
      drivers/gpu/drm/i915/Makefile
  82. 1 1
      drivers/gpu/drm/i915/dvo_ch7xxx.c
  83. 163 15
      drivers/gpu/drm/i915/i915_debugfs.c
  84. 34 31
      drivers/gpu/drm/i915/i915_dma.c
  85. 11 266
      drivers/gpu/drm/i915/i915_drv.c
  86. 158 103
      drivers/gpu/drm/i915/i915_drv.h
  87. 117 56
      drivers/gpu/drm/i915/i915_gem.c
  88. 8 15
      drivers/gpu/drm/i915/i915_gem_context.c
  89. 1 1
      drivers/gpu/drm/i915/i915_gem_debug.c
  90. 19 15
      drivers/gpu/drm/i915/i915_gem_evict.c
  91. 9 7
      drivers/gpu/drm/i915/i915_gem_execbuffer.c
  92. 129 84
      drivers/gpu/drm/i915/i915_gem_gtt.c
  93. 30 38
      drivers/gpu/drm/i915/i915_gem_stolen.c
  94. 5 3
      drivers/gpu/drm/i915/i915_gpu_error.c
  95. 224 328
      drivers/gpu/drm/i915/i915_irq.c
  96. 103 1
      drivers/gpu/drm/i915/i915_reg.h
  97. 5 3
      drivers/gpu/drm/i915/i915_trace.h
  98. 4 0
      drivers/gpu/drm/i915/intel_crt.c
  99. 8 4
      drivers/gpu/drm/i915/intel_ddi.c
  100. 348 125
      drivers/gpu/drm/i915/intel_display.c

+ 17 - 0
Documentation/ABI/testing/sysfs-driver-xen-blkback

@@ -0,0 +1,17 @@
+What:           /sys/module/xen_blkback/parameters/max_buffer_pages
+Date:           March 2013
+KernelVersion:  3.11
+Contact:        Roger Pau Monné <roger.pau@citrix.com>
+Description:
+                Maximum number of free pages to keep in each block
+                backend buffer.
+
+What:           /sys/module/xen_blkback/parameters/max_persistent_grants
+Date:           March 2013
+KernelVersion:  3.11
+Contact:        Roger Pau Monné <roger.pau@citrix.com>
+Description:
+                Maximum number of grants to map persistently in
+                blkback. If the frontend tries to use more than
+                max_persistent_grants, the LRU kicks in and starts
+                removing 5% of max_persistent_grants every 100ms.

+ 10 - 0
Documentation/ABI/testing/sysfs-driver-xen-blkfront

@@ -0,0 +1,10 @@
+What:           /sys/module/xen_blkfront/parameters/max
+Date:           June 2013
+KernelVersion:  3.11
+Contact:        Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                Maximum number of segments that the frontend will negotiate
+                with the backend for indirect descriptors. The default value
+                is 32 - higher value means more potential throughput but more
+                memory usage. The backend picks the minimum of the frontend
+                and its default backend value.

+ 24 - 13
Documentation/bcache.txt

@@ -46,29 +46,33 @@ you format your backing devices and cache device at the same time, you won't
 have to manually attach:
   make-bcache -B /dev/sda /dev/sdb -C /dev/sdc
 
-To make bcache devices known to the kernel, echo them to /sys/fs/bcache/register:
+bcache-tools now ships udev rules, and bcache devices are known to the kernel
+immediately.  Without udev, you can manually register devices like this:
 
   echo /dev/sdb > /sys/fs/bcache/register
   echo /dev/sdc > /sys/fs/bcache/register
 
-To register your bcache devices automatically, you could add something like
-this to an init script:
+Registering the backing device makes the bcache device show up in /dev; you can
+now format it and use it as normal. But the first time using a new bcache
+device, it'll be running in passthrough mode until you attach it to a cache.
+See the section on attaching.
 
-  echo /dev/sd* > /sys/fs/bcache/register_quiet
+The devices show up as:
 
-It'll look for bcache superblocks and ignore everything that doesn't have one.
+  /dev/bcache<N>
 
-Registering the backing device makes the bcache show up in /dev; you can now
-format it and use it as normal. But the first time using a new bcache device,
-it'll be running in passthrough mode until you attach it to a cache. See the
-section on attaching.
+As well as (with udev):
 
-The devices show up at /dev/bcacheN, and can be controlled via sysfs from
-/sys/block/bcacheN/bcache:
+  /dev/bcache/by-uuid/<uuid>
+  /dev/bcache/by-label/<label>
+
+To get started:
 
   mkfs.ext4 /dev/bcache0
   mount /dev/bcache0 /mnt
 
+You can control bcache devices through sysfs at /sys/block/bcache<N>/bcache .
+
 Cache devices are managed as sets; multiple caches per set isn't supported yet
 but will allow for mirroring of metadata and dirty data in the future. Your new
 cache set shows up as /sys/fs/bcache/<UUID>
@@ -80,11 +84,11 @@ must be attached to your cache set to enable caching. Attaching a backing
 device to a cache set is done thusly, with the UUID of the cache set in
 /sys/fs/bcache:
 
-  echo <UUID> > /sys/block/bcache0/bcache/attach
+  echo <CSET-UUID> > /sys/block/bcache0/bcache/attach
 
 This only has to be done once. The next time you reboot, just reregister all
 your bcache devices. If a backing device has data in a cache somewhere, the
-/dev/bcache# device won't be created until the cache shows up - particularly
+/dev/bcache<N> device won't be created until the cache shows up - particularly
 important if you have writeback caching turned on.
 
 If you're booting up and your cache device is gone and never coming back, you
@@ -191,6 +195,9 @@ want for getting the best possible numbers when benchmarking.
 
 SYSFS - BACKING DEVICE:
 
+Available at /sys/block/<bdev>/bcache, /sys/block/bcache*/bcache and
+(if attached) /sys/fs/bcache/<cset-uuid>/bdev*
+
 attach
   Echo the UUID of a cache set to this file to enable caching.
 
@@ -300,6 +307,8 @@ cache_readaheads
 
 SYSFS - CACHE SET:
 
+Available at /sys/fs/bcache/<cset-uuid>
+
 average_key_size
   Average data per key in the btree.
 
@@ -390,6 +399,8 @@ trigger_gc
 
 SYSFS - CACHE DEVICE:
 
+Available at /sys/block/<cdev>/bcache
+
 block_size
   Minimum granularity of writes - should match hardware sector size.
 

+ 2 - 2
MAINTAINERS

@@ -1642,7 +1642,7 @@ S:	Maintained
 F:	drivers/net/hamradio/baycom*
 
 BCACHE (BLOCK LAYER CACHE)
-M:	Kent Overstreet <koverstreet@google.com>
+M:	Kent Overstreet <kmo@daterainc.com>
 L:	linux-bcache@vger.kernel.org
 W:	http://bcache.evilpiepirate.org
 S:	Maintained:
@@ -3346,7 +3346,7 @@ F:	Documentation/firmware_class/
 F:	drivers/base/firmware*.c
 F:	include/linux/firmware.h
 
-FLASHSYSTEM DRIVER (IBM FlashSystem 70/80 PCI SSD Flash Card)
+FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card)
 M:	Joshua Morris <josh.h.morris@us.ibm.com>
 M:	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
 S:	Maintained

+ 1 - 1
Makefile

@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Linux for Workgroups
 
 # *DOCUMENTATION*

+ 1 - 0
arch/alpha/Kconfig

@@ -15,6 +15,7 @@ config ALPHA
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_STRNCPY_FROM_USER

+ 65 - 23
arch/alpha/include/asm/atomic.h

@@ -186,17 +186,24 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v)
  */
 static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 {
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
+	int c, new, old;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldl_l	%[old],%[mem]\n"
+	"	cmpeq	%[old],%[u],%[c]\n"
+	"	addl	%[old],%[a],%[new]\n"
+	"	bne	%[c],2f\n"
+	"	stl_c	%[new],%[mem]\n"
+	"	beq	%[new],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [old] "=&r"(old), [new] "=&r"(new), [c] "=&r"(c)
+	: [mem] "m"(*v), [a] "rI"(a), [u] "rI"((long)u)
+	: "memory");
+	smp_mb();
+	return old;
 }
 
 
@@ -207,21 +214,56 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
  * @u: ...unless v is equal to u.
  *
  * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
+ * Returns true iff @v was not @u.
  */
 static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 {
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
+	long c, tmp;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldq_l	%[tmp],%[mem]\n"
+	"	cmpeq	%[tmp],%[u],%[c]\n"
+	"	addq	%[tmp],%[a],%[tmp]\n"
+	"	bne	%[c],2f\n"
+	"	stq_c	%[tmp],%[mem]\n"
+	"	beq	%[tmp],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [tmp] "=&r"(tmp), [c] "=&r"(c)
+	: [mem] "m"(*v), [a] "rI"(a), [u] "rI"(u)
+	: "memory");
+	smp_mb();
+	return !c;
+}
+
+/*
+ * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+static inline long atomic64_dec_if_positive(atomic64_t *v)
+{
+	long old, tmp;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldq_l	%[old],%[mem]\n"
+	"	subq	%[old],1,%[tmp]\n"
+	"	ble	%[old],2f\n"
+	"	stq_c	%[tmp],%[mem]\n"
+	"	beq	%[tmp],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [old] "=&r"(old), [tmp] "=&r"(tmp)
+	: [mem] "m"(*v)
+	: "memory");
+	smp_mb();
+	return old - 1;
 }
 
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)

+ 5 - 3
arch/alpha/include/asm/param.h

@@ -3,7 +3,9 @@
 
 #include <uapi/asm/param.h>
 
-#define HZ		CONFIG_HZ
-#define USER_HZ		HZ
-# define CLOCKS_PER_SEC	HZ	/* frequency at which times() counts */
+# undef HZ
+# define HZ		CONFIG_HZ
+# define USER_HZ	1024
+# define CLOCKS_PER_SEC	USER_HZ	/* frequency at which times() counts */
+
 #endif /* _ASM_ALPHA_PARAM_H */

+ 0 - 4
arch/alpha/include/asm/spinlock.h

@@ -168,8 +168,4 @@ static inline void arch_write_unlock(arch_rwlock_t * lock)
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* _ALPHA_SPINLOCK_H */

+ 1 - 2
arch/alpha/include/asm/unistd.h

@@ -3,8 +3,7 @@
 
 #include <uapi/asm/unistd.h>
 
-
-#define NR_SYSCALLS			506
+#define NR_SYSCALLS			508
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64

+ 0 - 7
arch/alpha/include/uapi/asm/param.h

@@ -1,13 +1,7 @@
 #ifndef _UAPI_ASM_ALPHA_PARAM_H
 #define _UAPI_ASM_ALPHA_PARAM_H
 
-/* ??? Gross.  I don't want to parameterize this, and supposedly the
-   hardware ignores reprogramming.  We also need userland buy-in to the 
-   change in HZ, since this is visible in the wait4 resources etc.  */
-
-#ifndef __KERNEL__
 #define HZ		1024
-#endif
 
 #define EXEC_PAGESIZE	8192
 
@@ -17,5 +11,4 @@
 
 #define MAXHOSTNAMELEN	64	/* max length of hostname */
 
-
 #endif /* _UAPI_ASM_ALPHA_PARAM_H */

+ 2 - 0
arch/alpha/include/uapi/asm/unistd.h

@@ -467,5 +467,7 @@
 #define __NR_sendmmsg			503
 #define __NR_process_vm_readv		504
 #define __NR_process_vm_writev		505
+#define __NR_kcmp			506
+#define __NR_finit_module		507
 
 #endif /* _UAPI_ALPHA_UNISTD_H */

+ 288 - 111
arch/alpha/kernel/entry.S

@@ -12,11 +12,32 @@
 
 	.text
 	.set noat
+	.cfi_sections	.debug_frame
 
 /* Stack offsets.  */
 #define SP_OFF			184
 #define SWITCH_STACK_SIZE	320
 
+.macro	CFI_START_OSF_FRAME	func
+	.align	4
+	.globl	\func
+	.type	\func,@function
+\func:
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa	$sp, 48
+	.cfi_rel_offset	64, 8
+	.cfi_rel_offset	$gp, 16
+	.cfi_rel_offset	$16, 24
+	.cfi_rel_offset	$17, 32
+	.cfi_rel_offset	$18, 40
+.endm
+
+.macro	CFI_END_OSF_FRAME	func
+	.cfi_endproc
+	.size	\func, . - \func
+.endm
+
 /*
  * This defines the normal kernel pt-regs layout.
  *
@@ -27,100 +48,158 @@
  * the palcode-provided values are available to the signal handler.
  */
 
-#define SAVE_ALL			\
-	subq	$sp, SP_OFF, $sp;	\
-	stq	$0, 0($sp);		\
-	stq	$1, 8($sp);		\
-	stq	$2, 16($sp);		\
-	stq	$3, 24($sp);		\
-	stq	$4, 32($sp);		\
-	stq	$28, 144($sp);		\
-	lda	$2, alpha_mv;		\
-	stq	$5, 40($sp);		\
-	stq	$6, 48($sp);		\
-	stq	$7, 56($sp);		\
-	stq	$8, 64($sp);		\
-	stq	$19, 72($sp);		\
-	stq	$20, 80($sp);		\
-	stq	$21, 88($sp);		\
-	ldq	$2, HAE_CACHE($2);	\
-	stq	$22, 96($sp);		\
-	stq	$23, 104($sp);		\
-	stq	$24, 112($sp);		\
-	stq	$25, 120($sp);		\
-	stq	$26, 128($sp);		\
-	stq	$27, 136($sp);		\
-	stq	$2, 152($sp);		\
-	stq	$16, 160($sp);		\
-	stq	$17, 168($sp);		\
+.macro	SAVE_ALL
+	subq	$sp, SP_OFF, $sp
+	.cfi_adjust_cfa_offset	SP_OFF
+	stq	$0, 0($sp)
+	stq	$1, 8($sp)
+	stq	$2, 16($sp)
+	stq	$3, 24($sp)
+	stq	$4, 32($sp)
+	stq	$28, 144($sp)
+	.cfi_rel_offset	$0, 0
+	.cfi_rel_offset $1, 8
+	.cfi_rel_offset	$2, 16
+	.cfi_rel_offset	$3, 24
+	.cfi_rel_offset	$4, 32
+	.cfi_rel_offset	$28, 144
+	lda	$2, alpha_mv
+	stq	$5, 40($sp)
+	stq	$6, 48($sp)
+	stq	$7, 56($sp)
+	stq	$8, 64($sp)
+	stq	$19, 72($sp)
+	stq	$20, 80($sp)
+	stq	$21, 88($sp)
+	ldq	$2, HAE_CACHE($2)
+	stq	$22, 96($sp)
+	stq	$23, 104($sp)
+	stq	$24, 112($sp)
+	stq	$25, 120($sp)
+	stq	$26, 128($sp)
+	stq	$27, 136($sp)
+	stq	$2, 152($sp)
+	stq	$16, 160($sp)
+	stq	$17, 168($sp)
 	stq	$18, 176($sp)
+	.cfi_rel_offset	$5, 40
+	.cfi_rel_offset	$6, 48
+	.cfi_rel_offset	$7, 56
+	.cfi_rel_offset	$8, 64
+	.cfi_rel_offset $19, 72
+	.cfi_rel_offset	$20, 80
+	.cfi_rel_offset	$21, 88
+	.cfi_rel_offset $22, 96
+	.cfi_rel_offset	$23, 104
+	.cfi_rel_offset	$24, 112
+	.cfi_rel_offset	$25, 120
+	.cfi_rel_offset	$26, 128
+	.cfi_rel_offset	$27, 136
+.endm
 
-#define RESTORE_ALL			\
-	lda	$19, alpha_mv;		\
-	ldq	$0, 0($sp);		\
-	ldq	$1, 8($sp);		\
-	ldq	$2, 16($sp);		\
-	ldq	$3, 24($sp);		\
-	ldq	$21, 152($sp);		\
-	ldq	$20, HAE_CACHE($19);	\
-	ldq	$4, 32($sp);		\
-	ldq	$5, 40($sp);		\
-	ldq	$6, 48($sp);		\
-	ldq	$7, 56($sp);		\
-	subq	$20, $21, $20;		\
-	ldq	$8, 64($sp);		\
-	beq	$20, 99f;		\
-	ldq	$20, HAE_REG($19);	\
-	stq	$21, HAE_CACHE($19);	\
-	stq	$21, 0($20);		\
-99:;					\
-	ldq	$19, 72($sp);		\
-	ldq	$20, 80($sp);		\
-	ldq	$21, 88($sp);		\
-	ldq	$22, 96($sp);		\
-	ldq	$23, 104($sp);		\
-	ldq	$24, 112($sp);		\
-	ldq	$25, 120($sp);		\
-	ldq	$26, 128($sp);		\
-	ldq	$27, 136($sp);		\
-	ldq	$28, 144($sp);		\
+.macro	RESTORE_ALL
+	lda	$19, alpha_mv
+	ldq	$0, 0($sp)
+	ldq	$1, 8($sp)
+	ldq	$2, 16($sp)
+	ldq	$3, 24($sp)
+	ldq	$21, 152($sp)
+	ldq	$20, HAE_CACHE($19)
+	ldq	$4, 32($sp)
+	ldq	$5, 40($sp)
+	ldq	$6, 48($sp)
+	ldq	$7, 56($sp)
+	subq	$20, $21, $20
+	ldq	$8, 64($sp)
+	beq	$20, 99f
+	ldq	$20, HAE_REG($19)
+	stq	$21, HAE_CACHE($19)
+	stq	$21, 0($20)
+99:	ldq	$19, 72($sp)
+	ldq	$20, 80($sp)
+	ldq	$21, 88($sp)
+	ldq	$22, 96($sp)
+	ldq	$23, 104($sp)
+	ldq	$24, 112($sp)
+	ldq	$25, 120($sp)
+	ldq	$26, 128($sp)
+	ldq	$27, 136($sp)
+	ldq	$28, 144($sp)
 	addq	$sp, SP_OFF, $sp
+	.cfi_restore	$0
+	.cfi_restore	$1
+	.cfi_restore	$2
+	.cfi_restore	$3
+	.cfi_restore	$4
+	.cfi_restore	$5
+	.cfi_restore	$6
+	.cfi_restore	$7
+	.cfi_restore	$8
+	.cfi_restore	$19
+	.cfi_restore	$20
+	.cfi_restore	$21
+	.cfi_restore	$22
+	.cfi_restore	$23
+	.cfi_restore	$24
+	.cfi_restore	$25
+	.cfi_restore	$26
+	.cfi_restore	$27
+	.cfi_restore	$28
+	.cfi_adjust_cfa_offset	-SP_OFF
+.endm
+
+.macro	DO_SWITCH_STACK
+	bsr	$1, do_switch_stack
+	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
+	/* We don't really care about the FP registers for debugging.  */
+.endm
+
+.macro	UNDO_SWITCH_STACK
+	bsr	$1, undo_switch_stack
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-SWITCH_STACK_SIZE
+.endm
 
 /*
  * Non-syscall kernel entry points.
  */
 
-	.align	4
-	.globl	entInt
-	.ent	entInt
-entInt:
+CFI_START_OSF_FRAME entInt
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $19
 	jsr	$31, do_entInt
-.end entInt
+CFI_END_OSF_FRAME entInt
 
-	.align	4
-	.globl	entArith
-	.ent	entArith
-entArith:
+CFI_START_OSF_FRAME entArith
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $18
 	jsr	$31, do_entArith
-.end entArith
+CFI_END_OSF_FRAME entArith
 
-	.align	4
-	.globl	entMM
-	.ent	entMM
-entMM:
+CFI_START_OSF_FRAME entMM
 	SAVE_ALL
 /* save $9 - $15 so the inline exception code can manipulate them.  */
 	subq	$sp, 56, $sp
+	.cfi_adjust_cfa_offset	56
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
@@ -128,6 +207,13 @@ entMM:
 	stq	$13, 32($sp)
 	stq	$14, 40($sp)
 	stq	$15, 48($sp)
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
 	addq	$sp, 56, $19
 /* handle the fault */
 	lda	$8, 0x3fff
@@ -142,28 +228,33 @@ entMM:
 	ldq	$14, 40($sp)
 	ldq	$15, 48($sp)
 	addq	$sp, 56, $sp
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-56
 /* finish up the syscall as normal.  */
 	br	ret_from_sys_call
-.end entMM
+CFI_END_OSF_FRAME entMM
 
-	.align	4
-	.globl	entIF
-	.ent	entIF
-entIF:
+CFI_START_OSF_FRAME entIF
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $17
 	jsr	$31, do_entIF
-.end entIF
+CFI_END_OSF_FRAME entIF
 
-	.align	4
-	.globl	entUna
-	.ent	entUna
-entUna:
+CFI_START_OSF_FRAME entUna
 	lda	$sp, -256($sp)
+	.cfi_adjust_cfa_offset	256
 	stq	$0, 0($sp)
+	.cfi_rel_offset	$0, 0
+	.cfi_remember_state
 	ldq	$0, 256($sp)	/* get PS */
 	stq	$1, 8($sp)
 	stq	$2, 16($sp)
@@ -195,6 +286,32 @@ entUna:
 	stq	$28, 224($sp)
 	mov	$sp, $19
 	stq	$gp, 232($sp)
+	.cfi_rel_offset	$1, 1*8
+	.cfi_rel_offset	$2, 2*8
+	.cfi_rel_offset	$3, 3*8
+	.cfi_rel_offset	$4, 4*8
+	.cfi_rel_offset	$5, 5*8
+	.cfi_rel_offset	$6, 6*8
+	.cfi_rel_offset	$7, 7*8
+	.cfi_rel_offset	$8, 8*8
+	.cfi_rel_offset	$9, 9*8
+	.cfi_rel_offset	$10, 10*8
+	.cfi_rel_offset	$11, 11*8
+	.cfi_rel_offset	$12, 12*8
+	.cfi_rel_offset	$13, 13*8
+	.cfi_rel_offset	$14, 14*8
+	.cfi_rel_offset	$15, 15*8
+	.cfi_rel_offset	$19, 19*8
+	.cfi_rel_offset	$20, 20*8
+	.cfi_rel_offset	$21, 21*8
+	.cfi_rel_offset	$22, 22*8
+	.cfi_rel_offset	$23, 23*8
+	.cfi_rel_offset	$24, 24*8
+	.cfi_rel_offset	$25, 25*8
+	.cfi_rel_offset	$26, 26*8
+	.cfi_rel_offset	$27, 27*8
+	.cfi_rel_offset	$28, 28*8
+	.cfi_rel_offset	$29, 29*8
 	lda	$8, 0x3fff
 	stq	$31, 248($sp)
 	bic	$sp, $8, $8
@@ -228,16 +345,45 @@ entUna:
 	ldq	$28, 224($sp)
 	ldq	$gp, 232($sp)
 	lda	$sp, 256($sp)
+	.cfi_restore	$1
+	.cfi_restore	$2
+	.cfi_restore	$3
+	.cfi_restore	$4
+	.cfi_restore	$5
+	.cfi_restore	$6
+	.cfi_restore	$7
+	.cfi_restore	$8
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_restore	$19
+	.cfi_restore	$20
+	.cfi_restore	$21
+	.cfi_restore	$22
+	.cfi_restore	$23
+	.cfi_restore	$24
+	.cfi_restore	$25
+	.cfi_restore	$26
+	.cfi_restore	$27
+	.cfi_restore	$28
+	.cfi_restore	$29
+	.cfi_adjust_cfa_offset	-256
 	call_pal PAL_rti
-.end entUna
 
 	.align	4
-	.ent	entUnaUser
 entUnaUser:
+	.cfi_restore_state
 	ldq	$0, 0($sp)	/* restore original $0 */
 	lda	$sp, 256($sp)	/* pop entUna's stack frame */
+	.cfi_restore	$0
+	.cfi_adjust_cfa_offset	-256
 	SAVE_ALL		/* setup normal kernel stack */
 	lda	$sp, -56($sp)
+	.cfi_adjust_cfa_offset	56
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
@@ -245,6 +391,13 @@ entUnaUser:
 	stq	$13, 32($sp)
 	stq	$14, 40($sp)
 	stq	$15, 48($sp)
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
 	lda	$8, 0x3fff
 	addq	$sp, 56, $19
 	bic	$sp, $8, $8
@@ -257,20 +410,25 @@ entUnaUser:
 	ldq	$14, 40($sp)
 	ldq	$15, 48($sp)
 	lda	$sp, 56($sp)
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-56
 	br	ret_from_sys_call
-.end entUnaUser
+CFI_END_OSF_FRAME entUna
 
-	.align	4
-	.globl	entDbg
-	.ent	entDbg
-entDbg:
+CFI_START_OSF_FRAME entDbg
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $16
 	jsr	$31, do_entDbg
-.end entDbg
+CFI_END_OSF_FRAME entDbg
 
 /*
  * The system call entry point is special.  Most importantly, it looks
@@ -285,8 +443,12 @@ entDbg:
 
 	.align	4
 	.globl	entSys
-	.globl	ret_from_sys_call
-	.ent	entSys
+	.type	entSys, @function
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa	$sp, 48
+	.cfi_rel_offset	64, 8
+	.cfi_rel_offset	$gp, 16
 entSys:
 	SAVE_ALL
 	lda	$8, 0x3fff
@@ -300,6 +462,9 @@ entSys:
 	stq	$17, SP_OFF+32($sp)
 	s8addq	$0, $5, $5
 	stq	$18, SP_OFF+40($sp)
+	.cfi_rel_offset	$16, SP_OFF+24
+	.cfi_rel_offset	$17, SP_OFF+32
+	.cfi_rel_offset	$18, SP_OFF+40
 	blbs	$3, strace
 	beq	$4, 1f
 	ldq	$27, 0($5)
@@ -310,6 +475,7 @@ entSys:
 	stq	$31, 72($sp)		/* a3=0 => no error */
 
 	.align	4
+	.globl	ret_from_sys_call
 ret_from_sys_call:
 	cmovne	$26, 0, $18		/* $18 = 0 => non-restartable */
 	ldq	$0, SP_OFF($sp)
@@ -324,10 +490,12 @@ ret_to_user:
 	and	$17, _TIF_WORK_MASK, $2
 	bne	$2, work_pending
 restore_all:
+	.cfi_remember_state
 	RESTORE_ALL
 	call_pal PAL_rti
 
 ret_to_kernel:
+	.cfi_restore_state
 	lda	$16, 7
 	call_pal PAL_swpipl
 	br restore_all
@@ -356,7 +524,6 @@ $ret_success:
 	stq	$0, 0($sp)
 	stq	$31, 72($sp)	/* a3=0 => no error */
 	br	ret_from_sys_call
-.end entSys
 
 /*
  * Do all cleanup when returning from all interrupts and system calls.
@@ -370,7 +537,7 @@ $ret_success:
  */
 
 	.align	4
-	.ent	work_pending
+	.type	work_pending, @function
 work_pending:
 	and	$17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2
 	bne	$2, $work_notifysig
@@ -387,23 +554,22 @@ $work_resched:
 
 $work_notifysig:
 	mov	$sp, $16
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, do_work_pending
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	br	restore_all
-.end work_pending
 
 /*
  * PTRACE syscall handler
  */
 
 	.align	4
-	.ent	strace
+	.type	strace, @function
 strace:
 	/* set up signal stack, call syscall_trace */
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, syscall_trace_enter /* returns the syscall number */
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 
 	/* get the arguments back.. */
 	ldq	$16, SP_OFF+24($sp)
@@ -431,9 +597,9 @@ ret_from_straced:
 $strace_success:
 	stq	$0, 0($sp)		/* save return value */
 
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, syscall_trace_leave
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	br	$31, ret_from_sys_call
 
 	.align	3
@@ -447,26 +613,31 @@ $strace_error:
 	stq	$0, 0($sp)
 	stq	$1, 72($sp)	/* a3 for return */
 
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	mov	$18, $9		/* save old syscall number */
 	mov	$19, $10	/* save old a3 */
 	jsr	$26, syscall_trace_leave
 	mov	$9, $18
 	mov	$10, $19
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 
 	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
 	br	ret_from_sys_call
-.end strace
+CFI_END_OSF_FRAME entSys
 
 /*
  * Save and restore the switch stack -- aka the balance of the user context.
  */
 
 	.align	4
-	.ent	do_switch_stack
+	.type	do_switch_stack, @function
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa $sp, 0
+	.cfi_register 64, $1
 do_switch_stack:
 	lda	$sp, -SWITCH_STACK_SIZE($sp)
+	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
@@ -510,10 +681,14 @@ do_switch_stack:
 	stt	$f0, 312($sp)	# save fpcr in slot of $f31
 	ldt	$f0, 64($sp)	# dont let "do_switch_stack" change fp state.
 	ret	$31, ($1), 1
-.end do_switch_stack
+	.cfi_endproc
+	.size	do_switch_stack, .-do_switch_stack
 
 	.align	4
-	.ent	undo_switch_stack
+	.type	undo_switch_stack, @function
+	.cfi_startproc simple
+	.cfi_def_cfa $sp, 0
+	.cfi_register 64, $1
 undo_switch_stack:
 	ldq	$9, 0($sp)
 	ldq	$10, 8($sp)
@@ -558,7 +733,8 @@ undo_switch_stack:
 	ldt	$f30, 304($sp)
 	lda	$sp, SWITCH_STACK_SIZE($sp)
 	ret	$31, ($1), 1
-.end undo_switch_stack
+	.cfi_endproc
+	.size	undo_switch_stack, .-undo_switch_stack
 
 /*
  * The meat of the context switch code.
@@ -566,17 +742,18 @@ undo_switch_stack:
 
 	.align	4
 	.globl	alpha_switch_to
-	.ent	alpha_switch_to
+	.type	alpha_switch_to, @function
+	.cfi_startproc
 alpha_switch_to:
-	.prologue 0
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	call_pal PAL_swpctx
 	lda	$8, 0x3fff
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	bic	$sp, $8, $8
 	mov	$17, $0
 	ret
-.end alpha_switch_to
+	.cfi_endproc
+	.size	alpha_switch_to, .-alpha_switch_to
 
 /*
  * New processes begin life here.

+ 1 - 1
arch/alpha/kernel/irq_alpha.c

@@ -236,7 +236,7 @@ void __init
 init_rtc_irq(void)
 {
 	irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip,
-				      handle_simple_irq, "RTC");
+				      handle_percpu_irq, "RTC");
 	setup_irq(RTC_IRQ, &timer_irqaction);
 }
 

+ 3 - 2
arch/alpha/kernel/smp.c

@@ -264,9 +264,10 @@ recv_secondary_console_msg(void)
 		if (cnt <= 0 || cnt >= 80)
 			strcpy(buf, "<<< BOGUS MSG >>>");
 		else {
-			cp1 = (char *) &cpu->ipc_buffer[11];
+			cp1 = (char *) &cpu->ipc_buffer[1];
 			cp2 = buf;
-			strcpy(cp2, cp1);
+			memcpy(cp2, cp1, cnt);
+			cp2[cnt] = '\0';
 			
 			while ((cp2 = strchr(cp2, '\r')) != 0) {
 				*cp2 = ' ';

+ 0 - 8
arch/alpha/kernel/sys_dp264.c

@@ -190,9 +190,6 @@ static struct irq_chip clipper_irq_type = {
 static void
 dp264_device_interrupt(unsigned long vector)
 {
-#if 1
-	printk("dp264_device_interrupt: NOT IMPLEMENTED YET!!\n");
-#else
 	unsigned long pld;
 	unsigned int i;
 
@@ -210,12 +207,7 @@ dp264_device_interrupt(unsigned long vector)
 			isa_device_interrupt(vector);
 		else
 			handle_irq(16 + i);
-#if 0
-		TSUNAMI_cchip->dir0.csr = 1UL << i; mb();
-		tmp = TSUNAMI_cchip->dir0.csr;
-#endif
 	}
-#endif
 }
 
 static void 

+ 2 - 1
arch/alpha/kernel/sys_marvel.c

@@ -317,8 +317,9 @@ marvel_init_irq(void)
 }
 
 static int 
-marvel_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
 {
+	struct pci_dev *dev = (struct pci_dev *)cdev;
 	struct pci_controller *hose = dev->sysdata;
 	struct io7_port *io7_port = hose->sysdata;
 	struct io7 *io7 = io7_port->io7;

+ 2 - 0
arch/alpha/kernel/systbls.S

@@ -524,6 +524,8 @@ sys_call_table:
 	.quad sys_sendmmsg
 	.quad sys_process_vm_readv
 	.quad sys_process_vm_writev		/* 505 */
+	.quad sys_kcmp
+	.quad sys_finit_module
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object

+ 1 - 3
arch/alpha/kernel/time.c

@@ -105,9 +105,7 @@ void arch_irq_work_raise(void)
 
 static inline __u32 rpcc(void)
 {
-    __u32 result;
-    asm volatile ("rpcc %0" : "=r"(result));
-    return result;
+	return __builtin_alpha_rpcc();
 }
 
 int update_persistent_clock(struct timespec now)

+ 3 - 5
arch/alpha/kernel/traps.c

@@ -66,8 +66,8 @@ dik_show_regs(struct pt_regs *regs, unsigned long *r9_15)
 {
 	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx    %s\n",
 	       regs->pc, regs->r26, regs->ps, print_tainted());
-	print_symbol("pc is at %s\n", regs->pc);
-	print_symbol("ra is at %s\n", regs->r26 );
+	printk("pc is at %pSR\n", (void *)regs->pc);
+	printk("ra is at %pSR\n", (void *)regs->r26);
 	printk("v0 = %016lx  t0 = %016lx  t1 = %016lx\n",
 	       regs->r0, regs->r1, regs->r2);
 	printk("t2 = %016lx  t3 = %016lx  t4 = %016lx\n",
@@ -132,9 +132,7 @@ dik_show_trace(unsigned long *sp)
 			continue;
 		if (tmp >= (unsigned long) &_etext)
 			continue;
-		printk("[<%lx>]", tmp);
-		print_symbol(" %s", tmp);
-		printk("\n");
+		printk("[<%lx>] %pSR\n", tmp, (void *)tmp);
 		if (i > 40) {
 			printk(" ...");
 			break;

+ 1 - 1
arch/arm/mach-s3c24xx/Kconfig

@@ -208,7 +208,7 @@ config S3C24XX_GPIO_EXTRA128
 
 config S3C24XX_PLL
 	bool "Support CPUfreq changing of PLL frequency (EXPERIMENTAL)"
-	depends on ARM_S3C24XX
+	depends on ARM_S3C24XX_CPUFREQ
 	help
 	  Compile in support for changing the PLL frequency from the
 	  S3C24XX series CPUfreq driver. The PLL takes time to settle

+ 0 - 7
arch/arm64/include/asm/debug-monitors.h

@@ -83,14 +83,7 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs)
 }
 #endif
 
-#ifdef CONFIG_COMPAT
 int aarch32_break_handler(struct pt_regs *regs);
-#else
-static int aarch32_break_handler(struct pt_regs *regs)
-{
-	return -EFAULT;
-}
-#endif
 
 #endif	/* __ASSEMBLY */
 #endif	/* __KERNEL__ */

+ 2 - 1
arch/arm64/include/asm/system_misc.h

@@ -23,6 +23,7 @@
 #include <linux/compiler.h>
 #include <linux/linkage.h>
 #include <linux/irqflags.h>
+#include <linux/reboot.h>
 
 struct pt_regs;
 
@@ -41,7 +42,7 @@ extern void show_pte(struct mm_struct *mm, unsigned long addr);
 extern void __show_regs(struct pt_regs *);
 
 void soft_restart(unsigned long);
-extern void (*arm_pm_restart)(char str, const char *cmd);
+extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
 
 #define UDBG_UNDEFINED	(1 << 0)
 #define UDBG_SYSCALL	(1 << 1)

+ 1 - 1
arch/arm64/kernel/process.c

@@ -132,7 +132,7 @@ void machine_restart(char *cmd)
 
 	/* Now call the architecture specific reboot code. */
 	if (arm_pm_restart)
-		arm_pm_restart('h', cmd);
+		arm_pm_restart(reboot_mode, cmd);
 
 	/*
 	 * Whoops - the architecture was unable to reboot.

+ 8 - 7
arch/arm64/kernel/smp.c

@@ -199,13 +199,6 @@ asmlinkage void secondary_start_kernel(void)
 	raw_spin_lock(&boot_lock);
 	raw_spin_unlock(&boot_lock);
 
-	/*
-	 * Enable local interrupts.
-	 */
-	notify_cpu_starting(cpu);
-	local_irq_enable();
-	local_fiq_enable();
-
 	/*
 	 * OK, now it's safe to let the boot CPU continue.  Wait for
 	 * the CPU migration code to notice that the CPU is online
@@ -214,6 +207,14 @@ asmlinkage void secondary_start_kernel(void)
 	set_cpu_online(cpu, true);
 	complete(&cpu_running);
 
+	/*
+	 * Enable GIC and timers.
+	 */
+	notify_cpu_starting(cpu);
+
+	local_irq_enable();
+	local_fiq_enable();
+
 	/*
 	 * OK, it's off to the idle thread for us
 	 */

+ 20 - 26
arch/arm64/mm/fault.c

@@ -152,25 +152,8 @@ void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 #define ESR_CM			(1 << 8)
 #define ESR_LNX_EXEC		(1 << 24)
 
-/*
- * Check that the permissions on the VMA allow for the fault which occurred.
- * If we encountered a write fault, we must have write permission, otherwise
- * we allow any permission.
- */
-static inline bool access_error(unsigned int esr, struct vm_area_struct *vma)
-{
-	unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
-
-	if (esr & ESR_WRITE)
-		mask = VM_WRITE;
-	if (esr & ESR_LNX_EXEC)
-		mask = VM_EXEC;
-
-	return vma->vm_flags & mask ? false : true;
-}
-
 static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
-			   unsigned int esr, unsigned int flags,
+			   unsigned int mm_flags, unsigned long vm_flags,
 			   struct task_struct *tsk)
 {
 	struct vm_area_struct *vma;
@@ -188,12 +171,17 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
 	 * it.
 	 */
 good_area:
-	if (access_error(esr, vma)) {
+	/*
+	 * Check that the permissions on the VMA allow for the fault which
+	 * occurred. If we encountered a write or exec fault, we must have
+	 * appropriate permissions, otherwise we allow any permission.
+	 */
+	if (!(vma->vm_flags & vm_flags)) {
 		fault = VM_FAULT_BADACCESS;
 		goto out;
 	}
 
-	return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags);
+	return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags);
 
 check_stack:
 	if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
@@ -208,9 +196,15 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	int fault, sig, code;
-	bool write = (esr & ESR_WRITE) && !(esr & ESR_CM);
-	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-		(write ? FAULT_FLAG_WRITE : 0);
+	unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
+	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+
+	if (esr & ESR_LNX_EXEC) {
+		vm_flags = VM_EXEC;
+	} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
+		vm_flags = VM_WRITE;
+		mm_flags |= FAULT_FLAG_WRITE;
+	}
 
 	tsk = current;
 	mm  = tsk->mm;
@@ -248,7 +242,7 @@ retry:
 #endif
 	}
 
-	fault = __do_page_fault(mm, addr, esr, flags, tsk);
+	fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
 
 	/*
 	 * If we need to retry but a fatal signal is pending, handle the
@@ -265,7 +259,7 @@ retry:
 	 */
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
-	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+	if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
 		if (fault & VM_FAULT_MAJOR) {
 			tsk->maj_flt++;
 			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
@@ -280,7 +274,7 @@ retry:
 			 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
 			 * starvation.
 			 */
-			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
 			goto retry;
 		}
 	}

+ 1 - 0
arch/mips/Kconfig

@@ -1702,6 +1702,7 @@ endchoice
 
 config KVM_GUEST
 	bool "KVM Guest Kernel"
+	depends on BROKEN_ON_SMP
 	help
 	  Select this option if building a guest kernel for KVM (Trap & Emulate) mode
 

+ 3 - 2
arch/mips/cavium-octeon/octeon-platform.c

@@ -334,9 +334,10 @@ static void __init octeon_fdt_pip_iface(int pip, int idx, u64 *pmac)
 	char name_buffer[20];
 	int iface;
 	int p;
-	int count;
+	int count = 0;
 
-	count = cvmx_helper_interface_enumerate(idx);
+	if (cvmx_helper_interface_enumerate(idx) == 0)
+		count = cvmx_helper_ports_on_interface(idx);
 
 	snprintf(name_buffer, sizeof(name_buffer), "interface@%d", idx);
 	iface = fdt_subnode_offset(initial_boot_params, pip, name_buffer);

+ 1 - 1
arch/mips/kernel/smp-bmips.c

@@ -173,7 +173,7 @@ static void bmips_boot_secondary(int cpu, struct task_struct *idle)
 	else {
 #if defined(CONFIG_CPU_BMIPS4350) || defined(CONFIG_CPU_BMIPS4380)
 		/* Reset slave TP1 if booting from TP0 */
-		if (cpu_logical_map(cpu) == 0)
+		if (cpu_logical_map(cpu) == 1)
 			set_c0_brcm_cmt_ctrl(0x01);
 #elif defined(CONFIG_CPU_BMIPS5000)
 		if (cpu & 0x01)

+ 0 - 1
arch/mips/kernel/traps.c

@@ -1242,7 +1242,6 @@ asmlinkage void do_mcheck(struct pt_regs *regs)
 	panic("Caught Machine Check exception - %scaused by multiple "
 	      "matching entries in the TLB.",
 	      (multi_match) ? "" : "not ");
-	exception_exit(prev_state);
 }
 
 asmlinkage void do_mt(struct pt_regs *regs)

+ 0 - 1
arch/mips/kvm/Kconfig

@@ -5,7 +5,6 @@ source "virt/kvm/Kconfig"
 
 menuconfig VIRTUALIZATION
 	bool "Virtualization"
-	depends on HAVE_KVM
 	---help---
 	  Say Y here to get to see options for using your Linux host to run
 	  other operating systems inside virtual machines (guests).

+ 2 - 2
arch/mips/mm/tlbex.c

@@ -1456,7 +1456,7 @@ static void build_r4000_setup_pgd(void)
 {
 	const int a0 = 4;
 	const int a1 = 5;
-	u32 *p = tlbmiss_handler_setup_pgd_array;
+	u32 *p = tlbmiss_handler_setup_pgd;
 	const int tlbmiss_handler_setup_pgd_size =
 		tlbmiss_handler_setup_pgd_end - tlbmiss_handler_setup_pgd;
 	struct uasm_label *l = labels;
@@ -1793,7 +1793,7 @@ static void build_r3000_tlb_store_handler(void)
 	uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
 	uasm_i_nop(&p);
 
-	if (p >= handle_tlbs)
+	if (p >= handle_tlbs_end)
 		panic("TLB store handler fastpath space exceeded");
 
 	uasm_resolve_relocs(relocs, labels);

+ 57 - 11
arch/mips/netlogic/common/irq.c

@@ -40,6 +40,10 @@
 #include <linux/slab.h>
 #include <linux/irq.h>
 
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
 #include <asm/errno.h>
 #include <asm/signal.h>
 #include <asm/ptrace.h>
@@ -223,17 +227,6 @@ static void nlm_init_node_irqs(int node)
 	nodep->irqmask = irqmask;
 }
 
-void __init arch_init_irq(void)
-{
-	/* Initialize the irq descriptors */
-	nlm_init_percpu_irqs();
-	nlm_init_node_irqs(0);
-	write_c0_eimr(nlm_current_node()->irqmask);
-#if defined(CONFIG_CPU_XLR)
-	nlm_setup_fmn_irq();
-#endif
-}
-
 void nlm_smp_irq_init(int hwcpuid)
 {
 	int node, cpu;
@@ -266,3 +259,56 @@ asmlinkage void plat_irq_dispatch(void)
 	/* top level irq handling */
 	do_IRQ(nlm_irq_to_xirq(node, i));
 }
+
+#ifdef CONFIG_OF
+static struct irq_domain *xlp_pic_domain;
+
+static const struct irq_domain_ops xlp_pic_irq_domain_ops = {
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+static int __init xlp_of_pic_init(struct device_node *node,
+					struct device_node *parent)
+{
+	const int n_picirqs = PIC_IRT_LAST_IRQ - PIC_IRQ_BASE + 1;
+	struct resource res;
+	int socid, ret;
+
+	/* we need a hack to get the PIC's SoC chip id */
+	ret = of_address_to_resource(node, 0, &res);
+	if (ret < 0) {
+		pr_err("PIC %s: reg property not found!\n", node->name);
+		return -EINVAL;
+	}
+	socid = (res.start >> 18) & 0x3;
+	xlp_pic_domain = irq_domain_add_legacy(node, n_picirqs,
+		nlm_irq_to_xirq(socid, PIC_IRQ_BASE), PIC_IRQ_BASE,
+		&xlp_pic_irq_domain_ops, NULL);
+	if (xlp_pic_domain == NULL) {
+		pr_err("PIC %s: Creating legacy domain failed!\n", node->name);
+		return -EINVAL;
+	}
+	pr_info("Node %d: IRQ domain created for PIC@%pa\n", socid,
+							&res.start);
+	return 0;
+}
+
+static struct of_device_id __initdata xlp_pic_irq_ids[] = {
+	{ .compatible = "netlogic,xlp-pic", .data = xlp_of_pic_init },
+	{},
+};
+#endif
+
+void __init arch_init_irq(void)
+{
+	/* Initialize the irq descriptors */
+	nlm_init_percpu_irqs();
+	nlm_init_node_irqs(0);
+	write_c0_eimr(nlm_current_node()->irqmask);
+#if defined(CONFIG_CPU_XLR)
+	nlm_setup_fmn_irq();
+#endif
+#if defined(CONFIG_OF)
+	of_irq_init(xlp_pic_irq_ids);
+#endif
+}

+ 2 - 1
arch/mips/netlogic/dts/xlp_evp.dts

@@ -76,10 +76,11 @@
 			};
 		};
 		pic: pic@4000 {
-			interrupt-controller;
+			compatible = "netlogic,xlp-pic";
 			#address-cells = <0>;
 			#interrupt-cells = <1>;
 			reg = <0 0x4000 0x200>;
+			interrupt-controller;
 		};
 
 		nor_flash@1,0 {

+ 2 - 1
arch/mips/netlogic/dts/xlp_svp.dts

@@ -76,10 +76,11 @@
 			};
 		};
 		pic: pic@4000 {
-			interrupt-controller;
+			compatible = "netlogic,xlp-pic";
 			#address-cells = <0>;
 			#interrupt-cells = <1>;
 			reg = <0 0x4000 0x200>;
+			interrupt-controller;
 		};
 
 		nor_flash@1,0 {

+ 1 - 1
arch/mips/netlogic/xlp/usb-init.c

@@ -119,7 +119,7 @@ static u64 xlp_usb_dmamask = ~(u32)0;
 static void nlm_usb_fixup_final(struct pci_dev *dev)
 {
 	dev->dev.dma_mask		= &xlp_usb_dmamask;
-	dev->dev.coherent_dma_mask	= DMA_BIT_MASK(64);
+	dev->dev.coherent_dma_mask	= DMA_BIT_MASK(32);
 	switch (dev->devfn) {
 	case 0x10:
 		dev->irq = PIC_EHCI_0_IRQ;

+ 9 - 1
arch/s390/include/asm/processor.h

@@ -91,7 +91,15 @@ struct thread_struct {
 #endif
 };
 
-#define PER_FLAG_NO_TE		1UL	/* Flag to disable transactions. */
+/* Flag to disable transactions. */
+#define PER_FLAG_NO_TE			1UL
+/* Flag to enable random transaction aborts. */
+#define PER_FLAG_TE_ABORT_RAND		2UL
+/* Flag to specify random transaction abort mode:
+ * - abort each transaction at a random instruction before TEND if set.
+ * - abort random transactions at a random instruction if cleared.
+ */
+#define PER_FLAG_TE_ABORT_RAND_TEND	4UL
 
 typedef struct thread_struct thread_struct;
 

+ 2 - 2
arch/s390/include/asm/switch_to.h

@@ -10,7 +10,7 @@
 #include <linux/thread_info.h>
 
 extern struct task_struct *__switch_to(void *, void *);
-extern void update_per_regs(struct task_struct *task);
+extern void update_cr_regs(struct task_struct *task);
 
 static inline void save_fp_regs(s390_fp_regs *fpregs)
 {
@@ -86,7 +86,7 @@ static inline void restore_access_regs(unsigned int *acrs)
 		restore_fp_regs(&next->thread.fp_regs);			\
 		restore_access_regs(&next->thread.acrs[0]);		\
 		restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\
-		update_per_regs(next);					\
+		update_cr_regs(next);					\
 	}								\
 	prev = __switch_to(prev,next);					\
 } while (0)

+ 1 - 0
arch/s390/include/uapi/asm/ptrace.h

@@ -400,6 +400,7 @@ typedef struct
 #define PTRACE_POKE_SYSTEM_CALL	      0x5008
 #define PTRACE_ENABLE_TE	      0x5009
 #define PTRACE_DISABLE_TE	      0x5010
+#define PTRACE_TE_ABORT_RAND	      0x5011
 
 /*
  * PT_PROT definition is loosely based on hppa bsd definition in

+ 47 - 4
arch/s390/kernel/crash_dump.c

@@ -21,6 +21,48 @@
 #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
 #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
 
+
+/*
+ * Return physical address for virtual address
+ */
+static inline void *load_real_addr(void *addr)
+{
+	unsigned long real_addr;
+
+	asm volatile(
+		   "	lra     %0,0(%1)\n"
+		   "	jz	0f\n"
+		   "	la	%0,0\n"
+		   "0:"
+		   : "=a" (real_addr) : "a" (addr) : "cc");
+	return (void *)real_addr;
+}
+
+/*
+ * Copy up to one page to vmalloc or real memory
+ */
+static ssize_t copy_page_real(void *buf, void *src, size_t csize)
+{
+	size_t size;
+
+	if (is_vmalloc_addr(buf)) {
+		BUG_ON(csize >= PAGE_SIZE);
+		/* If buf is not page aligned, copy first part */
+		size = min(roundup(__pa(buf), PAGE_SIZE) - __pa(buf), csize);
+		if (size) {
+			if (memcpy_real(load_real_addr(buf), src, size))
+				return -EFAULT;
+			buf += size;
+			src += size;
+		}
+		/* Copy second part */
+		size = csize - size;
+		return (size) ? memcpy_real(load_real_addr(buf), src, size) : 0;
+	} else {
+		return memcpy_real(buf, src, csize);
+	}
+}
+
 /*
  * Copy one page from "oldmem"
  *
@@ -32,6 +74,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 			 size_t csize, unsigned long offset, int userbuf)
 {
 	unsigned long src;
+	int rc;
 
 	if (!csize)
 		return 0;
@@ -43,11 +86,11 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 		 src < OLDMEM_BASE + OLDMEM_SIZE)
 		src -= OLDMEM_BASE;
 	if (userbuf)
-		copy_to_user_real((void __force __user *) buf, (void *) src,
-				  csize);
+		rc = copy_to_user_real((void __force __user *) buf,
+				       (void *) src, csize);
 	else
-		memcpy_real(buf, (void *) src, csize);
-	return csize;
+		rc = copy_page_real(buf, (void *) src, csize);
+	return (rc == 0) ? csize : rc;
 }
 
 /*

+ 39 - 11
arch/s390/kernel/ptrace.c

@@ -47,7 +47,7 @@ enum s390_regset {
 	REGSET_GENERAL_EXTENDED,
 };
 
-void update_per_regs(struct task_struct *task)
+void update_cr_regs(struct task_struct *task)
 {
 	struct pt_regs *regs = task_pt_regs(task);
 	struct thread_struct *thread = &task->thread;
@@ -56,17 +56,25 @@ void update_per_regs(struct task_struct *task)
 #ifdef CONFIG_64BIT
 	/* Take care of the enable/disable of transactional execution. */
 	if (MACHINE_HAS_TE) {
-		unsigned long cr0, cr0_new;
+		unsigned long cr[3], cr_new[3];
 
-		__ctl_store(cr0, 0, 0);
-		/* set or clear transaction execution bits 8 and 9. */
+		__ctl_store(cr, 0, 2);
+		cr_new[1] = cr[1];
+		/* Set or clear transaction execution TXC/PIFO bits 8 and 9. */
 		if (task->thread.per_flags & PER_FLAG_NO_TE)
-			cr0_new = cr0 & ~(3UL << 54);
+			cr_new[0] = cr[0] & ~(3UL << 54);
 		else
-			cr0_new = cr0 | (3UL << 54);
-		/* Only load control register 0 if necessary. */
-		if (cr0 != cr0_new)
-			__ctl_load(cr0_new, 0, 0);
+			cr_new[0] = cr[0] | (3UL << 54);
+		/* Set or clear transaction execution TDC bits 62 and 63. */
+		cr_new[2] = cr[2] & ~3UL;
+		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+				cr_new[2] |= 1UL;
+			else
+				cr_new[2] |= 2UL;
+		}
+		if (memcmp(&cr_new, &cr, sizeof(cr)))
+			__ctl_load(cr_new, 0, 2);
 	}
 #endif
 	/* Copy user specified PER registers */
@@ -100,14 +108,14 @@ void user_enable_single_step(struct task_struct *task)
 {
 	set_tsk_thread_flag(task, TIF_SINGLE_STEP);
 	if (task == current)
-		update_per_regs(task);
+		update_cr_regs(task);
 }
 
 void user_disable_single_step(struct task_struct *task)
 {
 	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
 	if (task == current)
-		update_per_regs(task);
+		update_cr_regs(task);
 }
 
 /*
@@ -447,6 +455,26 @@ long arch_ptrace(struct task_struct *child, long request,
 		if (!MACHINE_HAS_TE)
 			return -EIO;
 		child->thread.per_flags |= PER_FLAG_NO_TE;
+		child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+		return 0;
+	case PTRACE_TE_ABORT_RAND:
+		if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+			return -EIO;
+		switch (data) {
+		case 0UL:
+			child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+			break;
+		case 1UL:
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND;
+			break;
+		case 2UL:
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+			child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND;
+			break;
+		default:
+			return -EINVAL;
+		}
 		return 0;
 	default:
 		/* Removing high order bit from addr (only for 31 bit). */

+ 88 - 25
arch/s390/net/bpf_jit_comp.c

@@ -9,6 +9,8 @@
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/filter.h>
+#include <linux/random.h>
+#include <linux/init.h>
 #include <asm/cacheflush.h>
 #include <asm/processor.h>
 #include <asm/facility.h>
@@ -221,6 +223,37 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
 	EMIT2(0x07fe);
 }
 
+/* Helper to find the offset of pkt_type in sk_buff
+ * Make sure its still a 3bit field starting at the MSBs within a byte.
+ */
+#define PKT_TYPE_MAX 0xe0
+static int pkt_type_offset;
+
+static int __init bpf_pkt_type_offset_init(void)
+{
+	struct sk_buff skb_probe = {
+		.pkt_type = ~0,
+	};
+	char *ct = (char *)&skb_probe;
+	int off;
+
+	pkt_type_offset = -1;
+	for (off = 0; off < sizeof(struct sk_buff); off++) {
+		if (!ct[off])
+			continue;
+		if (ct[off] == PKT_TYPE_MAX)
+			pkt_type_offset = off;
+		else {
+			/* Found non matching bit pattern, fix needed. */
+			WARN_ON_ONCE(1);
+			pkt_type_offset = -1;
+			return -1;
+		}
+	}
+	return 0;
+}
+device_initcall(bpf_pkt_type_offset_init);
+
 /*
  * make sure we dont leak kernel information to user
  */
@@ -720,6 +753,16 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 			EMIT4_DISP(0x88500000, 12);
 		}
 		break;
+	case BPF_S_ANC_PKTTYPE:
+		if (pkt_type_offset < 0)
+			goto out;
+		/* lhi %r5,0 */
+		EMIT4(0xa7580000);
+		/* ic %r5,<d(pkt_type_offset)>(%r2) */
+		EMIT4_DISP(0x43502000, pkt_type_offset);
+		/* srl %r5,5 */
+		EMIT4_DISP(0x88500000, 5);
+		break;
 	case BPF_S_ANC_CPU: /* A = smp_processor_id() */
 #ifdef CONFIG_SMP
 		/* l %r5,<d(cpu_nr)> */
@@ -738,8 +781,41 @@ out:
 	return -1;
 }
 
+/*
+ * Note: for security reasons, bpf code will follow a randomly
+ *	 sized amount of illegal instructions.
+ */
+struct bpf_binary_header {
+	unsigned int pages;
+	u8 image[];
+};
+
+static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
+						  u8 **image_ptr)
+{
+	struct bpf_binary_header *header;
+	unsigned int sz, hole;
+
+	/* Most BPF filters are really small, but if some of them fill a page,
+	 * allow at least 128 extra bytes for illegal instructions.
+	 */
+	sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE);
+	header = module_alloc(sz);
+	if (!header)
+		return NULL;
+	memset(header, 0, sz);
+	header->pages = sz / PAGE_SIZE;
+	hole = sz - bpfsize + sizeof(*header);
+	/* Insert random number of illegal instructions before BPF code
+	 * and make sure the first instruction starts at an even address.
+	 */
+	*image_ptr = &header->image[(prandom_u32() % hole) & -2];
+	return header;
+}
+
 void bpf_jit_compile(struct sk_filter *fp)
 {
+	struct bpf_binary_header *header = NULL;
 	unsigned long size, prg_len, lit_len;
 	struct bpf_jit jit, cjit;
 	unsigned int *addrs;
@@ -772,12 +848,11 @@ void bpf_jit_compile(struct sk_filter *fp)
 		} else if (jit.prg == cjit.prg && jit.lit == cjit.lit) {
 			prg_len = jit.prg - jit.start;
 			lit_len = jit.lit - jit.mid;
-			size = max_t(unsigned long, prg_len + lit_len,
-				     sizeof(struct work_struct));
+			size = prg_len + lit_len;
 			if (size >= BPF_SIZE_MAX)
 				goto out;
-			jit.start = module_alloc(size);
-			if (!jit.start)
+			header = bpf_alloc_binary(size, &jit.start);
+			if (!header)
 				goto out;
 			jit.prg = jit.mid = jit.start + prg_len;
 			jit.lit = jit.end = jit.start + prg_len + lit_len;
@@ -788,37 +863,25 @@ void bpf_jit_compile(struct sk_filter *fp)
 		cjit = jit;
 	}
 	if (bpf_jit_enable > 1) {
-		pr_err("flen=%d proglen=%lu pass=%d image=%p\n",
-		       fp->len, jit.end - jit.start, pass, jit.start);
-		if (jit.start) {
-			printk(KERN_ERR "JIT code:\n");
+		bpf_jit_dump(fp->len, jit.end - jit.start, pass, jit.start);
+		if (jit.start)
 			print_fn_code(jit.start, jit.mid - jit.start);
-			print_hex_dump(KERN_ERR, "JIT literals:\n",
-				       DUMP_PREFIX_ADDRESS, 16, 1,
-				       jit.mid, jit.end - jit.mid, false);
-		}
 	}
-	if (jit.start)
+	if (jit.start) {
+		set_memory_ro((unsigned long)header, header->pages);
 		fp->bpf_func = (void *) jit.start;
+	}
 out:
 	kfree(addrs);
 }
 
-static void jit_free_defer(struct work_struct *arg)
-{
-	module_free(NULL, arg);
-}
-
-/* run from softirq, we must use a work_struct to call
- * module_free() from process context
- */
 void bpf_jit_free(struct sk_filter *fp)
 {
-	struct work_struct *work;
+	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
+	struct bpf_binary_header *header = (void *)addr;
 
 	if (fp->bpf_func == sk_run_filter)
 		return;
-	work = (struct work_struct *)fp->bpf_func;
-	INIT_WORK(work, jit_free_defer);
-	schedule_work(work);
+	set_memory_rw(addr, header->pages);
+	module_free(NULL, header);
 }

+ 4 - 4
arch/um/include/shared/frame_kern.h

@@ -6,13 +6,13 @@
 #ifndef __FRAME_KERN_H_
 #define __FRAME_KERN_H_
 
-extern int setup_signal_stack_sc(unsigned long stack_top, int sig, 
+extern int setup_signal_stack_sc(unsigned long stack_top, int sig,
 				 struct k_sigaction *ka,
-				 struct pt_regs *regs, 
+				 struct pt_regs *regs,
 				 sigset_t *mask);
-extern int setup_signal_stack_si(unsigned long stack_top, int sig, 
+extern int setup_signal_stack_si(unsigned long stack_top, int sig,
 				 struct k_sigaction *ka,
-				 struct pt_regs *regs, siginfo_t *info, 
+				 struct pt_regs *regs, struct siginfo *info,
 				 sigset_t *mask);
 
 #endif

+ 2 - 2
arch/um/kernel/signal.c

@@ -19,7 +19,7 @@ EXPORT_SYMBOL(unblock_signals);
  * OK, we're invoking a handler
  */
 static void handle_signal(struct pt_regs *regs, unsigned long signr,
-			 struct k_sigaction *ka, siginfo_t *info)
+			 struct k_sigaction *ka, struct siginfo *info)
 {
 	sigset_t *oldset = sigmask_to_save();
 	int singlestep = 0;
@@ -71,7 +71,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 static int kern_do_signal(struct pt_regs *regs)
 {
 	struct k_sigaction ka_copy;
-	siginfo_t info;
+	struct siginfo info;
 	int sig, handled_sig = 0;
 
 	while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) {

+ 1 - 1
arch/um/kernel/skas/mmu.c

@@ -123,7 +123,7 @@ void uml_setup_stubs(struct mm_struct *mm)
 	/* dup_mmap already holds mmap_sem */
 	err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START,
 				      VM_READ | VM_MAYREAD | VM_EXEC |
-				      VM_MAYEXEC | VM_DONTCOPY,
+				      VM_MAYEXEC | VM_DONTCOPY | VM_PFNMAP,
 				      mm->context.stub_pages);
 	if (err) {
 		printk(KERN_ERR "install_special_mapping returned %d\n", err);

+ 1 - 1
arch/um/kernel/skas/uaccess.c

@@ -254,6 +254,6 @@ int strnlen_user(const void __user *str, int len)
 	n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count);
 	if (n == 0)
 		return count + 1;
-	return -EFAULT;
+	return 0;
 }
 EXPORT_SYMBOL(strnlen_user);

+ 189 - 41
arch/um/os-Linux/mem.c

@@ -52,6 +52,25 @@ static void __init find_tempdir(void)
 	strcat(tempdir, "/");
 }
 
+/*
+ * Remove bytes from the front of the buffer and refill it so that if there's a
+ * partial string that we care about, it will be completed, and we can recognize
+ * it.
+ */
+static int pop(int fd, char *buf, size_t size, size_t npop)
+{
+	ssize_t n;
+	size_t len = strlen(&buf[npop]);
+
+	memmove(buf, &buf[npop], len + 1);
+	n = read(fd, &buf[len], size - len - 1);
+	if (n < 0)
+		return -errno;
+
+	buf[len + n] = '\0';
+	return 1;
+}
+
 /*
  * This will return 1, with the first character in buf being the
  * character following the next instance of c in the file.  This will
@@ -61,7 +80,6 @@ static void __init find_tempdir(void)
 static int next(int fd, char *buf, size_t size, char c)
 {
 	ssize_t n;
-	size_t len;
 	char *ptr;
 
 	while ((ptr = strchr(buf, c)) == NULL) {
@@ -74,20 +92,129 @@ static int next(int fd, char *buf, size_t size, char c)
 		buf[n] = '\0';
 	}
 
-	ptr++;
-	len = strlen(ptr);
-	memmove(buf, ptr, len + 1);
+	return pop(fd, buf, size, ptr - buf + 1);
+}
+
+/*
+ * Decode an octal-escaped and space-terminated path of the form used by
+ * /proc/mounts. May be used to decode a path in-place. "out" must be at least
+ * as large as the input. The output is always null-terminated. "len" gets the
+ * length of the output, excluding the trailing null. Returns 0 if a full path
+ * was successfully decoded, otherwise an error.
+ */
+static int decode_path(const char *in, char *out, size_t *len)
+{
+	char *first = out;
+	int c;
+	int i;
+	int ret = -EINVAL;
+	while (1) {
+		switch (*in) {
+		case '\0':
+			goto out;
+
+		case ' ':
+			ret = 0;
+			goto out;
+
+		case '\\':
+			in++;
+			c = 0;
+			for (i = 0; i < 3; i++) {
+				if (*in < '0' || *in > '7')
+					goto out;
+				c = (c << 3) | (*in++ - '0');
+			}
+			*(unsigned char *)out++ = (unsigned char) c;
+			break;
+
+		default:
+			*out++ = *in++;
+			break;
+		}
+	}
+
+out:
+	*out = '\0';
+	*len = out - first;
+	return ret;
+}
+
+/*
+ * Computes the length of s when encoded with three-digit octal escape sequences
+ * for the characters in chars.
+ */
+static size_t octal_encoded_length(const char *s, const char *chars)
+{
+	size_t len = strlen(s);
+	while ((s = strpbrk(s, chars)) != NULL) {
+		len += 3;
+		s++;
+	}
+
+	return len;
+}
+
+enum {
+	OUTCOME_NOTHING_MOUNTED,
+	OUTCOME_TMPFS_MOUNT,
+	OUTCOME_NON_TMPFS_MOUNT,
+};
+
+/* Read a line of /proc/mounts data looking for a tmpfs mount at "path". */
+static int read_mount(int fd, char *buf, size_t bufsize, const char *path,
+		      int *outcome)
+{
+	int found;
+	int match;
+	char *space;
+	size_t len;
+
+	enum {
+		MATCH_NONE,
+		MATCH_EXACT,
+		MATCH_PARENT,
+	};
+
+	found = next(fd, buf, bufsize, ' ');
+	if (found != 1)
+		return found;
 
 	/*
-	 * Refill the buffer so that if there's a partial string that we care
-	 * about, it will be completed, and we can recognize it.
+	 * If there's no following space in the buffer, then this path is
+	 * truncated, so it can't be the one we're looking for.
 	 */
-	n = read(fd, &buf[len], size - len - 1);
-	if (n < 0)
-		return -errno;
+	space = strchr(buf, ' ');
+	if (space) {
+		match = MATCH_NONE;
+		if (!decode_path(buf, buf, &len)) {
+			if (!strcmp(buf, path))
+				match = MATCH_EXACT;
+			else if (!strncmp(buf, path, len)
+				 && (path[len] == '/' || !strcmp(buf, "/")))
+				match = MATCH_PARENT;
+		}
+
+		found = pop(fd, buf, bufsize, space - buf + 1);
+		if (found != 1)
+			return found;
+
+		switch (match) {
+		case MATCH_EXACT:
+			if (!strncmp(buf, "tmpfs", strlen("tmpfs")))
+				*outcome = OUTCOME_TMPFS_MOUNT;
+			else
+				*outcome = OUTCOME_NON_TMPFS_MOUNT;
+			break;
 
-	buf[len + n] = '\0';
-	return 1;
+		case MATCH_PARENT:
+			/* This mount obscures any previous ones. */
+			*outcome = OUTCOME_NOTHING_MOUNTED;
+			break;
+		}
+	}
+
+	return next(fd, buf, bufsize, '\n');
 }
 
 /* which_tmpdir is called only during early boot */
@@ -106,8 +233,12 @@ static int checked_tmpdir = 0;
  */
 static void which_tmpdir(void)
 {
-	int fd, found;
-	char buf[128] = { '\0' };
+	int fd;
+	int found;
+	int outcome;
+	char *path;
+	char *buf;
+	size_t bufsize;
 
 	if (checked_tmpdir)
 		return;
@@ -116,49 +247,66 @@ static void which_tmpdir(void)
 
 	printf("Checking for tmpfs mount on /dev/shm...");
 
+	path = realpath("/dev/shm", NULL);
+	if (!path) {
+		printf("failed to check real path, errno = %d\n", errno);
+		return;
+	}
+	printf("%s...", path);
+
+	/*
+	 * The buffer needs to be able to fit the full octal-escaped path, a
+	 * space, and a trailing null in order to successfully decode it.
+	 */
+	bufsize = octal_encoded_length(path, " \t\n\\") + 2;
+
+	if (bufsize < 128)
+		bufsize = 128;
+
+	buf = malloc(bufsize);
+	if (!buf) {
+		printf("malloc failed, errno = %d\n", errno);
+		goto out;
+	}
+	buf[0] = '\0';
+
 	fd = open("/proc/mounts", O_RDONLY);
 	if (fd < 0) {
 		printf("failed to open /proc/mounts, errno = %d\n", errno);
-		return;
+		goto out1;
 	}
 
+	outcome = OUTCOME_NOTHING_MOUNTED;
 	while (1) {
-		found = next(fd, buf, ARRAY_SIZE(buf), ' ');
-		if (found != 1)
-			break;
-
-		if (!strncmp(buf, "/dev/shm", strlen("/dev/shm")))
-			goto found;
-
-		found = next(fd, buf, ARRAY_SIZE(buf), '\n');
+		found = read_mount(fd, buf, bufsize, path, &outcome);
 		if (found != 1)
 			break;
 	}
 
-err:
-	if (found == 0)
-		printf("nothing mounted on /dev/shm\n");
-	else if (found < 0)
+	if (found < 0) {
 		printf("read returned errno %d\n", -found);
+	} else {
+		switch (outcome) {
+		case OUTCOME_TMPFS_MOUNT:
+			printf("OK\n");
+			default_tmpdir = "/dev/shm";
+			break;
 
-out:
-	close(fd);
-
-	return;
-
-found:
-	found = next(fd, buf, ARRAY_SIZE(buf), ' ');
-	if (found != 1)
-		goto err;
+		case OUTCOME_NON_TMPFS_MOUNT:
+			printf("not tmpfs\n");
+			break;
 
-	if (strncmp(buf, "tmpfs", strlen("tmpfs"))) {
-		printf("not tmpfs\n");
-		goto out;
+		default:
+			printf("nothing mounted on /dev/shm\n");
+			break;
+		}
 	}
 
-	printf("OK\n");
-	default_tmpdir = "/dev/shm";
-	goto out;
+	close(fd);
+out1:
+	free(buf);
+out:
+	free(path);
 }
 
 static int __init make_tempfile(const char *template, char **out_tempname,

+ 4 - 4
arch/um/os-Linux/signal.c

@@ -25,7 +25,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
 	[SIGIO]		= sigio_handler,
 	[SIGVTALRM]	= timer_handler };
 
-static void sig_handler_common(int sig, siginfo_t *si, mcontext_t *mc)
+static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
 {
 	struct uml_pt_regs r;
 	int save_errno = errno;
@@ -61,7 +61,7 @@ static void sig_handler_common(int sig, siginfo_t *si, mcontext_t *mc)
 static int signals_enabled;
 static unsigned int signals_pending;
 
-void sig_handler(int sig, siginfo_t *si, mcontext_t *mc)
+void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
 {
 	int enabled;
 
@@ -120,7 +120,7 @@ void set_sigstack(void *sig_stack, int size)
 		panic("enabling signal stack failed, errno = %d\n", errno);
 }
 
-static void (*handlers[_NSIG])(int sig, siginfo_t *si, mcontext_t *mc) = {
+static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
 	[SIGSEGV] = sig_handler,
 	[SIGBUS] = sig_handler,
 	[SIGILL] = sig_handler,
@@ -162,7 +162,7 @@ static void hard_handler(int sig, siginfo_t *si, void *p)
 		while ((sig = ffs(pending)) != 0){
 			sig--;
 			pending &= ~(1 << sig);
-			(*handlers[sig])(sig, si, mc);
+			(*handlers[sig])(sig, (struct siginfo *)si, mc);
 		}
 
 		/*

+ 12 - 7
arch/um/os-Linux/skas/process.c

@@ -54,7 +54,7 @@ static int ptrace_dump_regs(int pid)
 
 void wait_stub_done(int pid)
 {
-	int n, status, err;
+	int n, status, err, bad_stop = 0;
 
 	while (1) {
 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
@@ -74,6 +74,8 @@ void wait_stub_done(int pid)
 
 	if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0)
 		return;
+	else
+		bad_stop = 1;
 
 bad_wait:
 	err = ptrace_dump_regs(pid);
@@ -83,7 +85,10 @@ bad_wait:
 	printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, "
 	       "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno,
 	       status);
-	fatal_sigsegv();
+	if (bad_stop)
+		kill(pid, SIGKILL);
+	else
+		fatal_sigsegv();
 }
 
 extern unsigned long current_stub_stack(void);
@@ -409,7 +414,7 @@ void userspace(struct uml_pt_regs *regs)
 		if (WIFSTOPPED(status)) {
 			int sig = WSTOPSIG(status);
 
-			ptrace(PTRACE_GETSIGINFO, pid, 0, &si);
+			ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
 
 			switch (sig) {
 			case SIGSEGV:
@@ -417,7 +422,7 @@ void userspace(struct uml_pt_regs *regs)
 				    !ptrace_faultinfo) {
 					get_skas_faultinfo(pid,
 							   &regs->faultinfo);
-					(*sig_info[SIGSEGV])(SIGSEGV, &si,
+					(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
 							     regs);
 				}
 				else handle_segv(pid, regs);
@@ -426,14 +431,14 @@ void userspace(struct uml_pt_regs *regs)
 			        handle_trap(pid, regs, local_using_sysemu);
 				break;
 			case SIGTRAP:
-				relay_signal(SIGTRAP, &si, regs);
+				relay_signal(SIGTRAP, (struct siginfo *)&si, regs);
 				break;
 			case SIGVTALRM:
 				now = os_nsecs();
 				if (now < nsecs)
 					break;
 				block_signals();
-				(*sig_info[sig])(sig, &si, regs);
+				(*sig_info[sig])(sig, (struct siginfo *)&si, regs);
 				unblock_signals();
 				nsecs = timer.it_value.tv_sec *
 					UM_NSEC_PER_SEC +
@@ -447,7 +452,7 @@ void userspace(struct uml_pt_regs *regs)
 			case SIGFPE:
 			case SIGWINCH:
 				block_signals();
-				(*sig_info[sig])(sig, &si, regs);
+				(*sig_info[sig])(sig, (struct siginfo *)&si, regs);
 				unblock_signals();
 				break;
 			default:

+ 7 - 0
arch/x86/kvm/mmu.c

@@ -2810,6 +2810,13 @@ exit:
 
 static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code)
 {
+	/*
+	 * Do not fix the mmio spte with invalid generation number which
+	 * need to be updated by slow page fault path.
+	 */
+	if (unlikely(error_code & PFERR_RSVD_MASK))
+		return false;
+
 	/*
 	 * #PF can be fast only if the shadow page table is present and it
 	 * is caused by write-protect, that means we just need change the

+ 0 - 1
arch/x86/um/signal.c

@@ -508,7 +508,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 {
 	struct rt_sigframe __user *frame;
 	int err = 0;
-	struct task_struct *me = current;
 
 	frame = (struct rt_sigframe __user *)
 		round_down(stack_top - sizeof(struct rt_sigframe), 16);

+ 1 - 0
drivers/acpi/acpi_memhotplug.c

@@ -323,6 +323,7 @@ static int acpi_memory_device_add(struct acpi_device *device,
 	/* Get the range from the _CRS */
 	result = acpi_memory_get_device_resources(mem_device);
 	if (result) {
+		device->driver_data = NULL;
 		kfree(mem_device);
 		return result;
 	}

+ 0 - 13
drivers/acpi/acpica/aclocal.h

@@ -931,19 +931,6 @@ struct acpi_bit_register_info {
 
 /* Structs and definitions for _OSI support and I/O port validation */
 
-#define ACPI_OSI_WIN_2000               0x01
-#define ACPI_OSI_WIN_XP                 0x02
-#define ACPI_OSI_WIN_XP_SP1             0x03
-#define ACPI_OSI_WINSRV_2003            0x04
-#define ACPI_OSI_WIN_XP_SP2             0x05
-#define ACPI_OSI_WINSRV_2003_SP1        0x06
-#define ACPI_OSI_WIN_VISTA              0x07
-#define ACPI_OSI_WINSRV_2008            0x08
-#define ACPI_OSI_WIN_VISTA_SP1          0x09
-#define ACPI_OSI_WIN_VISTA_SP2          0x0A
-#define ACPI_OSI_WIN_7                  0x0B
-#define ACPI_OSI_WIN_8                  0x0C
-
 #define ACPI_ALWAYS_ILLEGAL             0x00
 
 struct acpi_interface_info {

+ 11 - 0
drivers/acpi/internal.h

@@ -164,4 +164,15 @@ struct platform_device;
 int acpi_create_platform_device(struct acpi_device *adev,
 				const struct acpi_device_id *id);
 
+/*--------------------------------------------------------------------------
+					Video
+  -------------------------------------------------------------------------- */
+#if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
+bool acpi_video_backlight_quirks(void);
+bool acpi_video_verify_backlight_support(void);
+#else
+static inline bool acpi_video_backlight_quirks(void) { return false; }
+static inline bool acpi_video_verify_backlight_support(void) { return false; }
+#endif
+
 #endif /* _ACPI_INTERNAL_H_ */

+ 9 - 4
drivers/acpi/scan.c

@@ -352,10 +352,12 @@ static void acpi_scan_bus_device_check(acpi_handle handle, u32 ost_source)
 	mutex_lock(&acpi_scan_lock);
 	lock_device_hotplug();
 
-	acpi_bus_get_device(handle, &device);
-	if (device) {
-		dev_warn(&device->dev, "Attempt to re-insert\n");
-		goto out;
+	if (ost_source != ACPI_NOTIFY_BUS_CHECK) {
+		acpi_bus_get_device(handle, &device);
+		if (device) {
+			dev_warn(&device->dev, "Attempt to re-insert\n");
+			goto out;
+		}
 	}
 	acpi_evaluate_hotplug_ost(handle, ost_source,
 				  ACPI_OST_SC_INSERT_IN_PROGRESS, NULL);
@@ -1981,6 +1983,9 @@ static acpi_status acpi_bus_device_attach(acpi_handle handle, u32 lvl_not_used,
 	if (acpi_bus_get_device(handle, &device))
 		return AE_CTRL_DEPTH;
 
+	if (device->handler)
+		return AE_OK;
+
 	ret = acpi_scan_attach_handler(device);
 	if (ret)
 		return ret > 0 ? AE_OK : AE_CTRL_DEPTH;

+ 85 - 13
drivers/acpi/video.c

@@ -44,6 +44,8 @@
 #include <linux/suspend.h>
 #include <acpi/video.h>
 
+#include "internal.h"
+
 #define PREFIX "ACPI: "
 
 #define ACPI_VIDEO_BUS_NAME		"Video Bus"
@@ -448,6 +450,14 @@ static struct dmi_system_id video_dmi_table[] __initdata = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "HP Folio 13 - 2000 Notebook PC"),
 		},
 	},
+	{
+	 .callback = video_ignore_initial_backlight,
+	 .ident = "Fujitsu E753",
+	 .matches = {
+		DMI_MATCH(DMI_BOARD_VENDOR, "FUJITSU"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E753"),
+		},
+	},
 	{
 	 .callback = video_ignore_initial_backlight,
 	 .ident = "HP Pavilion dm4",
@@ -898,7 +908,10 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 		device->cap._DDC = 1;
 	}
 
-	if (acpi_video_backlight_support()) {
+	if (acpi_video_init_brightness(device))
+		return;
+
+	if (acpi_video_verify_backlight_support()) {
 		struct backlight_properties props;
 		struct pci_dev *pdev;
 		acpi_handle acpi_parent;
@@ -907,9 +920,6 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 		static int count = 0;
 		char *name;
 
-		result = acpi_video_init_brightness(device);
-		if (result)
-			return;
 		name = kasprintf(GFP_KERNEL, "acpi_video%d", count);
 		if (!name)
 			return;
@@ -969,6 +979,11 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 		if (result)
 			printk(KERN_ERR PREFIX "Create sysfs link\n");
 
+	} else {
+		/* Remove the brightness object. */
+		kfree(device->brightness->levels);
+		kfree(device->brightness);
+		device->brightness = NULL;
 	}
 }
 
@@ -1351,8 +1366,8 @@ acpi_video_switch_brightness(struct acpi_video_device *device, int event)
 	unsigned long long level_current, level_next;
 	int result = -EINVAL;
 
-	/* no warning message if acpi_backlight=vendor is used */
-	if (!acpi_video_backlight_support())
+	/* no warning message if acpi_backlight=vendor or a quirk is used */
+	if (!acpi_video_verify_backlight_support())
 		return 0;
 
 	if (!device->brightness)
@@ -1532,14 +1547,20 @@ static int acpi_video_bus_put_devices(struct acpi_video_bus *video)
 
 /* acpi_video interface */
 
+/*
+ * Win8 requires setting bit2 of _DOS to let firmware know it shouldn't
+ * preform any automatic brightness change on receiving a notification.
+ */
 static int acpi_video_bus_start_devices(struct acpi_video_bus *video)
 {
-	return acpi_video_bus_DOS(video, 0, 0);
+	return acpi_video_bus_DOS(video, 0,
+				  acpi_video_backlight_quirks() ? 1 : 0);
 }
 
 static int acpi_video_bus_stop_devices(struct acpi_video_bus *video)
 {
-	return acpi_video_bus_DOS(video, 0, 1);
+	return acpi_video_bus_DOS(video, 0,
+				  acpi_video_backlight_quirks() ? 0 : 1);
 }
 
 static void acpi_video_bus_notify(struct acpi_device *device, u32 event)
@@ -1854,6 +1875,46 @@ static int acpi_video_bus_remove(struct acpi_device *device)
 	return 0;
 }
 
+static acpi_status video_unregister_backlight(acpi_handle handle, u32 lvl,
+					      void *context, void **rv)
+{
+	struct acpi_device *acpi_dev;
+	struct acpi_video_bus *video;
+	struct acpi_video_device *dev, *next;
+
+	if (acpi_bus_get_device(handle, &acpi_dev))
+		return AE_OK;
+
+	if (acpi_match_device_ids(acpi_dev, video_device_ids))
+		return AE_OK;
+
+	video = acpi_driver_data(acpi_dev);
+	if (!video)
+		return AE_OK;
+
+	acpi_video_bus_stop_devices(video);
+	mutex_lock(&video->device_list_lock);
+	list_for_each_entry_safe(dev, next, &video->video_device_list, entry) {
+		if (dev->backlight) {
+			backlight_device_unregister(dev->backlight);
+			dev->backlight = NULL;
+			kfree(dev->brightness->levels);
+			kfree(dev->brightness);
+		}
+		if (dev->cooling_dev) {
+			sysfs_remove_link(&dev->dev->dev.kobj,
+					  "thermal_cooling");
+			sysfs_remove_link(&dev->cooling_dev->device.kobj,
+					  "device");
+			thermal_cooling_device_unregister(dev->cooling_dev);
+			dev->cooling_dev = NULL;
+		}
+	}
+	mutex_unlock(&video->device_list_lock);
+	acpi_video_bus_start_devices(video);
+	return AE_OK;
+}
+
 static int __init is_i740(struct pci_dev *dev)
 {
 	if (dev->device == 0x00D1)
@@ -1885,14 +1946,25 @@ static int __init intel_opregion_present(void)
 	return opregion;
 }
 
-int acpi_video_register(void)
+int __acpi_video_register(bool backlight_quirks)
 {
-	int result = 0;
+	bool no_backlight;
+	int result;
+
+	no_backlight = backlight_quirks ? acpi_video_backlight_quirks() : false;
+
 	if (register_count) {
 		/*
-		 * if the function of acpi_video_register is already called,
-		 * don't register the acpi_vide_bus again and return no error.
+		 * If acpi_video_register() has been called already, don't try
+		 * to register acpi_video_bus, but unregister backlight devices
+		 * if no backlight support is requested.
 		 */
+		if (no_backlight)
+			acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+					    ACPI_UINT32_MAX,
+					    video_unregister_backlight,
+					    NULL, NULL, NULL);
+
 		return 0;
 	}
 
@@ -1908,7 +1980,7 @@ int acpi_video_register(void)
 
 	return 0;
 }
-EXPORT_SYMBOL(acpi_video_register);
+EXPORT_SYMBOL(__acpi_video_register);
 
 void acpi_video_unregister(void)
 {

+ 21 - 0
drivers/acpi/video_detect.c

@@ -38,6 +38,8 @@
 #include <linux/dmi.h>
 #include <linux/pci.h>
 
+#include "internal.h"
+
 #define PREFIX "ACPI: "
 
 ACPI_MODULE_NAME("video");
@@ -234,6 +236,17 @@ static void acpi_video_caps_check(void)
 		acpi_video_get_capabilities(NULL);
 }
 
+bool acpi_video_backlight_quirks(void)
+{
+	if (acpi_gbl_osi_data >= ACPI_OSI_WIN_8) {
+		acpi_video_caps_check();
+		acpi_video_support |= ACPI_VIDEO_SKIP_BACKLIGHT;
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL(acpi_video_backlight_quirks);
+
 /* Promote the vendor interface instead of the generic video module.
  * This function allow DMI blacklists to be implemented by externals
  * platform drivers instead of putting a big blacklist in video_detect.c
@@ -278,6 +291,14 @@ int acpi_video_backlight_support(void)
 }
 EXPORT_SYMBOL(acpi_video_backlight_support);
 
+/* For the ACPI video driver use only. */
+bool acpi_video_verify_backlight_support(void)
+{
+	return (acpi_video_support & ACPI_VIDEO_SKIP_BACKLIGHT) ?
+		false : acpi_video_backlight_support();
+}
+EXPORT_SYMBOL(acpi_video_verify_backlight_support);
+
 /*
  * Use acpi_backlight=vendor/video to force that backlight switching
  * is processed by vendor specific acpi drivers or video.ko driver.

+ 2 - 2
drivers/block/Kconfig

@@ -532,11 +532,11 @@ config BLK_DEV_RBD
 	  If unsure, say N.
 
 config BLK_DEV_RSXX
-	tristate "IBM FlashSystem 70/80 PCIe SSD Device Driver"
+	tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver"
 	depends on PCI
 	help
 	  Device driver for IBM's high speed PCIe SSD
-	  storage devices: FlashSystem-70 and FlashSystem-80.
+	  storage device: Flash Adapter 900GB Full Height.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called rsxx.

+ 21 - 0
drivers/block/drbd/drbd_actlog.c

@@ -659,6 +659,27 @@ void drbd_al_shrink(struct drbd_conf *mdev)
 	wake_up(&mdev->al_wait);
 }
 
+int drbd_initialize_al(struct drbd_conf *mdev, void *buffer)
+{
+	struct al_transaction_on_disk *al = buffer;
+	struct drbd_md *md = &mdev->ldev->md;
+	sector_t al_base = md->md_offset + md->al_offset;
+	int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
+	int i;
+
+	memset(al, 0, 4096);
+	al->magic = cpu_to_be32(DRBD_AL_MAGIC);
+	al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
+	al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
+
+	for (i = 0; i < al_size_4k; i++) {
+		int err = drbd_md_sync_page_io(mdev, mdev->ldev, al_base + i * 8, WRITE);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 static int w_update_odbm(struct drbd_work *w, int unused)
 {
 	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);

+ 13 - 2
drivers/block/drbd/drbd_int.h

@@ -832,6 +832,7 @@ struct drbd_tconn {			/* is a resource from the config file */
 	unsigned susp_nod:1;		/* IO suspended because no data */
 	unsigned susp_fen:1;		/* IO suspended because fence peer handler runs */
 	struct mutex cstate_mutex;	/* Protects graceful disconnects */
+	unsigned int connect_cnt;	/* Inc each time a connection is established */
 
 	unsigned long flags;
 	struct net_conf *net_conf;	/* content protected by rcu */
@@ -1132,6 +1133,7 @@ extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
 void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
 
 extern void conn_md_sync(struct drbd_tconn *tconn);
+extern void drbd_md_write(struct drbd_conf *mdev, void *buffer);
 extern void drbd_md_sync(struct drbd_conf *mdev);
 extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
 extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
@@ -1466,8 +1468,16 @@ extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int);
-enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
+enum determine_dev_size {
+	DS_ERROR_SHRINK = -3,
+	DS_ERROR_SPACE_MD = -2,
+	DS_ERROR = -1,
+	DS_UNCHANGED = 0,
+	DS_SHRUNK = 1,
+	DS_GREW = 2
+};
+extern enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
@@ -1633,6 +1643,7 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
 #define drbd_set_out_of_sync(mdev, sector, size) \
 	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
 extern void drbd_al_shrink(struct drbd_conf *mdev);
+extern int drbd_initialize_al(struct drbd_conf *, void *);
 
 /* drbd_nl.c */
 /* state info broadcast */

+ 34 - 27
drivers/block/drbd/drbd_main.c

@@ -2762,8 +2762,6 @@ int __init drbd_init(void)
 	/*
 	 * allocate all necessary structs
 	 */
-	err = -ENOMEM;
-
 	init_waitqueue_head(&drbd_pp_wait);
 
 	drbd_proc = NULL; /* play safe for drbd_cleanup */
@@ -2773,6 +2771,7 @@ int __init drbd_init(void)
 	if (err)
 		goto fail;
 
+	err = -ENOMEM;
 	drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
 	if (!drbd_proc)	{
 		printk(KERN_ERR "drbd: unable to register proc file\n");
@@ -2803,7 +2802,6 @@ int __init drbd_init(void)
 fail:
 	drbd_cleanup();
 	if (err == -ENOMEM)
-		/* currently always the case */
 		printk(KERN_ERR "drbd: ran out of memory\n");
 	else
 		printk(KERN_ERR "drbd: initialization failure\n");
@@ -2881,34 +2879,14 @@ struct meta_data_on_disk {
 	u8 reserved_u8[4096 - (7*8 + 10*4)];
 } __packed;
 
-/**
- * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
- * @mdev:	DRBD device.
- */
-void drbd_md_sync(struct drbd_conf *mdev)
+
+
+void drbd_md_write(struct drbd_conf *mdev, void *b)
 {
-	struct meta_data_on_disk *buffer;
+	struct meta_data_on_disk *buffer = b;
 	sector_t sector;
 	int i;
 
-	/* Don't accidentally change the DRBD meta data layout. */
-	BUILD_BUG_ON(UI_SIZE != 4);
-	BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
-
-	del_timer(&mdev->md_sync_timer);
-	/* timer may be rearmed by drbd_md_mark_dirty() now. */
-	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
-		return;
-
-	/* We use here D_FAILED and not D_ATTACHING because we try to write
-	 * metadata even if we detach due to a disk failure! */
-	if (!get_ldev_if_state(mdev, D_FAILED))
-		return;
-
-	buffer = drbd_md_get_buffer(mdev);
-	if (!buffer)
-		goto out;
-
 	memset(buffer, 0, sizeof(*buffer));
 
 	buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
@@ -2937,6 +2915,35 @@ void drbd_md_sync(struct drbd_conf *mdev)
 		dev_err(DEV, "meta data update failed!\n");
 		drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
 	}
+}
+
+/**
+ * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
+ * @mdev:	DRBD device.
+ */
+void drbd_md_sync(struct drbd_conf *mdev)
+{
+	struct meta_data_on_disk *buffer;
+
+	/* Don't accidentally change the DRBD meta data layout. */
+	BUILD_BUG_ON(UI_SIZE != 4);
+	BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
+
+	del_timer(&mdev->md_sync_timer);
+	/* timer may be rearmed by drbd_md_mark_dirty() now. */
+	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
+		return;
+
+	/* We use here D_FAILED and not D_ATTACHING because we try to write
+	 * metadata even if we detach due to a disk failure! */
+	if (!get_ldev_if_state(mdev, D_FAILED))
+		return;
+
+	buffer = drbd_md_get_buffer(mdev);
+	if (!buffer)
+		goto out;
+
+	drbd_md_write(mdev, buffer);
 
 	/* Update mdev->ldev->md.la_size_sect,
 	 * since we updated it on metadata. */

+ 143 - 42
drivers/block/drbd/drbd_nl.c

@@ -417,6 +417,7 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
 
 bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 {
+	unsigned int connect_cnt;
 	union drbd_state mask = { };
 	union drbd_state val = { };
 	enum drbd_fencing_p fp;
@@ -428,6 +429,10 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 		return false;
 	}
 
+	spin_lock_irq(&tconn->req_lock);
+	connect_cnt = tconn->connect_cnt;
+	spin_unlock_irq(&tconn->req_lock);
+
 	fp = highest_fencing_policy(tconn);
 	switch (fp) {
 	case FP_NOT_AVAIL:
@@ -492,8 +497,14 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 	   here, because we might were able to re-establish the connection in the
 	   meantime. */
 	spin_lock_irq(&tconn->req_lock);
-	if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags))
-		_conn_request_state(tconn, mask, val, CS_VERBOSE);
+	if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) {
+		if (tconn->connect_cnt != connect_cnt)
+			/* In case the connection was established and droped
+			   while the fence-peer handler was running, ignore it */
+			conn_info(tconn, "Ignoring fence-peer exit code\n");
+		else
+			_conn_request_state(tconn, mask, val, CS_VERBOSE);
+	}
 	spin_unlock_irq(&tconn->req_lock);
 
 	return conn_highest_pdsk(tconn) <= D_OUTDATED;
@@ -816,15 +827,20 @@ void drbd_resume_io(struct drbd_conf *mdev)
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
+enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size_sect, u_size;
+	struct drbd_md *md = &mdev->ldev->md;
+	u32 prev_al_stripe_size_4k;
+	u32 prev_al_stripes;
 	sector_t size;
 	char ppb[10];
+	void *buffer;
 
 	int md_moved, la_size_changed;
-	enum determine_dev_size rv = unchanged;
+	enum determine_dev_size rv = DS_UNCHANGED;
 
 	/* race:
 	 * application request passes inc_ap_bio,
@@ -836,6 +852,11 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 	 * still lock the act_log to not trigger ASSERTs there.
 	 */
 	drbd_suspend_io(mdev);
+	buffer = drbd_md_get_buffer(mdev); /* Lock meta-data IO */
+	if (!buffer) {
+		drbd_resume_io(mdev);
+		return DS_ERROR;
+	}
 
 	/* no wait necessary anymore, actually we could assert that */
 	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
@@ -844,7 +865,17 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 	prev_size = mdev->ldev->md.md_size_sect;
 	la_size_sect = mdev->ldev->md.la_size_sect;
 
-	/* TODO: should only be some assert here, not (re)init... */
+	if (rs) {
+		/* rs is non NULL if we should change the AL layout only */
+
+		prev_al_stripes = md->al_stripes;
+		prev_al_stripe_size_4k = md->al_stripe_size_4k;
+
+		md->al_stripes = rs->al_stripes;
+		md->al_stripe_size_4k = rs->al_stripe_size / 4;
+		md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
+	}
+
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
 	rcu_read_lock();
@@ -852,6 +883,21 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 	rcu_read_unlock();
 	size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);
 
+	if (size < la_size_sect) {
+		if (rs && u_size == 0) {
+			/* Remove "rs &&" later. This check should always be active, but
+			   right now the receiver expects the permissive behavior */
+			dev_warn(DEV, "Implicit shrink not allowed. "
+				 "Use --size=%llus for explicit shrink.\n",
+				 (unsigned long long)size);
+			rv = DS_ERROR_SHRINK;
+		}
+		if (u_size > size)
+			rv = DS_ERROR_SPACE_MD;
+		if (rv != DS_UNCHANGED)
+			goto err_out;
+	}
+
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	    drbd_bm_capacity(mdev) != size) {
 		int err;
@@ -867,7 +913,7 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 				    "Leaving size unchanged at size = %lu KB\n",
 				    (unsigned long)size);
 			}
-			rv = dev_size_error;
+			rv = DS_ERROR;
 		}
 		/* racy, see comments above. */
 		drbd_set_my_capacity(mdev, size);
@@ -875,38 +921,57 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 		dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 		     (unsigned long long)size>>1);
 	}
-	if (rv == dev_size_error)
-		goto out;
+	if (rv <= DS_ERROR)
+		goto err_out;
 
 	la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);
 
 	md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
 		|| prev_size	   != mdev->ldev->md.md_size_sect;
 
-	if (la_size_changed || md_moved) {
-		int err;
+	if (la_size_changed || md_moved || rs) {
+		u32 prev_flags;
 
 		drbd_al_shrink(mdev); /* All extents inactive. */
+
+		prev_flags = md->flags;
+		md->flags &= ~MDF_PRIMARY_IND;
+		drbd_md_write(mdev, buffer);
+
 		dev_info(DEV, "Writing the whole bitmap, %s\n",
 			 la_size_changed && md_moved ? "size changed and md moved" :
 			 la_size_changed ? "size changed" : "md moved");
 		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
-		err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
-				     "size changed", BM_LOCKED_MASK);
-		if (err) {
-			rv = dev_size_error;
-			goto out;
-		}
-		drbd_md_mark_dirty(mdev);
+		drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
+			       "size changed", BM_LOCKED_MASK);
+		drbd_initialize_al(mdev, buffer);
+
+		md->flags = prev_flags;
+		drbd_md_write(mdev, buffer);
+
+		if (rs)
+			dev_info(DEV, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
+				 md->al_stripes, md->al_stripe_size_4k * 4);
 	}
 
 	if (size > la_size_sect)
-		rv = grew;
+		rv = DS_GREW;
 	if (size < la_size_sect)
-		rv = shrunk;
-out:
+		rv = DS_SHRUNK;
+
+	if (0) {
+	err_out:
+		if (rs) {
+			md->al_stripes = prev_al_stripes;
+			md->al_stripe_size_4k = prev_al_stripe_size_4k;
+			md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
+
+			drbd_md_set_sector_offsets(mdev, mdev->ldev);
+		}
+	}
 	lc_unlock(mdev->act_log);
 	wake_up(&mdev->al_wait);
+	drbd_md_put_buffer(mdev);
 	drbd_resume_io(mdev);
 
 	return rv;
@@ -1607,11 +1672,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determine_dev_size(mdev, 0);
-	if (dd == dev_size_error) {
+	dd = drbd_determine_dev_size(mdev, 0, NULL);
+	if (dd <= DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
-	} else if (dd == grew)
+	} else if (dd == DS_GREW)
 		set_bit(RESYNC_AFTER_NEG, &mdev->flags);
 
 	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) ||
@@ -2305,6 +2370,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
+	bool change_al_layout = false;
 	enum dds_flags ddsf;
 	sector_t u_size;
 	int err;
@@ -2315,31 +2381,33 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto fail;
 
+	mdev = adm_ctx.mdev;
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto fail;
+	}
+
 	memset(&rs, 0, sizeof(struct resize_parms));
+	rs.al_stripes = mdev->ldev->md.al_stripes;
+	rs.al_stripe_size = mdev->ldev->md.al_stripe_size_4k * 4;
 	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
 		err = resize_parms_from_attrs(&rs, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
-			goto fail;
+			goto fail_ldev;
 		}
 	}
 
-	mdev = adm_ctx.mdev;
 	if (mdev->state.conn > C_CONNECTED) {
 		retcode = ERR_RESIZE_RESYNC;
-		goto fail;
+		goto fail_ldev;
 	}
 
 	if (mdev->state.role == R_SECONDARY &&
 	    mdev->state.peer == R_SECONDARY) {
 		retcode = ERR_NO_PRIMARY;
-		goto fail;
-	}
-
-	if (!get_ldev(mdev)) {
-		retcode = ERR_NO_DISK;
-		goto fail;
+		goto fail_ldev;
 	}
 
 	if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
@@ -2358,6 +2426,28 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
+	if (mdev->ldev->md.al_stripes != rs.al_stripes ||
+	    mdev->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
+		u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
+
+		if (al_size_k > (16 * 1024 * 1024)) {
+			retcode = ERR_MD_LAYOUT_TOO_BIG;
+			goto fail_ldev;
+		}
+
+		if (al_size_k < MD_32kB_SECT/2) {
+			retcode = ERR_MD_LAYOUT_TOO_SMALL;
+			goto fail_ldev;
+		}
+
+		if (mdev->state.conn != C_CONNECTED) {
+			retcode = ERR_MD_LAYOUT_CONNECTED;
+			goto fail_ldev;
+		}
+
+		change_al_layout = true;
+	}
+
 	if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
 		mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
 
@@ -2373,16 +2463,22 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
-	dd = drbd_determine_dev_size(mdev, ddsf);
+	dd = drbd_determine_dev_size(mdev, ddsf, change_al_layout ? &rs : NULL);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
-	if (dd == dev_size_error) {
+	if (dd == DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto fail;
+	} else if (dd == DS_ERROR_SPACE_MD) {
+		retcode = ERR_MD_LAYOUT_NO_FIT;
+		goto fail;
+	} else if (dd == DS_ERROR_SHRINK) {
+		retcode = ERR_IMPLICIT_SHRINK;
+		goto fail;
 	}
 
 	if (mdev->state.conn == C_CONNECTED) {
-		if (dd == grew)
+		if (dd == DS_GREW)
 			set_bit(RESIZE_PENDING, &mdev->flags);
 
 		drbd_send_uuids(mdev);
@@ -2658,7 +2754,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 		const struct sib_info *sib)
 {
 	struct state_info *si = NULL; /* for sizeof(si->member); */
-	struct net_conf *nc;
 	struct nlattr *nla;
 	int got_ldev;
 	int err = 0;
@@ -2688,13 +2783,19 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 		goto nla_put_failure;
 
 	rcu_read_lock();
-	if (got_ldev)
-		if (disk_conf_to_skb(skb, rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive))
-			goto nla_put_failure;
+	if (got_ldev) {
+		struct disk_conf *disk_conf;
 
-	nc = rcu_dereference(mdev->tconn->net_conf);
-	if (nc)
-		err = net_conf_to_skb(skb, nc, exclude_sensitive);
+		disk_conf = rcu_dereference(mdev->ldev->disk_conf);
+		err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
+	}
+	if (!err) {
+		struct net_conf *nc;
+
+		nc = rcu_dereference(mdev->tconn->net_conf);
+		if (nc)
+			err = net_conf_to_skb(skb, nc, exclude_sensitive);
+	}
 	rcu_read_unlock();
 	if (err)
 		goto nla_put_failure;

+ 6 - 6
drivers/block/drbd/drbd_receiver.c

@@ -1039,6 +1039,8 @@ randomize:
 	rcu_read_lock();
 	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 		kref_get(&mdev->kref);
+		rcu_read_unlock();
+
 		/* Prevent a race between resync-handshake and
 		 * being promoted to Primary.
 		 *
@@ -1049,8 +1051,6 @@ randomize:
 		mutex_lock(mdev->state_mutex);
 		mutex_unlock(mdev->state_mutex);
 
-		rcu_read_unlock();
-
 		if (discard_my_data)
 			set_bit(DISCARD_MY_DATA, &mdev->flags);
 		else
@@ -3545,7 +3545,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 {
 	struct drbd_conf *mdev;
 	struct p_sizes *p = pi->data;
-	enum determine_dev_size dd = unchanged;
+	enum determine_dev_size dd = DS_UNCHANGED;
 	sector_t p_size, p_usize, my_usize;
 	int ldsc = 0; /* local disk size changed */
 	enum dds_flags ddsf;
@@ -3617,9 +3617,9 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(mdev)) {
-		dd = drbd_determine_dev_size(mdev, ddsf);
+		dd = drbd_determine_dev_size(mdev, ddsf, NULL);
 		put_ldev(mdev);
-		if (dd == dev_size_error)
+		if (dd == DS_ERROR)
 			return -EIO;
 		drbd_md_sync(mdev);
 	} else {
@@ -3647,7 +3647,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 			drbd_send_sizes(mdev, 0, ddsf);
 		}
 		if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
-		    (dd == grew && mdev->state.conn == C_CONNECTED)) {
+		    (dd == DS_GREW && mdev->state.conn == C_CONNECTED)) {
 			if (mdev->state.pdsk >= D_INCONSISTENT &&
 			    mdev->state.disk >= D_INCONSISTENT) {
 				if (ddsf & DDSF_NO_RESYNC)

+ 3 - 1
drivers/block/drbd/drbd_state.c

@@ -1115,8 +1115,10 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 		drbd_thread_restart_nowait(&mdev->tconn->receiver);
 
 	/* Resume AL writing if we get a connection */
-	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
 		drbd_resume_al(mdev);
+		mdev->tconn->connect_cnt++;
+	}
 
 	/* remember last attach time so request_timer_fn() won't
 	 * kill newly established sessions while we are still trying to thaw

+ 348 - 11
drivers/block/rsxx/core.c

@@ -31,6 +31,8 @@
 #include <linux/slab.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
 #include <linux/genhd.h>
 #include <linux/idr.h>
@@ -39,8 +41,9 @@
 #include "rsxx_cfg.h"
 
 #define NO_LEGACY 0
+#define SYNC_START_TIMEOUT (10 * 60) /* 10 minutes */
 
-MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver");
+MODULE_DESCRIPTION("IBM Flash Adapter 900GB Full Height Device Driver");
 MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRIVER_VERSION);
@@ -49,9 +52,282 @@ static unsigned int force_legacy = NO_LEGACY;
 module_param(force_legacy, uint, 0444);
 MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts");
 
+static unsigned int sync_start = 1;
+module_param(sync_start, uint, 0444);
+MODULE_PARM_DESC(sync_start, "On by Default: Driver load will not complete "
+			     "until the card startup has completed.");
+
 static DEFINE_IDA(rsxx_disk_ida);
 static DEFINE_SPINLOCK(rsxx_ida_lock);
 
+/* --------------------Debugfs Setup ------------------- */
+
+struct rsxx_cram {
+	u32 f_pos;
+	u32 offset;
+	void *i_private;
+};
+
+static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p)
+{
+	struct rsxx_cardinfo *card = m->private;
+
+	seq_printf(m, "HWID		0x%08x\n",
+					ioread32(card->regmap + HWID));
+	seq_printf(m, "SCRATCH		0x%08x\n",
+					ioread32(card->regmap + SCRATCH));
+	seq_printf(m, "IER		0x%08x\n",
+					ioread32(card->regmap + IER));
+	seq_printf(m, "IPR		0x%08x\n",
+					ioread32(card->regmap + IPR));
+	seq_printf(m, "CREG_CMD		0x%08x\n",
+					ioread32(card->regmap + CREG_CMD));
+	seq_printf(m, "CREG_ADD		0x%08x\n",
+					ioread32(card->regmap + CREG_ADD));
+	seq_printf(m, "CREG_CNT		0x%08x\n",
+					ioread32(card->regmap + CREG_CNT));
+	seq_printf(m, "CREG_STAT	0x%08x\n",
+					ioread32(card->regmap + CREG_STAT));
+	seq_printf(m, "CREG_DATA0	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA0));
+	seq_printf(m, "CREG_DATA1	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA1));
+	seq_printf(m, "CREG_DATA2	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA2));
+	seq_printf(m, "CREG_DATA3	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA3));
+	seq_printf(m, "CREG_DATA4	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA4));
+	seq_printf(m, "CREG_DATA5	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA5));
+	seq_printf(m, "CREG_DATA6	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA6));
+	seq_printf(m, "CREG_DATA7	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA7));
+	seq_printf(m, "INTR_COAL	0x%08x\n",
+					ioread32(card->regmap + INTR_COAL));
+	seq_printf(m, "HW_ERROR		0x%08x\n",
+					ioread32(card->regmap + HW_ERROR));
+	seq_printf(m, "DEBUG0		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG0));
+	seq_printf(m, "DEBUG1		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG1));
+	seq_printf(m, "DEBUG2		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG2));
+	seq_printf(m, "DEBUG3		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG3));
+	seq_printf(m, "DEBUG4		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG4));
+	seq_printf(m, "DEBUG5		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG5));
+	seq_printf(m, "DEBUG6		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG6));
+	seq_printf(m, "DEBUG7		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG7));
+	seq_printf(m, "RECONFIG		0x%08x\n",
+					ioread32(card->regmap + PCI_RECONFIG));
+
+	return 0;
+}
+
+static int rsxx_attr_stats_show(struct seq_file *m, void *p)
+{
+	struct rsxx_cardinfo *card = m->private;
+	int i;
+
+	for (i = 0; i < card->n_targets; i++) {
+		seq_printf(m, "Ctrl %d CRC Errors	= %d\n",
+				i, card->ctrl[i].stats.crc_errors);
+		seq_printf(m, "Ctrl %d Hard Errors	= %d\n",
+				i, card->ctrl[i].stats.hard_errors);
+		seq_printf(m, "Ctrl %d Soft Errors	= %d\n",
+				i, card->ctrl[i].stats.soft_errors);
+		seq_printf(m, "Ctrl %d Writes Issued	= %d\n",
+				i, card->ctrl[i].stats.writes_issued);
+		seq_printf(m, "Ctrl %d Writes Failed	= %d\n",
+				i, card->ctrl[i].stats.writes_failed);
+		seq_printf(m, "Ctrl %d Reads Issued	= %d\n",
+				i, card->ctrl[i].stats.reads_issued);
+		seq_printf(m, "Ctrl %d Reads Failed	= %d\n",
+				i, card->ctrl[i].stats.reads_failed);
+		seq_printf(m, "Ctrl %d Reads Retried	= %d\n",
+				i, card->ctrl[i].stats.reads_retried);
+		seq_printf(m, "Ctrl %d Discards Issued	= %d\n",
+				i, card->ctrl[i].stats.discards_issued);
+		seq_printf(m, "Ctrl %d Discards Failed	= %d\n",
+				i, card->ctrl[i].stats.discards_failed);
+		seq_printf(m, "Ctrl %d DMA SW Errors	= %d\n",
+				i, card->ctrl[i].stats.dma_sw_err);
+		seq_printf(m, "Ctrl %d DMA HW Faults	= %d\n",
+				i, card->ctrl[i].stats.dma_hw_fault);
+		seq_printf(m, "Ctrl %d DMAs Cancelled	= %d\n",
+				i, card->ctrl[i].stats.dma_cancelled);
+		seq_printf(m, "Ctrl %d SW Queue Depth	= %d\n",
+				i, card->ctrl[i].stats.sw_q_depth);
+		seq_printf(m, "Ctrl %d HW Queue Depth	= %d\n",
+			i, atomic_read(&card->ctrl[i].stats.hw_q_depth));
+	}
+
+	return 0;
+}
+
+static int rsxx_attr_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rsxx_attr_stats_show, inode->i_private);
+}
+
+static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rsxx_attr_pci_regs_show, inode->i_private);
+}
+
+static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf,
+			      size_t cnt, loff_t *ppos)
+{
+	struct rsxx_cram *info = fp->private_data;
+	struct rsxx_cardinfo *card = info->i_private;
+	char *buf;
+	int st;
+
+	buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	info->f_pos = (u32)*ppos + info->offset;
+
+	st = rsxx_creg_read(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
+	if (st)
+		return st;
+
+	st = copy_to_user(ubuf, buf, cnt);
+	if (st)
+		return st;
+
+	info->offset += cnt;
+
+	kfree(buf);
+
+	return cnt;
+}
+
+static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf,
+			       size_t cnt, loff_t *ppos)
+{
+	struct rsxx_cram *info = fp->private_data;
+	struct rsxx_cardinfo *card = info->i_private;
+	char *buf;
+	int st;
+
+	buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	st = copy_from_user(buf, ubuf, cnt);
+	if (st)
+		return st;
+
+	info->f_pos = (u32)*ppos + info->offset;
+
+	st = rsxx_creg_write(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
+	if (st)
+		return st;
+
+	info->offset += cnt;
+
+	kfree(buf);
+
+	return cnt;
+}
+
+static int rsxx_cram_open(struct inode *inode, struct file *file)
+{
+	struct rsxx_cram *info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->i_private = inode->i_private;
+	info->f_pos = file->f_pos;
+	file->private_data = info;
+
+	return 0;
+}
+
+static int rsxx_cram_release(struct inode *inode, struct file *file)
+{
+	struct rsxx_cram *info = file->private_data;
+
+	if (!info)
+		return 0;
+
+	kfree(info);
+	file->private_data = NULL;
+
+	return 0;
+}
+
+static const struct file_operations debugfs_cram_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rsxx_cram_open,
+	.read		= rsxx_cram_read,
+	.write		= rsxx_cram_write,
+	.release	= rsxx_cram_release,
+};
+
+static const struct file_operations debugfs_stats_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rsxx_attr_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations debugfs_pci_regs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rsxx_attr_pci_regs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card)
+{
+	struct dentry *debugfs_stats;
+	struct dentry *debugfs_pci_regs;
+	struct dentry *debugfs_cram;
+
+	card->debugfs_dir = debugfs_create_dir(card->gendisk->disk_name, NULL);
+	if (IS_ERR_OR_NULL(card->debugfs_dir))
+		goto failed_debugfs_dir;
+
+	debugfs_stats = debugfs_create_file("stats", S_IRUGO,
+					    card->debugfs_dir, card,
+					    &debugfs_stats_fops);
+	if (IS_ERR_OR_NULL(debugfs_stats))
+		goto failed_debugfs_stats;
+
+	debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO,
+					       card->debugfs_dir, card,
+					       &debugfs_pci_regs_fops);
+	if (IS_ERR_OR_NULL(debugfs_pci_regs))
+		goto failed_debugfs_pci_regs;
+
+	debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR,
+					   card->debugfs_dir, card,
+					   &debugfs_cram_fops);
+	if (IS_ERR_OR_NULL(debugfs_cram))
+		goto failed_debugfs_cram;
+
+	return;
+failed_debugfs_cram:
+	debugfs_remove(debugfs_pci_regs);
+failed_debugfs_pci_regs:
+	debugfs_remove(debugfs_stats);
+failed_debugfs_stats:
+	debugfs_remove(card->debugfs_dir);
+failed_debugfs_dir:
+	card->debugfs_dir = NULL;
+}
+
 /*----------------- Interrupt Control & Handling -------------------*/
 
 static void rsxx_mask_interrupts(struct rsxx_cardinfo *card)
@@ -163,12 +439,13 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)
 		}
 
 		if (isr & CR_INTR_CREG) {
-			schedule_work(&card->creg_ctrl.done_work);
+			queue_work(card->creg_ctrl.creg_wq,
+				   &card->creg_ctrl.done_work);
 			handled++;
 		}
 
 		if (isr & CR_INTR_EVENT) {
-			schedule_work(&card->event_work);
+			queue_work(card->event_wq, &card->event_work);
 			rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
 			handled++;
 		}
@@ -329,7 +606,7 @@ static int rsxx_eeh_frozen(struct pci_dev *dev)
 	int i;
 	int st;
 
-	dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n");
+	dev_warn(&dev->dev, "IBM Flash Adapter PCI: preparing for slot reset.\n");
 
 	card->eeh_state = 1;
 	rsxx_mask_interrupts(card);
@@ -367,15 +644,26 @@ static void rsxx_eeh_failure(struct pci_dev *dev)
 {
 	struct rsxx_cardinfo *card = pci_get_drvdata(dev);
 	int i;
+	int cnt = 0;
 
-	dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n");
+	dev_err(&dev->dev, "IBM Flash Adapter PCI: disabling failed card.\n");
 
 	card->eeh_state = 1;
+	card->halt = 1;
 
-	for (i = 0; i < card->n_targets; i++)
-		del_timer_sync(&card->ctrl[i].activity_timer);
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock_bh(&card->ctrl[i].queue_lock);
+		cnt = rsxx_cleanup_dma_queue(&card->ctrl[i],
+					     &card->ctrl[i].queue);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
+
+		cnt += rsxx_dma_cancel(&card->ctrl[i]);
 
-	rsxx_eeh_cancel_dmas(card);
+		if (cnt)
+			dev_info(CARD_TO_DEV(card),
+				"Freed %d queued DMAs on channel %d\n",
+				cnt, card->ctrl[i].id);
+	}
 }
 
 static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
@@ -432,7 +720,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
 	int st;
 
 	dev_warn(&dev->dev,
-		"IBM FlashSystem PCI: recovering from slot reset.\n");
+		"IBM Flash Adapter PCI: recovering from slot reset.\n");
 
 	st = pci_enable_device(dev);
 	if (st)
@@ -485,7 +773,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
 				&card->ctrl[i].issue_dma_work);
 	}
 
-	dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n");
+	dev_info(&dev->dev, "IBM Flash Adapter PCI: recovery complete.\n");
 
 	return PCI_ERS_RESULT_RECOVERED;
 
@@ -528,6 +816,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 {
 	struct rsxx_cardinfo *card;
 	int st;
+	unsigned int sync_timeout;
 
 	dev_info(&dev->dev, "PCI-Flash SSD discovered\n");
 
@@ -610,7 +899,11 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 	}
 
 	/************* Setup Processor Command Interface *************/
-	rsxx_creg_setup(card);
+	st = rsxx_creg_setup(card);
+	if (st) {
+		dev_err(CARD_TO_DEV(card), "Failed to setup creg interface.\n");
+		goto failed_creg_setup;
+	}
 
 	spin_lock_irq(&card->irq_lock);
 	rsxx_enable_ier_and_isr(card, CR_INTR_CREG);
@@ -650,6 +943,12 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 	}
 
 	/************* Setup Card Event Handler *************/
+	card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event");
+	if (!card->event_wq) {
+		dev_err(CARD_TO_DEV(card), "Failed card event setup.\n");
+		goto failed_event_handler;
+	}
+
 	INIT_WORK(&card->event_work, card_event_handler);
 
 	st = rsxx_setup_dev(card);
@@ -676,6 +975,33 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 		if (st)
 			dev_crit(CARD_TO_DEV(card),
 				"Failed issuing card startup\n");
+		if (sync_start) {
+			sync_timeout = SYNC_START_TIMEOUT;
+
+			dev_info(CARD_TO_DEV(card),
+				 "Waiting for card to startup\n");
+
+			do {
+				ssleep(1);
+				sync_timeout--;
+
+				rsxx_get_card_state(card, &card->state);
+			} while (sync_timeout &&
+				(card->state == CARD_STATE_STARTING));
+
+			if (card->state == CARD_STATE_STARTING) {
+				dev_warn(CARD_TO_DEV(card),
+					 "Card startup timed out\n");
+				card->size8 = 0;
+			} else {
+				dev_info(CARD_TO_DEV(card),
+					"card state: %s\n",
+					rsxx_card_state_to_str(card->state));
+				st = rsxx_get_card_size8(card, &card->size8);
+				if (st)
+					card->size8 = 0;
+			}
+		}
 	} else if (card->state == CARD_STATE_GOOD ||
 		   card->state == CARD_STATE_RD_ONLY_FAULT) {
 		st = rsxx_get_card_size8(card, &card->size8);
@@ -685,12 +1011,21 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 
 	rsxx_attach_dev(card);
 
+	/************* Setup Debugfs *************/
+	rsxx_debugfs_dev_new(card);
+
 	return 0;
 
 failed_create_dev:
+	destroy_workqueue(card->event_wq);
+	card->event_wq = NULL;
+failed_event_handler:
 	rsxx_dma_destroy(card);
 failed_dma_setup:
 failed_compatiblity_check:
+	destroy_workqueue(card->creg_ctrl.creg_wq);
+	card->creg_ctrl.creg_wq = NULL;
+failed_creg_setup:
 	spin_lock_irq(&card->irq_lock);
 	rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
 	spin_unlock_irq(&card->irq_lock);
@@ -756,6 +1091,8 @@ static void rsxx_pci_remove(struct pci_dev *dev)
 	/* Prevent work_structs from re-queuing themselves. */
 	card->halt = 1;
 
+	debugfs_remove_recursive(card->debugfs_dir);
+
 	free_irq(dev->irq, card);
 
 	if (!force_legacy)

+ 14 - 0
drivers/block/rsxx/cregs.c

@@ -431,6 +431,15 @@ static int __issue_creg_rw(struct rsxx_cardinfo *card,
 	*hw_stat = completion.creg_status;
 
 	if (completion.st) {
+		/*
+		* This read is needed to verify that there has not been any
+		* extreme errors that might have occurred, i.e. EEH. The
+		* function iowrite32 will not detect EEH errors, so it is
+		* necessary that we recover if such an error is the reason
+		* for the timeout. This is a dummy read.
+		*/
+		ioread32(card->regmap + SCRATCH);
+
 		dev_warn(CARD_TO_DEV(card),
 			"creg command failed(%d x%08x)\n",
 			completion.st, addr);
@@ -727,6 +736,11 @@ int rsxx_creg_setup(struct rsxx_cardinfo *card)
 {
 	card->creg_ctrl.active_cmd = NULL;
 
+	card->creg_ctrl.creg_wq =
+			create_singlethread_workqueue(DRIVER_NAME"_creg");
+	if (!card->creg_ctrl.creg_wq)
+		return -ENOMEM;
+
 	INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done);
 	mutex_init(&card->creg_ctrl.reset_lock);
 	INIT_LIST_HEAD(&card->creg_ctrl.queue);

+ 12 - 21
drivers/block/rsxx/dev.c

@@ -155,7 +155,8 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
 		atomic_set(&meta->error, 1);
 
 	if (atomic_dec_and_test(&meta->pending_dmas)) {
-		disk_stats_complete(card, meta->bio, meta->start_time);
+		if (!card->eeh_state && card->gendisk)
+			disk_stats_complete(card, meta->bio, meta->start_time);
 
 		bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0);
 		kmem_cache_free(bio_meta_pool, meta);
@@ -170,6 +171,12 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
 
 	might_sleep();
 
+	if (!card)
+		goto req_err;
+
+	if (bio->bi_sector + (bio->bi_size >> 9) > get_capacity(card->gendisk))
+		goto req_err;
+
 	if (unlikely(card->halt)) {
 		st = -EFAULT;
 		goto req_err;
@@ -196,7 +203,8 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
 	atomic_set(&bio_meta->pending_dmas, 0);
 	bio_meta->start_time = jiffies;
 
-	disk_stats_start(card, bio);
+	if (!unlikely(card->halt))
+		disk_stats_start(card, bio);
 
 	dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
 		 bio_data_dir(bio) ? 'W' : 'R', bio_meta,
@@ -225,24 +233,6 @@ static bool rsxx_discard_supported(struct rsxx_cardinfo *card)
 	return (pci_rev >= RSXX_DISCARD_SUPPORT);
 }
 
-static unsigned short rsxx_get_logical_block_size(
-					struct rsxx_cardinfo *card)
-{
-	u32 capabilities = 0;
-	int st;
-
-	st = rsxx_get_card_capabilities(card, &capabilities);
-	if (st)
-		dev_warn(CARD_TO_DEV(card),
-			"Failed reading card capabilities register\n");
-
-	/* Earlier firmware did not have support for 512 byte accesses */
-	if (capabilities & CARD_CAP_SUBPAGE_WRITES)
-		return 512;
-	else
-		return RSXX_HW_BLK_SIZE;
-}
-
 int rsxx_attach_dev(struct rsxx_cardinfo *card)
 {
 	mutex_lock(&card->dev_lock);
@@ -305,7 +295,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
 		return -ENOMEM;
 	}
 
-	blk_size = rsxx_get_logical_block_size(card);
+	blk_size = card->config.data.block_size;
 
 	blk_queue_make_request(card->queue, rsxx_make_request);
 	blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
@@ -347,6 +337,7 @@ void rsxx_destroy_dev(struct rsxx_cardinfo *card)
 	card->gendisk = NULL;
 
 	blk_cleanup_queue(card->queue);
+	card->queue->queuedata = NULL;
 	unregister_blkdev(card->major, DRIVER_NAME);
 }
 

+ 96 - 89
drivers/block/rsxx/dma.c

@@ -245,6 +245,22 @@ static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
 	kmem_cache_free(rsxx_dma_pool, dma);
 }
 
+int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
+			   struct list_head *q)
+{
+	struct rsxx_dma *dma;
+	struct rsxx_dma *tmp;
+	int cnt = 0;
+
+	list_for_each_entry_safe(dma, tmp, q, list) {
+		list_del(&dma->list);
+		rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
+		cnt++;
+	}
+
+	return cnt;
+}
+
 static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
 				 struct rsxx_dma *dma)
 {
@@ -252,9 +268,10 @@ static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
 	 * Requeued DMAs go to the front of the queue so they are issued
 	 * first.
 	 */
-	spin_lock(&ctrl->queue_lock);
+	spin_lock_bh(&ctrl->queue_lock);
+	ctrl->stats.sw_q_depth++;
 	list_add(&dma->list, &ctrl->queue);
-	spin_unlock(&ctrl->queue_lock);
+	spin_unlock_bh(&ctrl->queue_lock);
 }
 
 static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
@@ -329,6 +346,7 @@ static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
 static void dma_engine_stalled(unsigned long data)
 {
 	struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data;
+	int cnt;
 
 	if (atomic_read(&ctrl->stats.hw_q_depth) == 0 ||
 	    unlikely(ctrl->card->eeh_state))
@@ -349,18 +367,28 @@ static void dma_engine_stalled(unsigned long data)
 			"DMA channel %d has stalled, faulting interface.\n",
 			ctrl->id);
 		ctrl->card->dma_fault = 1;
+
+		/* Clean up the DMA queue */
+		spin_lock(&ctrl->queue_lock);
+		cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
+		spin_unlock(&ctrl->queue_lock);
+
+		cnt += rsxx_dma_cancel(ctrl);
+
+		if (cnt)
+			dev_info(CARD_TO_DEV(ctrl->card),
+				"Freed %d queued DMAs on channel %d\n",
+				cnt, ctrl->id);
 	}
 }
 
-static void rsxx_issue_dmas(struct work_struct *work)
+static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
 {
-	struct rsxx_dma_ctrl *ctrl;
 	struct rsxx_dma *dma;
 	int tag;
 	int cmds_pending = 0;
 	struct hw_cmd *hw_cmd_buf;
 
-	ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
 	hw_cmd_buf = ctrl->cmd.buf;
 
 	if (unlikely(ctrl->card->halt) ||
@@ -368,22 +396,22 @@ static void rsxx_issue_dmas(struct work_struct *work)
 		return;
 
 	while (1) {
-		spin_lock(&ctrl->queue_lock);
+		spin_lock_bh(&ctrl->queue_lock);
 		if (list_empty(&ctrl->queue)) {
-			spin_unlock(&ctrl->queue_lock);
+			spin_unlock_bh(&ctrl->queue_lock);
 			break;
 		}
-		spin_unlock(&ctrl->queue_lock);
+		spin_unlock_bh(&ctrl->queue_lock);
 
 		tag = pop_tracker(ctrl->trackers);
 		if (tag == -1)
 			break;
 
-		spin_lock(&ctrl->queue_lock);
+		spin_lock_bh(&ctrl->queue_lock);
 		dma = list_entry(ctrl->queue.next, struct rsxx_dma, list);
 		list_del(&dma->list);
 		ctrl->stats.sw_q_depth--;
-		spin_unlock(&ctrl->queue_lock);
+		spin_unlock_bh(&ctrl->queue_lock);
 
 		/*
 		 * This will catch any DMAs that slipped in right before the
@@ -440,9 +468,8 @@ static void rsxx_issue_dmas(struct work_struct *work)
 	}
 }
 
-static void rsxx_dma_done(struct work_struct *work)
+static void rsxx_dma_done(struct rsxx_dma_ctrl *ctrl)
 {
-	struct rsxx_dma_ctrl *ctrl;
 	struct rsxx_dma *dma;
 	unsigned long flags;
 	u16 count;
@@ -450,7 +477,6 @@ static void rsxx_dma_done(struct work_struct *work)
 	u8 tag;
 	struct hw_status *hw_st_buf;
 
-	ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
 	hw_st_buf = ctrl->status.buf;
 
 	if (unlikely(ctrl->card->halt) ||
@@ -520,33 +546,32 @@ static void rsxx_dma_done(struct work_struct *work)
 	rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id));
 	spin_unlock_irqrestore(&ctrl->card->irq_lock, flags);
 
-	spin_lock(&ctrl->queue_lock);
+	spin_lock_bh(&ctrl->queue_lock);
 	if (ctrl->stats.sw_q_depth)
 		queue_work(ctrl->issue_wq, &ctrl->issue_dma_work);
-	spin_unlock(&ctrl->queue_lock);
+	spin_unlock_bh(&ctrl->queue_lock);
 }
 
-static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card,
-				      struct list_head *q)
+static void rsxx_schedule_issue(struct work_struct *work)
 {
-	struct rsxx_dma *dma;
-	struct rsxx_dma *tmp;
-	int cnt = 0;
+	struct rsxx_dma_ctrl *ctrl;
 
-	list_for_each_entry_safe(dma, tmp, q, list) {
-		list_del(&dma->list);
+	ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
 
-		if (dma->dma_addr)
-			pci_unmap_page(card->dev, dma->dma_addr,
-				       get_dma_size(dma),
-				       (dma->cmd == HW_CMD_BLK_WRITE) ?
-				       PCI_DMA_TODEVICE :
-				       PCI_DMA_FROMDEVICE);
-		kmem_cache_free(rsxx_dma_pool, dma);
-		cnt++;
-	}
+	mutex_lock(&ctrl->work_lock);
+	rsxx_issue_dmas(ctrl);
+	mutex_unlock(&ctrl->work_lock);
+}
 
-	return cnt;
+static void rsxx_schedule_done(struct work_struct *work)
+{
+	struct rsxx_dma_ctrl *ctrl;
+
+	ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
+
+	mutex_lock(&ctrl->work_lock);
+	rsxx_dma_done(ctrl);
+	mutex_unlock(&ctrl->work_lock);
 }
 
 static int rsxx_queue_discard(struct rsxx_cardinfo *card,
@@ -698,10 +723,10 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 
 	for (i = 0; i < card->n_targets; i++) {
 		if (!list_empty(&dma_list[i])) {
-			spin_lock(&card->ctrl[i].queue_lock);
+			spin_lock_bh(&card->ctrl[i].queue_lock);
 			card->ctrl[i].stats.sw_q_depth += dma_cnt[i];
 			list_splice_tail(&dma_list[i], &card->ctrl[i].queue);
-			spin_unlock(&card->ctrl[i].queue_lock);
+			spin_unlock_bh(&card->ctrl[i].queue_lock);
 
 			queue_work(card->ctrl[i].issue_wq,
 				   &card->ctrl[i].issue_dma_work);
@@ -711,8 +736,11 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 	return 0;
 
 bvec_err:
-	for (i = 0; i < card->n_targets; i++)
-		rsxx_cleanup_dma_queue(card, &dma_list[i]);
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock_bh(&card->ctrl[i].queue_lock);
+		rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i]);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
+	}
 
 	return st;
 }
@@ -780,6 +808,7 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
 	spin_lock_init(&ctrl->trackers->lock);
 
 	spin_lock_init(&ctrl->queue_lock);
+	mutex_init(&ctrl->work_lock);
 	INIT_LIST_HEAD(&ctrl->queue);
 
 	setup_timer(&ctrl->activity_timer, dma_engine_stalled,
@@ -793,8 +822,8 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
 	if (!ctrl->done_wq)
 		return -ENOMEM;
 
-	INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas);
-	INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done);
+	INIT_WORK(&ctrl->issue_dma_work, rsxx_schedule_issue);
+	INIT_WORK(&ctrl->dma_done_work, rsxx_schedule_done);
 
 	st = rsxx_hw_buffers_init(dev, ctrl);
 	if (st)
@@ -918,13 +947,30 @@ failed_dma_setup:
 	return st;
 }
 
+int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl)
+{
+	struct rsxx_dma *dma;
+	int i;
+	int cnt = 0;
+
+	/* Clean up issued DMAs */
+	for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) {
+		dma = get_tracker_dma(ctrl->trackers, i);
+		if (dma) {
+			atomic_dec(&ctrl->stats.hw_q_depth);
+			rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
+			push_tracker(ctrl->trackers, i);
+			cnt++;
+		}
+	}
+
+	return cnt;
+}
 
 void rsxx_dma_destroy(struct rsxx_cardinfo *card)
 {
 	struct rsxx_dma_ctrl *ctrl;
-	struct rsxx_dma *dma;
-	int i, j;
-	int cnt = 0;
+	int i;
 
 	for (i = 0; i < card->n_targets; i++) {
 		ctrl = &card->ctrl[i];
@@ -943,33 +989,11 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card)
 			del_timer_sync(&ctrl->activity_timer);
 
 		/* Clean up the DMA queue */
-		spin_lock(&ctrl->queue_lock);
-		cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue);
-		spin_unlock(&ctrl->queue_lock);
-
-		if (cnt)
-			dev_info(CARD_TO_DEV(card),
-				"Freed %d queued DMAs on channel %d\n",
-				cnt, i);
-
-		/* Clean up issued DMAs */
-		for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) {
-			dma = get_tracker_dma(ctrl->trackers, j);
-			if (dma) {
-				pci_unmap_page(card->dev, dma->dma_addr,
-					       get_dma_size(dma),
-					       (dma->cmd == HW_CMD_BLK_WRITE) ?
-					       PCI_DMA_TODEVICE :
-					       PCI_DMA_FROMDEVICE);
-				kmem_cache_free(rsxx_dma_pool, dma);
-				cnt++;
-			}
-		}
+		spin_lock_bh(&ctrl->queue_lock);
+		rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
+		spin_unlock_bh(&ctrl->queue_lock);
 
-		if (cnt)
-			dev_info(CARD_TO_DEV(card),
-				"Freed %d pending DMAs on channel %d\n",
-				cnt, i);
+		rsxx_dma_cancel(ctrl);
 
 		vfree(ctrl->trackers);
 
@@ -1013,7 +1037,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 			cnt++;
 		}
 
-		spin_lock(&card->ctrl[i].queue_lock);
+		spin_lock_bh(&card->ctrl[i].queue_lock);
 		list_splice(&issued_dmas[i], &card->ctrl[i].queue);
 
 		atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth);
@@ -1028,7 +1052,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 					       PCI_DMA_TODEVICE :
 					       PCI_DMA_FROMDEVICE);
 		}
-		spin_unlock(&card->ctrl[i].queue_lock);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
 	}
 
 	kfree(issued_dmas);
@@ -1036,30 +1060,13 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 	return 0;
 }
 
-void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card)
-{
-	struct rsxx_dma *dma;
-	struct rsxx_dma *tmp;
-	int i;
-
-	for (i = 0; i < card->n_targets; i++) {
-		spin_lock(&card->ctrl[i].queue_lock);
-		list_for_each_entry_safe(dma, tmp, &card->ctrl[i].queue, list) {
-			list_del(&dma->list);
-
-			rsxx_complete_dma(&card->ctrl[i], dma, DMA_CANCELLED);
-		}
-		spin_unlock(&card->ctrl[i].queue_lock);
-	}
-}
-
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
 {
 	struct rsxx_dma *dma;
 	int i;
 
 	for (i = 0; i < card->n_targets; i++) {
-		spin_lock(&card->ctrl[i].queue_lock);
+		spin_lock_bh(&card->ctrl[i].queue_lock);
 		list_for_each_entry(dma, &card->ctrl[i].queue, list) {
 			dma->dma_addr = pci_map_page(card->dev, dma->page,
 					dma->pg_off, get_dma_size(dma),
@@ -1067,12 +1074,12 @@ int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
 					PCI_DMA_TODEVICE :
 					PCI_DMA_FROMDEVICE);
 			if (!dma->dma_addr) {
-				spin_unlock(&card->ctrl[i].queue_lock);
+				spin_unlock_bh(&card->ctrl[i].queue_lock);
 				kmem_cache_free(rsxx_dma_pool, dma);
 				return -ENOMEM;
 			}
 		}
-		spin_unlock(&card->ctrl[i].queue_lock);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
 	}
 
 	return 0;

+ 9 - 1
drivers/block/rsxx/rsxx_priv.h

@@ -39,6 +39,7 @@
 #include <linux/vmalloc.h>
 #include <linux/timer.h>
 #include <linux/ioctl.h>
+#include <linux/delay.h>
 
 #include "rsxx.h"
 #include "rsxx_cfg.h"
@@ -114,6 +115,7 @@ struct rsxx_dma_ctrl {
 	struct timer_list		activity_timer;
 	struct dma_tracker_list		*trackers;
 	struct rsxx_dma_stats		stats;
+	struct mutex			work_lock;
 };
 
 struct rsxx_cardinfo {
@@ -134,6 +136,7 @@ struct rsxx_cardinfo {
 		spinlock_t		lock;
 		bool			active;
 		struct creg_cmd		*active_cmd;
+		struct workqueue_struct	*creg_wq;
 		struct work_struct	done_work;
 		struct list_head	queue;
 		unsigned int		q_depth;
@@ -154,6 +157,7 @@ struct rsxx_cardinfo {
 		int buf_len;
 	} log;
 
+	struct workqueue_struct	*event_wq;
 	struct work_struct	event_work;
 	unsigned int		state;
 	u64			size8;
@@ -181,6 +185,8 @@ struct rsxx_cardinfo {
 
 	int			n_targets;
 	struct rsxx_dma_ctrl	*ctrl;
+
+	struct dentry		*debugfs_dir;
 };
 
 enum rsxx_pci_regmap {
@@ -283,6 +289,7 @@ enum rsxx_creg_addr {
 	CREG_ADD_CAPABILITIES		= 0x80001050,
 	CREG_ADD_LOG			= 0x80002000,
 	CREG_ADD_NUM_TARGETS		= 0x80003000,
+	CREG_ADD_CRAM			= 0xA0000000,
 	CREG_ADD_CONFIG			= 0xB0000000,
 };
 
@@ -372,6 +379,8 @@ typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card,
 int rsxx_dma_setup(struct rsxx_cardinfo *card);
 void rsxx_dma_destroy(struct rsxx_cardinfo *card);
 int rsxx_dma_init(void);
+int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, struct list_head *q);
+int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
 void rsxx_dma_cleanup(void);
 void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
 int rsxx_dma_configure(struct rsxx_cardinfo *card);
@@ -382,7 +391,6 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 			   void *cb_data);
 int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl);
 int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card);
-void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card);
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card);
 
 /***** cregs.c *****/

File diff suppressed because it is too large
+ 501 - 244
drivers/block/xen-blkback/blkback.c


+ 145 - 2
drivers/block/xen-blkback/common.h

@@ -50,6 +50,19 @@
 		 __func__, __LINE__, ##args)
 
 
+/*
+ * This is the maximum number of segments that would be allowed in indirect
+ * requests. This value will also be passed to the frontend.
+ */
+#define MAX_INDIRECT_SEGMENTS 256
+
+#define SEGS_PER_INDIRECT_FRAME \
+	(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+#define MAX_INDIRECT_PAGES \
+	((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+#define INDIRECT_PAGES(_segs) \
+	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+
 /* Not a real protocol.  Used to generate ring structs which contain
  * the elements common to all protocols only.  This way we get a
  * compiler-checkable way to use common struct elements, so we can
@@ -83,12 +96,31 @@ struct blkif_x86_32_request_other {
 	uint64_t       id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 
+struct blkif_x86_32_request_indirect {
+	uint8_t        indirect_op;
+	uint16_t       nr_segments;
+	uint64_t       id;
+	blkif_sector_t sector_number;
+	blkif_vdev_t   handle;
+	uint16_t       _pad1;
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+	/*
+	 * The maximum number of indirect segments (and pages) that will
+	 * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+	 * is also exported to the guest (via xenstore
+	 * feature-max-indirect-segments entry), so the frontend knows how
+	 * many indirect segments the backend supports.
+	 */
+	uint64_t       _pad2;        /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
 struct blkif_x86_32_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 		struct blkif_x86_32_request_rw rw;
 		struct blkif_x86_32_request_discard discard;
 		struct blkif_x86_32_request_other other;
+		struct blkif_x86_32_request_indirect indirect;
 	} u;
 } __attribute__((__packed__));
 
@@ -127,12 +159,32 @@ struct blkif_x86_64_request_other {
 	uint64_t       id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 
+struct blkif_x86_64_request_indirect {
+	uint8_t        indirect_op;
+	uint16_t       nr_segments;
+	uint32_t       _pad1;        /* offsetof(blkif_..,u.indirect.id)==8   */
+	uint64_t       id;
+	blkif_sector_t sector_number;
+	blkif_vdev_t   handle;
+	uint16_t       _pad2;
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+	/*
+	 * The maximum number of indirect segments (and pages) that will
+	 * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+	 * is also exported to the guest (via xenstore
+	 * feature-max-indirect-segments entry), so the frontend knows how
+	 * many indirect segments the backend supports.
+	 */
+	uint32_t       _pad3;        /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
 struct blkif_x86_64_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 		struct blkif_x86_64_request_rw rw;
 		struct blkif_x86_64_request_discard discard;
 		struct blkif_x86_64_request_other other;
+		struct blkif_x86_64_request_indirect indirect;
 	} u;
 } __attribute__((__packed__));
 
@@ -182,12 +234,26 @@ struct xen_vbd {
 
 struct backend_info;
 
+/* Number of available flags */
+#define PERSISTENT_GNT_FLAGS_SIZE	2
+/* This persistent grant is currently in use */
+#define PERSISTENT_GNT_ACTIVE		0
+/*
+ * This persistent grant has been used, this flag is set when we remove the
+ * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
+ */
+#define PERSISTENT_GNT_WAS_ACTIVE	1
+
+/* Number of requests that we can fit in a ring */
+#define XEN_BLKIF_REQS			32
 
 struct persistent_gnt {
 	struct page *page;
 	grant_ref_t gnt;
 	grant_handle_t handle;
+	DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
 	struct rb_node node;
+	struct list_head remove_node;
 };
 
 struct xen_blkif {
@@ -219,6 +285,23 @@ struct xen_blkif {
 	/* tree to store persistent grants */
 	struct rb_root		persistent_gnts;
 	unsigned int		persistent_gnt_c;
+	atomic_t		persistent_gnt_in_use;
+	unsigned long           next_lru;
+
+	/* used by the kworker that offload work from the persistent purge */
+	struct list_head	persistent_purge_list;
+	struct work_struct	persistent_purge_work;
+
+	/* buffer of free pages to map grant refs */
+	spinlock_t		free_pages_lock;
+	int			free_pages_num;
+	struct list_head	free_pages;
+
+	/* List of all 'pending_req' available */
+	struct list_head	pending_free;
+	/* And its spinlock. */
+	spinlock_t		pending_free_lock;
+	wait_queue_head_t	pending_free_wq;
 
 	/* statistics */
 	unsigned long		st_print;
@@ -231,6 +314,41 @@ struct xen_blkif {
 	unsigned long long			st_wr_sect;
 
 	wait_queue_head_t	waiting_to_free;
+	/* Thread shutdown wait queue. */
+	wait_queue_head_t	shutdown_wq;
+};
+
+struct seg_buf {
+	unsigned long offset;
+	unsigned int nsec;
+};
+
+struct grant_page {
+	struct page 		*page;
+	struct persistent_gnt	*persistent_gnt;
+	grant_handle_t		handle;
+	grant_ref_t		gref;
+};
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
+ * response queued for it, with the saved 'id' passed back.
+ */
+struct pending_req {
+	struct xen_blkif	*blkif;
+	u64			id;
+	int			nr_pages;
+	atomic_t		pendcnt;
+	unsigned short		operation;
+	int			status;
+	struct list_head	free_list;
+	struct grant_page	*segments[MAX_INDIRECT_SEGMENTS];
+	/* Indirect descriptors */
+	struct grant_page	*indirect_pages[MAX_INDIRECT_PAGES];
+	struct seg_buf		seg[MAX_INDIRECT_SEGMENTS];
+	struct bio		*biolist[MAX_INDIRECT_SEGMENTS];
 };
 
 
@@ -257,6 +375,7 @@ int xen_blkif_xenbus_init(void);
 
 irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 int xen_blkif_schedule(void *arg);
+int xen_blkif_purge_persistent(void *arg);
 
 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
 			      struct backend_info *be, int state);
@@ -268,7 +387,7 @@ struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
 static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 					struct blkif_x86_32_request *src)
 {
-	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 	dst->operation = src->operation;
 	switch (src->operation) {
 	case BLKIF_OP_READ:
@@ -291,6 +410,18 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 		dst->u.discard.sector_number = src->u.discard.sector_number;
 		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 		break;
+	case BLKIF_OP_INDIRECT:
+		dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+		dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+		dst->u.indirect.handle = src->u.indirect.handle;
+		dst->u.indirect.id = src->u.indirect.id;
+		dst->u.indirect.sector_number = src->u.indirect.sector_number;
+		barrier();
+		j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
+		for (i = 0; i < j; i++)
+			dst->u.indirect.indirect_grefs[i] =
+				src->u.indirect.indirect_grefs[i];
+		break;
 	default:
 		/*
 		 * Don't know how to translate this op. Only get the
@@ -304,7 +435,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 					struct blkif_x86_64_request *src)
 {
-	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 	dst->operation = src->operation;
 	switch (src->operation) {
 	case BLKIF_OP_READ:
@@ -327,6 +458,18 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 		dst->u.discard.sector_number = src->u.discard.sector_number;
 		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 		break;
+	case BLKIF_OP_INDIRECT:
+		dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+		dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+		dst->u.indirect.handle = src->u.indirect.handle;
+		dst->u.indirect.id = src->u.indirect.id;
+		dst->u.indirect.sector_number = src->u.indirect.sector_number;
+		barrier();
+		j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
+		for (i = 0; i < j; i++)
+			dst->u.indirect.indirect_grefs[i] =
+				src->u.indirect.indirect_grefs[i];
+		break;
 	default:
 		/*
 		 * Don't know how to translate this op. Only get the

+ 85 - 0
drivers/block/xen-blkback/xenbus.c

@@ -98,12 +98,17 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
 		err = PTR_ERR(blkif->xenblkd);
 		blkif->xenblkd = NULL;
 		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
+		return;
 	}
 }
 
 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 {
 	struct xen_blkif *blkif;
+	struct pending_req *req, *n;
+	int i, j;
+
+	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
 
 	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
 	if (!blkif)
@@ -118,8 +123,57 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	blkif->st_print = jiffies;
 	init_waitqueue_head(&blkif->waiting_to_free);
 	blkif->persistent_gnts.rb_node = NULL;
+	spin_lock_init(&blkif->free_pages_lock);
+	INIT_LIST_HEAD(&blkif->free_pages);
+	blkif->free_pages_num = 0;
+	atomic_set(&blkif->persistent_gnt_in_use, 0);
+
+	INIT_LIST_HEAD(&blkif->pending_free);
+
+	for (i = 0; i < XEN_BLKIF_REQS; i++) {
+		req = kzalloc(sizeof(*req), GFP_KERNEL);
+		if (!req)
+			goto fail;
+		list_add_tail(&req->free_list,
+		              &blkif->pending_free);
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+			req->segments[j] = kzalloc(sizeof(*req->segments[0]),
+			                           GFP_KERNEL);
+			if (!req->segments[j])
+				goto fail;
+		}
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
+			                                 GFP_KERNEL);
+			if (!req->indirect_pages[j])
+				goto fail;
+		}
+	}
+	spin_lock_init(&blkif->pending_free_lock);
+	init_waitqueue_head(&blkif->pending_free_wq);
+	init_waitqueue_head(&blkif->shutdown_wq);
 
 	return blkif;
+
+fail:
+	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
+		list_del(&req->free_list);
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+			if (!req->segments[j])
+				break;
+			kfree(req->segments[j]);
+		}
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+			if (!req->indirect_pages[j])
+				break;
+			kfree(req->indirect_pages[j]);
+		}
+		kfree(req);
+	}
+
+	kmem_cache_free(xen_blkif_cachep, blkif);
+
+	return ERR_PTR(-ENOMEM);
 }
 
 static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
@@ -178,6 +232,7 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
 {
 	if (blkif->xenblkd) {
 		kthread_stop(blkif->xenblkd);
+		wake_up(&blkif->shutdown_wq);
 		blkif->xenblkd = NULL;
 	}
 
@@ -198,8 +253,28 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
 
 static void xen_blkif_free(struct xen_blkif *blkif)
 {
+	struct pending_req *req, *n;
+	int i = 0, j;
+
 	if (!atomic_dec_and_test(&blkif->refcnt))
 		BUG();
+
+	/* Check that there is no request in use */
+	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
+		list_del(&req->free_list);
+
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
+			kfree(req->segments[j]);
+
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++)
+			kfree(req->indirect_pages[j]);
+
+		kfree(req);
+		i++;
+	}
+
+	WARN_ON(i != XEN_BLKIF_REQS);
+
 	kmem_cache_free(xen_blkif_cachep, blkif);
 }
 
@@ -678,6 +753,11 @@ again:
 				 dev->nodename);
 		goto abort;
 	}
+	err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u",
+			    MAX_INDIRECT_SEGMENTS);
+	if (err)
+		dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
+			 dev->nodename, err);
 
 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
 			    (unsigned long long)vbd_sz(&be->blkif->vbd));
@@ -704,6 +784,11 @@ again:
 				 dev->nodename);
 		goto abort;
 	}
+	err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
+			    bdev_physical_block_size(be->blkif->vbd.bdev));
+	if (err)
+		xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
+				 dev->nodename);
 
 	err = xenbus_transaction_end(xbt, 0);
 	if (err == -EAGAIN)

+ 432 - 100
drivers/block/xen-blkfront.c

@@ -74,12 +74,30 @@ struct grant {
 struct blk_shadow {
 	struct blkif_request req;
 	struct request *request;
-	struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct grant **grants_used;
+	struct grant **indirect_grants;
+	struct scatterlist *sg;
+};
+
+struct split_bio {
+	struct bio *bio;
+	atomic_t pending;
+	int err;
 };
 
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;
 
+/*
+ * Maximum number of segments in indirect requests, the actual value used by
+ * the frontend driver is the minimum of this value and the value provided
+ * by the backend driver.
+ */
+
+static unsigned int xen_blkif_max_segments = 32;
+module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
+MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
+
 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
 
 /*
@@ -98,7 +116,6 @@ struct blkfront_info
 	enum blkif_state connected;
 	int ring_ref;
 	struct blkif_front_ring ring;
-	struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	unsigned int evtchn, irq;
 	struct request_queue *rq;
 	struct work_struct work;
@@ -114,6 +131,7 @@ struct blkfront_info
 	unsigned int discard_granularity;
 	unsigned int discard_alignment;
 	unsigned int feature_persistent:1;
+	unsigned int max_indirect_segments;
 	int is_ready;
 };
 
@@ -142,6 +160,13 @@ static DEFINE_SPINLOCK(minor_lock);
 
 #define DEV_NAME	"xvd"	/* name in /dev */
 
+#define SEGS_PER_INDIRECT_FRAME \
+	(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+#define INDIRECT_GREFS(_segs) \
+	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+
+static int blkfront_setup_indirect(struct blkfront_info *info);
+
 static int get_id_from_freelist(struct blkfront_info *info)
 {
 	unsigned long free = info->shadow_free;
@@ -358,7 +383,8 @@ static int blkif_queue_request(struct request *req)
 	struct blkif_request *ring_req;
 	unsigned long id;
 	unsigned int fsect, lsect;
-	int i, ref;
+	int i, ref, n;
+	struct blkif_request_segment_aligned *segments = NULL;
 
 	/*
 	 * Used to store if we are able to queue the request by just using
@@ -369,21 +395,27 @@ static int blkif_queue_request(struct request *req)
 	grant_ref_t gref_head;
 	struct grant *gnt_list_entry = NULL;
 	struct scatterlist *sg;
+	int nseg, max_grefs;
 
 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
 		return 1;
 
-	/* Check if we have enought grants to allocate a requests */
-	if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+	max_grefs = info->max_indirect_segments ?
+		    info->max_indirect_segments +
+		    INDIRECT_GREFS(info->max_indirect_segments) :
+		    BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+	/* Check if we have enough grants to allocate a requests */
+	if (info->persistent_gnts_c < max_grefs) {
 		new_persistent_gnts = 1;
 		if (gnttab_alloc_grant_references(
-		    BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
+		    max_grefs - info->persistent_gnts_c,
 		    &gref_head) < 0) {
 			gnttab_request_free_callback(
 				&info->callback,
 				blkif_restart_queue_callback,
 				info,
-				BLKIF_MAX_SEGMENTS_PER_REQUEST);
+				max_grefs);
 			return 1;
 		}
 	} else
@@ -394,42 +426,67 @@ static int blkif_queue_request(struct request *req)
 	id = get_id_from_freelist(info);
 	info->shadow[id].request = req;
 
-	ring_req->u.rw.id = id;
-	ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
-	ring_req->u.rw.handle = info->handle;
-
-	ring_req->operation = rq_data_dir(req) ?
-		BLKIF_OP_WRITE : BLKIF_OP_READ;
-
-	if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
-		/*
-		 * Ideally we can do an unordered flush-to-disk. In case the
-		 * backend onlysupports barriers, use that. A barrier request
-		 * a superset of FUA, so we can implement it the same
-		 * way.  (It's also a FLUSH+FUA, since it is
-		 * guaranteed ordered WRT previous writes.)
-		 */
-		ring_req->operation = info->flush_op;
-	}
-
 	if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
-		/* id, sector_number and handle are set above. */
 		ring_req->operation = BLKIF_OP_DISCARD;
 		ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
+		ring_req->u.discard.id = id;
+		ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req);
 		if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
 			ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
 		else
 			ring_req->u.discard.flag = 0;
 	} else {
-		ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
-							   info->sg);
-		BUG_ON(ring_req->u.rw.nr_segments >
-		       BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
-		for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
+		BUG_ON(info->max_indirect_segments == 0 &&
+		       req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+		BUG_ON(info->max_indirect_segments &&
+		       req->nr_phys_segments > info->max_indirect_segments);
+		nseg = blk_rq_map_sg(req->q, req, info->shadow[id].sg);
+		ring_req->u.rw.id = id;
+		if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+			/*
+			 * The indirect operation can only be a BLKIF_OP_READ or
+			 * BLKIF_OP_WRITE
+			 */
+			BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA));
+			ring_req->operation = BLKIF_OP_INDIRECT;
+			ring_req->u.indirect.indirect_op = rq_data_dir(req) ?
+				BLKIF_OP_WRITE : BLKIF_OP_READ;
+			ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req);
+			ring_req->u.indirect.handle = info->handle;
+			ring_req->u.indirect.nr_segments = nseg;
+		} else {
+			ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
+			ring_req->u.rw.handle = info->handle;
+			ring_req->operation = rq_data_dir(req) ?
+				BLKIF_OP_WRITE : BLKIF_OP_READ;
+			if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+				/*
+				 * Ideally we can do an unordered flush-to-disk. In case the
+				 * backend onlysupports barriers, use that. A barrier request
+				 * a superset of FUA, so we can implement it the same
+				 * way.  (It's also a FLUSH+FUA, since it is
+				 * guaranteed ordered WRT previous writes.)
+				 */
+				ring_req->operation = info->flush_op;
+			}
+			ring_req->u.rw.nr_segments = nseg;
+		}
+		for_each_sg(info->shadow[id].sg, sg, nseg, i) {
 			fsect = sg->offset >> 9;
 			lsect = fsect + (sg->length >> 9) - 1;
 
+			if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
+			    (i % SEGS_PER_INDIRECT_FRAME == 0)) {
+				if (segments)
+					kunmap_atomic(segments);
+
+				n = i / SEGS_PER_INDIRECT_FRAME;
+				gnt_list_entry = get_grant(&gref_head, info);
+				info->shadow[id].indirect_grants[n] = gnt_list_entry;
+				segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
+				ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
+			}
+
 			gnt_list_entry = get_grant(&gref_head, info);
 			ref = gnt_list_entry->gref;
 
@@ -441,8 +498,7 @@ static int blkif_queue_request(struct request *req)
 
 				BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 
-				shared_data = kmap_atomic(
-					pfn_to_page(gnt_list_entry->pfn));
+				shared_data = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
 				bvec_data = kmap_atomic(sg_page(sg));
 
 				/*
@@ -461,13 +517,23 @@ static int blkif_queue_request(struct request *req)
 				kunmap_atomic(bvec_data);
 				kunmap_atomic(shared_data);
 			}
-
-			ring_req->u.rw.seg[i] =
-					(struct blkif_request_segment) {
-						.gref       = ref,
-						.first_sect = fsect,
-						.last_sect  = lsect };
+			if (ring_req->operation != BLKIF_OP_INDIRECT) {
+				ring_req->u.rw.seg[i] =
+						(struct blkif_request_segment) {
+							.gref       = ref,
+							.first_sect = fsect,
+							.last_sect  = lsect };
+			} else {
+				n = i % SEGS_PER_INDIRECT_FRAME;
+				segments[n] =
+					(struct blkif_request_segment_aligned) {
+							.gref       = ref,
+							.first_sect = fsect,
+							.last_sect  = lsect };
+			}
 		}
+		if (segments)
+			kunmap_atomic(segments);
 	}
 
 	info->ring.req_prod_pvt++;
@@ -542,7 +608,9 @@ wait:
 		flush_requests(info);
 }
 
-static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
+				unsigned int physical_sector_size,
+				unsigned int segments)
 {
 	struct request_queue *rq;
 	struct blkfront_info *info = gd->private_data;
@@ -564,14 +632,15 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
 	blk_queue_logical_block_size(rq, sector_size);
-	blk_queue_max_hw_sectors(rq, 512);
+	blk_queue_physical_block_size(rq, physical_sector_size);
+	blk_queue_max_hw_sectors(rq, (segments * PAGE_SIZE) / 512);
 
 	/* Each segment in a request is up to an aligned page in size. */
 	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
 	blk_queue_max_segment_size(rq, PAGE_SIZE);
 
 	/* Ensure a merged request will fit in a single I/O ring slot. */
-	blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+	blk_queue_max_segments(rq, segments);
 
 	/* Make sure buffer addresses are sector-aligned. */
 	blk_queue_dma_alignment(rq, 511);
@@ -588,13 +657,16 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 static void xlvbd_flush(struct blkfront_info *info)
 {
 	blk_queue_flush(info->rq, info->feature_flush);
-	printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
+	printk(KERN_INFO "blkfront: %s: %s: %s %s %s %s %s\n",
 	       info->gd->disk_name,
 	       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
 		"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
 		"flush diskcache" : "barrier or flush"),
-	       info->feature_flush ? "enabled" : "disabled",
-	       info->feature_persistent ? "using persistent grants" : "");
+	       info->feature_flush ? "enabled;" : "disabled;",
+	       "persistent grants:",
+	       info->feature_persistent ? "enabled;" : "disabled;",
+	       "indirect descriptors:",
+	       info->max_indirect_segments ? "enabled;" : "disabled;");
 }
 
 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -667,7 +739,8 @@ static char *encode_disk_name(char *ptr, unsigned int n)
 
 static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 			       struct blkfront_info *info,
-			       u16 vdisk_info, u16 sector_size)
+			       u16 vdisk_info, u16 sector_size,
+			       unsigned int physical_sector_size)
 {
 	struct gendisk *gd;
 	int nr_minors = 1;
@@ -734,7 +807,9 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 	gd->driverfs_dev = &(info->xbdev->dev);
 	set_capacity(gd, capacity);
 
-	if (xlvbd_init_blk_queue(gd, sector_size)) {
+	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
+				 info->max_indirect_segments ? :
+				 BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
 		del_gendisk(gd);
 		goto release;
 	}
@@ -818,6 +893,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 {
 	struct grant *persistent_gnt;
 	struct grant *n;
+	int i, j, segs;
 
 	/* Prevent new requests being issued until we fix things up. */
 	spin_lock_irq(&info->io_lock);
@@ -843,6 +919,47 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	}
 	BUG_ON(info->persistent_gnts_c != 0);
 
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		/*
+		 * Clear persistent grants present in requests already
+		 * on the shared ring
+		 */
+		if (!info->shadow[i].request)
+			goto free_shadow;
+
+		segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
+		       info->shadow[i].req.u.indirect.nr_segments :
+		       info->shadow[i].req.u.rw.nr_segments;
+		for (j = 0; j < segs; j++) {
+			persistent_gnt = info->shadow[i].grants_used[j];
+			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+			__free_page(pfn_to_page(persistent_gnt->pfn));
+			kfree(persistent_gnt);
+		}
+
+		if (info->shadow[i].req.operation != BLKIF_OP_INDIRECT)
+			/*
+			 * If this is not an indirect operation don't try to
+			 * free indirect segments
+			 */
+			goto free_shadow;
+
+		for (j = 0; j < INDIRECT_GREFS(segs); j++) {
+			persistent_gnt = info->shadow[i].indirect_grants[j];
+			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+			__free_page(pfn_to_page(persistent_gnt->pfn));
+			kfree(persistent_gnt);
+		}
+
+free_shadow:
+		kfree(info->shadow[i].grants_used);
+		info->shadow[i].grants_used = NULL;
+		kfree(info->shadow[i].indirect_grants);
+		info->shadow[i].indirect_grants = NULL;
+		kfree(info->shadow[i].sg);
+		info->shadow[i].sg = NULL;
+	}
+
 	/* No more gnttab callback work. */
 	gnttab_cancel_free_callback(&info->callback);
 	spin_unlock_irq(&info->io_lock);
@@ -867,12 +984,13 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
 			     struct blkif_response *bret)
 {
 	int i = 0;
-	struct bio_vec *bvec;
-	struct req_iterator iter;
-	unsigned long flags;
+	struct scatterlist *sg;
 	char *bvec_data;
 	void *shared_data;
-	unsigned int offset = 0;
+	int nseg;
+
+	nseg = s->req.operation == BLKIF_OP_INDIRECT ?
+		s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
 
 	if (bret->operation == BLKIF_OP_READ) {
 		/*
@@ -881,26 +999,29 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
 		 * than PAGE_SIZE, we have to keep track of the current offset,
 		 * to be sure we are copying the data from the right shared page.
 		 */
-		rq_for_each_segment(bvec, s->request, iter) {
-			BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
-			if (bvec->bv_offset < offset)
-				i++;
-			BUG_ON(i >= s->req.u.rw.nr_segments);
+		for_each_sg(s->sg, sg, nseg, i) {
+			BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 			shared_data = kmap_atomic(
 				pfn_to_page(s->grants_used[i]->pfn));
-			bvec_data = bvec_kmap_irq(bvec, &flags);
-			memcpy(bvec_data, shared_data + bvec->bv_offset,
-				bvec->bv_len);
-			bvec_kunmap_irq(bvec_data, &flags);
+			bvec_data = kmap_atomic(sg_page(sg));
+			memcpy(bvec_data   + sg->offset,
+			       shared_data + sg->offset,
+			       sg->length);
+			kunmap_atomic(bvec_data);
 			kunmap_atomic(shared_data);
-			offset = bvec->bv_offset + bvec->bv_len;
 		}
 	}
 	/* Add the persistent grant into the list of free grants */
-	for (i = 0; i < s->req.u.rw.nr_segments; i++) {
+	for (i = 0; i < nseg; i++) {
 		list_add(&s->grants_used[i]->node, &info->persistent_gnts);
 		info->persistent_gnts_c++;
 	}
+	if (s->req.operation == BLKIF_OP_INDIRECT) {
+		for (i = 0; i < INDIRECT_GREFS(nseg); i++) {
+			list_add(&s->indirect_grants[i]->node, &info->persistent_gnts);
+			info->persistent_gnts_c++;
+		}
+	}
 }
 
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -1034,14 +1155,6 @@ static int setup_blkring(struct xenbus_device *dev,
 	SHARED_RING_INIT(sring);
 	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
 
-	sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
-	/* Allocate memory for grants */
-	err = fill_grant_buffer(info, BLK_RING_SIZE *
-	                              BLKIF_MAX_SEGMENTS_PER_REQUEST);
-	if (err)
-		goto fail;
-
 	err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
 	if (err < 0) {
 		free_page((unsigned long)sring);
@@ -1223,13 +1336,84 @@ static int blkfront_probe(struct xenbus_device *dev,
 	return 0;
 }
 
+/*
+ * This is a clone of md_trim_bio, used to split a bio into smaller ones
+ */
+static void trim_bio(struct bio *bio, int offset, int size)
+{
+	/* 'bio' is a cloned bio which we need to trim to match
+	 * the given offset and size.
+	 * This requires adjusting bi_sector, bi_size, and bi_io_vec
+	 */
+	int i;
+	struct bio_vec *bvec;
+	int sofar = 0;
+
+	size <<= 9;
+	if (offset == 0 && size == bio->bi_size)
+		return;
+
+	bio->bi_sector += offset;
+	bio->bi_size = size;
+	offset <<= 9;
+	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
+	while (bio->bi_idx < bio->bi_vcnt &&
+	       bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
+		/* remove this whole bio_vec */
+		offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
+		bio->bi_idx++;
+	}
+	if (bio->bi_idx < bio->bi_vcnt) {
+		bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
+		bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
+	}
+	/* avoid any complications with bi_idx being non-zero*/
+	if (bio->bi_idx) {
+		memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
+			(bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
+		bio->bi_vcnt -= bio->bi_idx;
+		bio->bi_idx = 0;
+	}
+	/* Make sure vcnt and last bv are not too big */
+	bio_for_each_segment(bvec, bio, i) {
+		if (sofar + bvec->bv_len > size)
+			bvec->bv_len = size - sofar;
+		if (bvec->bv_len == 0) {
+			bio->bi_vcnt = i;
+			break;
+		}
+		sofar += bvec->bv_len;
+	}
+}
+
+static void split_bio_end(struct bio *bio, int error)
+{
+	struct split_bio *split_bio = bio->bi_private;
+
+	if (error)
+		split_bio->err = error;
+
+	if (atomic_dec_and_test(&split_bio->pending)) {
+		split_bio->bio->bi_phys_segments = 0;
+		bio_endio(split_bio->bio, split_bio->err);
+		kfree(split_bio);
+	}
+	bio_put(bio);
+}
 
 static int blkif_recover(struct blkfront_info *info)
 {
 	int i;
-	struct blkif_request *req;
+	struct request *req, *n;
 	struct blk_shadow *copy;
-	int j;
+	int rc;
+	struct bio *bio, *cloned_bio;
+	struct bio_list bio_list, merge_bio;
+	unsigned int segs, offset;
+	int pending, size;
+	struct split_bio *split_bio;
+	struct list_head requests;
 
 	/* Stage 1: Make a safe copy of the shadow state. */
 	copy = kmemdup(info->shadow, sizeof(info->shadow),
@@ -1244,36 +1428,64 @@ static int blkif_recover(struct blkfront_info *info)
 	info->shadow_free = info->ring.req_prod_pvt;
 	info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
 
-	/* Stage 3: Find pending requests and requeue them. */
+	rc = blkfront_setup_indirect(info);
+	if (rc) {
+		kfree(copy);
+		return rc;
+	}
+
+	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	blk_queue_max_segments(info->rq, segs);
+	bio_list_init(&bio_list);
+	INIT_LIST_HEAD(&requests);
 	for (i = 0; i < BLK_RING_SIZE; i++) {
 		/* Not in use? */
 		if (!copy[i].request)
 			continue;
 
-		/* Grab a request slot and copy shadow state into it. */
-		req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
-		*req = copy[i].req;
-
-		/* We get a new request id, and must reset the shadow state. */
-		req->u.rw.id = get_id_from_freelist(info);
-		memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
-
-		if (req->operation != BLKIF_OP_DISCARD) {
-		/* Rewrite any grant references invalidated by susp/resume. */
-			for (j = 0; j < req->u.rw.nr_segments; j++)
-				gnttab_grant_foreign_access_ref(
-					req->u.rw.seg[j].gref,
-					info->xbdev->otherend_id,
-					pfn_to_mfn(copy[i].grants_used[j]->pfn),
-					0);
+		/*
+		 * Get the bios in the request so we can re-queue them.
+		 */
+		if (copy[i].request->cmd_flags &
+		    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+			/*
+			 * Flush operations don't contain bios, so
+			 * we need to requeue the whole request
+			 */
+			list_add(&copy[i].request->queuelist, &requests);
+			continue;
 		}
-		info->shadow[req->u.rw.id].req = *req;
-
-		info->ring.req_prod_pvt++;
+		merge_bio.head = copy[i].request->bio;
+		merge_bio.tail = copy[i].request->biotail;
+		bio_list_merge(&bio_list, &merge_bio);
+		copy[i].request->bio = NULL;
+		blk_put_request(copy[i].request);
 	}
 
 	kfree(copy);
 
+	/*
+	 * Empty the queue, this is important because we might have
+	 * requests in the queue with more segments than what we
+	 * can handle now.
+	 */
+	spin_lock_irq(&info->io_lock);
+	while ((req = blk_fetch_request(info->rq)) != NULL) {
+		if (req->cmd_flags &
+		    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+			list_add(&req->queuelist, &requests);
+			continue;
+		}
+		merge_bio.head = req->bio;
+		merge_bio.tail = req->biotail;
+		bio_list_merge(&bio_list, &merge_bio);
+		req->bio = NULL;
+		if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
+			pr_alert("diskcache flush request found!\n");
+		__blk_put_request(info->rq, req);
+	}
+	spin_unlock_irq(&info->io_lock);
+
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
 	spin_lock_irq(&info->io_lock);
@@ -1281,14 +1493,50 @@ static int blkif_recover(struct blkfront_info *info)
 	/* Now safe for us to use the shared ring */
 	info->connected = BLKIF_STATE_CONNECTED;
 
-	/* Send off requeued requests */
-	flush_requests(info);
-
 	/* Kick any other new requests queued since we resumed */
 	kick_pending_request_queues(info);
 
+	list_for_each_entry_safe(req, n, &requests, queuelist) {
+		/* Requeue pending requests (flush or discard) */
+		list_del_init(&req->queuelist);
+		BUG_ON(req->nr_phys_segments > segs);
+		blk_requeue_request(info->rq, req);
+	}
 	spin_unlock_irq(&info->io_lock);
 
+	while ((bio = bio_list_pop(&bio_list)) != NULL) {
+		/* Traverse the list of pending bios and re-queue them */
+		if (bio_segments(bio) > segs) {
+			/*
+			 * This bio has more segments than what we can
+			 * handle, we have to split it.
+			 */
+			pending = (bio_segments(bio) + segs - 1) / segs;
+			split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
+			BUG_ON(split_bio == NULL);
+			atomic_set(&split_bio->pending, pending);
+			split_bio->bio = bio;
+			for (i = 0; i < pending; i++) {
+				offset = (i * segs * PAGE_SIZE) >> 9;
+				size = min((unsigned int)(segs * PAGE_SIZE) >> 9,
+					   (unsigned int)(bio->bi_size >> 9) - offset);
+				cloned_bio = bio_clone(bio, GFP_NOIO);
+				BUG_ON(cloned_bio == NULL);
+				trim_bio(cloned_bio, offset, size);
+				cloned_bio->bi_private = split_bio;
+				cloned_bio->bi_end_io = split_bio_end;
+				submit_bio(cloned_bio->bi_rw, cloned_bio);
+			}
+			/*
+			 * Now we have to wait for all those smaller bios to
+			 * end, so we can also end the "parent" bio.
+			 */
+			continue;
+		}
+		/* We don't need to split this bio */
+		submit_bio(bio->bi_rw, bio);
+	}
+
 	return 0;
 }
 
@@ -1308,8 +1556,12 @@ static int blkfront_resume(struct xenbus_device *dev)
 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
 	err = talk_to_blkback(dev, info);
-	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
-		err = blkif_recover(info);
+
+	/*
+	 * We have to wait for the backend to switch to
+	 * connected state, since we want to read which
+	 * features it supports.
+	 */
 
 	return err;
 }
@@ -1387,6 +1639,60 @@ static void blkfront_setup_discard(struct blkfront_info *info)
 	kfree(type);
 }
 
+static int blkfront_setup_indirect(struct blkfront_info *info)
+{
+	unsigned int indirect_segments, segs;
+	int err, i;
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-max-indirect-segments", "%u", &indirect_segments,
+			    NULL);
+	if (err) {
+		info->max_indirect_segments = 0;
+		segs = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	} else {
+		info->max_indirect_segments = min(indirect_segments,
+						  xen_blkif_max_segments);
+		segs = info->max_indirect_segments;
+	}
+
+	err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE);
+	if (err)
+		goto out_of_memory;
+
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		info->shadow[i].grants_used = kzalloc(
+			sizeof(info->shadow[i].grants_used[0]) * segs,
+			GFP_NOIO);
+		info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * segs, GFP_NOIO);
+		if (info->max_indirect_segments)
+			info->shadow[i].indirect_grants = kzalloc(
+				sizeof(info->shadow[i].indirect_grants[0]) *
+				INDIRECT_GREFS(segs),
+				GFP_NOIO);
+		if ((info->shadow[i].grants_used == NULL) ||
+			(info->shadow[i].sg == NULL) ||
+		     (info->max_indirect_segments &&
+		     (info->shadow[i].indirect_grants == NULL)))
+			goto out_of_memory;
+		sg_init_table(info->shadow[i].sg, segs);
+	}
+
+
+	return 0;
+
+out_of_memory:
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		kfree(info->shadow[i].grants_used);
+		info->shadow[i].grants_used = NULL;
+		kfree(info->shadow[i].sg);
+		info->shadow[i].sg = NULL;
+		kfree(info->shadow[i].indirect_grants);
+		info->shadow[i].indirect_grants = NULL;
+	}
+	return -ENOMEM;
+}
+
 /*
  * Invoked when the backend is finally 'ready' (and has told produced
  * the details about the physical device - #sectors, size, etc).
@@ -1395,6 +1701,7 @@ static void blkfront_connect(struct blkfront_info *info)
 {
 	unsigned long long sectors;
 	unsigned long sector_size;
+	unsigned int physical_sector_size;
 	unsigned int binfo;
 	int err;
 	int barrier, flush, discard, persistent;
@@ -1414,8 +1721,15 @@ static void blkfront_connect(struct blkfront_info *info)
 		set_capacity(info->gd, sectors);
 		revalidate_disk(info->gd);
 
-		/* fall through */
+		return;
 	case BLKIF_STATE_SUSPENDED:
+		/*
+		 * If we are recovering from suspension, we need to wait
+		 * for the backend to announce it's features before
+		 * reconnecting, at least we need to know if the backend
+		 * supports indirect descriptors, and how many.
+		 */
+		blkif_recover(info);
 		return;
 
 	default:
@@ -1437,6 +1751,16 @@ static void blkfront_connect(struct blkfront_info *info)
 		return;
 	}
 
+	/*
+	 * physcial-sector-size is a newer field, so old backends may not
+	 * provide this. Assume physical sector size to be the same as
+	 * sector_size in that case.
+	 */
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "physical-sector-size", "%u", &physical_sector_size);
+	if (err != 1)
+		physical_sector_size = sector_size;
+
 	info->feature_flush = 0;
 	info->flush_op = 0;
 
@@ -1483,7 +1807,15 @@ static void blkfront_connect(struct blkfront_info *info)
 	else
 		info->feature_persistent = persistent;
 
-	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
+	err = blkfront_setup_indirect(info);
+	if (err) {
+		xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
+				 info->xbdev->otherend);
+		return;
+	}
+
+	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
+				  physical_sector_size);
 	if (err) {
 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
 				 info->xbdev->otherend);

+ 3 - 1
drivers/cpufreq/cpufreq.c

@@ -1942,13 +1942,15 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
 	if (dev) {
 		switch (action) {
 		case CPU_ONLINE:
+		case CPU_ONLINE_FROZEN:
 			cpufreq_add_dev(dev, NULL);
 			break;
 		case CPU_DOWN_PREPARE:
-		case CPU_UP_CANCELED_FROZEN:
+		case CPU_DOWN_PREPARE_FROZEN:
 			__cpufreq_remove_dev(dev, NULL);
 			break;
 		case CPU_DOWN_FAILED:
+		case CPU_DOWN_FAILED_FROZEN:
 			cpufreq_add_dev(dev, NULL);
 			break;
 		}

+ 0 - 3
drivers/cpufreq/cpufreq_governor.c

@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
-#include <linux/cpu.h>
 
 #include "cpufreq_governor.h"
 
@@ -137,10 +136,8 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
 	if (!all_cpus) {
 		__gov_queue_work(smp_processor_id(), dbs_data, delay);
 	} else {
-		get_online_cpus();
 		for_each_cpu(i, policy->cpus)
 			__gov_queue_work(i, dbs_data, delay);
-		put_online_cpus();
 	}
 }
 EXPORT_SYMBOL_GPL(gov_queue_work);

+ 2 - 4
drivers/cpufreq/cpufreq_stats.c

@@ -353,13 +353,11 @@ static int cpufreq_stat_cpu_callback(struct notifier_block *nfb,
 		cpufreq_update_policy(cpu);
 		break;
 	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
 		cpufreq_stats_free_sysfs(cpu);
 		break;
 	case CPU_DEAD:
-		cpufreq_stats_free_table(cpu);
-		break;
-	case CPU_UP_CANCELED_FROZEN:
-		cpufreq_stats_free_sysfs(cpu);
+	case CPU_DEAD_FROZEN:
 		cpufreq_stats_free_table(cpu);
 		break;
 	}

+ 2 - 2
drivers/cpufreq/s3c24xx-cpufreq.c

@@ -49,7 +49,7 @@ static struct clk *clk_hclk;
 static struct clk *clk_pclk;
 static struct clk *clk_arm;
 
-#ifdef CONFIG_CPU_FREQ_S3C24XX_DEBUGFS
+#ifdef CONFIG_ARM_S3C24XX_CPUFREQ_DEBUGFS
 struct s3c_cpufreq_config *s3c_cpufreq_getconfig(void)
 {
 	return &cpu_cur;
@@ -59,7 +59,7 @@ struct s3c_iotimings *s3c_cpufreq_getiotimings(void)
 {
 	return &s3c24xx_iotiming;
 }
-#endif /* CONFIG_CPU_FREQ_S3C24XX_DEBUGFS */
+#endif /* CONFIG_ARM_S3C24XX_CPUFREQ_DEBUGFS */
 
 static void s3c_cpufreq_getcur(struct s3c_cpufreq_config *cfg)
 {

+ 1 - 1
drivers/gpio/gpio-msm-v2.c

@@ -378,7 +378,7 @@ static int msm_gpio_probe(struct platform_device *pdev)
 	int ret, ngpio;
 	struct resource *res;
 
-	if (!of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) {
+	if (of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) {
 		dev_err(&pdev->dev, "%s: ngpio property missing\n", __func__);
 		return -EINVAL;
 	}

+ 70 - 14
drivers/gpio/gpio-omap.c

@@ -1037,6 +1037,18 @@ omap_mpuio_alloc_gc(struct gpio_bank *bank, unsigned int irq_start,
 			       IRQ_NOREQUEST | IRQ_NOPROBE, 0);
 }
 
+#if defined(CONFIG_OF_GPIO)
+static inline bool omap_gpio_chip_boot_dt(struct gpio_chip *chip)
+{
+	return chip->of_node != NULL;
+}
+#else
+static inline bool omap_gpio_chip_boot_dt(struct gpio_chip *chip)
+{
+	return false;
+}
+#endif
+
 static void omap_gpio_chip_init(struct gpio_bank *bank)
 {
 	int j;
@@ -1068,24 +1080,68 @@ static void omap_gpio_chip_init(struct gpio_bank *bank)
 
 	gpiochip_add(&bank->chip);
 
-	for (j = 0; j < bank->width; j++) {
-		int irq = irq_create_mapping(bank->domain, j);
-		irq_set_lockdep_class(irq, &gpio_lock_class);
-		irq_set_chip_data(irq, bank);
-		if (bank->is_mpuio) {
-			omap_mpuio_alloc_gc(bank, irq, bank->width);
-		} else {
-			irq_set_chip_and_handler(irq, &gpio_irq_chip,
-						 handle_simple_irq);
-			set_irq_flags(irq, IRQF_VALID);
-		}
-	}
+	/*
+	 * REVISIT these explicit calls to irq_create_mapping()
+	 * to do the GPIO to IRQ domain mapping for each GPIO in
+	 * the bank can be removed once all OMAP platforms have
+	 * been migrated to Device Tree boot only.
+	 * Since in DT boot irq_create_mapping() is called from
+	 * irq_create_of_mapping() only for the GPIO lines that
+	 * are used as interrupts.
+	 */
+	if (!omap_gpio_chip_boot_dt(&bank->chip))
+		for (j = 0; j < bank->width; j++)
+			irq_create_mapping(bank->domain, j);
 	irq_set_chained_handler(bank->irq, gpio_irq_handler);
 	irq_set_handler_data(bank->irq, bank);
 }
 
 static const struct of_device_id omap_gpio_match[];
 
+static int omap_gpio_irq_map(struct irq_domain *d, unsigned int virq,
+			     irq_hw_number_t hwirq)
+{
+	struct gpio_bank *bank = d->host_data;
+	int gpio;
+	int ret;
+
+	if (!bank)
+		return -EINVAL;
+
+	irq_set_lockdep_class(virq, &gpio_lock_class);
+	irq_set_chip_data(virq, bank);
+	if (bank->is_mpuio) {
+		omap_mpuio_alloc_gc(bank, virq, bank->width);
+	} else {
+		irq_set_chip_and_handler(virq, &gpio_irq_chip,
+					 handle_simple_irq);
+		set_irq_flags(virq, IRQF_VALID);
+	}
+
+	/*
+	 * REVISIT most GPIO IRQ chip drivers need to call
+	 * gpio_request() before a GPIO line can be used as an
+	 * IRQ. Ideally this should be handled by the IRQ core
+	 * but until then this has to be done on a per driver
+	 * basis. Remove this once this is managed by the core.
+	 */
+	if (omap_gpio_chip_boot_dt(&bank->chip)) {
+		gpio = irq_to_gpio(bank, hwirq);
+		ret = gpio_request_one(gpio, GPIOF_IN, NULL);
+		if (ret) {
+			dev_err(bank->dev, "Could not request GPIO%d\n", gpio);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static struct irq_domain_ops omap_gpio_irq_ops = {
+	.xlate  = irq_domain_xlate_onetwocell,
+	.map    = omap_gpio_irq_map,
+};
+
 static int omap_gpio_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -1151,10 +1207,10 @@ static int omap_gpio_probe(struct platform_device *pdev)
 	}
 
 	bank->domain = irq_domain_add_legacy(node, bank->width, irq_base,
-					     0, &irq_domain_simple_ops, NULL);
+					     0, &omap_gpio_irq_ops, bank);
 #else
 	bank->domain = irq_domain_add_linear(node, bank->width,
-					     &irq_domain_simple_ops, NULL);
+					     &omap_gpio_irq_ops, bank);
 #endif
 	if (!bank->domain) {
 		dev_err(dev, "Couldn't register an IRQ domain\n");

+ 11 - 16
drivers/gpu/drm/drm_crtc_helper.c

@@ -677,6 +677,11 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 					/* don't break so fail path works correct */
 					fail = 1;
 				break;
+
+				if (connector->dpms != DRM_MODE_DPMS_ON) {
+					DRM_DEBUG_KMS("connector dpms not on, full mode switch\n");
+					mode_changed = true;
+				}
 			}
 		}
 
@@ -754,6 +759,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 				ret = -EINVAL;
 				goto fail;
 			}
+			DRM_DEBUG_KMS("Setting connector DPMS state to on\n");
+			for (i = 0; i < set->num_connectors; i++) {
+				DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id,
+					      drm_get_connector_name(set->connectors[i]));
+				set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON);
+			}
 		}
 		drm_helper_disable_unused_functions(dev);
 	} else if (fb_changed) {
@@ -771,22 +782,6 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		}
 	}
 
-	/*
-	 * crtc set_config helpers implicit set the crtc and all connected
-	 * encoders to DPMS on for a full mode set. But for just an fb update it
-	 * doesn't do that. To not confuse userspace, do an explicit DPMS_ON
-	 * unconditionally. This will also ensure driver internal dpms state is
-	 * consistent again.
-	 */
-	if (set->crtc->enabled) {
-		DRM_DEBUG_KMS("Setting connector DPMS state to on\n");
-		for (i = 0; i < set->num_connectors; i++) {
-			DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id,
-				      drm_get_connector_name(set->connectors[i]));
-			set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON);
-		}
-	}
-
 	kfree(save_connectors);
 	kfree(save_encoders);
 	kfree(save_crtcs);

+ 1 - 0
drivers/gpu/drm/i915/Makefile

@@ -38,6 +38,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  intel_sprite.o \
 	  intel_opregion.o \
 	  intel_sideband.o \
+	  intel_uncore.o \
 	  dvo_ch7xxx.o \
 	  dvo_ch7017.o \
 	  dvo_ivch.o \

+ 1 - 1
drivers/gpu/drm/i915/dvo_ch7xxx.c

@@ -307,7 +307,7 @@ static void ch7xxx_mode_set(struct intel_dvo_device *dvo,
 		idf |= CH7xxx_IDF_HSP;
 
 	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
-		idf |= CH7xxx_IDF_HSP;
+		idf |= CH7xxx_IDF_VSP;
 
 	ch7xxx_writeb(dvo, CH7xxx_IDF, idf);
 }

+ 163 - 15
drivers/gpu/drm/i915/i915_debugfs.c

@@ -135,7 +135,8 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
 	uintptr_t list = (uintptr_t) node->info_ent->data;
 	struct list_head *head;
 	struct drm_device *dev = node->minor->dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 	size_t total_obj_size, total_gtt_size;
 	int count, ret;
@@ -147,11 +148,11 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
 	switch (list) {
 	case ACTIVE_LIST:
 		seq_puts(m, "Active:\n");
-		head = &dev_priv->mm.active_list;
+		head = &vm->active_list;
 		break;
 	case INACTIVE_LIST:
 		seq_puts(m, "Inactive:\n");
-		head = &dev_priv->mm.inactive_list;
+		head = &vm->inactive_list;
 		break;
 	default:
 		mutex_unlock(&dev->struct_mutex);
@@ -219,6 +220,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 	u32 count, mappable_count, purgeable_count;
 	size_t size, mappable_size, purgeable_size;
 	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_file *file;
 	int ret;
 
@@ -236,12 +238,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		   count, mappable_count, size, mappable_size);
 
 	size = count = mappable_size = mappable_count = 0;
-	count_objects(&dev_priv->mm.active_list, mm_list);
+	count_objects(&vm->active_list, mm_list);
 	seq_printf(m, "  %u [%u] active objects, %zu [%zu] bytes\n",
 		   count, mappable_count, size, mappable_size);
 
 	size = count = mappable_size = mappable_count = 0;
-	count_objects(&dev_priv->mm.inactive_list, mm_list);
+	count_objects(&vm->inactive_list, mm_list);
 	seq_printf(m, "  %u [%u] inactive objects, %zu [%zu] bytes\n",
 		   count, mappable_count, size, mappable_size);
 
@@ -276,8 +278,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		   count, size);
 
 	seq_printf(m, "%zu [%lu] gtt total\n",
-		   dev_priv->gtt.total,
-		   dev_priv->gtt.mappable_end - dev_priv->gtt.start);
+		   dev_priv->gtt.base.total,
+		   dev_priv->gtt.mappable_end - dev_priv->gtt.base.start);
 
 	seq_putc(m, '\n');
 	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
@@ -987,9 +989,9 @@ static int gen6_drpc_info(struct seq_file *m)
 	if (ret)
 		return ret;
 
-	spin_lock_irq(&dev_priv->gt_lock);
-	forcewake_count = dev_priv->forcewake_count;
-	spin_unlock_irq(&dev_priv->gt_lock);
+	spin_lock_irq(&dev_priv->uncore.lock);
+	forcewake_count = dev_priv->uncore.forcewake_count;
+	spin_unlock_irq(&dev_priv->uncore.lock);
 
 	if (forcewake_count) {
 		seq_puts(m, "RC information inaccurate because somebody "
@@ -1002,7 +1004,7 @@ static int gen6_drpc_info(struct seq_file *m)
 	}
 
 	gt_core_status = readl(dev_priv->regs + GEN6_GT_CORE_STATUS);
-	trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4);
+	trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true);
 
 	rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
 	rcctl1 = I915_READ(GEN6_RC_CONTROL);
@@ -1373,9 +1375,9 @@ static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned forcewake_count;
 
-	spin_lock_irq(&dev_priv->gt_lock);
-	forcewake_count = dev_priv->forcewake_count;
-	spin_unlock_irq(&dev_priv->gt_lock);
+	spin_lock_irq(&dev_priv->uncore.lock);
+	forcewake_count = dev_priv->uncore.forcewake_count;
+	spin_unlock_irq(&dev_priv->uncore.lock);
 
 	seq_printf(m, "forcewake count = %u\n", forcewake_count);
 
@@ -1530,6 +1532,148 @@ static int i915_dpio_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int i915_llc(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* Size calculation for LLC is a bit of a pain. Ignore for now. */
+	seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(dev)));
+	seq_printf(m, "eLLC: %zuMB\n", dev_priv->ellc_size);
+
+	return 0;
+}
+
+static int i915_edp_psr_status(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 psrstat, psrperf;
+
+	if (!IS_HASWELL(dev)) {
+		seq_puts(m, "PSR not supported on this platform\n");
+	} else if (IS_HASWELL(dev) && I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE) {
+		seq_puts(m, "PSR enabled\n");
+	} else {
+		seq_puts(m, "PSR disabled: ");
+		switch (dev_priv->no_psr_reason) {
+		case PSR_NO_SOURCE:
+			seq_puts(m, "not supported on this platform");
+			break;
+		case PSR_NO_SINK:
+			seq_puts(m, "not supported by panel");
+			break;
+		case PSR_MODULE_PARAM:
+			seq_puts(m, "disabled by flag");
+			break;
+		case PSR_CRTC_NOT_ACTIVE:
+			seq_puts(m, "crtc not active");
+			break;
+		case PSR_PWR_WELL_ENABLED:
+			seq_puts(m, "power well enabled");
+			break;
+		case PSR_NOT_TILED:
+			seq_puts(m, "not tiled");
+			break;
+		case PSR_SPRITE_ENABLED:
+			seq_puts(m, "sprite enabled");
+			break;
+		case PSR_S3D_ENABLED:
+			seq_puts(m, "stereo 3d enabled");
+			break;
+		case PSR_INTERLACED_ENABLED:
+			seq_puts(m, "interlaced enabled");
+			break;
+		case PSR_HSW_NOT_DDIA:
+			seq_puts(m, "HSW ties PSR to DDI A (eDP)");
+			break;
+		default:
+			seq_puts(m, "unknown reason");
+		}
+		seq_puts(m, "\n");
+		return 0;
+	}
+
+	psrstat = I915_READ(EDP_PSR_STATUS_CTL);
+
+	seq_puts(m, "PSR Current State: ");
+	switch (psrstat & EDP_PSR_STATUS_STATE_MASK) {
+	case EDP_PSR_STATUS_STATE_IDLE:
+		seq_puts(m, "Reset state\n");
+		break;
+	case EDP_PSR_STATUS_STATE_SRDONACK:
+		seq_puts(m, "Wait for TG/Stream to send on frame of data after SRD conditions are met\n");
+		break;
+	case EDP_PSR_STATUS_STATE_SRDENT:
+		seq_puts(m, "SRD entry\n");
+		break;
+	case EDP_PSR_STATUS_STATE_BUFOFF:
+		seq_puts(m, "Wait for buffer turn off\n");
+		break;
+	case EDP_PSR_STATUS_STATE_BUFON:
+		seq_puts(m, "Wait for buffer turn on\n");
+		break;
+	case EDP_PSR_STATUS_STATE_AUXACK:
+		seq_puts(m, "Wait for AUX to acknowledge on SRD exit\n");
+		break;
+	case EDP_PSR_STATUS_STATE_SRDOFFACK:
+		seq_puts(m, "Wait for TG/Stream to acknowledge the SRD VDM exit\n");
+		break;
+	default:
+		seq_puts(m, "Unknown\n");
+		break;
+	}
+
+	seq_puts(m, "Link Status: ");
+	switch (psrstat & EDP_PSR_STATUS_LINK_MASK) {
+	case EDP_PSR_STATUS_LINK_FULL_OFF:
+		seq_puts(m, "Link is fully off\n");
+		break;
+	case EDP_PSR_STATUS_LINK_FULL_ON:
+		seq_puts(m, "Link is fully on\n");
+		break;
+	case EDP_PSR_STATUS_LINK_STANDBY:
+		seq_puts(m, "Link is in standby\n");
+		break;
+	default:
+		seq_puts(m, "Unknown\n");
+		break;
+	}
+
+	seq_printf(m, "PSR Entry Count: %u\n",
+		   psrstat >> EDP_PSR_STATUS_COUNT_SHIFT &
+		   EDP_PSR_STATUS_COUNT_MASK);
+
+	seq_printf(m, "Max Sleep Timer Counter: %u\n",
+		   psrstat >> EDP_PSR_STATUS_MAX_SLEEP_TIMER_SHIFT &
+		   EDP_PSR_STATUS_MAX_SLEEP_TIMER_MASK);
+
+	seq_printf(m, "Had AUX error: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_AUX_ERROR));
+
+	seq_printf(m, "Sending AUX: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_AUX_SENDING));
+
+	seq_printf(m, "Sending Idle: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_SENDING_IDLE));
+
+	seq_printf(m, "Sending TP2 TP3: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_SENDING_TP2_TP3));
+
+	seq_printf(m, "Sending TP1: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_SENDING_TP1));
+
+	seq_printf(m, "Idle Count: %u\n",
+		   psrstat & EDP_PSR_STATUS_IDLE_MASK);
+
+	psrperf = (I915_READ(EDP_PSR_PERF_CNT)) & EDP_PSR_PERF_CNT_MASK;
+	seq_printf(m, "Performance Counter: %u\n", psrperf);
+
+	return 0;
+}
+
 static int
 i915_wedged_get(void *data, u64 *val)
 {
@@ -1612,6 +1756,7 @@ i915_drop_caches_set(void *data, u64 val)
 	struct drm_device *dev = data;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj, *next;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	int ret;
 
 	DRM_DEBUG_DRIVER("Dropping caches: 0x%08llx\n", val);
@@ -1632,7 +1777,8 @@ i915_drop_caches_set(void *data, u64 val)
 		i915_gem_retire_requests(dev);
 
 	if (val & DROP_BOUND) {
-		list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, mm_list)
+		list_for_each_entry_safe(obj, next, &vm->inactive_list,
+					 mm_list)
 			if (obj->pin_count == 0) {
 				ret = i915_gem_object_unbind(obj);
 				if (ret)
@@ -1959,6 +2105,8 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_swizzle_info", i915_swizzle_info, 0},
 	{"i915_ppgtt_info", i915_ppgtt_info, 0},
 	{"i915_dpio", i915_dpio_info, 0},
+	{"i915_llc", i915_llc, 0},
+	{"i915_edp_psr_status", i915_edp_psr_status, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 

+ 34 - 31
drivers/gpu/drm/i915/i915_dma.c

@@ -1358,7 +1358,7 @@ cleanup_gem:
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
 	i915_gem_cleanup_aliasing_ppgtt(dev);
-	drm_mm_takedown(&dev_priv->mm.gtt_space);
+	drm_mm_takedown(&dev_priv->gtt.base.mm);
 cleanup_irq:
 	drm_irq_uninstall(dev);
 cleanup_gem_stolen:
@@ -1435,22 +1435,6 @@ static void i915_dump_device_info(struct drm_i915_private *dev_priv)
 #undef SEP_COMMA
 }
 
-/**
- * intel_early_sanitize_regs - clean up BIOS state
- * @dev: DRM device
- *
- * This function must be called before we do any I915_READ or I915_WRITE. Its
- * purpose is to clean up any state left by the BIOS that may affect us when
- * reading and/or writing registers.
- */
-static void intel_early_sanitize_regs(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	if (HAS_FPGA_DBG_UNCLAIMED(dev))
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-}
-
 /**
  * i915_driver_load - setup chip and create an initial config
  * @dev: DRM device
@@ -1490,8 +1474,21 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	dev_priv->dev = dev;
 	dev_priv->info = info;
 
+	spin_lock_init(&dev_priv->irq_lock);
+	spin_lock_init(&dev_priv->gpu_error.lock);
+	spin_lock_init(&dev_priv->backlight.lock);
+	spin_lock_init(&dev_priv->uncore.lock);
+	spin_lock_init(&dev_priv->mm.object_stat_lock);
+	mutex_init(&dev_priv->dpio_lock);
+	mutex_init(&dev_priv->rps.hw_lock);
+	mutex_init(&dev_priv->modeset_restore_lock);
+
 	i915_dump_device_info(dev_priv);
 
+	INIT_LIST_HEAD(&dev_priv->vm_list);
+	INIT_LIST_HEAD(&dev_priv->gtt.base.global_link);
+	list_add(&dev_priv->gtt.base.global_link, &dev_priv->vm_list);
+
 	if (i915_get_bridge_dev(dev)) {
 		ret = -EIO;
 		goto free_priv;
@@ -1517,7 +1514,17 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 		goto put_bridge;
 	}
 
-	intel_early_sanitize_regs(dev);
+	intel_uncore_early_sanitize(dev);
+
+	if (IS_HASWELL(dev) && (I915_READ(HSW_EDRAM_PRESENT) == 1)) {
+		/* The docs do not explain exactly how the calculation can be
+		 * made. It is somewhat guessable, but for now, it's always
+		 * 128MB.
+		 * NB: We can't write IDICR yet because we do not have gt funcs
+		 * set up */
+		dev_priv->ellc_size = 128;
+		DRM_INFO("Found %zuMB of eLLC\n", dev_priv->ellc_size);
+	}
 
 	ret = i915_gem_gtt_init(dev);
 	if (ret)
@@ -1580,7 +1587,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	intel_detect_pch(dev);
 
 	intel_irq_init(dev);
-	intel_gt_init(dev);
+	intel_pm_init(dev);
+	intel_uncore_sanitize(dev);
+	intel_uncore_init(dev);
 
 	/* Try to make sure MCHBAR is enabled before poking at it */
 	intel_setup_mchbar(dev);
@@ -1605,14 +1614,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	if (!IS_I945G(dev) && !IS_I945GM(dev))
 		pci_enable_msi(dev->pdev);
 
-	spin_lock_init(&dev_priv->irq_lock);
-	spin_lock_init(&dev_priv->gpu_error.lock);
-	spin_lock_init(&dev_priv->backlight.lock);
-	mutex_init(&dev_priv->dpio_lock);
-
-	mutex_init(&dev_priv->rps.hw_lock);
-	mutex_init(&dev_priv->modeset_restore_lock);
-
 	dev_priv->num_plane = 1;
 	if (IS_VALLEYVIEW(dev))
 		dev_priv->num_plane = 2;
@@ -1642,7 +1643,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	if (INTEL_INFO(dev)->num_pipes) {
 		/* Must be done after probing outputs */
 		intel_opregion_init(dev);
-		acpi_video_register();
+		acpi_video_register_with_quirks();
 	}
 
 	if (IS_GEN5(dev))
@@ -1663,7 +1664,7 @@ out_gem_unload:
 out_mtrrfree:
 	arch_phys_wc_del(dev_priv->gtt.mtrr);
 	io_mapping_free(dev_priv->gtt.mappable);
-	dev_priv->gtt.gtt_remove(dev);
+	dev_priv->gtt.base.cleanup(&dev_priv->gtt.base);
 out_rmmap:
 	pci_iounmap(dev->pdev, dev_priv->regs);
 put_bridge:
@@ -1748,7 +1749,9 @@ int i915_driver_unload(struct drm_device *dev)
 			i915_free_hws(dev);
 	}
 
-	drm_mm_takedown(&dev_priv->mm.gtt_space);
+	list_del(&dev_priv->gtt.base.global_link);
+	WARN_ON(!list_empty(&dev_priv->vm_list));
+	drm_mm_takedown(&dev_priv->gtt.base.mm);
 	if (dev_priv->regs != NULL)
 		pci_iounmap(dev->pdev, dev_priv->regs);
 
@@ -1758,7 +1761,7 @@ int i915_driver_unload(struct drm_device *dev)
 	destroy_workqueue(dev_priv->wq);
 	pm_qos_remove_request(&dev_priv->pm_qos);
 
-	dev_priv->gtt.gtt_remove(dev);
+	dev_priv->gtt.base.cleanup(&dev_priv->gtt.base);
 
 	if (dev_priv->slab)
 		kmem_cache_destroy(dev_priv->slab);

+ 11 - 266
drivers/gpu/drm/i915/i915_drv.c

@@ -118,6 +118,10 @@ module_param_named(i915_enable_ppgtt, i915_enable_ppgtt, int, 0600);
 MODULE_PARM_DESC(i915_enable_ppgtt,
 		"Enable PPGTT (default: true)");
 
+int i915_enable_psr __read_mostly = 0;
+module_param_named(enable_psr, i915_enable_psr, int, 0600);
+MODULE_PARM_DESC(enable_psr, "Enable PSR (default: false)");
+
 unsigned int i915_preliminary_hw_support __read_mostly = 0;
 module_param_named(preliminary_hw_support, i915_preliminary_hw_support, int, 0600);
 MODULE_PARM_DESC(preliminary_hw_support,
@@ -137,6 +141,11 @@ module_param_named(fastboot, i915_fastboot, bool, 0600);
 MODULE_PARM_DESC(fastboot, "Try to skip unnecessary mode sets at boot time "
 		 "(default: false)");
 
+bool i915_prefault_disable __read_mostly;
+module_param_named(prefault_disable, i915_prefault_disable, bool, 0600);
+MODULE_PARM_DESC(prefault_disable,
+		"Disable page prefaulting for pread/pwrite/reloc (default:false). For developers only.");
+
 static struct drm_driver driver;
 extern int intel_agp_enabled;
 
@@ -714,7 +723,7 @@ static int i915_drm_thaw(struct drm_device *dev)
 {
 	int error = 0;
 
-	intel_gt_reset(dev);
+	intel_uncore_sanitize(dev);
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		mutex_lock(&dev->struct_mutex);
@@ -740,7 +749,7 @@ int i915_resume(struct drm_device *dev)
 
 	pci_set_master(dev->pdev);
 
-	intel_gt_reset(dev);
+	intel_uncore_sanitize(dev);
 
 	/*
 	 * Platforms with opregion should have sane BIOS, older ones (gen3 and
@@ -761,140 +770,6 @@ int i915_resume(struct drm_device *dev)
 	return 0;
 }
 
-static int i8xx_do_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	if (IS_I85X(dev))
-		return -ENODEV;
-
-	I915_WRITE(D_STATE, I915_READ(D_STATE) | DSTATE_GFX_RESET_I830);
-	POSTING_READ(D_STATE);
-
-	if (IS_I830(dev) || IS_845G(dev)) {
-		I915_WRITE(DEBUG_RESET_I830,
-			   DEBUG_RESET_DISPLAY |
-			   DEBUG_RESET_RENDER |
-			   DEBUG_RESET_FULL);
-		POSTING_READ(DEBUG_RESET_I830);
-		msleep(1);
-
-		I915_WRITE(DEBUG_RESET_I830, 0);
-		POSTING_READ(DEBUG_RESET_I830);
-	}
-
-	msleep(1);
-
-	I915_WRITE(D_STATE, I915_READ(D_STATE) & ~DSTATE_GFX_RESET_I830);
-	POSTING_READ(D_STATE);
-
-	return 0;
-}
-
-static int i965_reset_complete(struct drm_device *dev)
-{
-	u8 gdrst;
-	pci_read_config_byte(dev->pdev, I965_GDRST, &gdrst);
-	return (gdrst & GRDOM_RESET_ENABLE) == 0;
-}
-
-static int i965_do_reset(struct drm_device *dev)
-{
-	int ret;
-
-	/*
-	 * Set the domains we want to reset (GRDOM/bits 2 and 3) as
-	 * well as the reset bit (GR/bit 0).  Setting the GR bit
-	 * triggers the reset; when done, the hardware will clear it.
-	 */
-	pci_write_config_byte(dev->pdev, I965_GDRST,
-			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret =  wait_for(i965_reset_complete(dev), 500);
-	if (ret)
-		return ret;
-
-	/* We can't reset render&media without also resetting display ... */
-	pci_write_config_byte(dev->pdev, I965_GDRST,
-			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
-
-	ret =  wait_for(i965_reset_complete(dev), 500);
-	if (ret)
-		return ret;
-
-	pci_write_config_byte(dev->pdev, I965_GDRST, 0);
-
-	return 0;
-}
-
-static int ironlake_do_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 gdrst;
-	int ret;
-
-	gdrst = I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR);
-	gdrst &= ~GRDOM_MASK;
-	I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR,
-		   gdrst | GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret = wait_for(I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR) & 0x1, 500);
-	if (ret)
-		return ret;
-
-	/* We can't reset render&media without also resetting display ... */
-	gdrst = I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR);
-	gdrst &= ~GRDOM_MASK;
-	I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR,
-		   gdrst | GRDOM_MEDIA | GRDOM_RESET_ENABLE);
-	return wait_for(I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR) & 0x1, 500);
-}
-
-static int gen6_do_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int	ret;
-	unsigned long irqflags;
-
-	/* Hold gt_lock across reset to prevent any register access
-	 * with forcewake not set correctly
-	 */
-	spin_lock_irqsave(&dev_priv->gt_lock, irqflags);
-
-	/* Reset the chip */
-
-	/* GEN6_GDRST is not in the gt power well, no need to check
-	 * for fifo space for the write or forcewake the chip for
-	 * the read
-	 */
-	I915_WRITE_NOTRACE(GEN6_GDRST, GEN6_GRDOM_FULL);
-
-	/* Spin waiting for the device to ack the reset request */
-	ret = wait_for((I915_READ_NOTRACE(GEN6_GDRST) & GEN6_GRDOM_FULL) == 0, 500);
-
-	/* If reset with a user forcewake, try to restore, otherwise turn it off */
-	if (dev_priv->forcewake_count)
-		dev_priv->gt.force_wake_get(dev_priv);
-	else
-		dev_priv->gt.force_wake_put(dev_priv);
-
-	/* Restore fifo count */
-	dev_priv->gt_fifo_count = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
-
-	spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags);
-	return ret;
-}
-
-int intel_gpu_reset(struct drm_device *dev)
-{
-	switch (INTEL_INFO(dev)->gen) {
-	case 7:
-	case 6: return gen6_do_reset(dev);
-	case 5: return ironlake_do_reset(dev);
-	case 4: return i965_do_reset(dev);
-	case 2: return i8xx_do_reset(dev);
-	default: return -ENODEV;
-	}
-}
-
 /**
  * i915_reset - reset chip after a hang
  * @dev: drm device to reset
@@ -1224,133 +1099,3 @@ module_exit(i915_exit);
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL and additional rights");
-
-/* We give fast paths for the really cool registers */
-#define NEEDS_FORCE_WAKE(dev_priv, reg) \
-	((HAS_FORCE_WAKE((dev_priv)->dev)) && \
-	 ((reg) < 0x40000) &&            \
-	 ((reg) != FORCEWAKE))
-static void
-ilk_dummy_write(struct drm_i915_private *dev_priv)
-{
-	/* WaIssueDummyWriteToWakeupFromRC6:ilk Issue a dummy write to wake up
-	 * the chip from rc6 before touching it for real. MI_MODE is masked,
-	 * hence harmless to write 0 into. */
-	I915_WRITE_NOTRACE(MI_MODE, 0);
-}
-
-static void
-hsw_unclaimed_reg_clear(struct drm_i915_private *dev_priv, u32 reg)
-{
-	if (HAS_FPGA_DBG_UNCLAIMED(dev_priv->dev) &&
-	    (I915_READ_NOTRACE(FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
-		DRM_ERROR("Unknown unclaimed register before writing to %x\n",
-			  reg);
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-	}
-}
-
-static void
-hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg)
-{
-	if (HAS_FPGA_DBG_UNCLAIMED(dev_priv->dev) &&
-	    (I915_READ_NOTRACE(FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
-		DRM_ERROR("Unclaimed write to %x\n", reg);
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-	}
-}
-
-#define __i915_read(x, y) \
-u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
-	u##x val = 0; \
-	if (IS_GEN5(dev_priv->dev)) \
-		ilk_dummy_write(dev_priv); \
-	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		unsigned long irqflags; \
-		spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \
-		if (dev_priv->forcewake_count == 0) \
-			dev_priv->gt.force_wake_get(dev_priv); \
-		val = read##y(dev_priv->regs + reg); \
-		if (dev_priv->forcewake_count == 0) \
-			dev_priv->gt.force_wake_put(dev_priv); \
-		spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \
-	} else { \
-		val = read##y(dev_priv->regs + reg); \
-	} \
-	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
-	return val; \
-}
-
-__i915_read(8, b)
-__i915_read(16, w)
-__i915_read(32, l)
-__i915_read(64, q)
-#undef __i915_read
-
-#define __i915_write(x, y) \
-void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
-	u32 __fifo_ret = 0; \
-	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
-	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
-	} \
-	if (IS_GEN5(dev_priv->dev)) \
-		ilk_dummy_write(dev_priv); \
-	hsw_unclaimed_reg_clear(dev_priv, reg); \
-	write##y(val, dev_priv->regs + reg); \
-	if (unlikely(__fifo_ret)) { \
-		gen6_gt_check_fifodbg(dev_priv); \
-	} \
-	hsw_unclaimed_reg_check(dev_priv, reg); \
-}
-__i915_write(8, b)
-__i915_write(16, w)
-__i915_write(32, l)
-__i915_write(64, q)
-#undef __i915_write
-
-static const struct register_whitelist {
-	uint64_t offset;
-	uint32_t size;
-	uint32_t gen_bitmask; /* support gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
-} whitelist[] = {
-	{ RING_TIMESTAMP(RENDER_RING_BASE), 8, 0xF0 },
-};
-
-int i915_reg_read_ioctl(struct drm_device *dev,
-			void *data, struct drm_file *file)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_reg_read *reg = data;
-	struct register_whitelist const *entry = whitelist;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
-		if (entry->offset == reg->offset &&
-		    (1 << INTEL_INFO(dev)->gen & entry->gen_bitmask))
-			break;
-	}
-
-	if (i == ARRAY_SIZE(whitelist))
-		return -EINVAL;
-
-	switch (entry->size) {
-	case 8:
-		reg->val = I915_READ64(reg->offset);
-		break;
-	case 4:
-		reg->val = I915_READ(reg->offset);
-		break;
-	case 2:
-		reg->val = I915_READ16(reg->offset);
-		break;
-	case 1:
-		reg->val = I915_READ8(reg->offset);
-		break;
-	default:
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	return 0;
-}

+ 158 - 103
drivers/gpu/drm/i915/i915_drv.h

@@ -391,11 +391,20 @@ struct drm_i915_display_funcs {
 	/* pll clock increase/decrease */
 };
 
-struct drm_i915_gt_funcs {
+struct intel_uncore_funcs {
 	void (*force_wake_get)(struct drm_i915_private *dev_priv);
 	void (*force_wake_put)(struct drm_i915_private *dev_priv);
 };
 
+struct intel_uncore {
+	spinlock_t lock; /** lock is also taken in irq contexts. */
+
+	struct intel_uncore_funcs funcs;
+
+	unsigned fifo_count;
+	unsigned forcewake_count;
+};
+
 #define DEV_INFO_FOR_EACH_FLAG(func, sep) \
 	func(is_mobile) sep \
 	func(is_i85x) sep \
@@ -446,6 +455,54 @@ enum i915_cache_level {
 
 typedef uint32_t gen6_gtt_pte_t;
 
+struct i915_address_space {
+	struct drm_mm mm;
+	struct drm_device *dev;
+	struct list_head global_link;
+	unsigned long start;		/* Start offset always 0 for dri2 */
+	size_t total;		/* size addr space maps (ex. 2GB for ggtt) */
+
+	struct {
+		dma_addr_t addr;
+		struct page *page;
+	} scratch;
+
+	/**
+	 * List of objects currently involved in rendering.
+	 *
+	 * Includes buffers having the contents of their GPU caches
+	 * flushed, not necessarily primitives.  last_rendering_seqno
+	 * represents when the rendering involved will be completed.
+	 *
+	 * A reference is held on the buffer while on this list.
+	 */
+	struct list_head active_list;
+
+	/**
+	 * LRU list of objects which are not in the ringbuffer and
+	 * are ready to unbind, but are still in the GTT.
+	 *
+	 * last_rendering_seqno is 0 while an object is in this list.
+	 *
+	 * A reference is not held on the buffer while on this list,
+	 * as merely being GTT-bound shouldn't prevent its being
+	 * freed, and we'll pull it off the list in the free path.
+	 */
+	struct list_head inactive_list;
+
+	/* FIXME: Need a more generic return type */
+	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
+				     enum i915_cache_level level);
+	void (*clear_range)(struct i915_address_space *vm,
+			    unsigned int first_entry,
+			    unsigned int num_entries);
+	void (*insert_entries)(struct i915_address_space *vm,
+			       struct sg_table *st,
+			       unsigned int first_entry,
+			       enum i915_cache_level cache_level);
+	void (*cleanup)(struct i915_address_space *vm);
+};
+
 /* The Graphics Translation Table is the way in which GEN hardware translates a
  * Graphics Virtual Address into a Physical Address. In addition to the normal
  * collateral associated with any va->pa translations GEN hardware also has a
@@ -454,8 +511,7 @@ typedef uint32_t gen6_gtt_pte_t;
  * the spec.
  */
 struct i915_gtt {
-	unsigned long start;		/* Start offset of used GTT */
-	size_t total;			/* Total size GTT can map */
+	struct i915_address_space base;
 	size_t stolen_size;		/* Total size of stolen memory */
 
 	unsigned long mappable_end;	/* End offset that we can CPU map */
@@ -466,10 +522,6 @@ struct i915_gtt {
 	void __iomem *gsm;
 
 	bool do_idle_maps;
-	struct {
-		dma_addr_t addr;
-		struct page *page;
-	} scratch;
 
 	int mtrr;
 
@@ -477,38 +529,28 @@ struct i915_gtt {
 	int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
 			  size_t *stolen, phys_addr_t *mappable_base,
 			  unsigned long *mappable_end);
-	void (*gtt_remove)(struct drm_device *dev);
-	void (*gtt_clear_range)(struct drm_device *dev,
-				unsigned int first_entry,
-				unsigned int num_entries);
-	void (*gtt_insert_entries)(struct drm_device *dev,
-				   struct sg_table *st,
-				   unsigned int pg_start,
-				   enum i915_cache_level cache_level);
-	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
-				     enum i915_cache_level level);
 };
-#define gtt_total_entries(gtt) ((gtt).total >> PAGE_SHIFT)
+#define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
 
 struct i915_hw_ppgtt {
-	struct drm_device *dev;
+	struct i915_address_space base;
 	unsigned num_pd_entries;
 	struct page **pt_pages;
 	uint32_t pd_offset;
 	dma_addr_t *pt_dma_addr;
 
-	/* pte functions, mirroring the interface of the global gtt. */
-	void (*clear_range)(struct i915_hw_ppgtt *ppgtt,
-			    unsigned int first_entry,
-			    unsigned int num_entries);
-	void (*insert_entries)(struct i915_hw_ppgtt *ppgtt,
-			       struct sg_table *st,
-			       unsigned int pg_start,
-			       enum i915_cache_level cache_level);
-	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
-				     enum i915_cache_level level);
 	int (*enable)(struct drm_device *dev);
-	void (*cleanup)(struct i915_hw_ppgtt *ppgtt);
+};
+
+/* To make things as simple as possible (ie. no refcounting), a VMA's lifetime
+ * will always be <= an objects lifetime. So object refcounting should cover us.
+ */
+struct i915_vma {
+	struct drm_mm_node node;
+	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm;
+
+	struct list_head vma_link; /* Link in the object's VMA list */
 };
 
 struct i915_ctx_hang_stats {
@@ -560,6 +602,18 @@ struct i915_fbc {
 	} no_fbc_reason;
 };
 
+enum no_psr_reason {
+	PSR_NO_SOURCE, /* Not supported on platform */
+	PSR_NO_SINK, /* Not supported by panel */
+	PSR_MODULE_PARAM,
+	PSR_CRTC_NOT_ACTIVE,
+	PSR_PWR_WELL_ENABLED,
+	PSR_NOT_TILED,
+	PSR_SPRITE_ENABLED,
+	PSR_S3D_ENABLED,
+	PSR_INTERLACED_ENABLED,
+	PSR_HSW_NOT_DDIA,
+};
 
 enum intel_pch {
 	PCH_NONE = 0,	/* No PCH present */
@@ -577,6 +631,7 @@ enum intel_sbi_destination {
 #define QUIRK_PIPEA_FORCE (1<<0)
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
+#define QUIRK_NO_PCH_PWM_ENABLE (1<<3)
 
 struct intel_fbdev;
 struct intel_fbc_work;
@@ -834,8 +889,6 @@ struct intel_l3_parity {
 struct i915_gem_mm {
 	/** Memory allocator for GTT stolen memory */
 	struct drm_mm stolen;
-	/** Memory allocator for GTT */
-	struct drm_mm gtt_space;
 	/** List of all objects in gtt_space. Used to restore gtt
 	 * mappings on resume */
 	struct list_head bound_list;
@@ -855,29 +908,6 @@ struct i915_gem_mm {
 	struct shrinker inactive_shrinker;
 	bool shrinker_no_lock_stealing;
 
-	/**
-	 * List of objects currently involved in rendering.
-	 *
-	 * Includes buffers having the contents of their GPU caches
-	 * flushed, not necessarily primitives.  last_rendering_seqno
-	 * represents when the rendering involved will be completed.
-	 *
-	 * A reference is held on the buffer while on this list.
-	 */
-	struct list_head active_list;
-
-	/**
-	 * LRU list of objects which are not in the ringbuffer and
-	 * are ready to unbind, but are still in the GTT.
-	 *
-	 * last_rendering_seqno is 0 while an object is in this list.
-	 *
-	 * A reference is not held on the buffer while on this list,
-	 * as merely being GTT-bound shouldn't prevent its being
-	 * freed, and we'll pull it off the list in the free path.
-	 */
-	struct list_head inactive_list;
-
 	/** LRU list of objects with fence regs on them. */
 	struct list_head fence_list;
 
@@ -905,6 +935,7 @@ struct i915_gem_mm {
 	struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT];
 
 	/* accounting, useful for userland debugging */
+	spinlock_t object_stat_lock;
 	size_t object_memory;
 	u32 object_count;
 };
@@ -1024,14 +1055,7 @@ typedef struct drm_i915_private {
 
 	void __iomem *regs;
 
-	struct drm_i915_gt_funcs gt;
-	/** gt_fifo_count and the subsequent register write are synchronized
-	 * with dev->struct_mutex. */
-	unsigned gt_fifo_count;
-	/** forcewake_count is protected by gt_lock */
-	unsigned forcewake_count;
-	/** gt_lock is also taken in irq contexts. */
-	spinlock_t gt_lock;
+	struct intel_uncore uncore;
 
 	struct intel_gmbus gmbus[GMBUS_NUM_PORTS];
 
@@ -1124,7 +1148,8 @@ typedef struct drm_i915_private {
 	enum modeset_restore modeset_restore;
 	struct mutex modeset_restore_lock;
 
-	struct i915_gtt gtt;
+	struct list_head vm_list; /* Global list of all address spaces */
+	struct i915_gtt gtt; /* VMA representing the global address space */
 
 	struct i915_gem_mm mm;
 
@@ -1151,6 +1176,9 @@ typedef struct drm_i915_private {
 
 	struct intel_l3_parity l3_parity;
 
+	/* Cannot be determined by PCIID. You must always read a register. */
+	size_t ellc_size;
+
 	/* gen6+ rps state */
 	struct intel_gen6_power_mgmt rps;
 
@@ -1161,6 +1189,8 @@ typedef struct drm_i915_private {
 	/* Haswell power well */
 	struct i915_power_well power_well;
 
+	enum no_psr_reason no_psr_reason;
+
 	struct i915_gpu_error gpu_error;
 
 	struct drm_i915_gem_object *vlv_pctx;
@@ -1228,8 +1258,9 @@ struct drm_i915_gem_object {
 
 	const struct drm_i915_gem_object_ops *ops;
 
-	/** Current space allocated to this object in the GTT, if any. */
-	struct drm_mm_node gtt_space;
+	/** List of VMAs backed by this object */
+	struct list_head vma_list;
+
 	/** Stolen memory for this object, instead of being backed by shmem. */
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
@@ -1355,18 +1386,32 @@ struct drm_i915_gem_object {
 
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
-/* Offset of the first PTE pointing to this object */
-static inline unsigned long
-i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o)
+/* This is a temporary define to help transition us to real VMAs. If you see
+ * this, you're either reviewing code, or bisecting it. */
+static inline struct i915_vma *
+__i915_gem_obj_to_vma(struct drm_i915_gem_object *obj)
 {
-	return o->gtt_space.start;
+	if (list_empty(&obj->vma_list))
+		return NULL;
+	return list_first_entry(&obj->vma_list, struct i915_vma, vma_link);
 }
 
 /* Whether or not this object is currently mapped by the translation tables */
 static inline bool
 i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *o)
 {
-	return drm_mm_node_allocated(&o->gtt_space);
+	struct i915_vma *vma = __i915_gem_obj_to_vma(o);
+	if (vma == NULL)
+		return false;
+	return drm_mm_node_allocated(&vma->node);
+}
+
+/* Offset of the first PTE pointing to this object */
+static inline unsigned long
+i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o)
+{
+	BUG_ON(list_empty(&o->vma_list));
+	return __i915_gem_obj_to_vma(o)->node.start;
 }
 
 /* The size used in the translation tables may be larger than the actual size of
@@ -1376,14 +1421,15 @@ i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *o)
 static inline unsigned long
 i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
 {
-	return o->gtt_space.size;
+	BUG_ON(list_empty(&o->vma_list));
+	return __i915_gem_obj_to_vma(o)->node.size;
 }
 
 static inline void
 i915_gem_obj_ggtt_set_color(struct drm_i915_gem_object *o,
 			    enum i915_cache_level color)
 {
-	o->gtt_space.color = color;
+	__i915_gem_obj_to_vma(o)->node.color = color;
 }
 
 /**
@@ -1580,10 +1626,12 @@ extern int i915_enable_rc6 __read_mostly;
 extern int i915_enable_fbc __read_mostly;
 extern bool i915_enable_hangcheck __read_mostly;
 extern int i915_enable_ppgtt __read_mostly;
+extern int i915_enable_psr __read_mostly;
 extern unsigned int i915_preliminary_hw_support __read_mostly;
 extern int i915_disable_power_well __read_mostly;
 extern int i915_enable_ips __read_mostly;
 extern bool i915_fastboot __read_mostly;
+extern bool i915_prefault_disable __read_mostly;
 
 extern int i915_suspend(struct drm_device *dev, pm_message_t state);
 extern int i915_resume(struct drm_device *dev);
@@ -1619,13 +1667,20 @@ extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 extern void intel_console_resume(struct work_struct *work);
 
 /* i915_irq.c */
+void i915_queue_hangcheck(struct drm_device *dev);
 void i915_hangcheck_elapsed(unsigned long data);
 void i915_handle_error(struct drm_device *dev, bool wedged);
 
 extern void intel_irq_init(struct drm_device *dev);
 extern void intel_hpd_init(struct drm_device *dev);
-extern void intel_gt_init(struct drm_device *dev);
-extern void intel_gt_reset(struct drm_device *dev);
+extern void intel_pm_init(struct drm_device *dev);
+
+extern void intel_uncore_sanitize(struct drm_device *dev);
+extern void intel_uncore_early_sanitize(struct drm_device *dev);
+extern void intel_uncore_init(struct drm_device *dev);
+extern void intel_uncore_reset(struct drm_device *dev);
+extern void intel_uncore_clear_errors(struct drm_device *dev);
+extern void intel_uncore_check_errors(struct drm_device *dev);
 
 void
 i915_enable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
@@ -1689,6 +1744,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
 void i915_gem_free_object(struct drm_gem_object *obj);
+struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj,
+				     struct i915_address_space *vm);
+void i915_gem_vma_destroy(struct i915_vma *vma);
 
 int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
 				     uint32_t alignment,
@@ -1857,7 +1915,7 @@ static inline void i915_gem_context_unreference(struct i915_hw_context *ctx)
 }
 
 struct i915_ctx_hang_stats * __must_check
-i915_gem_context_get_hang_stats(struct intel_ring_buffer *ring,
+i915_gem_context_get_hang_stats(struct drm_device *dev,
 				struct drm_file *file,
 				u32 id);
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
@@ -2056,7 +2114,6 @@ extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
  */
 void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
 void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
-int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 
 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val);
 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val);
@@ -2075,39 +2132,37 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
 int vlv_gpu_freq(int ddr_freq, int val);
 int vlv_freq_opcode(int ddr_freq, int val);
 
-#define __i915_read(x, y) \
-	u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg);
-
-__i915_read(8, b)
-__i915_read(16, w)
-__i915_read(32, l)
-__i915_read(64, q)
+#define __i915_read(x) \
+	u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg, bool trace);
+__i915_read(8)
+__i915_read(16)
+__i915_read(32)
+__i915_read(64)
 #undef __i915_read
 
-#define __i915_write(x, y) \
-	void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val);
-
-__i915_write(8, b)
-__i915_write(16, w)
-__i915_write(32, l)
-__i915_write(64, q)
+#define __i915_write(x) \
+	void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val, bool trace);
+__i915_write(8)
+__i915_write(16)
+__i915_write(32)
+__i915_write(64)
 #undef __i915_write
 
-#define I915_READ8(reg)		i915_read8(dev_priv, (reg))
-#define I915_WRITE8(reg, val)	i915_write8(dev_priv, (reg), (val))
+#define I915_READ8(reg)		i915_read8(dev_priv, (reg), true)
+#define I915_WRITE8(reg, val)	i915_write8(dev_priv, (reg), (val), true)
 
-#define I915_READ16(reg)	i915_read16(dev_priv, (reg))
-#define I915_WRITE16(reg, val)	i915_write16(dev_priv, (reg), (val))
-#define I915_READ16_NOTRACE(reg)	readw(dev_priv->regs + (reg))
-#define I915_WRITE16_NOTRACE(reg, val)	writew(val, dev_priv->regs + (reg))
+#define I915_READ16(reg)	i915_read16(dev_priv, (reg), true)
+#define I915_WRITE16(reg, val)	i915_write16(dev_priv, (reg), (val), true)
+#define I915_READ16_NOTRACE(reg)	i915_read16(dev_priv, (reg), false)
+#define I915_WRITE16_NOTRACE(reg, val)	i915_write16(dev_priv, (reg), (val), false)
 
-#define I915_READ(reg)		i915_read32(dev_priv, (reg))
-#define I915_WRITE(reg, val)	i915_write32(dev_priv, (reg), (val))
-#define I915_READ_NOTRACE(reg)		readl(dev_priv->regs + (reg))
-#define I915_WRITE_NOTRACE(reg, val)	writel(val, dev_priv->regs + (reg))
+#define I915_READ(reg)		i915_read32(dev_priv, (reg), true)
+#define I915_WRITE(reg, val)	i915_write32(dev_priv, (reg), (val), true)
+#define I915_READ_NOTRACE(reg)		i915_read32(dev_priv, (reg), false)
+#define I915_WRITE_NOTRACE(reg, val)	i915_write32(dev_priv, (reg), (val), false)
 
-#define I915_WRITE64(reg, val)	i915_write64(dev_priv, (reg), (val))
-#define I915_READ64(reg)	i915_read64(dev_priv, (reg))
+#define I915_WRITE64(reg, val)	i915_write64(dev_priv, (reg), (val), true)
+#define I915_READ64(reg)	i915_read64(dev_priv, (reg), true)
 
 #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)

+ 117 - 56
drivers/gpu/drm/i915/i915_gem.c

@@ -76,15 +76,19 @@ static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
 				  size_t size)
 {
+	spin_lock(&dev_priv->mm.object_stat_lock);
 	dev_priv->mm.object_count++;
 	dev_priv->mm.object_memory += size;
+	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 
 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 				     size_t size)
 {
+	spin_lock(&dev_priv->mm.object_stat_lock);
 	dev_priv->mm.object_count--;
 	dev_priv->mm.object_memory -= size;
+	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 
 static int
@@ -182,7 +186,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 			pinned += i915_gem_obj_ggtt_size(obj);
 	mutex_unlock(&dev->struct_mutex);
 
-	args->aper_size = dev_priv->gtt.total;
+	args->aper_size = dev_priv->gtt.base.total;
 	args->aper_available_size = args->aper_size - pinned;
 
 	return 0;
@@ -220,16 +224,10 @@ i915_gem_create(struct drm_file *file,
 		return -ENOMEM;
 
 	ret = drm_gem_handle_create(file, &obj->base, &handle);
-	if (ret) {
-		drm_gem_object_release(&obj->base);
-		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
-		i915_gem_object_free(obj);
-		return ret;
-	}
-
 	/* drop reference from allocate - handle holds it now */
-	drm_gem_object_unreference(&obj->base);
-	trace_i915_gem_object_create(obj);
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		return ret;
 
 	*handle_p = handle;
 	return 0;
@@ -459,7 +457,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
 
 		mutex_unlock(&dev->struct_mutex);
 
-		if (!prefaulted) {
+		if (likely(!i915_prefault_disable) && !prefaulted) {
 			ret = fault_in_multipages_writeable(user_data, remain);
 			/* Userspace is tricking us, but we've already clobbered
 			 * its pages with the prefault and promised to write the
@@ -854,10 +852,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		       args->size))
 		return -EFAULT;
 
-	ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
-					   args->size);
-	if (ret)
-		return -EFAULT;
+	if (likely(!i915_prefault_disable)) {
+		ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
+						   args->size);
+		if (ret)
+			return -EFAULT;
+	}
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
@@ -1679,6 +1679,7 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 		  bool purgeable_only)
 {
 	struct drm_i915_gem_object *obj, *next;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	long count = 0;
 
 	list_for_each_entry_safe(obj, next,
@@ -1692,9 +1693,7 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, long target,
 		}
 	}
 
-	list_for_each_entry_safe(obj, next,
-				 &dev_priv->mm.inactive_list,
-				 mm_list) {
+	list_for_each_entry_safe(obj, next, &vm->inactive_list, mm_list) {
 		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
 		    i915_gem_object_unbind(obj) == 0 &&
 		    i915_gem_object_put_pages(obj) == 0) {
@@ -1865,6 +1864,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	u32 seqno = intel_ring_get_seqno(ring);
 
 	BUG_ON(ring == NULL);
@@ -1881,7 +1881,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 	}
 
 	/* Move from whatever list we were on to the tail of execution. */
-	list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
+	list_move_tail(&obj->mm_list, &vm->active_list);
 	list_move_tail(&obj->ring_list, &ring->active_list);
 
 	obj->last_read_seqno = seqno;
@@ -1905,11 +1905,12 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 
 	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
 	BUG_ON(!obj->active);
 
-	list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+	list_move_tail(&obj->mm_list, &vm->inactive_list);
 
 	list_del_init(&obj->ring_list);
 	obj->ring = NULL;
@@ -2074,10 +2075,8 @@ int __i915_add_request(struct intel_ring_buffer *ring,
 	ring->outstanding_lazy_request = 0;
 
 	if (!dev_priv->ums.mm_suspended) {
-		if (i915_enable_hangcheck) {
-			mod_timer(&dev_priv->gpu_error.hangcheck_timer,
-				  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
-		}
+		i915_queue_hangcheck(ring->dev);
+
 		if (was_empty) {
 			queue_delayed_work(dev_priv->wq,
 					   &dev_priv->mm.retire_work,
@@ -2246,13 +2245,24 @@ void i915_gem_restore_fences(struct drm_device *dev)
 
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
-		i915_gem_write_fence(dev, i, reg->obj);
+
+		/*
+		 * Commit delayed tiling changes if we have an object still
+		 * attached to the fence, otherwise just clear the fence.
+		 */
+		if (reg->obj) {
+			i915_gem_object_update_fence(reg->obj, reg,
+						     reg->obj->tiling_mode);
+		} else {
+			i915_gem_write_fence(dev, i, NULL);
+		}
 	}
 }
 
 void i915_gem_reset(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 	struct intel_ring_buffer *ring;
 	int i;
@@ -2263,12 +2273,8 @@ void i915_gem_reset(struct drm_device *dev)
 	/* Move everything out of the GPU domains to ensure we do any
 	 * necessary invalidation upon reuse.
 	 */
-	list_for_each_entry(obj,
-			    &dev_priv->mm.inactive_list,
-			    mm_list)
-	{
+	list_for_each_entry(obj, &vm->inactive_list, mm_list)
 		obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
-	}
 
 	i915_gem_restore_fences(dev);
 }
@@ -2571,6 +2577,7 @@ int
 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
 	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
+	struct i915_vma *vma;
 	int ret;
 
 	if (!i915_gem_obj_ggtt_bound(obj))
@@ -2608,11 +2615,20 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	i915_gem_object_unpin_pages(obj);
 
 	list_del(&obj->mm_list);
-	list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
 	/* Avoid an unnecessary call to unbind on rebind. */
 	obj->map_and_fenceable = true;
 
-	drm_mm_remove_node(&obj->gtt_space);
+	vma = __i915_gem_obj_to_vma(obj);
+	list_del(&vma->vma_link);
+	drm_mm_remove_node(&vma->node);
+	i915_gem_vma_destroy(vma);
+
+	/* Since the unbound list is global, only move to that list if
+	 * no more VMAs exist.
+	 * NB: Until we have real VMAs there will only ever be one */
+	WARN_ON(!list_empty(&obj->vma_list));
+	if (list_empty(&obj->vma_list))
+		list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
 
 	return 0;
 }
@@ -2781,6 +2797,10 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
 	if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
 		mb();
 
+	WARN(obj && (!obj->stride || !obj->tiling_mode),
+	     "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
+	     obj->stride, obj->tiling_mode);
+
 	switch (INTEL_INFO(dev)->gen) {
 	case 7:
 	case 6:
@@ -2822,6 +2842,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 		fence->obj = NULL;
 		list_del_init(&fence->lru_list);
 	}
+	obj->fence_dirty = false;
 }
 
 static int
@@ -2951,7 +2972,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 		return 0;
 
 	i915_gem_object_update_fence(obj, reg, enable);
-	obj->fence_dirty = false;
 
 	return 0;
 }
@@ -3037,12 +3057,17 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	u32 size, fence_size, fence_alignment, unfenced_alignment;
 	bool mappable, fenceable;
 	size_t gtt_max = map_and_fenceable ?
-		dev_priv->gtt.mappable_end : dev_priv->gtt.total;
+		dev_priv->gtt.mappable_end : dev_priv->gtt.base.total;
+	struct i915_vma *vma;
 	int ret;
 
+	if (WARN_ON(!list_empty(&obj->vma_list)))
+		return -EBUSY;
+
 	fence_size = i915_gem_get_gtt_size(dev,
 					   obj->base.size,
 					   obj->tiling_mode);
@@ -3081,9 +3106,15 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_pin_pages(obj);
 
+	vma = i915_gem_vma_create(obj, &dev_priv->gtt.base);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unpin;
+	}
+
 search_free:
-	ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space,
-						  &obj->gtt_space,
+	ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
+						  &vma->node,
 						  size, alignment,
 						  obj->cache_level, 0, gtt_max,
 						  DRM_MM_SEARCH_DEFAULT);
@@ -3095,25 +3126,21 @@ search_free:
 		if (ret == 0)
 			goto search_free;
 
-		i915_gem_object_unpin_pages(obj);
-		return ret;
+		goto err_free_vma;
 	}
-	if (WARN_ON(!i915_gem_valid_gtt_space(dev, &obj->gtt_space,
+	if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node,
 					      obj->cache_level))) {
-		i915_gem_object_unpin_pages(obj);
-		drm_mm_remove_node(&obj->gtt_space);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_remove_node;
 	}
 
 	ret = i915_gem_gtt_prepare_object(obj);
-	if (ret) {
-		i915_gem_object_unpin_pages(obj);
-		drm_mm_remove_node(&obj->gtt_space);
-		return ret;
-	}
+	if (ret)
+		goto err_remove_node;
 
 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
-	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+	list_add_tail(&obj->mm_list, &vm->inactive_list);
+	list_add(&vma->vma_link, &obj->vma_list);
 
 	fenceable =
 		i915_gem_obj_ggtt_size(obj) == fence_size &&
@@ -3127,6 +3154,14 @@ search_free:
 	trace_i915_gem_object_bind(obj, map_and_fenceable);
 	i915_gem_verify_gtt(dev);
 	return 0;
+
+err_remove_node:
+	drm_mm_remove_node(&vma->node);
+err_free_vma:
+	i915_gem_vma_destroy(vma);
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
 }
 
 void
@@ -3261,7 +3296,8 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 
 	/* And bump the LRU for this access */
 	if (i915_gem_object_is_inactive(obj))
-		list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+		list_move_tail(&obj->mm_list,
+			       &dev_priv->gtt.base.inactive_list);
 
 	return 0;
 }
@@ -3271,6 +3307,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_vma *vma = __i915_gem_obj_to_vma(obj);
 	int ret;
 
 	if (obj->cache_level == cache_level)
@@ -3281,7 +3318,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		return -EBUSY;
 	}
 
-	if (!i915_gem_valid_gtt_space(dev, &obj->gtt_space, cache_level)) {
+	if (vma && !i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) {
 		ret = i915_gem_object_unbind(obj);
 		if (ret)
 			return ret;
@@ -3826,6 +3863,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&obj->global_list);
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->exec_list);
+	INIT_LIST_HEAD(&obj->vma_list);
 
 	obj->ops = ops;
 
@@ -3890,6 +3928,8 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	} else
 		obj->cache_level = I915_CACHE_NONE;
 
+	trace_i915_gem_object_create(obj);
+
 	return obj;
 }
 
@@ -3946,6 +3986,26 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	i915_gem_object_free(obj);
 }
 
+struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj,
+				     struct i915_address_space *vm)
+{
+	struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	if (vma == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&vma->vma_link);
+	vma->vm = vm;
+	vma->obj = obj;
+
+	return vma;
+}
+
+void i915_gem_vma_destroy(struct i915_vma *vma)
+{
+	WARN_ON(vma->node.allocated);
+	kfree(vma);
+}
+
 int
 i915_gem_idle(struct drm_device *dev)
 {
@@ -4105,8 +4165,8 @@ i915_gem_init_hw(struct drm_device *dev)
 	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
 		return -EIO;
 
-	if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
-		I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000);
+	if (dev_priv->ellc_size)
+		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
 
 	if (HAS_PCH_NOP(dev)) {
 		u32 temp = I915_READ(GEN7_MSG_CTL);
@@ -4202,7 +4262,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 		return ret;
 	}
 
-	BUG_ON(!list_empty(&dev_priv->mm.active_list));
+	BUG_ON(!list_empty(&dev_priv->gtt.base.active_list));
 	mutex_unlock(&dev->struct_mutex);
 
 	ret = drm_irq_install(dev);
@@ -4280,8 +4340,8 @@ i915_gem_load(struct drm_device *dev)
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
 
-	INIT_LIST_HEAD(&dev_priv->mm.active_list);
-	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
+	INIT_LIST_HEAD(&dev_priv->gtt.base.active_list);
+	INIT_LIST_HEAD(&dev_priv->gtt.base.inactive_list);
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
@@ -4552,6 +4612,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 			     struct drm_i915_private,
 			     mm.inactive_shrinker);
 	struct drm_device *dev = dev_priv->dev;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 	int nr_to_scan = sc->nr_to_scan;
 	bool unlock = true;
@@ -4580,7 +4641,7 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
 		if (obj->pages_pin_count == 0)
 			cnt += obj->base.size >> PAGE_SHIFT;
-	list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list)
+	list_for_each_entry(obj, &vm->inactive_list, mm_list)
 		if (obj->pin_count == 0 && obj->pages_pin_count == 0)
 			cnt += obj->base.size >> PAGE_SHIFT;
 

+ 8 - 15
drivers/gpu/drm/i915/i915_gem_context.c

@@ -304,31 +304,24 @@ static int context_idr_cleanup(int id, void *p, void *data)
 }
 
 struct i915_ctx_hang_stats *
-i915_gem_context_get_hang_stats(struct intel_ring_buffer *ring,
+i915_gem_context_get_hang_stats(struct drm_device *dev,
 				struct drm_file *file,
 				u32 id)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_hw_context *to;
-
-	if (dev_priv->hw_contexts_disabled)
-		return ERR_PTR(-ENOENT);
-
-	if (ring->id != RCS)
-		return ERR_PTR(-EINVAL);
-
-	if (file == NULL)
-		return ERR_PTR(-EINVAL);
+	struct i915_hw_context *ctx;
 
 	if (id == DEFAULT_CONTEXT_ID)
 		return &file_priv->hang_stats;
 
-	to = i915_gem_context_get(file->driver_priv, id);
-	if (to == NULL)
+	ctx = NULL;
+	if (!dev_priv->hw_contexts_disabled)
+		ctx = i915_gem_context_get(file->driver_priv, id);
+	if (ctx == NULL)
 		return ERR_PTR(-ENOENT);
 
-	return &to->hang_stats;
+	return &ctx->hang_stats;
 }
 
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)

+ 1 - 1
drivers/gpu/drm/i915/i915_gem_debug.c

@@ -97,7 +97,7 @@ i915_verify_lists(struct drm_device *dev)
 		}
 	}
 
-	list_for_each_entry(obj, &dev_priv->mm.inactive_list, list) {
+	list_for_each_entry(obj, &i915_gtt_vm->inactive_list, list) {
 		if (obj->base.dev != dev ||
 		    !atomic_read(&obj->base.refcount.refcount)) {
 			DRM_ERROR("freed inactive %p\n", obj);

+ 19 - 15
drivers/gpu/drm/i915/i915_gem_evict.c

@@ -34,11 +34,13 @@
 static bool
 mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind)
 {
+	struct i915_vma *vma = __i915_gem_obj_to_vma(obj);
+
 	if (obj->pin_count)
 		return false;
 
 	list_add(&obj->exec_list, unwind);
-	return drm_mm_scan_add_block(&obj->gtt_space);
+	return drm_mm_scan_add_block(&vma->node);
 }
 
 int
@@ -47,7 +49,9 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
 			 bool mappable, bool nonblocking)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct list_head eviction_list, unwind_list;
+	struct i915_vma *vma;
 	struct drm_i915_gem_object *obj;
 	int ret = 0;
 
@@ -78,15 +82,14 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
 
 	INIT_LIST_HEAD(&unwind_list);
 	if (mappable)
-		drm_mm_init_scan_with_range(&dev_priv->mm.gtt_space,
-					    min_size, alignment, cache_level,
-					    0, dev_priv->gtt.mappable_end);
+		drm_mm_init_scan_with_range(&vm->mm, min_size,
+					    alignment, cache_level, 0,
+					    dev_priv->gtt.mappable_end);
 	else
-		drm_mm_init_scan(&dev_priv->mm.gtt_space,
-				 min_size, alignment, cache_level);
+		drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level);
 
 	/* First see if there is a large enough contiguous idle region... */
-	list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) {
+	list_for_each_entry(obj, &vm->inactive_list, mm_list) {
 		if (mark_free(obj, &unwind_list))
 			goto found;
 	}
@@ -95,7 +98,7 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
 		goto none;
 
 	/* Now merge in the soon-to-be-expired objects... */
-	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
+	list_for_each_entry(obj, &vm->active_list, mm_list) {
 		if (mark_free(obj, &unwind_list))
 			goto found;
 	}
@@ -106,8 +109,8 @@ none:
 		obj = list_first_entry(&unwind_list,
 				       struct drm_i915_gem_object,
 				       exec_list);
-
-		ret = drm_mm_scan_remove_block(&obj->gtt_space);
+		vma = __i915_gem_obj_to_vma(obj);
+		ret = drm_mm_scan_remove_block(&vma->node);
 		BUG_ON(ret);
 
 		list_del_init(&obj->exec_list);
@@ -127,7 +130,8 @@ found:
 		obj = list_first_entry(&unwind_list,
 				       struct drm_i915_gem_object,
 				       exec_list);
-		if (drm_mm_scan_remove_block(&obj->gtt_space)) {
+		vma = __i915_gem_obj_to_vma(obj);
+		if (drm_mm_scan_remove_block(&vma->node)) {
 			list_move(&obj->exec_list, &eviction_list);
 			drm_gem_object_reference(&obj->base);
 			continue;
@@ -154,12 +158,13 @@ int
 i915_gem_evict_everything(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj, *next;
 	bool lists_empty;
 	int ret;
 
-	lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
-		       list_empty(&dev_priv->mm.active_list));
+	lists_empty = (list_empty(&vm->inactive_list) &&
+		       list_empty(&vm->active_list));
 	if (lists_empty)
 		return -ENOSPC;
 
@@ -176,8 +181,7 @@ i915_gem_evict_everything(struct drm_device *dev)
 	i915_gem_retire_requests(dev);
 
 	/* Having flushed everything, unbind() should never raise an error */
-	list_for_each_entry_safe(obj, next,
-				 &dev_priv->mm.inactive_list, mm_list)
+	list_for_each_entry_safe(obj, next, &vm->inactive_list, mm_list)
 		if (obj->pin_count == 0)
 			WARN_ON(i915_gem_object_unbind(obj));
 

+ 9 - 7
drivers/gpu/drm/i915/i915_gem_execbuffer.c

@@ -255,7 +255,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 
 	reloc->delta += target_offset;
 	if (use_cpu_reloc(obj)) {
-		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
+		uint32_t page_offset = offset_in_page(reloc->offset);
 		char *vaddr;
 
 		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
@@ -284,7 +284,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
 						      reloc->offset & PAGE_MASK);
 		reloc_entry = (uint32_t __iomem *)
-			(reloc_page + (reloc->offset & ~PAGE_MASK));
+			(reloc_page + offset_in_page(reloc->offset));
 		iowrite32(reloc->delta, reloc_entry);
 		io_mapping_unmap_atomic(reloc_page);
 	}
@@ -759,8 +759,10 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
 		if (!access_ok(VERIFY_WRITE, ptr, length))
 			return -EFAULT;
 
-		if (fault_in_multipages_readable(ptr, length))
-			return -EFAULT;
+		if (likely(!i915_prefault_disable)) {
+			if (fault_in_multipages_readable(ptr, length))
+				return -EFAULT;
+		}
 	}
 
 	return 0;
@@ -873,7 +875,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		break;
 	case I915_EXEC_BSD:
 		ring = &dev_priv->ring[VCS];
-		if (ctx_id != 0) {
+		if (ctx_id != DEFAULT_CONTEXT_ID) {
 			DRM_DEBUG("Ring %s doesn't support contexts\n",
 				  ring->name);
 			return -EPERM;
@@ -881,7 +883,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		break;
 	case I915_EXEC_BLT:
 		ring = &dev_priv->ring[BCS];
-		if (ctx_id != 0) {
+		if (ctx_id != DEFAULT_CONTEXT_ID) {
 			DRM_DEBUG("Ring %s doesn't support contexts\n",
 				  ring->name);
 			return -EPERM;
@@ -889,7 +891,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		break;
 	case I915_EXEC_VEBOX:
 		ring = &dev_priv->ring[VECS];
-		if (ctx_id != 0) {
+		if (ctx_id != DEFAULT_CONTEXT_ID) {
 			DRM_DEBUG("Ring %s doesn't support contexts\n",
 				  ring->name);
 			return -EPERM;

+ 129 - 84
drivers/gpu/drm/i915/i915_gem_gtt.c

@@ -33,6 +33,7 @@
 
 /* PPGTT stuff */
 #define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
+#define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
 
 #define GEN6_PDE_VALID			(1 << 0)
 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
@@ -44,6 +45,15 @@
 #define GEN6_PTE_CACHE_LLC		(2 << 1)
 #define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
+#define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
+
+/* Cacheability Control is a 4-bit value. The low three bits are stored in *
+ * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
+ */
+#define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
+					 (((bits) & 0x8) << (11 - 3)))
+#define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
+#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
 
 static gen6_gtt_pte_t gen6_pte_encode(dma_addr_t addr,
 				      enum i915_cache_level level)
@@ -92,17 +102,29 @@ static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level)
 {
 	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
-	pte |= GEN6_PTE_ADDR_ENCODE(addr);
+	pte |= HSW_PTE_ADDR_ENCODE(addr);
 
 	if (level != I915_CACHE_NONE)
-		pte |= GEN6_PTE_CACHE_LLC;
+		pte |= HSW_WB_LLC_AGE0;
+
+	return pte;
+}
+
+static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
+				      enum i915_cache_level level)
+{
+	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
+	pte |= HSW_PTE_ADDR_ENCODE(addr);
+
+	if (level != I915_CACHE_NONE)
+		pte |= HSW_WB_ELLC_LLC_AGE0;
 
 	return pte;
 }
 
 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
 {
-	struct drm_i915_private *dev_priv = ppgtt->dev->dev_private;
+	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
 	gen6_gtt_pte_t __iomem *pd_addr;
 	uint32_t pd_entry;
 	int i;
@@ -181,18 +203,18 @@ static int gen6_ppgtt_enable(struct drm_device *dev)
 }
 
 /* PPGTT support for Sandybdrige/Gen6 and later */
-static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
+static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 				   unsigned first_entry,
 				   unsigned num_entries)
 {
-	struct drm_i915_private *dev_priv = ppgtt->dev->dev_private;
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned last_pte, i;
 
-	scratch_pte = ppgtt->pte_encode(dev_priv->gtt.scratch.addr,
-					I915_CACHE_LLC);
+	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC);
 
 	while (num_entries) {
 		last_pte = first_pte + num_entries;
@@ -212,11 +234,13 @@ static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 	}
 }
 
-static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
+static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      unsigned first_entry,
 				      enum i915_cache_level cache_level)
 {
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
@@ -227,7 +251,7 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
 		dma_addr_t page_addr;
 
 		page_addr = sg_page_iter_dma_address(&sg_iter);
-		pt_vaddr[act_pte] = ppgtt->pte_encode(page_addr, cache_level);
+		pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level);
 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
 			kunmap_atomic(pt_vaddr);
 			act_pt++;
@@ -239,13 +263,17 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
 	kunmap_atomic(pt_vaddr);
 }
 
-static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
+static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 {
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(vm, struct i915_hw_ppgtt, base);
 	int i;
 
+	drm_mm_takedown(&ppgtt->base.mm);
+
 	if (ppgtt->pt_dma_addr) {
 		for (i = 0; i < ppgtt->num_pd_entries; i++)
-			pci_unmap_page(ppgtt->dev->pdev,
+			pci_unmap_page(ppgtt->base.dev->pdev,
 				       ppgtt->pt_dma_addr[i],
 				       4096, PCI_DMA_BIDIRECTIONAL);
 	}
@@ -259,7 +287,7 @@ static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
 
 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 {
-	struct drm_device *dev = ppgtt->dev;
+	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned first_pd_entry_in_global_pt;
 	int i;
@@ -271,17 +299,18 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt);
 
 	if (IS_HASWELL(dev)) {
-		ppgtt->pte_encode = hsw_pte_encode;
+		ppgtt->base.pte_encode = hsw_pte_encode;
 	} else if (IS_VALLEYVIEW(dev)) {
-		ppgtt->pte_encode = byt_pte_encode;
+		ppgtt->base.pte_encode = byt_pte_encode;
 	} else {
-		ppgtt->pte_encode = gen6_pte_encode;
+		ppgtt->base.pte_encode = gen6_pte_encode;
 	}
 	ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
 	ppgtt->enable = gen6_ppgtt_enable;
-	ppgtt->clear_range = gen6_ppgtt_clear_range;
-	ppgtt->insert_entries = gen6_ppgtt_insert_entries;
-	ppgtt->cleanup = gen6_ppgtt_cleanup;
+	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
+	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
+	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
+	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
 	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
 				  GFP_KERNEL);
 	if (!ppgtt->pt_pages)
@@ -312,8 +341,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 		ppgtt->pt_dma_addr[i] = pt_addr;
 	}
 
-	ppgtt->clear_range(ppgtt, 0,
-			   ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
+	ppgtt->base.clear_range(&ppgtt->base, 0,
+				ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES);
 
 	ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t);
 
@@ -346,7 +375,7 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 	if (!ppgtt)
 		return -ENOMEM;
 
-	ppgtt->dev = dev;
+	ppgtt->base.dev = dev;
 
 	if (INTEL_INFO(dev)->gen < 8)
 		ret = gen6_ppgtt_init(ppgtt);
@@ -355,8 +384,11 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 
 	if (ret)
 		kfree(ppgtt);
-	else
+	else {
 		dev_priv->mm.aliasing_ppgtt = ppgtt;
+		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
+			    ppgtt->base.total);
+	}
 
 	return ret;
 }
@@ -369,7 +401,7 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
 	if (!ppgtt)
 		return;
 
-	ppgtt->cleanup(ppgtt);
+	ppgtt->base.cleanup(&ppgtt->base);
 	dev_priv->mm.aliasing_ppgtt = NULL;
 }
 
@@ -377,17 +409,17 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
 			    struct drm_i915_gem_object *obj,
 			    enum i915_cache_level cache_level)
 {
-	ppgtt->insert_entries(ppgtt, obj->pages,
-			      i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
-			      cache_level);
+	ppgtt->base.insert_entries(&ppgtt->base, obj->pages,
+				   i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
+				   cache_level);
 }
 
 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
 			      struct drm_i915_gem_object *obj)
 {
-	ppgtt->clear_range(ppgtt,
-			   i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
-			   obj->base.size >> PAGE_SHIFT);
+	ppgtt->base.clear_range(&ppgtt->base,
+				i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
+				obj->base.size >> PAGE_SHIFT);
 }
 
 extern int intel_iommu_gfx_mapped;
@@ -434,8 +466,9 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 	struct drm_i915_gem_object *obj;
 
 	/* First fill our portion of the GTT with scratch pages */
-	dev_priv->gtt.gtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE,
-				      dev_priv->gtt.total / PAGE_SIZE);
+	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+				       dev_priv->gtt.base.start / PAGE_SIZE,
+				       dev_priv->gtt.base.total / PAGE_SIZE);
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		i915_gem_clflush_object(obj);
@@ -464,12 +497,12 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
  * within the global GTT as well as accessible by the GPU through the GMADR
  * mapped BAR (dev_priv->mm.gtt->gtt).
  */
-static void gen6_ggtt_insert_entries(struct drm_device *dev,
+static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     unsigned int first_entry,
 				     enum i915_cache_level level)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	gen6_gtt_pte_t __iomem *gtt_entries =
 		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
 	int i = 0;
@@ -478,8 +511,7 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
 		addr = sg_page_iter_dma_address(&sg_iter);
-		iowrite32(dev_priv->gtt.pte_encode(addr, level),
-			  &gtt_entries[i]);
+		iowrite32(vm->pte_encode(addr, level), &gtt_entries[i]);
 		i++;
 	}
 
@@ -490,8 +522,8 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 	 * hardware should work, we must keep this posting read for paranoia.
 	 */
 	if (i != 0)
-		WARN_ON(readl(&gtt_entries[i-1])
-			!= dev_priv->gtt.pte_encode(addr, level));
+		WARN_ON(readl(&gtt_entries[i-1]) !=
+			vm->pte_encode(addr, level));
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -501,11 +533,11 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev,
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
 }
 
-static void gen6_ggtt_clear_range(struct drm_device *dev,
+static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				  unsigned int first_entry,
 				  unsigned int num_entries)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
 		(gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
@@ -516,15 +548,14 @@ static void gen6_ggtt_clear_range(struct drm_device *dev,
 		 first_entry, num_entries, max_entries))
 		num_entries = max_entries;
 
-	scratch_pte = dev_priv->gtt.pte_encode(dev_priv->gtt.scratch.addr,
-					       I915_CACHE_LLC);
+	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC);
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
 	readl(gtt_base);
 }
 
 
-static void i915_ggtt_insert_entries(struct drm_device *dev,
+static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     unsigned int pg_start,
 				     enum i915_cache_level cache_level)
@@ -536,7 +567,7 @@ static void i915_ggtt_insert_entries(struct drm_device *dev,
 
 }
 
-static void i915_ggtt_clear_range(struct drm_device *dev,
+static void i915_ggtt_clear_range(struct i915_address_space *vm,
 				  unsigned int first_entry,
 				  unsigned int num_entries)
 {
@@ -549,10 +580,11 @@ void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
 
-	dev_priv->gtt.gtt_insert_entries(dev, obj->pages,
-					 i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
-					 cache_level);
+	dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages,
+					  entry,
+					  cache_level);
 
 	obj->has_global_gtt_mapping = 1;
 }
@@ -561,10 +593,11 @@ void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
 
-	dev_priv->gtt.gtt_clear_range(obj->base.dev,
-				      i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
-				      obj->base.size >> PAGE_SHIFT);
+	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+				       entry,
+				       obj->base.size >> PAGE_SHIFT);
 
 	obj->has_global_gtt_mapping = 0;
 }
@@ -623,38 +656,42 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 	BUG_ON(mappable_end > end);
 
 	/* Subtract the guard page ... */
-	drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
+	drm_mm_init(&dev_priv->gtt.base.mm, start, end - start - PAGE_SIZE);
 	if (!HAS_LLC(dev))
-		dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
+		dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
 
 	/* Mark any preallocated objects as occupied */
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		struct i915_vma *vma = __i915_gem_obj_to_vma(obj);
 		int ret;
 		DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
 			      i915_gem_obj_ggtt_offset(obj), obj->base.size);
 
 		WARN_ON(i915_gem_obj_ggtt_bound(obj));
-		ret = drm_mm_reserve_node(&dev_priv->mm.gtt_space,
-					  &obj->gtt_space);
+		ret = drm_mm_reserve_node(&dev_priv->gtt.base.mm, &vma->node);
 		if (ret)
 			DRM_DEBUG_KMS("Reservation failed\n");
 		obj->has_global_gtt_mapping = 1;
+		list_add(&vma->vma_link, &obj->vma_list);
 	}
 
-	dev_priv->gtt.start = start;
-	dev_priv->gtt.total = end - start;
+	dev_priv->gtt.base.start = start;
+	dev_priv->gtt.base.total = end - start;
 
 	/* Clear any non-preallocated blocks */
-	drm_mm_for_each_hole(entry, &dev_priv->mm.gtt_space,
+	drm_mm_for_each_hole(entry, &dev_priv->gtt.base.mm,
 			     hole_start, hole_end) {
+		const unsigned long count = (hole_end - hole_start) / PAGE_SIZE;
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
 			      hole_start, hole_end);
-		dev_priv->gtt.gtt_clear_range(dev, hole_start / PAGE_SIZE,
-					      (hole_end-hole_start) / PAGE_SIZE);
+		dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+					       hole_start / PAGE_SIZE,
+					       count);
 	}
 
 	/* And finally clear the reserved guard page */
-	dev_priv->gtt.gtt_clear_range(dev, end / PAGE_SIZE - 1, 1);
+	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+				       end / PAGE_SIZE - 1, 1);
 }
 
 static bool
@@ -677,7 +714,7 @@ void i915_gem_init_global_gtt(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned long gtt_size, mappable_size;
 
-	gtt_size = dev_priv->gtt.total;
+	gtt_size = dev_priv->gtt.base.total;
 	mappable_size = dev_priv->gtt.mappable_end;
 
 	if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
@@ -696,7 +733,7 @@ void i915_gem_init_global_gtt(struct drm_device *dev)
 			return;
 
 		DRM_ERROR("Aliased PPGTT setup failed %d\n", ret);
-		drm_mm_takedown(&dev_priv->mm.gtt_space);
+		drm_mm_takedown(&dev_priv->gtt.base.mm);
 		gtt_size += GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE;
 	}
 	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
@@ -722,8 +759,8 @@ static int setup_scratch_page(struct drm_device *dev)
 #else
 	dma_addr = page_to_phys(page);
 #endif
-	dev_priv->gtt.scratch.page = page;
-	dev_priv->gtt.scratch.addr = dma_addr;
+	dev_priv->gtt.base.scratch.page = page;
+	dev_priv->gtt.base.scratch.addr = dma_addr;
 
 	return 0;
 }
@@ -731,11 +768,13 @@ static int setup_scratch_page(struct drm_device *dev)
 static void teardown_scratch_page(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	set_pages_wb(dev_priv->gtt.scratch.page, 1);
-	pci_unmap_page(dev->pdev, dev_priv->gtt.scratch.addr,
+	struct page *page = dev_priv->gtt.base.scratch.page;
+
+	set_pages_wb(page, 1);
+	pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
 		       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	put_page(dev_priv->gtt.scratch.page);
-	__free_page(dev_priv->gtt.scratch.page);
+	put_page(page);
+	__free_page(page);
 }
 
 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@@ -798,17 +837,18 @@ static int gen6_gmch_probe(struct drm_device *dev,
 	if (ret)
 		DRM_ERROR("Scratch setup failed\n");
 
-	dev_priv->gtt.gtt_clear_range = gen6_ggtt_clear_range;
-	dev_priv->gtt.gtt_insert_entries = gen6_ggtt_insert_entries;
+	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
+	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
 
 	return ret;
 }
 
-static void gen6_gmch_remove(struct drm_device *dev)
+static void gen6_gmch_remove(struct i915_address_space *vm)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	iounmap(dev_priv->gtt.gsm);
-	teardown_scratch_page(dev_priv->dev);
+
+	struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
+	iounmap(gtt->gsm);
+	teardown_scratch_page(vm->dev);
 }
 
 static int i915_gmch_probe(struct drm_device *dev,
@@ -829,13 +869,13 @@ static int i915_gmch_probe(struct drm_device *dev,
 	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
 
 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
-	dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range;
-	dev_priv->gtt.gtt_insert_entries = i915_ggtt_insert_entries;
+	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
+	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
 
 	return 0;
 }
 
-static void i915_gmch_remove(struct drm_device *dev)
+static void i915_gmch_remove(struct i915_address_space *vm)
 {
 	intel_gmch_remove();
 }
@@ -848,25 +888,30 @@ int i915_gem_gtt_init(struct drm_device *dev)
 
 	if (INTEL_INFO(dev)->gen <= 5) {
 		gtt->gtt_probe = i915_gmch_probe;
-		gtt->gtt_remove = i915_gmch_remove;
+		gtt->base.cleanup = i915_gmch_remove;
 	} else {
 		gtt->gtt_probe = gen6_gmch_probe;
-		gtt->gtt_remove = gen6_gmch_remove;
-		if (IS_HASWELL(dev))
-			gtt->pte_encode = hsw_pte_encode;
+		gtt->base.cleanup = gen6_gmch_remove;
+		if (IS_HASWELL(dev) && dev_priv->ellc_size)
+			gtt->base.pte_encode = iris_pte_encode;
+		else if (IS_HASWELL(dev))
+			gtt->base.pte_encode = hsw_pte_encode;
 		else if (IS_VALLEYVIEW(dev))
-			gtt->pte_encode = byt_pte_encode;
+			gtt->base.pte_encode = byt_pte_encode;
 		else
-			gtt->pte_encode = gen6_pte_encode;
+			gtt->base.pte_encode = gen6_pte_encode;
 	}
 
-	ret = gtt->gtt_probe(dev, &gtt->total, &gtt->stolen_size,
+	ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
 			     &gtt->mappable_base, &gtt->mappable_end);
 	if (ret)
 		return ret;
 
+	gtt->base.dev = dev;
+
 	/* GMADR is the PCI mmio aperture into the global GTT. */
-	DRM_INFO("Memory usable by graphics device = %zdM\n", gtt->total >> 20);
+	DRM_INFO("Memory usable by graphics device = %zdM\n",
+		 gtt->base.total >> 20);
 	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
 	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
 

+ 30 - 38
drivers/gpu/drm/i915/i915_gem_stolen.c

@@ -45,45 +45,27 @@
 static unsigned long i915_stolen_to_physical(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct pci_dev *pdev = dev_priv->bridge_dev;
 	struct resource *r;
 	u32 base;
 
-	/* On the machines I have tested the Graphics Base of Stolen Memory
-	 * is unreliable, so on those compute the base by subtracting the
-	 * stolen memory from the Top of Low Usable DRAM which is where the
-	 * BIOS places the graphics stolen memory.
+	/* Almost universally we can find the Graphics Base of Stolen Memory
+	 * at offset 0x5c in the igfx configuration space. On a few (desktop)
+	 * machines this is also mirrored in the bridge device at different
+	 * locations, or in the MCHBAR. On gen2, the layout is again slightly
+	 * different with the Graphics Segment immediately following Top of
+	 * Memory (or Top of Usable DRAM). Note it appears that TOUD is only
+	 * reported by 865g, so we just use the top of memory as determined
+	 * by the e820 probe.
 	 *
-	 * On gen2, the layout is slightly different with the Graphics Segment
-	 * immediately following Top of Memory (or Top of Usable DRAM). Note
-	 * it appears that TOUD is only reported by 865g, so we just use the
-	 * top of memory as determined by the e820 probe.
-	 *
-	 * XXX gen2 requires an unavailable symbol and 945gm fails with
-	 * its value of TOLUD.
+	 * XXX However gen2 requires an unavailable symbol.
 	 */
 	base = 0;
-	if (IS_VALLEYVIEW(dev)) {
+	if (INTEL_INFO(dev)->gen >= 3) {
+		/* Read Graphics Base of Stolen Memory directly */
 		pci_read_config_dword(dev->pdev, 0x5c, &base);
 		base &= ~((1<<20) - 1);
-	} else if (INTEL_INFO(dev)->gen >= 6) {
-		/* Read Base Data of Stolen Memory Register (BDSM) directly.
-		 * Note that there is also a MCHBAR miror at 0x1080c0 or
-		 * we could use device 2:0x5c instead.
-		*/
-		pci_read_config_dword(pdev, 0xB0, &base);
-		base &= ~4095; /* lower bits used for locking register */
-	} else if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
-		/* Read Graphics Base of Stolen Memory directly */
-		pci_read_config_dword(pdev, 0xA4, &base);
+	} else { /* GEN2 */
 #if 0
-	} else if (IS_GEN3(dev)) {
-		u8 val;
-		/* Stolen is immediately below Top of Low Usable DRAM */
-		pci_read_config_byte(pdev, 0x9c, &val);
-		base = val >> 3 << 27;
-		base -= dev_priv->mm.gtt->stolen_size;
-	} else {
 		/* Stolen is immediately above Top of Memory */
 		base = max_low_pfn_mapped << PAGE_SHIFT;
 #endif
@@ -367,8 +349,10 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 					       u32 size)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 	struct drm_mm_node *stolen;
+	struct i915_vma *vma;
 	int ret;
 
 	if (!drm_mm_initialized(&dev_priv->mm.stolen))
@@ -409,30 +393,38 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 	if (gtt_offset == I915_GTT_OFFSET_NONE)
 		return obj;
 
+	vma = i915_gem_vma_create(obj, &dev_priv->gtt.base);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_out;
+	}
+
 	/* To simplify the initialisation sequence between KMS and GTT,
 	 * we allow construction of the stolen object prior to
 	 * setting up the GTT space. The actual reservation will occur
 	 * later.
 	 */
-	obj->gtt_space.start = gtt_offset;
-	obj->gtt_space.size = size;
-	if (drm_mm_initialized(&dev_priv->mm.gtt_space)) {
-		ret = drm_mm_reserve_node(&dev_priv->mm.gtt_space,
-					  &obj->gtt_space);
+	vma->node.start = gtt_offset;
+	vma->node.size = size;
+	if (drm_mm_initialized(&dev_priv->gtt.base.mm)) {
+		ret = drm_mm_reserve_node(&dev_priv->gtt.base.mm, &vma->node);
 		if (ret) {
 			DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
-			goto unref_out;
+			i915_gem_vma_destroy(vma);
+			goto err_out;
 		}
 	}
 
 	obj->has_global_gtt_mapping = 1;
 
 	list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
-	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+	list_add_tail(&obj->mm_list, &vm->inactive_list);
 
 	return obj;
 
-unref_out:
+err_out:
+	drm_mm_remove_node(stolen);
+	kfree(stolen);
 	drm_gem_object_unreference(&obj->base);
 	return NULL;
 }

+ 5 - 3
drivers/gpu/drm/i915/i915_gpu_error.c

@@ -622,6 +622,7 @@ static struct drm_i915_error_object *
 i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 			     struct intel_ring_buffer *ring)
 {
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 	u32 seqno;
 
@@ -641,7 +642,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 	}
 
 	seqno = ring->get_seqno(ring, false);
-	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
+	list_for_each_entry(obj, &vm->active_list, mm_list) {
 		if (obj->ring != ring)
 			continue;
 
@@ -773,11 +774,12 @@ static void i915_gem_record_rings(struct drm_device *dev,
 static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
 				     struct drm_i915_error_state *error)
 {
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 	int i;
 
 	i = 0;
-	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list)
+	list_for_each_entry(obj, &vm->active_list, mm_list)
 		i++;
 	error->active_bo_count = i;
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
@@ -797,7 +799,7 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
 		error->active_bo_count =
 			capture_active_bo(error->active_bo,
 					  error->active_bo_count,
-					  &dev_priv->mm.active_list);
+					  &vm->active_list);
 
 	if (error->pinned_bo)
 		error->pinned_bo_count =

+ 224 - 328
drivers/gpu/drm/i915/i915_irq.c

@@ -698,18 +698,13 @@ static void ironlake_rps_change_irq_handler(struct drm_device *dev)
 static void notify_ring(struct drm_device *dev,
 			struct intel_ring_buffer *ring)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
 	if (ring->obj == NULL)
 		return;
 
 	trace_i915_gem_request_complete(ring, ring->get_seqno(ring, false));
 
 	wake_up_all(&ring->irq_queue);
-	if (i915_enable_hangcheck) {
-		mod_timer(&dev_priv->gpu_error.hangcheck_timer,
-			  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
-	}
+	i915_queue_hangcheck(dev);
 }
 
 static void gen6_pm_rps_work(struct work_struct *work)
@@ -817,7 +812,7 @@ static void ivybridge_parity_work(struct work_struct *work)
 
 	mutex_unlock(&dev_priv->dev->struct_mutex);
 
-	parity_event[0] = "L3_PARITY_ERROR=1";
+	parity_event[0] = I915_L3_PARITY_UEVENT "=1";
 	parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row);
 	parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank);
 	parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank);
@@ -849,6 +844,17 @@ static void ivybridge_parity_error_irq_handler(struct drm_device *dev)
 	queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work);
 }
 
+static void ilk_gt_irq_handler(struct drm_device *dev,
+			       struct drm_i915_private *dev_priv,
+			       u32 gt_iir)
+{
+	if (gt_iir &
+	    (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
+		notify_ring(dev, &dev_priv->ring[RCS]);
+	if (gt_iir & ILK_BSD_USER_INTERRUPT)
+		notify_ring(dev, &dev_priv->ring[VCS]);
+}
+
 static void snb_gt_irq_handler(struct drm_device *dev,
 			       struct drm_i915_private *dev_priv,
 			       u32 gt_iir)
@@ -913,6 +919,10 @@ static inline void intel_hpd_irq_handler(struct drm_device *dev,
 	spin_lock(&dev_priv->irq_lock);
 	for (i = 1; i < HPD_NUM_PINS; i++) {
 
+		WARN(((hpd[i] & hotplug_trigger) &&
+		      dev_priv->hpd_stats[i].hpd_mark != HPD_ENABLED),
+		     "Received HPD interrupt although disabled\n");
+
 		if (!(hpd[i] & hotplug_trigger) ||
 		    dev_priv->hpd_stats[i].hpd_mark != HPD_ENABLED)
 			continue;
@@ -923,6 +933,7 @@ static inline void intel_hpd_irq_handler(struct drm_device *dev,
 				   + msecs_to_jiffies(HPD_STORM_DETECT_PERIOD))) {
 			dev_priv->hpd_stats[i].hpd_last_jiffies = jiffies;
 			dev_priv->hpd_stats[i].hpd_cnt = 0;
+			DRM_DEBUG_KMS("Received HPD interrupt on PIN %d - cnt: 0\n", i);
 		} else if (dev_priv->hpd_stats[i].hpd_cnt > HPD_STORM_THRESHOLD) {
 			dev_priv->hpd_stats[i].hpd_mark = HPD_MARK_DISABLED;
 			dev_priv->hpd_event_bits &= ~(1 << i);
@@ -930,6 +941,8 @@ static inline void intel_hpd_irq_handler(struct drm_device *dev,
 			storm_detected = true;
 		} else {
 			dev_priv->hpd_stats[i].hpd_cnt++;
+			DRM_DEBUG_KMS("Received HPD interrupt on PIN %d - cnt: %d\n", i,
+				      dev_priv->hpd_stats[i].hpd_cnt);
 		}
 	}
 
@@ -1202,27 +1215,111 @@ static void cpt_irq_handler(struct drm_device *dev, u32 pch_iir)
 		cpt_serr_int_handler(dev);
 }
 
-static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
+static void ilk_display_irq_handler(struct drm_device *dev, u32 de_iir)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (de_iir & DE_AUX_CHANNEL_A)
+		dp_aux_irq_handler(dev);
+
+	if (de_iir & DE_GSE)
+		intel_opregion_asle_intr(dev);
+
+	if (de_iir & DE_PIPEA_VBLANK)
+		drm_handle_vblank(dev, 0);
+
+	if (de_iir & DE_PIPEB_VBLANK)
+		drm_handle_vblank(dev, 1);
+
+	if (de_iir & DE_POISON)
+		DRM_ERROR("Poison interrupt\n");
+
+	if (de_iir & DE_PIPEA_FIFO_UNDERRUN)
+		if (intel_set_cpu_fifo_underrun_reporting(dev, PIPE_A, false))
+			DRM_DEBUG_DRIVER("Pipe A FIFO underrun\n");
+
+	if (de_iir & DE_PIPEB_FIFO_UNDERRUN)
+		if (intel_set_cpu_fifo_underrun_reporting(dev, PIPE_B, false))
+			DRM_DEBUG_DRIVER("Pipe B FIFO underrun\n");
+
+	if (de_iir & DE_PLANEA_FLIP_DONE) {
+		intel_prepare_page_flip(dev, 0);
+		intel_finish_page_flip_plane(dev, 0);
+	}
+
+	if (de_iir & DE_PLANEB_FLIP_DONE) {
+		intel_prepare_page_flip(dev, 1);
+		intel_finish_page_flip_plane(dev, 1);
+	}
+
+	/* check event from PCH */
+	if (de_iir & DE_PCH_EVENT) {
+		u32 pch_iir = I915_READ(SDEIIR);
+
+		if (HAS_PCH_CPT(dev))
+			cpt_irq_handler(dev, pch_iir);
+		else
+			ibx_irq_handler(dev, pch_iir);
+
+		/* should clear PCH hotplug event before clear CPU irq */
+		I915_WRITE(SDEIIR, pch_iir);
+	}
+
+	if (IS_GEN5(dev) && de_iir & DE_PCU_EVENT)
+		ironlake_rps_change_irq_handler(dev);
+}
+
+static void ivb_display_irq_handler(struct drm_device *dev, u32 de_iir)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int i;
+
+	if (de_iir & DE_ERR_INT_IVB)
+		ivb_err_int_handler(dev);
+
+	if (de_iir & DE_AUX_CHANNEL_A_IVB)
+		dp_aux_irq_handler(dev);
+
+	if (de_iir & DE_GSE_IVB)
+		intel_opregion_asle_intr(dev);
+
+	for (i = 0; i < 3; i++) {
+		if (de_iir & (DE_PIPEA_VBLANK_IVB << (5 * i)))
+			drm_handle_vblank(dev, i);
+		if (de_iir & (DE_PLANEA_FLIP_DONE_IVB << (5 * i))) {
+			intel_prepare_page_flip(dev, i);
+			intel_finish_page_flip_plane(dev, i);
+		}
+	}
+
+	/* check event from PCH */
+	if (!HAS_PCH_NOP(dev) && (de_iir & DE_PCH_EVENT_IVB)) {
+		u32 pch_iir = I915_READ(SDEIIR);
+
+		cpt_irq_handler(dev, pch_iir);
+
+		/* clear PCH hotplug event before clear CPU irq */
+		I915_WRITE(SDEIIR, pch_iir);
+	}
+}
+
+static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	u32 de_iir, gt_iir, de_ier, pm_iir, sde_ier = 0;
+	u32 de_iir, gt_iir, de_ier, sde_ier = 0;
 	irqreturn_t ret = IRQ_NONE;
-	int i;
 
 	atomic_inc(&dev_priv->irq_received);
 
 	/* We get interrupts on unclaimed registers, so check for this before we
 	 * do any I915_{READ,WRITE}. */
-	if (IS_HASWELL(dev) &&
-	    (I915_READ_NOTRACE(FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
-		DRM_ERROR("Unclaimed register before interrupt\n");
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-	}
+	intel_uncore_check_errors(dev);
 
 	/* disable master interrupt before clearing iir  */
 	de_ier = I915_READ(DEIER);
 	I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
+	POSTING_READ(DEIER);
 
 	/* Disable south interrupts. We'll only write to SDEIIR once, so further
 	 * interrupts will will be stored on its back queue, and then we'll be
@@ -1246,53 +1343,34 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
 
 	gt_iir = I915_READ(GTIIR);
 	if (gt_iir) {
-		snb_gt_irq_handler(dev, dev_priv, gt_iir);
+		if (INTEL_INFO(dev)->gen >= 6)
+			snb_gt_irq_handler(dev, dev_priv, gt_iir);
+		else
+			ilk_gt_irq_handler(dev, dev_priv, gt_iir);
 		I915_WRITE(GTIIR, gt_iir);
 		ret = IRQ_HANDLED;
 	}
 
 	de_iir = I915_READ(DEIIR);
 	if (de_iir) {
-		if (de_iir & DE_ERR_INT_IVB)
-			ivb_err_int_handler(dev);
-
-		if (de_iir & DE_AUX_CHANNEL_A_IVB)
-			dp_aux_irq_handler(dev);
-
-		if (de_iir & DE_GSE_IVB)
-			intel_opregion_asle_intr(dev);
-
-		for (i = 0; i < 3; i++) {
-			if (de_iir & (DE_PIPEA_VBLANK_IVB << (5 * i)))
-				drm_handle_vblank(dev, i);
-			if (de_iir & (DE_PLANEA_FLIP_DONE_IVB << (5 * i))) {
-				intel_prepare_page_flip(dev, i);
-				intel_finish_page_flip_plane(dev, i);
-			}
-		}
-
-		/* check event from PCH */
-		if (!HAS_PCH_NOP(dev) && (de_iir & DE_PCH_EVENT_IVB)) {
-			u32 pch_iir = I915_READ(SDEIIR);
-
-			cpt_irq_handler(dev, pch_iir);
-
-			/* clear PCH hotplug event before clear CPU irq */
-			I915_WRITE(SDEIIR, pch_iir);
-		}
-
+		if (INTEL_INFO(dev)->gen >= 7)
+			ivb_display_irq_handler(dev, de_iir);
+		else
+			ilk_display_irq_handler(dev, de_iir);
 		I915_WRITE(DEIIR, de_iir);
 		ret = IRQ_HANDLED;
 	}
 
-	pm_iir = I915_READ(GEN6_PMIIR);
-	if (pm_iir) {
-		if (IS_HASWELL(dev))
-			hsw_pm_irq_handler(dev_priv, pm_iir);
-		else if (pm_iir & GEN6_PM_RPS_EVENTS)
-			gen6_rps_irq_handler(dev_priv, pm_iir);
-		I915_WRITE(GEN6_PMIIR, pm_iir);
-		ret = IRQ_HANDLED;
+	if (INTEL_INFO(dev)->gen >= 6) {
+		u32 pm_iir = I915_READ(GEN6_PMIIR);
+		if (pm_iir) {
+			if (IS_HASWELL(dev))
+				hsw_pm_irq_handler(dev_priv, pm_iir);
+			else if (pm_iir & GEN6_PM_RPS_EVENTS)
+				gen6_rps_irq_handler(dev_priv, pm_iir);
+			I915_WRITE(GEN6_PMIIR, pm_iir);
+			ret = IRQ_HANDLED;
+		}
 	}
 
 	if (IS_HASWELL(dev)) {
@@ -1312,119 +1390,6 @@ static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
 	return ret;
 }
 
-static void ilk_gt_irq_handler(struct drm_device *dev,
-			       struct drm_i915_private *dev_priv,
-			       u32 gt_iir)
-{
-	if (gt_iir &
-	    (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
-		notify_ring(dev, &dev_priv->ring[RCS]);
-	if (gt_iir & ILK_BSD_USER_INTERRUPT)
-		notify_ring(dev, &dev_priv->ring[VCS]);
-}
-
-static irqreturn_t ironlake_irq_handler(int irq, void *arg)
-{
-	struct drm_device *dev = (struct drm_device *) arg;
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	int ret = IRQ_NONE;
-	u32 de_iir, gt_iir, de_ier, pm_iir, sde_ier;
-
-	atomic_inc(&dev_priv->irq_received);
-
-	/* disable master interrupt before clearing iir  */
-	de_ier = I915_READ(DEIER);
-	I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
-	POSTING_READ(DEIER);
-
-	/* Disable south interrupts. We'll only write to SDEIIR once, so further
-	 * interrupts will will be stored on its back queue, and then we'll be
-	 * able to process them after we restore SDEIER (as soon as we restore
-	 * it, we'll get an interrupt if SDEIIR still has something to process
-	 * due to its back queue). */
-	sde_ier = I915_READ(SDEIER);
-	I915_WRITE(SDEIER, 0);
-	POSTING_READ(SDEIER);
-
-	de_iir = I915_READ(DEIIR);
-	gt_iir = I915_READ(GTIIR);
-	pm_iir = I915_READ(GEN6_PMIIR);
-
-	if (de_iir == 0 && gt_iir == 0 && (!IS_GEN6(dev) || pm_iir == 0))
-		goto done;
-
-	ret = IRQ_HANDLED;
-
-	if (IS_GEN5(dev))
-		ilk_gt_irq_handler(dev, dev_priv, gt_iir);
-	else
-		snb_gt_irq_handler(dev, dev_priv, gt_iir);
-
-	if (de_iir & DE_AUX_CHANNEL_A)
-		dp_aux_irq_handler(dev);
-
-	if (de_iir & DE_GSE)
-		intel_opregion_asle_intr(dev);
-
-	if (de_iir & DE_PIPEA_VBLANK)
-		drm_handle_vblank(dev, 0);
-
-	if (de_iir & DE_PIPEB_VBLANK)
-		drm_handle_vblank(dev, 1);
-
-	if (de_iir & DE_POISON)
-		DRM_ERROR("Poison interrupt\n");
-
-	if (de_iir & DE_PIPEA_FIFO_UNDERRUN)
-		if (intel_set_cpu_fifo_underrun_reporting(dev, PIPE_A, false))
-			DRM_DEBUG_DRIVER("Pipe A FIFO underrun\n");
-
-	if (de_iir & DE_PIPEB_FIFO_UNDERRUN)
-		if (intel_set_cpu_fifo_underrun_reporting(dev, PIPE_B, false))
-			DRM_DEBUG_DRIVER("Pipe B FIFO underrun\n");
-
-	if (de_iir & DE_PLANEA_FLIP_DONE) {
-		intel_prepare_page_flip(dev, 0);
-		intel_finish_page_flip_plane(dev, 0);
-	}
-
-	if (de_iir & DE_PLANEB_FLIP_DONE) {
-		intel_prepare_page_flip(dev, 1);
-		intel_finish_page_flip_plane(dev, 1);
-	}
-
-	/* check event from PCH */
-	if (de_iir & DE_PCH_EVENT) {
-		u32 pch_iir = I915_READ(SDEIIR);
-
-		if (HAS_PCH_CPT(dev))
-			cpt_irq_handler(dev, pch_iir);
-		else
-			ibx_irq_handler(dev, pch_iir);
-
-		/* should clear PCH hotplug event before clear CPU irq */
-		I915_WRITE(SDEIIR, pch_iir);
-	}
-
-	if (IS_GEN5(dev) &&  de_iir & DE_PCU_EVENT)
-		ironlake_rps_change_irq_handler(dev);
-
-	if (IS_GEN6(dev) && pm_iir & GEN6_PM_RPS_EVENTS)
-		gen6_rps_irq_handler(dev_priv, pm_iir);
-
-	I915_WRITE(GTIIR, gt_iir);
-	I915_WRITE(DEIIR, de_iir);
-	I915_WRITE(GEN6_PMIIR, pm_iir);
-
-done:
-	I915_WRITE(DEIER, de_ier);
-	POSTING_READ(DEIER);
-	I915_WRITE(SDEIER, sde_ier);
-	POSTING_READ(SDEIER);
-
-	return ret;
-}
-
 /**
  * i915_error_work_func - do process context error handling work
  * @work: work struct
@@ -1440,9 +1405,9 @@ static void i915_error_work_func(struct work_struct *work)
 						    gpu_error);
 	struct drm_device *dev = dev_priv->dev;
 	struct intel_ring_buffer *ring;
-	char *error_event[] = { "ERROR=1", NULL };
-	char *reset_event[] = { "RESET=1", NULL };
-	char *reset_done_event[] = { "ERROR=0", NULL };
+	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
+	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
+	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
 	int i, ret;
 
 	kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);
@@ -1696,29 +1661,14 @@ static int ironlake_enable_vblank(struct drm_device *dev, int pipe)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	unsigned long irqflags;
+	uint32_t bit = (INTEL_INFO(dev)->gen >= 7) ? DE_PIPE_VBLANK_IVB(pipe) :
+						     DE_PIPE_VBLANK_ILK(pipe);
 
 	if (!i915_pipe_enabled(dev, pipe))
 		return -EINVAL;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_enable_display_irq(dev_priv, (pipe == 0) ?
-				    DE_PIPEA_VBLANK : DE_PIPEB_VBLANK);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
-
-	return 0;
-}
-
-static int ivybridge_enable_vblank(struct drm_device *dev, int pipe)
-{
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	unsigned long irqflags;
-
-	if (!i915_pipe_enabled(dev, pipe))
-		return -EINVAL;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_enable_display_irq(dev_priv,
-				    DE_PIPEA_VBLANK_IVB << (5 * pipe));
+	ironlake_enable_display_irq(dev_priv, bit);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 
 	return 0;
@@ -1769,21 +1719,11 @@ static void ironlake_disable_vblank(struct drm_device *dev, int pipe)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	unsigned long irqflags;
+	uint32_t bit = (INTEL_INFO(dev)->gen >= 7) ? DE_PIPE_VBLANK_IVB(pipe) :
+						     DE_PIPE_VBLANK_ILK(pipe);
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_disable_display_irq(dev_priv, (pipe == 0) ?
-				     DE_PIPEA_VBLANK : DE_PIPEB_VBLANK);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
-}
-
-static void ivybridge_disable_vblank(struct drm_device *dev, int pipe)
-{
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	unsigned long irqflags;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_disable_display_irq(dev_priv,
-				     DE_PIPEA_VBLANK_IVB << (pipe * 5));
+	ironlake_disable_display_irq(dev_priv, bit);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
@@ -2030,9 +1970,17 @@ void i915_hangcheck_elapsed(unsigned long data)
 	if (busy_count)
 		/* Reset timer case chip hangs without another request
 		 * being added */
-		mod_timer(&dev_priv->gpu_error.hangcheck_timer,
-			  round_jiffies_up(jiffies +
-					   DRM_I915_HANGCHECK_JIFFIES));
+		i915_queue_hangcheck(dev);
+}
+
+void i915_queue_hangcheck(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	if (!i915_enable_hangcheck)
+		return;
+
+	mod_timer(&dev_priv->gpu_error.hangcheck_timer,
+		  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
 }
 
 static void ibx_irq_preinstall(struct drm_device *dev)
@@ -2054,31 +2002,26 @@ static void ibx_irq_preinstall(struct drm_device *dev)
 	POSTING_READ(SDEIER);
 }
 
-/* drm_dma.h hooks
-*/
-static void ironlake_irq_preinstall(struct drm_device *dev)
+static void gen5_gt_irq_preinstall(struct drm_device *dev)
 {
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-
-	atomic_set(&dev_priv->irq_received, 0);
-
-	I915_WRITE(HWSTAM, 0xeffe);
-
-	/* XXX hotplug from PCH */
-
-	I915_WRITE(DEIMR, 0xffffffff);
-	I915_WRITE(DEIER, 0x0);
-	POSTING_READ(DEIER);
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* and GT */
 	I915_WRITE(GTIMR, 0xffffffff);
 	I915_WRITE(GTIER, 0x0);
 	POSTING_READ(GTIER);
 
-	ibx_irq_preinstall(dev);
+	if (INTEL_INFO(dev)->gen >= 6) {
+		/* and PM */
+		I915_WRITE(GEN6_PMIMR, 0xffffffff);
+		I915_WRITE(GEN6_PMIER, 0x0);
+		POSTING_READ(GEN6_PMIER);
+	}
 }
 
-static void ivybridge_irq_preinstall(struct drm_device *dev)
+/* drm_dma.h hooks
+*/
+static void ironlake_irq_preinstall(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 
@@ -2086,21 +2029,11 @@ static void ivybridge_irq_preinstall(struct drm_device *dev)
 
 	I915_WRITE(HWSTAM, 0xeffe);
 
-	/* XXX hotplug from PCH */
-
 	I915_WRITE(DEIMR, 0xffffffff);
 	I915_WRITE(DEIER, 0x0);
 	POSTING_READ(DEIER);
 
-	/* and GT */
-	I915_WRITE(GTIMR, 0xffffffff);
-	I915_WRITE(GTIER, 0x0);
-	POSTING_READ(GTIER);
-
-	/* Power management */
-	I915_WRITE(GEN6_PMIMR, 0xffffffff);
-	I915_WRITE(GEN6_PMIER, 0x0);
-	POSTING_READ(GEN6_PMIER);
+	gen5_gt_irq_preinstall(dev);
 
 	ibx_irq_preinstall(dev);
 }
@@ -2121,9 +2054,8 @@ static void valleyview_irq_preinstall(struct drm_device *dev)
 	/* and GT */
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
-	I915_WRITE(GTIMR, 0xffffffff);
-	I915_WRITE(GTIER, 0x0);
-	POSTING_READ(GTIER);
+
+	gen5_gt_irq_preinstall(dev);
 
 	I915_WRITE(DPINVGTT, 0xff);
 
@@ -2193,120 +2125,99 @@ static void ibx_irq_postinstall(struct drm_device *dev)
 	I915_WRITE(SDEIMR, ~mask);
 }
 
-static int ironlake_irq_postinstall(struct drm_device *dev)
+static void gen5_gt_irq_postinstall(struct drm_device *dev)
 {
-	unsigned long irqflags;
-
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	/* enable kind of interrupts always enabled */
-	u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT |
-			   DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE |
-			   DE_AUX_CHANNEL_A | DE_PIPEB_FIFO_UNDERRUN |
-			   DE_PIPEA_FIFO_UNDERRUN | DE_POISON;
-	u32 gt_irqs;
-
-	dev_priv->irq_mask = ~display_mask;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 pm_irqs, gt_irqs;
 
-	/* should always can generate irq */
-	I915_WRITE(DEIIR, I915_READ(DEIIR));
-	I915_WRITE(DEIMR, dev_priv->irq_mask);
-	I915_WRITE(DEIER, display_mask |
-			  DE_PIPEA_VBLANK | DE_PIPEB_VBLANK | DE_PCU_EVENT);
-	POSTING_READ(DEIER);
+	pm_irqs = gt_irqs = 0;
 
 	dev_priv->gt_irq_mask = ~0;
+	if (HAS_L3_GPU_CACHE(dev)) {
+		/* L3 parity interrupt is always unmasked. */
+		dev_priv->gt_irq_mask = ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
+		gt_irqs |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
+	}
 
-	I915_WRITE(GTIIR, I915_READ(GTIIR));
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-
-	gt_irqs = GT_RENDER_USER_INTERRUPT;
-
-	if (IS_GEN6(dev))
-		gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT;
-	else
+	gt_irqs |= GT_RENDER_USER_INTERRUPT;
+	if (IS_GEN5(dev)) {
 		gt_irqs |= GT_RENDER_PIPECTL_NOTIFY_INTERRUPT |
 			   ILK_BSD_USER_INTERRUPT;
+	} else {
+		gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT;
+	}
 
+	I915_WRITE(GTIIR, I915_READ(GTIIR));
+	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
 	I915_WRITE(GTIER, gt_irqs);
 	POSTING_READ(GTIER);
 
-	ibx_irq_postinstall(dev);
+	if (INTEL_INFO(dev)->gen >= 6) {
+		pm_irqs |= GEN6_PM_RPS_EVENTS;
 
-	if (IS_IRONLAKE_M(dev)) {
-		/* Enable PCU event interrupts
-		 *
-		 * spinlocking not required here for correctness since interrupt
-		 * setup is guaranteed to run in single-threaded context. But we
-		 * need it to make the assert_spin_locked happy. */
-		spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-		ironlake_enable_display_irq(dev_priv, DE_PCU_EVENT);
-		spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
-	}
+		if (HAS_VEBOX(dev))
+			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
 
-	return 0;
+		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
+		I915_WRITE(GEN6_PMIMR, 0xffffffff);
+		I915_WRITE(GEN6_PMIER, pm_irqs);
+		POSTING_READ(GEN6_PMIER);
+	}
 }
 
-static int ivybridge_irq_postinstall(struct drm_device *dev)
+static int ironlake_irq_postinstall(struct drm_device *dev)
 {
+	unsigned long irqflags;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	/* enable kind of interrupts always enabled */
-	u32 display_mask =
-		DE_MASTER_IRQ_CONTROL | DE_GSE_IVB | DE_PCH_EVENT_IVB |
-		DE_PLANEC_FLIP_DONE_IVB |
-		DE_PLANEB_FLIP_DONE_IVB |
-		DE_PLANEA_FLIP_DONE_IVB |
-		DE_AUX_CHANNEL_A_IVB |
-		DE_ERR_INT_IVB;
-	u32 pm_irqs = GEN6_PM_RPS_EVENTS;
-	u32 gt_irqs;
+	u32 display_mask, extra_mask;
+
+	if (INTEL_INFO(dev)->gen >= 7) {
+		display_mask = (DE_MASTER_IRQ_CONTROL | DE_GSE_IVB |
+				DE_PCH_EVENT_IVB | DE_PLANEC_FLIP_DONE_IVB |
+				DE_PLANEB_FLIP_DONE_IVB |
+				DE_PLANEA_FLIP_DONE_IVB | DE_AUX_CHANNEL_A_IVB |
+				DE_ERR_INT_IVB);
+		extra_mask = (DE_PIPEC_VBLANK_IVB | DE_PIPEB_VBLANK_IVB |
+			      DE_PIPEA_VBLANK_IVB);
+
+		I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
+	} else {
+		display_mask = (DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT |
+				DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE |
+				DE_AUX_CHANNEL_A | DE_PIPEB_FIFO_UNDERRUN |
+				DE_PIPEA_FIFO_UNDERRUN | DE_POISON);
+		extra_mask = DE_PIPEA_VBLANK | DE_PIPEB_VBLANK | DE_PCU_EVENT;
+	}
 
 	dev_priv->irq_mask = ~display_mask;
 
 	/* should always can generate irq */
-	I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
 	I915_WRITE(DEIIR, I915_READ(DEIIR));
 	I915_WRITE(DEIMR, dev_priv->irq_mask);
-	I915_WRITE(DEIER,
-		   display_mask |
-		   DE_PIPEC_VBLANK_IVB |
-		   DE_PIPEB_VBLANK_IVB |
-		   DE_PIPEA_VBLANK_IVB);
+	I915_WRITE(DEIER, display_mask | extra_mask);
 	POSTING_READ(DEIER);
 
-	dev_priv->gt_irq_mask = ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-
-	I915_WRITE(GTIIR, I915_READ(GTIIR));
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-
-	gt_irqs = GT_RENDER_USER_INTERRUPT | GT_BSD_USER_INTERRUPT |
-		  GT_BLT_USER_INTERRUPT | GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-	I915_WRITE(GTIER, gt_irqs);
-	POSTING_READ(GTIER);
-
-	I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
-	if (HAS_VEBOX(dev))
-		pm_irqs |= PM_VEBOX_USER_INTERRUPT;
-
-	/* Our enable/disable rps functions may touch these registers so
-	 * make sure to set a known state for only the non-RPS bits.
-	 * The RMW is extra paranoia since this should be called after being set
-	 * to a known state in preinstall.
-	 * */
-	I915_WRITE(GEN6_PMIMR,
-		   (I915_READ(GEN6_PMIMR) | ~GEN6_PM_RPS_EVENTS) & ~pm_irqs);
-	I915_WRITE(GEN6_PMIER,
-		   (I915_READ(GEN6_PMIER) & GEN6_PM_RPS_EVENTS) | pm_irqs);
-	POSTING_READ(GEN6_PMIER);
+	gen5_gt_irq_postinstall(dev);
 
 	ibx_irq_postinstall(dev);
 
+	if (IS_IRONLAKE_M(dev)) {
+		/* Enable PCU event interrupts
+		 *
+		 * spinlocking not required here for correctness since interrupt
+		 * setup is guaranteed to run in single-threaded context. But we
+		 * need it to make the assert_spin_locked happy. */
+		spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+		ironlake_enable_display_irq(dev_priv, DE_PCU_EVENT);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	}
+
 	return 0;
 }
 
 static int valleyview_irq_postinstall(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	u32 gt_irqs;
 	u32 enable_mask;
 	u32 pipestat_enable = PLANE_FLIP_DONE_INT_EN_VLV;
 	unsigned long irqflags;
@@ -2346,13 +2257,7 @@ static int valleyview_irq_postinstall(struct drm_device *dev)
 	I915_WRITE(VLV_IIR, 0xffffffff);
 	I915_WRITE(VLV_IIR, 0xffffffff);
 
-	I915_WRITE(GTIIR, I915_READ(GTIIR));
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-
-	gt_irqs = GT_RENDER_USER_INTERRUPT | GT_BSD_USER_INTERRUPT |
-		GT_BLT_USER_INTERRUPT;
-	I915_WRITE(GTIER, gt_irqs);
-	POSTING_READ(GTIER);
+	gen5_gt_irq_postinstall(dev);
 
 	/* ack & enable invalid PTE error interrupts */
 #if 0 /* FIXME: add support to irq handler for checking these bits */
@@ -3118,15 +3023,6 @@ void intel_irq_init(struct drm_device *dev)
 		dev->driver->enable_vblank = valleyview_enable_vblank;
 		dev->driver->disable_vblank = valleyview_disable_vblank;
 		dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup;
-	} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
-		/* Share uninstall handlers with ILK/SNB */
-		dev->driver->irq_handler = ivybridge_irq_handler;
-		dev->driver->irq_preinstall = ivybridge_irq_preinstall;
-		dev->driver->irq_postinstall = ivybridge_irq_postinstall;
-		dev->driver->irq_uninstall = ironlake_irq_uninstall;
-		dev->driver->enable_vblank = ivybridge_enable_vblank;
-		dev->driver->disable_vblank = ivybridge_disable_vblank;
-		dev_priv->display.hpd_irq_setup = ibx_hpd_irq_setup;
 	} else if (HAS_PCH_SPLIT(dev)) {
 		dev->driver->irq_handler = ironlake_irq_handler;
 		dev->driver->irq_preinstall = ironlake_irq_preinstall;

+ 103 - 1
drivers/gpu/drm/i915/i915_reg.h

@@ -61,6 +61,12 @@
 #define   GC_LOW_FREQUENCY_ENABLE	(1 << 7)
 #define   GC_DISPLAY_CLOCK_190_200_MHZ	(0 << 4)
 #define   GC_DISPLAY_CLOCK_333_MHZ	(4 << 4)
+#define   GC_DISPLAY_CLOCK_267_MHZ_PNV	(0 << 4)
+#define   GC_DISPLAY_CLOCK_333_MHZ_PNV	(1 << 4)
+#define   GC_DISPLAY_CLOCK_444_MHZ_PNV	(2 << 4)
+#define   GC_DISPLAY_CLOCK_200_MHZ_PNV	(5 << 4)
+#define   GC_DISPLAY_CLOCK_133_MHZ_PNV	(6 << 4)
+#define   GC_DISPLAY_CLOCK_167_MHZ_PNV	(7 << 4)
 #define   GC_DISPLAY_CLOCK_MASK		(7 << 4)
 #define   GM45_GC_RENDER_CLOCK_MASK	(0xf << 0)
 #define   GM45_GC_RENDER_CLOCK_266_MHZ	(8 << 0)
@@ -1779,6 +1785,71 @@
 #define BCLRPAT(pipe) _PIPE(pipe, _BCLRPAT_A, _BCLRPAT_B)
 #define VSYNCSHIFT(trans) _TRANSCODER(trans, _VSYNCSHIFT_A, _VSYNCSHIFT_B)
 
+/* HSW eDP PSR registers */
+#define EDP_PSR_CTL				0x64800
+#define   EDP_PSR_ENABLE			(1<<31)
+#define   EDP_PSR_LINK_DISABLE			(0<<27)
+#define   EDP_PSR_LINK_STANDBY			(1<<27)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_MASK	(3<<25)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES	(0<<25)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_4_LINES	(1<<25)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_2_LINES	(2<<25)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_0_LINES	(3<<25)
+#define   EDP_PSR_MAX_SLEEP_TIME_SHIFT		20
+#define   EDP_PSR_SKIP_AUX_EXIT			(1<<12)
+#define   EDP_PSR_TP1_TP2_SEL			(0<<11)
+#define   EDP_PSR_TP1_TP3_SEL			(1<<11)
+#define   EDP_PSR_TP2_TP3_TIME_500us		(0<<8)
+#define   EDP_PSR_TP2_TP3_TIME_100us		(1<<8)
+#define   EDP_PSR_TP2_TP3_TIME_2500us		(2<<8)
+#define   EDP_PSR_TP2_TP3_TIME_0us		(3<<8)
+#define   EDP_PSR_TP1_TIME_500us		(0<<4)
+#define   EDP_PSR_TP1_TIME_100us		(1<<4)
+#define   EDP_PSR_TP1_TIME_2500us		(2<<4)
+#define   EDP_PSR_TP1_TIME_0us			(3<<4)
+#define   EDP_PSR_IDLE_FRAME_SHIFT		0
+
+#define EDP_PSR_AUX_CTL			0x64810
+#define EDP_PSR_AUX_DATA1		0x64814
+#define   EDP_PSR_DPCD_COMMAND		0x80060000
+#define EDP_PSR_AUX_DATA2		0x64818
+#define   EDP_PSR_DPCD_NORMAL_OPERATION	(1<<24)
+#define EDP_PSR_AUX_DATA3		0x6481c
+#define EDP_PSR_AUX_DATA4		0x64820
+#define EDP_PSR_AUX_DATA5		0x64824
+
+#define EDP_PSR_STATUS_CTL			0x64840
+#define   EDP_PSR_STATUS_STATE_MASK		(7<<29)
+#define   EDP_PSR_STATUS_STATE_IDLE		(0<<29)
+#define   EDP_PSR_STATUS_STATE_SRDONACK		(1<<29)
+#define   EDP_PSR_STATUS_STATE_SRDENT		(2<<29)
+#define   EDP_PSR_STATUS_STATE_BUFOFF		(3<<29)
+#define   EDP_PSR_STATUS_STATE_BUFON		(4<<29)
+#define   EDP_PSR_STATUS_STATE_AUXACK		(5<<29)
+#define   EDP_PSR_STATUS_STATE_SRDOFFACK	(6<<29)
+#define   EDP_PSR_STATUS_LINK_MASK		(3<<26)
+#define   EDP_PSR_STATUS_LINK_FULL_OFF		(0<<26)
+#define   EDP_PSR_STATUS_LINK_FULL_ON		(1<<26)
+#define   EDP_PSR_STATUS_LINK_STANDBY		(2<<26)
+#define   EDP_PSR_STATUS_MAX_SLEEP_TIMER_SHIFT	20
+#define   EDP_PSR_STATUS_MAX_SLEEP_TIMER_MASK	0x1f
+#define   EDP_PSR_STATUS_COUNT_SHIFT		16
+#define   EDP_PSR_STATUS_COUNT_MASK		0xf
+#define   EDP_PSR_STATUS_AUX_ERROR		(1<<15)
+#define   EDP_PSR_STATUS_AUX_SENDING		(1<<12)
+#define   EDP_PSR_STATUS_SENDING_IDLE		(1<<9)
+#define   EDP_PSR_STATUS_SENDING_TP2_TP3	(1<<8)
+#define   EDP_PSR_STATUS_SENDING_TP1		(1<<4)
+#define   EDP_PSR_STATUS_IDLE_MASK		0xf
+
+#define EDP_PSR_PERF_CNT		0x64844
+#define   EDP_PSR_PERF_CNT_MASK		0xffffff
+
+#define EDP_PSR_DEBUG_CTL		0x64860
+#define   EDP_PSR_DEBUG_MASK_LPSP	(1<<27)
+#define   EDP_PSR_DEBUG_MASK_MEMUP	(1<<26)
+#define   EDP_PSR_DEBUG_MASK_HPD	(1<<25)
+
 /* VGA port control */
 #define ADPA			0x61100
 #define PCH_ADPA                0xe1100
@@ -2048,6 +2119,7 @@
  * (Haswell and newer) to see which VIDEO_DIP_DATA byte corresponds to each byte
  * of the infoframe structure specified by CEA-861. */
 #define   VIDEO_DIP_DATA_SIZE	32
+#define   VIDEO_DIP_VSC_DATA_SIZE	36
 #define VIDEO_DIP_CTL		0x61170
 /* Pre HSW: */
 #define   VIDEO_DIP_ENABLE		(1 << 31)
@@ -2195,6 +2267,8 @@
 #define BLC_PWM_CPU_CTL2	0x48250
 #define BLC_PWM_CPU_CTL		0x48254
 
+#define HSW_BLC_PWM2_CTL	0x48350
+
 /* PCH CTL1 is totally different, all but the below bits are reserved. CTL2 is
  * like the normal CTL from gen4 and earlier. Hooray for confusing naming. */
 #define BLC_PWM_PCH_CTL1	0xc8250
@@ -2203,6 +2277,12 @@
 #define   BLM_PCH_POLARITY			(1 << 29)
 #define BLC_PWM_PCH_CTL2	0xc8254
 
+#define UTIL_PIN_CTL		0x48400
+#define   UTIL_PIN_ENABLE	(1 << 31)
+
+#define PCH_GTC_CTL		0xe7000
+#define   PCH_GTC_ENABLE	(1 << 31)
+
 /* TV port control */
 #define TV_CTL			0x68000
 /** Enables the TV encoder */
@@ -3721,6 +3801,9 @@
 #define DE_PLANEA_FLIP_DONE_IVB		(1<<3)
 #define DE_PIPEA_VBLANK_IVB		(1<<0)
 
+#define DE_PIPE_VBLANK_ILK(pipe)	(1 << ((pipe * 8) + 7))
+#define DE_PIPE_VBLANK_IVB(pipe)	(1 << (pipe * 5))
+
 #define VLV_MASTER_IER			0x4400c /* Gunit master IER */
 #define   MASTER_INTERRUPT_ENABLE	(1<<31)
 
@@ -4084,6 +4167,13 @@
 #define HSW_TVIDEO_DIP_VSC_DATA(trans) \
 	 _TRANSCODER(trans, HSW_VIDEO_DIP_VSC_DATA_A, HSW_VIDEO_DIP_VSC_DATA_B)
 
+#define HSW_STEREO_3D_CTL_A	0x70020
+#define   S3D_ENABLE		(1<<31)
+#define HSW_STEREO_3D_CTL_B	0x71020
+
+#define HSW_STEREO_3D_CTL(trans) \
+	_TRANSCODER(trans, HSW_STEREO_3D_CTL_A, HSW_STEREO_3D_CTL_A)
+
 #define _PCH_TRANS_HTOTAL_B          0xe1000
 #define _PCH_TRANS_HBLANK_B          0xe1004
 #define _PCH_TRANS_HSYNC_B           0xe1008
@@ -4472,6 +4562,10 @@
 #define  GT_FIFO_FREE_ENTRIES			0x120008
 #define    GT_FIFO_NUM_RESERVED_ENTRIES		20
 
+#define  HSW_IDICR				0x9008
+#define    IDIHASHMSK(x)			(((x) & 0x3f) << 16)
+#define  HSW_EDRAM_PRESENT			0x120010
+
 #define GEN6_UCGCTL1				0x9400
 # define GEN6_BLBUNIT_CLOCK_GATE_DISABLE		(1 << 5)
 # define GEN6_CSUNIT_CLOCK_GATE_DISABLE			(1 << 7)
@@ -4862,7 +4956,8 @@
 #define  SBI_SSCAUXDIV6				0x0610
 #define   SBI_SSCAUXDIV_FINALDIV2SEL(x)		((x)<<4)
 #define  SBI_DBUFF0				0x2a00
-#define   SBI_DBUFF0_ENABLE			(1<<0)
+#define  SBI_GEN0				0x1f00
+#define   SBI_GEN0_CFG_BUFFENABLE_DISABLE	(1<<0)
 
 /* LPT PIXCLK_GATE */
 #define PIXCLK_GATE			0xC6020
@@ -4928,7 +5023,14 @@
 #define  LCPLL_CLK_FREQ_450		(0<<26)
 #define  LCPLL_CD_CLOCK_DISABLE		(1<<25)
 #define  LCPLL_CD2X_CLOCK_DISABLE	(1<<23)
+#define  LCPLL_POWER_DOWN_ALLOW		(1<<22)
 #define  LCPLL_CD_SOURCE_FCLK		(1<<21)
+#define  LCPLL_CD_SOURCE_FCLK_DONE	(1<<19)
+
+#define D_COMP				(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
+#define  D_COMP_RCOMP_IN_PROGRESS	(1<<9)
+#define  D_COMP_COMP_FORCE		(1<<8)
+#define  D_COMP_COMP_DISABLE		(1<<0)
 
 /* Pipe WM_LINETIME - watermark line time */
 #define PIPE_WM_LINETIME_A		0x45270

+ 5 - 3
drivers/gpu/drm/i915/i915_trace.h

@@ -406,10 +406,12 @@ TRACE_EVENT(i915_flip_complete,
 	    TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj)
 );
 
-TRACE_EVENT(i915_reg_rw,
-	TP_PROTO(bool write, u32 reg, u64 val, int len),
+TRACE_EVENT_CONDITION(i915_reg_rw,
+	TP_PROTO(bool write, u32 reg, u64 val, int len, bool trace),
 
-	TP_ARGS(write, reg, val, len),
+	TP_ARGS(write, reg, val, len, trace),
+
+	TP_CONDITION(trace),
 
 	TP_STRUCT__entry(
 		__field(u64, val)

+ 4 - 0
drivers/gpu/drm/i915/intel_crt.c

@@ -613,6 +613,10 @@ intel_crt_detect(struct drm_connector *connector, bool force)
 	enum drm_connector_status status;
 	struct intel_load_detect_pipe tmp;
 
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] force=%d\n",
+		      connector->base.id, drm_get_connector_name(connector),
+		      force);
+
 	if (I915_HAS_HOTPLUG(dev)) {
 		/* We can not rely on the HPD pin always being correctly wired
 		 * up, for example many KVM do not pass it through, and so

+ 8 - 4
drivers/gpu/drm/i915/intel_ddi.c

@@ -301,7 +301,7 @@ static void intel_ddi_mode_set(struct drm_encoder *encoder,
 		struct intel_digital_port *intel_dig_port =
 			enc_to_dig_port(encoder);
 
-		intel_dp->DP = intel_dig_port->port_reversal |
+		intel_dp->DP = intel_dig_port->saved_port_bits |
 			       DDI_BUF_CTL_ENABLE | DDI_BUF_EMP_400MV_0DB_HSW;
 		intel_dp->DP |= DDI_PORT_WIDTH(intel_dp->lane_count);
 
@@ -1109,7 +1109,8 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder)
 		 * enabling the port.
 		 */
 		I915_WRITE(DDI_BUF_CTL(port),
-			   intel_dig_port->port_reversal | DDI_BUF_CTL_ENABLE);
+			   intel_dig_port->saved_port_bits |
+			   DDI_BUF_CTL_ENABLE);
 	} else if (type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
@@ -1117,6 +1118,7 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder)
 			intel_dp_stop_link_train(intel_dp);
 
 		ironlake_edp_backlight_on(intel_dp);
+		intel_edp_psr_enable(intel_dp);
 	}
 
 	if (intel_crtc->eld_vld && type != INTEL_OUTPUT_EDP) {
@@ -1147,6 +1149,7 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder)
 	if (type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
+		intel_edp_psr_disable(intel_dp);
 		ironlake_edp_backlight_off(intel_dp);
 	}
 }
@@ -1347,8 +1350,9 @@ void intel_ddi_init(struct drm_device *dev, enum port port)
 	intel_encoder->get_config = intel_ddi_get_config;
 
 	intel_dig_port->port = port;
-	intel_dig_port->port_reversal = I915_READ(DDI_BUF_CTL(port)) &
-					DDI_BUF_PORT_REVERSAL;
+	intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
+					  (DDI_BUF_PORT_REVERSAL |
+					   DDI_A_4_LANES);
 	intel_dig_port->dp.output_reg = DDI_BUF_CTL(port);
 
 	intel_encoder->type = INTEL_OUTPUT_UNKNOWN;

+ 348 - 125
drivers/gpu/drm/i915/intel_display.c

@@ -2274,6 +2274,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 	}
 
 	intel_update_fbc(dev);
+	intel_edp_psr_update(dev);
 	mutex_unlock(&dev->struct_mutex);
 
 	intel_crtc_update_sarea_pos(crtc, x, y);
@@ -4162,6 +4163,30 @@ static int i9xx_misc_get_display_clock_speed(struct drm_device *dev)
 	return 200000;
 }
 
+static int pnv_get_display_clock_speed(struct drm_device *dev)
+{
+	u16 gcfgc = 0;
+
+	pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+
+	switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
+	case GC_DISPLAY_CLOCK_267_MHZ_PNV:
+		return 267000;
+	case GC_DISPLAY_CLOCK_333_MHZ_PNV:
+		return 333000;
+	case GC_DISPLAY_CLOCK_444_MHZ_PNV:
+		return 444000;
+	case GC_DISPLAY_CLOCK_200_MHZ_PNV:
+		return 200000;
+	default:
+		DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc);
+	case GC_DISPLAY_CLOCK_133_MHZ_PNV:
+		return 133000;
+	case GC_DISPLAY_CLOCK_167_MHZ_PNV:
+		return 167000;
+	}
+}
+
 static int i915gm_get_display_clock_speed(struct drm_device *dev)
 {
 	u16 gcfgc = 0;
@@ -4946,22 +4971,19 @@ static void i9xx_get_pfit_config(struct intel_crtc *crtc,
 	uint32_t tmp;
 
 	tmp = I915_READ(PFIT_CONTROL);
+	if (!(tmp & PFIT_ENABLE))
+		return;
 
+	/* Check whether the pfit is attached to our pipe. */
 	if (INTEL_INFO(dev)->gen < 4) {
 		if (crtc->pipe != PIPE_B)
 			return;
-
-		/* gen2/3 store dither state in pfit control, needs to match */
-		pipe_config->gmch_pfit.control = tmp & PANEL_8TO6_DITHER_ENABLE;
 	} else {
 		if ((tmp & PFIT_PIPE_MASK) != (crtc->pipe << PFIT_PIPE_SHIFT))
 			return;
 	}
 
-	if (!(tmp & PFIT_ENABLE))
-		return;
-
-	pipe_config->gmch_pfit.control = I915_READ(PFIT_CONTROL);
+	pipe_config->gmch_pfit.control = tmp;
 	pipe_config->gmch_pfit.pgm_ratios = I915_READ(PFIT_PGM_RATIOS);
 	if (INTEL_INFO(dev)->gen < 5)
 		pipe_config->gmch_pfit.lvds_border_bits =
@@ -5166,74 +5188,37 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
 	BUG_ON(val != final);
 }
 
-/* Sequence to enable CLKOUT_DP for FDI usage and configure PCH FDI I/O. */
-static void lpt_init_pch_refclk(struct drm_device *dev)
+static void lpt_reset_fdi_mphy(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_mode_config *mode_config = &dev->mode_config;
-	struct intel_encoder *encoder;
-	bool has_vga = false;
-	bool is_sdv = false;
-	u32 tmp;
-
-	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
-		switch (encoder->type) {
-		case INTEL_OUTPUT_ANALOG:
-			has_vga = true;
-			break;
-		}
-	}
-
-	if (!has_vga)
-		return;
-
-	mutex_lock(&dev_priv->dpio_lock);
-
-	/* XXX: Rip out SDV support once Haswell ships for real. */
-	if (IS_HASWELL(dev) && (dev->pci_device & 0xFF00) == 0x0C00)
-		is_sdv = true;
+	uint32_t tmp;
 
-	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
-	tmp &= ~SBI_SSCCTL_DISABLE;
-	tmp |= SBI_SSCCTL_PATHALT;
-	intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+	tmp = I915_READ(SOUTH_CHICKEN2);
+	tmp |= FDI_MPHY_IOSFSB_RESET_CTL;
+	I915_WRITE(SOUTH_CHICKEN2, tmp);
 
-	udelay(24);
+	if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) &
+			       FDI_MPHY_IOSFSB_RESET_STATUS, 100))
+		DRM_ERROR("FDI mPHY reset assert timeout\n");
 
-	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
-	tmp &= ~SBI_SSCCTL_PATHALT;
-	intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+	tmp = I915_READ(SOUTH_CHICKEN2);
+	tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL;
+	I915_WRITE(SOUTH_CHICKEN2, tmp);
 
-	if (!is_sdv) {
-		tmp = I915_READ(SOUTH_CHICKEN2);
-		tmp |= FDI_MPHY_IOSFSB_RESET_CTL;
-		I915_WRITE(SOUTH_CHICKEN2, tmp);
-
-		if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) &
-				       FDI_MPHY_IOSFSB_RESET_STATUS, 100))
-			DRM_ERROR("FDI mPHY reset assert timeout\n");
-
-		tmp = I915_READ(SOUTH_CHICKEN2);
-		tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL;
-		I915_WRITE(SOUTH_CHICKEN2, tmp);
+	if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) &
+				FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100))
+		DRM_ERROR("FDI mPHY reset de-assert timeout\n");
+}
 
-		if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) &
-				        FDI_MPHY_IOSFSB_RESET_STATUS) == 0,
-				       100))
-			DRM_ERROR("FDI mPHY reset de-assert timeout\n");
-	}
+/* WaMPhyProgramming:hsw */
+static void lpt_program_fdi_mphy(struct drm_i915_private *dev_priv)
+{
+	uint32_t tmp;
 
 	tmp = intel_sbi_read(dev_priv, 0x8008, SBI_MPHY);
 	tmp &= ~(0xFF << 24);
 	tmp |= (0x12 << 24);
 	intel_sbi_write(dev_priv, 0x8008, tmp, SBI_MPHY);
 
-	if (is_sdv) {
-		tmp = intel_sbi_read(dev_priv, 0x800C, SBI_MPHY);
-		tmp |= 0x7FFF;
-		intel_sbi_write(dev_priv, 0x800C, tmp, SBI_MPHY);
-	}
-
 	tmp = intel_sbi_read(dev_priv, 0x2008, SBI_MPHY);
 	tmp |= (1 << 11);
 	intel_sbi_write(dev_priv, 0x2008, tmp, SBI_MPHY);
@@ -5242,24 +5227,6 @@ static void lpt_init_pch_refclk(struct drm_device *dev)
 	tmp |= (1 << 11);
 	intel_sbi_write(dev_priv, 0x2108, tmp, SBI_MPHY);
 
-	if (is_sdv) {
-		tmp = intel_sbi_read(dev_priv, 0x2038, SBI_MPHY);
-		tmp |= (0x3F << 24) | (0xF << 20) | (0xF << 16);
-		intel_sbi_write(dev_priv, 0x2038, tmp, SBI_MPHY);
-
-		tmp = intel_sbi_read(dev_priv, 0x2138, SBI_MPHY);
-		tmp |= (0x3F << 24) | (0xF << 20) | (0xF << 16);
-		intel_sbi_write(dev_priv, 0x2138, tmp, SBI_MPHY);
-
-		tmp = intel_sbi_read(dev_priv, 0x203C, SBI_MPHY);
-		tmp |= (0x3F << 8);
-		intel_sbi_write(dev_priv, 0x203C, tmp, SBI_MPHY);
-
-		tmp = intel_sbi_read(dev_priv, 0x213C, SBI_MPHY);
-		tmp |= (0x3F << 8);
-		intel_sbi_write(dev_priv, 0x213C, tmp, SBI_MPHY);
-	}
-
 	tmp = intel_sbi_read(dev_priv, 0x206C, SBI_MPHY);
 	tmp |= (1 << 24) | (1 << 21) | (1 << 18);
 	intel_sbi_write(dev_priv, 0x206C, tmp, SBI_MPHY);
@@ -5268,17 +5235,15 @@ static void lpt_init_pch_refclk(struct drm_device *dev)
 	tmp |= (1 << 24) | (1 << 21) | (1 << 18);
 	intel_sbi_write(dev_priv, 0x216C, tmp, SBI_MPHY);
 
-	if (!is_sdv) {
-		tmp = intel_sbi_read(dev_priv, 0x2080, SBI_MPHY);
-		tmp &= ~(7 << 13);
-		tmp |= (5 << 13);
-		intel_sbi_write(dev_priv, 0x2080, tmp, SBI_MPHY);
+	tmp = intel_sbi_read(dev_priv, 0x2080, SBI_MPHY);
+	tmp &= ~(7 << 13);
+	tmp |= (5 << 13);
+	intel_sbi_write(dev_priv, 0x2080, tmp, SBI_MPHY);
 
-		tmp = intel_sbi_read(dev_priv, 0x2180, SBI_MPHY);
-		tmp &= ~(7 << 13);
-		tmp |= (5 << 13);
-		intel_sbi_write(dev_priv, 0x2180, tmp, SBI_MPHY);
-	}
+	tmp = intel_sbi_read(dev_priv, 0x2180, SBI_MPHY);
+	tmp &= ~(7 << 13);
+	tmp |= (5 << 13);
+	intel_sbi_write(dev_priv, 0x2180, tmp, SBI_MPHY);
 
 	tmp = intel_sbi_read(dev_priv, 0x208C, SBI_MPHY);
 	tmp &= ~0xFF;
@@ -5300,34 +5265,120 @@ static void lpt_init_pch_refclk(struct drm_device *dev)
 	tmp |= (0x1C << 16);
 	intel_sbi_write(dev_priv, 0x2198, tmp, SBI_MPHY);
 
-	if (!is_sdv) {
-		tmp = intel_sbi_read(dev_priv, 0x20C4, SBI_MPHY);
-		tmp |= (1 << 27);
-		intel_sbi_write(dev_priv, 0x20C4, tmp, SBI_MPHY);
+	tmp = intel_sbi_read(dev_priv, 0x20C4, SBI_MPHY);
+	tmp |= (1 << 27);
+	intel_sbi_write(dev_priv, 0x20C4, tmp, SBI_MPHY);
+
+	tmp = intel_sbi_read(dev_priv, 0x21C4, SBI_MPHY);
+	tmp |= (1 << 27);
+	intel_sbi_write(dev_priv, 0x21C4, tmp, SBI_MPHY);
+
+	tmp = intel_sbi_read(dev_priv, 0x20EC, SBI_MPHY);
+	tmp &= ~(0xF << 28);
+	tmp |= (4 << 28);
+	intel_sbi_write(dev_priv, 0x20EC, tmp, SBI_MPHY);
+
+	tmp = intel_sbi_read(dev_priv, 0x21EC, SBI_MPHY);
+	tmp &= ~(0xF << 28);
+	tmp |= (4 << 28);
+	intel_sbi_write(dev_priv, 0x21EC, tmp, SBI_MPHY);
+}
+
+/* Implements 3 different sequences from BSpec chapter "Display iCLK
+ * Programming" based on the parameters passed:
+ * - Sequence to enable CLKOUT_DP
+ * - Sequence to enable CLKOUT_DP without spread
+ * - Sequence to enable CLKOUT_DP for FDI usage and configure PCH FDI I/O
+ */
+static void lpt_enable_clkout_dp(struct drm_device *dev, bool with_spread,
+				 bool with_fdi)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t reg, tmp;
+
+	if (WARN(with_fdi && !with_spread, "FDI requires downspread\n"))
+		with_spread = true;
+	if (WARN(dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE &&
+		 with_fdi, "LP PCH doesn't have FDI\n"))
+		with_fdi = false;
 
-		tmp = intel_sbi_read(dev_priv, 0x21C4, SBI_MPHY);
-		tmp |= (1 << 27);
-		intel_sbi_write(dev_priv, 0x21C4, tmp, SBI_MPHY);
+	mutex_lock(&dev_priv->dpio_lock);
 
-		tmp = intel_sbi_read(dev_priv, 0x20EC, SBI_MPHY);
-		tmp &= ~(0xF << 28);
-		tmp |= (4 << 28);
-		intel_sbi_write(dev_priv, 0x20EC, tmp, SBI_MPHY);
+	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
+	tmp &= ~SBI_SSCCTL_DISABLE;
+	tmp |= SBI_SSCCTL_PATHALT;
+	intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+
+	udelay(24);
 
-		tmp = intel_sbi_read(dev_priv, 0x21EC, SBI_MPHY);
-		tmp &= ~(0xF << 28);
-		tmp |= (4 << 28);
-		intel_sbi_write(dev_priv, 0x21EC, tmp, SBI_MPHY);
+	if (with_spread) {
+		tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
+		tmp &= ~SBI_SSCCTL_PATHALT;
+		intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+
+		if (with_fdi) {
+			lpt_reset_fdi_mphy(dev_priv);
+			lpt_program_fdi_mphy(dev_priv);
+		}
 	}
 
-	/* ULT uses SBI_GEN0, but ULT doesn't have VGA, so we don't care. */
-	tmp = intel_sbi_read(dev_priv, SBI_DBUFF0, SBI_ICLK);
-	tmp |= SBI_DBUFF0_ENABLE;
-	intel_sbi_write(dev_priv, SBI_DBUFF0, tmp, SBI_ICLK);
+	reg = (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) ?
+	       SBI_GEN0 : SBI_DBUFF0;
+	tmp = intel_sbi_read(dev_priv, reg, SBI_ICLK);
+	tmp |= SBI_GEN0_CFG_BUFFENABLE_DISABLE;
+	intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK);
+
+	mutex_unlock(&dev_priv->dpio_lock);
+}
+
+/* Sequence to disable CLKOUT_DP */
+static void lpt_disable_clkout_dp(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t reg, tmp;
+
+	mutex_lock(&dev_priv->dpio_lock);
+
+	reg = (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) ?
+	       SBI_GEN0 : SBI_DBUFF0;
+	tmp = intel_sbi_read(dev_priv, reg, SBI_ICLK);
+	tmp &= ~SBI_GEN0_CFG_BUFFENABLE_DISABLE;
+	intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK);
+
+	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
+	if (!(tmp & SBI_SSCCTL_DISABLE)) {
+		if (!(tmp & SBI_SSCCTL_PATHALT)) {
+			tmp |= SBI_SSCCTL_PATHALT;
+			intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+			udelay(32);
+		}
+		tmp |= SBI_SSCCTL_DISABLE;
+		intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+	}
 
 	mutex_unlock(&dev_priv->dpio_lock);
 }
 
+static void lpt_init_pch_refclk(struct drm_device *dev)
+{
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	struct intel_encoder *encoder;
+	bool has_vga = false;
+
+	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
+		switch (encoder->type) {
+		case INTEL_OUTPUT_ANALOG:
+			has_vga = true;
+			break;
+		}
+	}
+
+	if (has_vga)
+		lpt_enable_clkout_dp(dev, true, true);
+	else
+		lpt_disable_clkout_dp(dev);
+}
+
 /*
  * Initialize reference clocks when the driver loads
  */
@@ -5895,6 +5946,142 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 	return true;
 }
 
+static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct intel_ddi_plls *plls = &dev_priv->ddi_plls;
+	struct intel_crtc *crtc;
+	unsigned long irqflags;
+	uint32_t val, pch_hpd_mask;
+
+	pch_hpd_mask = SDE_PORTB_HOTPLUG_CPT | SDE_PORTC_HOTPLUG_CPT;
+	if (!(dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE))
+		pch_hpd_mask |= SDE_PORTD_HOTPLUG_CPT | SDE_CRT_HOTPLUG_CPT;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
+		WARN(crtc->base.enabled, "CRTC for pipe %c enabled\n",
+		     pipe_name(crtc->pipe));
+
+	WARN(I915_READ(HSW_PWR_WELL_DRIVER), "Power well on\n");
+	WARN(plls->spll_refcount, "SPLL enabled\n");
+	WARN(plls->wrpll1_refcount, "WRPLL1 enabled\n");
+	WARN(plls->wrpll2_refcount, "WRPLL2 enabled\n");
+	WARN(I915_READ(PCH_PP_STATUS) & PP_ON, "Panel power on\n");
+	WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE,
+	     "CPU PWM1 enabled\n");
+	WARN(I915_READ(HSW_BLC_PWM2_CTL) & BLM_PWM_ENABLE,
+	     "CPU PWM2 enabled\n");
+	WARN(I915_READ(BLC_PWM_PCH_CTL1) & BLM_PCH_PWM_ENABLE,
+	     "PCH PWM1 enabled\n");
+	WARN(I915_READ(UTIL_PIN_CTL) & UTIL_PIN_ENABLE,
+	     "Utility pin enabled\n");
+	WARN(I915_READ(PCH_GTC_CTL) & PCH_GTC_ENABLE, "PCH GTC enabled\n");
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	val = I915_READ(DEIMR);
+	WARN((val & ~DE_PCH_EVENT_IVB) != val,
+	     "Unexpected DEIMR bits enabled: 0x%x\n", val);
+	val = I915_READ(SDEIMR);
+	WARN((val & ~pch_hpd_mask) != val,
+	     "Unexpected SDEIMR bits enabled: 0x%x\n", val);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+}
+
+/*
+ * This function implements pieces of two sequences from BSpec:
+ * - Sequence for display software to disable LCPLL
+ * - Sequence for display software to allow package C8+
+ * The steps implemented here are just the steps that actually touch the LCPLL
+ * register. Callers should take care of disabling all the display engine
+ * functions, doing the mode unset, fixing interrupts, etc.
+ */
+void hsw_disable_lcpll(struct drm_i915_private *dev_priv,
+		       bool switch_to_fclk, bool allow_power_down)
+{
+	uint32_t val;
+
+	assert_can_disable_lcpll(dev_priv);
+
+	val = I915_READ(LCPLL_CTL);
+
+	if (switch_to_fclk) {
+		val |= LCPLL_CD_SOURCE_FCLK;
+		I915_WRITE(LCPLL_CTL, val);
+
+		if (wait_for_atomic_us(I915_READ(LCPLL_CTL) &
+				       LCPLL_CD_SOURCE_FCLK_DONE, 1))
+			DRM_ERROR("Switching to FCLK failed\n");
+
+		val = I915_READ(LCPLL_CTL);
+	}
+
+	val |= LCPLL_PLL_DISABLE;
+	I915_WRITE(LCPLL_CTL, val);
+	POSTING_READ(LCPLL_CTL);
+
+	if (wait_for((I915_READ(LCPLL_CTL) & LCPLL_PLL_LOCK) == 0, 1))
+		DRM_ERROR("LCPLL still locked\n");
+
+	val = I915_READ(D_COMP);
+	val |= D_COMP_COMP_DISABLE;
+	I915_WRITE(D_COMP, val);
+	POSTING_READ(D_COMP);
+	ndelay(100);
+
+	if (wait_for((I915_READ(D_COMP) & D_COMP_RCOMP_IN_PROGRESS) == 0, 1))
+		DRM_ERROR("D_COMP RCOMP still in progress\n");
+
+	if (allow_power_down) {
+		val = I915_READ(LCPLL_CTL);
+		val |= LCPLL_POWER_DOWN_ALLOW;
+		I915_WRITE(LCPLL_CTL, val);
+		POSTING_READ(LCPLL_CTL);
+	}
+}
+
+/*
+ * Fully restores LCPLL, disallowing power down and switching back to LCPLL
+ * source.
+ */
+void hsw_restore_lcpll(struct drm_i915_private *dev_priv)
+{
+	uint32_t val;
+
+	val = I915_READ(LCPLL_CTL);
+
+	if ((val & (LCPLL_PLL_LOCK | LCPLL_PLL_DISABLE | LCPLL_CD_SOURCE_FCLK |
+		    LCPLL_POWER_DOWN_ALLOW)) == LCPLL_PLL_LOCK)
+		return;
+
+	if (val & LCPLL_POWER_DOWN_ALLOW) {
+		val &= ~LCPLL_POWER_DOWN_ALLOW;
+		I915_WRITE(LCPLL_CTL, val);
+	}
+
+	val = I915_READ(D_COMP);
+	val |= D_COMP_COMP_FORCE;
+	val &= ~D_COMP_COMP_DISABLE;
+	I915_WRITE(D_COMP, val);
+	I915_READ(D_COMP);
+
+	val = I915_READ(LCPLL_CTL);
+	val &= ~LCPLL_PLL_DISABLE;
+	I915_WRITE(LCPLL_CTL, val);
+
+	if (wait_for(I915_READ(LCPLL_CTL) & LCPLL_PLL_LOCK, 5))
+		DRM_ERROR("LCPLL not locked yet\n");
+
+	if (val & LCPLL_CD_SOURCE_FCLK) {
+		val = I915_READ(LCPLL_CTL);
+		val &= ~LCPLL_CD_SOURCE_FCLK;
+		I915_WRITE(LCPLL_CTL, val);
+
+		if (wait_for_atomic_us((I915_READ(LCPLL_CTL) &
+					LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
+			DRM_ERROR("Switching back to LCPLL failed\n");
+	}
+}
+
 static void haswell_modeset_global_resources(struct drm_device *dev)
 {
 	bool enable = false;
@@ -8434,6 +8621,8 @@ check_shared_dpll_state(struct drm_device *dev)
 		     pll->active, pll->refcount);
 		WARN(pll->active && !pll->on,
 		     "pll in active use but not on in sw tracking\n");
+		WARN(pll->on && !pll->active,
+		     "pll in on but not on in use in sw tracking\n");
 		WARN(pll->on != active,
 		     "pll on state mismatch (expected %i, found %i)\n",
 		     pll->on, active);
@@ -8658,15 +8847,20 @@ static void intel_set_config_restore_state(struct drm_device *dev,
 }
 
 static bool
-is_crtc_connector_off(struct drm_crtc *crtc, struct drm_connector *connectors,
-		      int num_connectors)
+is_crtc_connector_off(struct drm_mode_set *set)
 {
 	int i;
 
-	for (i = 0; i < num_connectors; i++)
-		if (connectors[i].encoder &&
-		    connectors[i].encoder->crtc == crtc &&
-		    connectors[i].dpms != DRM_MODE_DPMS_ON)
+	if (set->num_connectors == 0)
+		return false;
+
+	if (WARN_ON(set->connectors == NULL))
+		return false;
+
+	for (i = 0; i < set->num_connectors; i++)
+		if (set->connectors[i]->encoder &&
+		    set->connectors[i]->encoder->crtc == set->crtc &&
+		    set->connectors[i]->dpms != DRM_MODE_DPMS_ON)
 			return true;
 
 	return false;
@@ -8679,10 +8873,8 @@ intel_set_config_compute_mode_changes(struct drm_mode_set *set,
 
 	/* We should be able to check here if the fb has the same properties
 	 * and then just flip_or_move it */
-	if (set->connectors != NULL &&
-	    is_crtc_connector_off(set->crtc, *set->connectors,
-				  set->num_connectors)) {
-			config->mode_changed = true;
+	if (is_crtc_connector_off(set)) {
+		config->mode_changed = true;
 	} else if (set->crtc->fb != set->fb) {
 		/* If we have no fb then treat it as a full mode set */
 		if (set->crtc->fb == NULL) {
@@ -9437,9 +9629,12 @@ static void intel_init_display(struct drm_device *dev)
 	else if (IS_I915G(dev))
 		dev_priv->display.get_display_clock_speed =
 			i915_get_display_clock_speed;
-	else if (IS_I945GM(dev) || IS_845G(dev) || IS_PINEVIEW_M(dev))
+	else if (IS_I945GM(dev) || IS_845G(dev))
 		dev_priv->display.get_display_clock_speed =
 			i9xx_misc_get_display_clock_speed;
+	else if (IS_PINEVIEW(dev))
+		dev_priv->display.get_display_clock_speed =
+			pnv_get_display_clock_speed;
 	else if (IS_I915GM(dev))
 		dev_priv->display.get_display_clock_speed =
 			i915gm_get_display_clock_speed;
@@ -9536,6 +9731,17 @@ static void quirk_invert_brightness(struct drm_device *dev)
 	DRM_INFO("applying inverted panel brightness quirk\n");
 }
 
+/*
+ * Some machines (Dell XPS13) suffer broken backlight controls if
+ * BLM_PCH_PWM_ENABLE is set.
+ */
+static void quirk_no_pcm_pwm_enable(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	dev_priv->quirks |= QUIRK_NO_PCH_PWM_ENABLE;
+	DRM_INFO("applying no-PCH_PWM_ENABLE quirk\n");
+}
+
 struct intel_quirk {
 	int device;
 	int subsystem_vendor;
@@ -9605,6 +9811,11 @@ static struct intel_quirk intel_quirks[] = {
 
 	/* Acer Aspire 4736Z */
 	{ 0x2a42, 0x1025, 0x0260, quirk_invert_brightness },
+
+	/* Dell XPS13 HD Sandy Bridge */
+	{ 0x0116, 0x1028, 0x052e, quirk_no_pcm_pwm_enable },
+	/* Dell XPS13 HD and XPS13 FHD Ivy Bridge */
+	{ 0x0166, 0x1028, 0x058b, quirk_no_pcm_pwm_enable },
 };
 
 static void intel_init_quirks(struct drm_device *dev)
@@ -9955,8 +10166,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 		}
 		pll->refcount = pll->active;
 
-		DRM_DEBUG_KMS("%s hw state readout: refcount %i\n",
-			      pll->name, pll->refcount);
+		DRM_DEBUG_KMS("%s hw state readout: refcount %i, on %i\n",
+			      pll->name, pll->refcount, pll->on);
 	}
 
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
@@ -10016,6 +10227,7 @@ void intel_modeset_setup_hw_state(struct drm_device *dev,
 	struct drm_plane *plane;
 	struct intel_crtc *crtc;
 	struct intel_encoder *encoder;
+	int i;
 
 	intel_modeset_readout_hw_state(dev);
 
@@ -10047,6 +10259,18 @@ void intel_modeset_setup_hw_state(struct drm_device *dev,
 		intel_dump_pipe_config(crtc, &crtc->config, "[setup_hw_state]");
 	}
 
+	for (i = 0; i < dev_priv->num_shared_dpll; i++) {
+		struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i];
+
+		if (!pll->on || pll->active)
+			continue;
+
+		DRM_DEBUG_KMS("%s enabled but not in use, disabling\n", pll->name);
+
+		pll->disable(dev_priv, pll);
+		pll->on = false;
+	}
+
 	if (force_restore) {
 		/*
 		 * We need to use raw interfaces for restoring state to avoid
@@ -10256,8 +10480,7 @@ intel_display_capture_error_state(struct drm_device *dev)
 	 * well was on, so here we have to clear the FPGA_DBG_RM_NOCLAIM bit to
 	 * prevent the next I915_WRITE from detecting it and printing an error
 	 * message. */
-	if (HAS_POWER_WELL(dev))
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+	intel_uncore_clear_errors(dev);
 
 	return error;
 }

Some files were not shown because too many files changed in this diff