Browse Source

Merge branch 'hw-breakpoint' of git://repo.or.cz/linux-2.6/linux-wd into devel-stable

Russell King 14 years ago
parent
commit
2f841ed13b
100 changed files with 1793 additions and 847 deletions
  1. 4 4
      Documentation/edac.txt
  2. 1 0
      Documentation/networking/ip-sysctl.txt
  3. 1 1
      Makefile
  4. 2 2
      arch/arm/include/asm/hw_breakpoint.h
  5. 4 0
      arch/arm/kernel/entry-armv.S
  6. 19 0
      arch/arm/kernel/entry-header.S
  7. 319 224
      arch/arm/kernel/hw_breakpoint.c
  8. 2 2
      arch/arm/kernel/ptrace.c
  9. 7 0
      arch/arm/mach-omap2/pm24xx.c
  10. 10 0
      arch/arm/mach-omap2/pm34xx.c
  11. 6 1
      arch/arm/mach-omap2/serial.c
  12. 4 4
      arch/arm/mach-s3c2410/h1940-bluetooth.c
  13. 2 3
      arch/arm/mach-s3c2416/irq.c
  14. 2 3
      arch/arm/mach-s3c2443/irq.c
  15. 1 1
      arch/arm/mach-s3c64xx/mach-mini6410.c
  16. 1 1
      arch/arm/mach-s3c64xx/mach-real6410.c
  17. 1 0
      arch/arm/mach-s5pv210/mach-smdkc110.c
  18. 1 0
      arch/arm/mach-s5pv210/mach-smdkv210.c
  19. 111 36
      arch/arm/mach-shmobile/board-ap4evb.c
  20. 8 25
      arch/arm/mach-shmobile/clock-sh7372.c
  21. 3 0
      arch/arm/plat-pxa/include/plat/sdhci.h
  22. 3 3
      arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c
  23. 3 3
      arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c
  24. 3 3
      arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c
  25. 1 1
      arch/powerpc/mm/pgtable.c
  26. 4 6
      arch/s390/kernel/nmi.c
  27. 9 5
      arch/s390/lib/delay.c
  28. 1 1
      arch/x86/Kconfig
  29. 1 1
      arch/x86/include/asm/msr-index.h
  30. 5 5
      arch/x86/include/asm/paravirt.h
  31. 4 0
      arch/x86/include/asm/uv/uv_hub.h
  32. 18 1
      arch/x86/include/asm/uv/uv_mmrs.h
  33. 4 3
      arch/x86/kernel/apic/hw_nmi.c
  34. 23 2
      arch/x86/kernel/apic/x2apic_uv_x.c
  35. 20 0
      arch/x86/kernel/cpu/perf_event.c
  36. 1 1
      arch/x86/kernel/entry_32.S
  37. 2 0
      arch/x86/kernel/entry_64.S
  38. 4 0
      arch/x86/kernel/hw_breakpoint.c
  39. 30 34
      arch/x86/kernel/mmconf-fam10h_64.c
  40. 3 2
      arch/x86/mm/tlb.c
  41. 1 1
      arch/x86/platform/uv/tlb_uv.c
  42. 3 1
      arch/x86/platform/uv/uv_time.c
  43. 1 1
      block/blk-throttle.c
  44. 1 1
      drivers/block/amiflop.c
  45. 1 1
      drivers/block/ataflop.c
  46. 1 2
      drivers/block/cciss.c
  47. 33 22
      drivers/block/xen-blkfront.c
  48. 24 0
      drivers/char/tpm/tpm_tis.c
  49. 4 4
      drivers/edac/Makefile
  50. 1 1
      drivers/edac/mce_amd_inj.c
  51. 90 70
      drivers/firewire/net.c
  52. 4 3
      drivers/isdn/icn/icn.c
  53. 1 1
      drivers/mmc/core/core.c
  54. 6 2
      drivers/mmc/core/mmc.c
  55. 35 16
      drivers/mmc/core/sdio.c
  56. 22 11
      drivers/mmc/core/sdio_bus.c
  57. 1 1
      drivers/mmc/host/omap_hsmmc.c
  58. 9 3
      drivers/mmc/host/sdhci-esdhc-imx.c
  59. 23 8
      drivers/mmc/host/sdhci-pci.c
  60. 4 0
      drivers/mmc/host/sdhci-pxa.c
  61. 43 11
      drivers/mmc/host/sdhci.c
  62. 8 1
      drivers/mmc/host/sdhci.h
  63. 22 8
      drivers/mmc/host/ushc.c
  64. 9 7
      drivers/mtd/ubi/scan.c
  65. 3 3
      drivers/net/Kconfig
  66. 5 5
      drivers/net/au1000_eth.c
  67. 45 28
      drivers/net/cxgb4vf/cxgb4vf_main.c
  68. 59 35
      drivers/net/cxgb4vf/t4vf_hw.c
  69. 14 4
      drivers/net/ehea/ehea_main.c
  70. 3 3
      drivers/net/pch_gbe/pch_gbe_main.c
  71. 4 4
      drivers/net/pch_gbe/pch_gbe_param.c
  72. 22 21
      drivers/net/ppp_generic.c
  73. 2 1
      drivers/net/ucc_geth.h
  74. 6 4
      drivers/net/usb/hso.c
  75. 6 5
      drivers/net/wan/x25_asy.c
  76. 1 1
      drivers/net/wireless/ath/ath9k/recv.c
  77. 1 1
      drivers/net/wireless/ath/carl9170/main.c
  78. 1 0
      drivers/net/wireless/b43/sdio.c
  79. 1 1
      drivers/s390/cio/qdio_thinint.c
  80. 1 0
      drivers/ssb/b43_pci_bridge.c
  81. 3 2
      drivers/vhost/net.c
  82. 1 14
      fs/btrfs/compression.c
  83. 3 3
      fs/btrfs/ctree.h
  84. 32 6
      fs/btrfs/disk-io.c
  85. 76 0
      fs/btrfs/export.c
  86. 1 1
      fs/btrfs/extent-tree.c
  87. 62 15
      fs/btrfs/extent_io.c
  88. 3 0
      fs/btrfs/extent_io.h
  89. 7 0
      fs/btrfs/file.c
  90. 237 57
      fs/btrfs/inode.c
  91. 21 10
      fs/btrfs/ioctl.c
  92. 67 0
      fs/btrfs/ordered-data.c
  93. 3 0
      fs/btrfs/ordered-data.h
  94. 38 3
      fs/btrfs/super.c
  95. 4 1
      fs/btrfs/transaction.c
  96. 17 4
      fs/btrfs/tree-log.c
  97. 8 7
      fs/gfs2/quota.c
  98. 6 25
      fs/ioprio.c
  99. 34 28
      fs/nfs/dir.c
  100. 1 1
      fs/nfs/direct.c

+ 4 - 4
Documentation/edac.txt

@@ -196,7 +196,7 @@ csrow3.
 The representation of the above is reflected in the directory tree
 in EDAC's sysfs interface. Starting in directory
 /sys/devices/system/edac/mc each memory controller will be represented
-by its own 'mcX' directory, where 'X" is the index of the MC.
+by its own 'mcX' directory, where 'X' is the index of the MC.
 
 
 	..../edac/mc/
@@ -207,7 +207,7 @@ by its own 'mcX' directory, where 'X" is the index of the MC.
 		   ....
 
 Under each 'mcX' directory each 'csrowX' is again represented by a
-'csrowX', where 'X" is the csrow index:
+'csrowX', where 'X' is the csrow index:
 
 
 	.../mc/mc0/
@@ -232,7 +232,7 @@ EDAC control and attribute files.
 
 
 In 'mcX' directories are EDAC control and attribute files for
-this 'X" instance of the memory controllers:
+this 'X' instance of the memory controllers:
 
 
 Counter reset control file:
@@ -343,7 +343,7 @@ Sdram memory scrubbing rate:
 'csrowX' DIRECTORIES
 
 In the 'csrowX' directories are EDAC control and attribute files for
-this 'X" instance of csrow:
+this 'X' instance of csrow:
 
 
 Total Uncorrectable Errors count attribute file:

+ 1 - 0
Documentation/networking/ip-sysctl.txt

@@ -144,6 +144,7 @@ tcp_adv_win_scale - INTEGER
 	Count buffering overhead as bytes/2^tcp_adv_win_scale
 	(if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale),
 	if it is <= 0.
+	Possible values are [-31, 31], inclusive.
 	Default: 2
 
 tcp_allowed_congestion_control - STRING

+ 1 - 1
Makefile

@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 37
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Flesh-Eating Bats with Fangs
 
 # *DOCUMENTATION*

+ 2 - 2
arch/arm/include/asm/hw_breakpoint.h

@@ -20,8 +20,8 @@ struct arch_hw_breakpoint_ctrl {
 struct arch_hw_breakpoint {
 	u32	address;
 	u32	trigger;
-	struct perf_event *suspended_wp;
-	struct arch_hw_breakpoint_ctrl ctrl;
+	struct	arch_hw_breakpoint_ctrl step_ctrl;
+	struct	arch_hw_breakpoint_ctrl ctrl;
 };
 
 static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl)

+ 4 - 0
arch/arm/kernel/entry-armv.S

@@ -198,6 +198,7 @@ __dabt_svc:
 	@
 	@ set desired IRQ state, then call main handler
 	@
+	debug_entry r1
 	msr	cpsr_c, r9
 	mov	r2, sp
 	bl	do_DataAbort
@@ -324,6 +325,7 @@ __pabt_svc:
 #else
 	bl	CPU_PABORT_HANDLER
 #endif
+	debug_entry r1
 	msr	cpsr_c, r9			@ Maybe enable interrupts
 	mov	r2, sp				@ regs
 	bl	do_PrefetchAbort		@ call abort handler
@@ -439,6 +441,7 @@ __dabt_usr:
 	@
 	@ IRQs on, then call the main handler
 	@
+	debug_entry r1
 	enable_irq
 	mov	r2, sp
 	adr	lr, BSYM(ret_from_exception)
@@ -703,6 +706,7 @@ __pabt_usr:
 #else
 	bl	CPU_PABORT_HANDLER
 #endif
+	debug_entry r1
 	enable_irq				@ Enable interrupts
 	mov	r2, sp				@ regs
 	bl	do_PrefetchAbort		@ call abort handler

+ 19 - 0
arch/arm/kernel/entry-header.S

@@ -165,6 +165,25 @@
 	.endm
 #endif	/* !CONFIG_THUMB2_KERNEL */
 
+	@
+	@ Debug exceptions are taken as prefetch or data aborts.
+	@ We must disable preemption during the handler so that
+	@ we can access the debug registers safely.
+	@
+	.macro	debug_entry, fsr
+#if defined(CONFIG_HAVE_HW_BREAKPOINT) && defined(CONFIG_PREEMPT)
+	ldr	r4, =0x40f		@ mask out fsr.fs
+	and	r5, r4, \fsr
+	cmp	r5, #2			@ debug exception
+	bne	1f
+	get_thread_info r10
+	ldr	r6, [r10, #TI_PREEMPT]	@ get preempt count
+	add	r11, r6, #1		@ increment it
+	str	r11, [r10, #TI_PREEMPT]
+1:
+#endif
+	.endm
+
 /*
  * These are the registers used in the syscall handler, and allow us to
  * have in theory up to 7 arguments to a function - r0 to r6.

+ 319 - 224
arch/arm/kernel/hw_breakpoint.c

@@ -24,6 +24,7 @@
 #define pr_fmt(fmt) "hw-breakpoint: " fmt
 
 #include <linux/errno.h>
+#include <linux/hardirq.h>
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/smp.h>
@@ -44,6 +45,7 @@ static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]);
 
 /* Number of BRP/WRP registers on this CPU. */
 static int core_num_brps;
+static int core_num_reserved_brps;
 static int core_num_wrps;
 
 /* Debug architecture version. */
@@ -52,87 +54,6 @@ static u8 debug_arch;
 /* Maximum supported watchpoint length. */
 static u8 max_watchpoint_len;
 
-/* Determine number of BRP registers available. */
-static int get_num_brps(void)
-{
-	u32 didr;
-	ARM_DBG_READ(c0, 0, didr);
-	return ((didr >> 24) & 0xf) + 1;
-}
-
-/* Determine number of WRP registers available. */
-static int get_num_wrps(void)
-{
-	/*
-	 * FIXME: When a watchpoint fires, the only way to work out which
-	 * watchpoint it was is by disassembling the faulting instruction
-	 * and working out the address of the memory access.
-	 *
-	 * Furthermore, we can only do this if the watchpoint was precise
-	 * since imprecise watchpoints prevent us from calculating register
-	 * based addresses.
-	 *
-	 * For the time being, we only report 1 watchpoint register so we
-	 * always know which watchpoint fired. In the future we can either
-	 * add a disassembler and address generation emulator, or we can
-	 * insert a check to see if the DFAR is set on watchpoint exception
-	 * entry [the ARM ARM states that the DFAR is UNKNOWN, but
-	 * experience shows that it is set on some implementations].
-	 */
-
-#if 0
-	u32 didr, wrps;
-	ARM_DBG_READ(c0, 0, didr);
-	return ((didr >> 28) & 0xf) + 1;
-#endif
-
-	return 1;
-}
-
-int hw_breakpoint_slots(int type)
-{
-	/*
-	 * We can be called early, so don't rely on
-	 * our static variables being initialised.
-	 */
-	switch (type) {
-	case TYPE_INST:
-		return get_num_brps();
-	case TYPE_DATA:
-		return get_num_wrps();
-	default:
-		pr_warning("unknown slot type: %d\n", type);
-		return 0;
-	}
-}
-
-/* Determine debug architecture. */
-static u8 get_debug_arch(void)
-{
-	u32 didr;
-
-	/* Do we implement the extended CPUID interface? */
-	if (((read_cpuid_id() >> 16) & 0xf) != 0xf) {
-		pr_warning("CPUID feature registers not supported. "
-				"Assuming v6 debug is present.\n");
-		return ARM_DEBUG_ARCH_V6;
-	}
-
-	ARM_DBG_READ(c0, 0, didr);
-	return (didr >> 16) & 0xf;
-}
-
-/* Does this core support mismatch breakpoints? */
-static int core_has_mismatch_bps(void)
-{
-	return debug_arch >= ARM_DEBUG_ARCH_V7_ECP14 && core_num_brps > 1;
-}
-
-u8 arch_get_debug_arch(void)
-{
-	return debug_arch;
-}
-
 #define READ_WB_REG_CASE(OP2, M, VAL)		\
 	case ((OP2 << 4) + M):			\
 		ARM_DBG_READ(c ## M, OP2, VAL); \
@@ -210,6 +131,94 @@ static void write_wb_reg(int n, u32 val)
 	isb();
 }
 
+/* Determine debug architecture. */
+static u8 get_debug_arch(void)
+{
+	u32 didr;
+
+	/* Do we implement the extended CPUID interface? */
+	if (((read_cpuid_id() >> 16) & 0xf) != 0xf) {
+		pr_warning("CPUID feature registers not supported. "
+				"Assuming v6 debug is present.\n");
+		return ARM_DEBUG_ARCH_V6;
+	}
+
+	ARM_DBG_READ(c0, 0, didr);
+	return (didr >> 16) & 0xf;
+}
+
+u8 arch_get_debug_arch(void)
+{
+	return debug_arch;
+}
+
+/* Determine number of BRP register available. */
+static int get_num_brp_resources(void)
+{
+	u32 didr;
+	ARM_DBG_READ(c0, 0, didr);
+	return ((didr >> 24) & 0xf) + 1;
+}
+
+/* Does this core support mismatch breakpoints? */
+static int core_has_mismatch_brps(void)
+{
+	return (get_debug_arch() >= ARM_DEBUG_ARCH_V7_ECP14 &&
+		get_num_brp_resources() > 1);
+}
+
+/* Determine number of usable WRPs available. */
+static int get_num_wrps(void)
+{
+	/*
+	 * FIXME: When a watchpoint fires, the only way to work out which
+	 * watchpoint it was is by disassembling the faulting instruction
+	 * and working out the address of the memory access.
+	 *
+	 * Furthermore, we can only do this if the watchpoint was precise
+	 * since imprecise watchpoints prevent us from calculating register
+	 * based addresses.
+	 *
+	 * Providing we have more than 1 breakpoint register, we only report
+	 * a single watchpoint register for the time being. This way, we always
+	 * know which watchpoint fired. In the future we can either add a
+	 * disassembler and address generation emulator, or we can insert a
+	 * check to see if the DFAR is set on watchpoint exception entry
+	 * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows
+	 * that it is set on some implementations].
+	 */
+
+#if 0
+	int wrps;
+	u32 didr;
+	ARM_DBG_READ(c0, 0, didr);
+	wrps = ((didr >> 28) & 0xf) + 1;
+#endif
+	int wrps = 1;
+
+	if (core_has_mismatch_brps() && wrps >= get_num_brp_resources())
+		wrps = get_num_brp_resources() - 1;
+
+	return wrps;
+}
+
+/* We reserve one breakpoint for each watchpoint. */
+static int get_num_reserved_brps(void)
+{
+	if (core_has_mismatch_brps())
+		return get_num_wrps();
+	return 0;
+}
+
+/* Determine number of usable BRPs available. */
+static int get_num_brps(void)
+{
+	int brps = get_num_brp_resources();
+	if (core_has_mismatch_brps())
+		brps -= get_num_reserved_brps();
+	return brps;
+}
+
 /*
  * In order to access the breakpoint/watchpoint control registers,
  * we must be running in debug monitor mode. Unfortunately, we can
@@ -230,8 +239,12 @@ static int enable_monitor_mode(void)
 		goto out;
 	}
 
+	/* If monitor mode is already enabled, just return. */
+	if (dscr & ARM_DSCR_MDBGEN)
+		goto out;
+
 	/* Write to the corresponding DSCR. */
-	switch (debug_arch) {
+	switch (get_debug_arch()) {
 	case ARM_DEBUG_ARCH_V6:
 	case ARM_DEBUG_ARCH_V6_1:
 		ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN));
@@ -246,15 +259,30 @@ static int enable_monitor_mode(void)
 
 	/* Check that the write made it through. */
 	ARM_DBG_READ(c1, 0, dscr);
-	if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN),
-				"failed to enable monitor mode.")) {
+	if (!(dscr & ARM_DSCR_MDBGEN))
 		ret = -EPERM;
-	}
 
 out:
 	return ret;
 }
 
+int hw_breakpoint_slots(int type)
+{
+	/*
+	 * We can be called early, so don't rely on
+	 * our static variables being initialised.
+	 */
+	switch (type) {
+	case TYPE_INST:
+		return get_num_brps();
+	case TYPE_DATA:
+		return get_num_wrps();
+	default:
+		pr_warning("unknown slot type: %d\n", type);
+		return 0;
+	}
+}
+
 /*
  * Check if 8-bit byte-address select is available.
  * This clobbers WRP 0.
@@ -268,9 +296,6 @@ static u8 get_max_wp_len(void)
 	if (debug_arch < ARM_DEBUG_ARCH_V7_ECP14)
 		goto out;
 
-	if (enable_monitor_mode())
-		goto out;
-
 	memset(&ctrl, 0, sizeof(ctrl));
 	ctrl.len = ARM_BREAKPOINT_LEN_8;
 	ctrl_reg = encode_ctrl_reg(ctrl);
@@ -289,23 +314,6 @@ u8 arch_get_max_wp_len(void)
 	return max_watchpoint_len;
 }
 
-/*
- * Handler for reactivating a suspended watchpoint when the single
- * step `mismatch' breakpoint is triggered.
- */
-static void wp_single_step_handler(struct perf_event *bp, int unused,
-				   struct perf_sample_data *data,
-				   struct pt_regs *regs)
-{
-	perf_event_enable(counter_arch_bp(bp)->suspended_wp);
-	unregister_hw_breakpoint(bp);
-}
-
-static int bp_is_single_step(struct perf_event *bp)
-{
-	return bp->overflow_handler == wp_single_step_handler;
-}
-
 /*
  * Install a perf counter breakpoint.
  */
@@ -314,30 +322,41 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 	struct perf_event **slot, **slots;
 	int i, max_slots, ctrl_base, val_base, ret = 0;
+	u32 addr, ctrl;
 
 	/* Ensure that we are in monitor mode and halting mode is disabled. */
 	ret = enable_monitor_mode();
 	if (ret)
 		goto out;
 
+	addr = info->address;
+	ctrl = encode_ctrl_reg(info->ctrl) | 0x1;
+
 	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
 		/* Breakpoint */
 		ctrl_base = ARM_BASE_BCR;
 		val_base = ARM_BASE_BVR;
-		slots = __get_cpu_var(bp_on_reg);
-		max_slots = core_num_brps - 1;
-
-		if (bp_is_single_step(bp)) {
-			info->ctrl.mismatch = 1;
-			i = max_slots;
-			slots[i] = bp;
-			goto setup;
+		slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
+		max_slots = core_num_brps;
+		if (info->step_ctrl.enabled) {
+			/* Override the breakpoint data with the step data. */
+			addr = info->trigger & ~0x3;
+			ctrl = encode_ctrl_reg(info->step_ctrl);
 		}
 	} else {
 		/* Watchpoint */
-		ctrl_base = ARM_BASE_WCR;
-		val_base = ARM_BASE_WVR;
-		slots = __get_cpu_var(wp_on_reg);
+		if (info->step_ctrl.enabled) {
+			/* Install into the reserved breakpoint region. */
+			ctrl_base = ARM_BASE_BCR + core_num_brps;
+			val_base = ARM_BASE_BVR + core_num_brps;
+			/* Override the watchpoint data with the step data. */
+			addr = info->trigger & ~0x3;
+			ctrl = encode_ctrl_reg(info->step_ctrl);
+		} else {
+			ctrl_base = ARM_BASE_WCR;
+			val_base = ARM_BASE_WVR;
+		}
+		slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 		max_slots = core_num_wrps;
 	}
 
@@ -355,12 +374,11 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 		goto out;
 	}
 
-setup:
 	/* Setup the address register. */
-	write_wb_reg(val_base + i, info->address);
+	write_wb_reg(val_base + i, addr);
 
 	/* Setup the control register. */
-	write_wb_reg(ctrl_base + i, encode_ctrl_reg(info->ctrl) | 0x1);
+	write_wb_reg(ctrl_base + i, ctrl);
 
 out:
 	return ret;
@@ -375,18 +393,15 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
 		/* Breakpoint */
 		base = ARM_BASE_BCR;
-		slots = __get_cpu_var(bp_on_reg);
-		max_slots = core_num_brps - 1;
-
-		if (bp_is_single_step(bp)) {
-			i = max_slots;
-			slots[i] = NULL;
-			goto reset;
-		}
+		slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
+		max_slots = core_num_brps;
 	} else {
 		/* Watchpoint */
-		base = ARM_BASE_WCR;
-		slots = __get_cpu_var(wp_on_reg);
+		if (info->step_ctrl.enabled)
+			base = ARM_BASE_BCR + core_num_brps;
+		else
+			base = ARM_BASE_WCR;
+		slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 		max_slots = core_num_wrps;
 	}
 
@@ -403,7 +418,6 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot"))
 		return;
 
-reset:
 	/* Reset the control register. */
 	write_wb_reg(base + i, 0);
 }
@@ -537,12 +551,23 @@ static int arch_build_bp_info(struct perf_event *bp)
 		return -EINVAL;
 	}
 
+	/*
+	 * Breakpoints must be of length 2 (thumb) or 4 (ARM) bytes.
+	 * Watchpoints can be of length 1, 2, 4 or 8 bytes if supported
+	 * by the hardware and must be aligned to the appropriate number of
+	 * bytes.
+	 */
+	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE &&
+	    info->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
+	    info->ctrl.len != ARM_BREAKPOINT_LEN_4)
+		return -EINVAL;
+
 	/* Address */
 	info->address = bp->attr.bp_addr;
 
 	/* Privilege */
 	info->ctrl.privilege = ARM_BREAKPOINT_USER;
-	if (arch_check_bp_in_kernelspace(bp) && !bp_is_single_step(bp))
+	if (arch_check_bp_in_kernelspace(bp))
 		info->ctrl.privilege |= ARM_BREAKPOINT_PRIV;
 
 	/* Enabled? */
@@ -561,7 +586,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 {
 	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 	int ret = 0;
-	u32 bytelen, max_len, offset, alignment_mask = 0x3;
+	u32 offset, alignment_mask = 0x3;
 
 	/* Build the arch_hw_breakpoint. */
 	ret = arch_build_bp_info(bp);
@@ -571,84 +596,85 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	/* Check address alignment. */
 	if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
 		alignment_mask = 0x7;
-	if (info->address & alignment_mask) {
-		/*
-		 * Try to fix the alignment. This may result in a length
-		 * that is too large, so we must check for that.
-		 */
-		bytelen = get_hbp_len(info->ctrl.len);
-		max_len = info->ctrl.type == ARM_BREAKPOINT_EXECUTE ? 4 :
-				max_watchpoint_len;
-
-		if (max_len >= 8)
-			offset = info->address & 0x7;
-		else
-			offset = info->address & 0x3;
-
-		if (bytelen > (1 << ((max_len - (offset + 1)) >> 1))) {
-			ret = -EFBIG;
-			goto out;
-		}
-
-		info->ctrl.len <<= offset;
-		info->address &= ~offset;
-
-		pr_debug("breakpoint alignment fixup: length = 0x%x, "
-			"address = 0x%x\n", info->ctrl.len, info->address);
+	offset = info->address & alignment_mask;
+	switch (offset) {
+	case 0:
+		/* Aligned */
+		break;
+	case 1:
+		/* Allow single byte watchpoint. */
+		if (info->ctrl.len == ARM_BREAKPOINT_LEN_1)
+			break;
+	case 2:
+		/* Allow halfword watchpoints and breakpoints. */
+		if (info->ctrl.len == ARM_BREAKPOINT_LEN_2)
+			break;
+	default:
+		ret = -EINVAL;
+		goto out;
 	}
 
+	info->address &= ~alignment_mask;
+	info->ctrl.len <<= offset;
+
 	/*
 	 * Currently we rely on an overflow handler to take
 	 * care of single-stepping the breakpoint when it fires.
 	 * In the case of userspace breakpoints on a core with V7 debug,
-	 * we can use the mismatch feature as a poor-man's hardware single-step.
+	 * we can use the mismatch feature as a poor-man's hardware
+	 * single-step, but this only works for per-task breakpoints.
 	 */
 	if (WARN_ONCE(!bp->overflow_handler &&
-		(arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_bps()),
+		(arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps()
+		 || !bp->hw.bp_target),
 			"overflow handler required but none found")) {
 		ret = -EINVAL;
-		goto out;
 	}
 out:
 	return ret;
 }
 
-static void update_mismatch_flag(int idx, int flag)
+/*
+ * Enable/disable single-stepping over the breakpoint bp at address addr.
+ */
+static void enable_single_step(struct perf_event *bp, u32 addr)
 {
-	struct perf_event *bp = __get_cpu_var(bp_on_reg[idx]);
-	struct arch_hw_breakpoint *info;
-
-	if (bp == NULL)
-		return;
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 
-	info = counter_arch_bp(bp);
+	arch_uninstall_hw_breakpoint(bp);
+	info->step_ctrl.mismatch  = 1;
+	info->step_ctrl.len	  = ARM_BREAKPOINT_LEN_4;
+	info->step_ctrl.type	  = ARM_BREAKPOINT_EXECUTE;
+	info->step_ctrl.privilege = info->ctrl.privilege;
+	info->step_ctrl.enabled	  = 1;
+	info->trigger		  = addr;
+	arch_install_hw_breakpoint(bp);
+}
 
-	/* Update the mismatch field to enter/exit `single-step' mode */
-	if (!bp->overflow_handler && info->ctrl.mismatch != flag) {
-		info->ctrl.mismatch = flag;
-		write_wb_reg(ARM_BASE_BCR + idx, encode_ctrl_reg(info->ctrl) | 0x1);
-	}
+static void disable_single_step(struct perf_event *bp)
+{
+	arch_uninstall_hw_breakpoint(bp);
+	counter_arch_bp(bp)->step_ctrl.enabled = 0;
+	arch_install_hw_breakpoint(bp);
 }
 
 static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
 {
 	int i;
-	struct perf_event *bp, **slots = __get_cpu_var(wp_on_reg);
+	struct perf_event *wp, **slots;
 	struct arch_hw_breakpoint *info;
-	struct perf_event_attr attr;
+
+	slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 
 	/* Without a disassembler, we can only handle 1 watchpoint. */
 	BUG_ON(core_num_wrps > 1);
 
-	hw_breakpoint_init(&attr);
-	attr.bp_addr	= regs->ARM_pc & ~0x3;
-	attr.bp_len	= HW_BREAKPOINT_LEN_4;
-	attr.bp_type	= HW_BREAKPOINT_X;
-
 	for (i = 0; i < core_num_wrps; ++i) {
 		rcu_read_lock();
 
-		if (slots[i] == NULL) {
+		wp = slots[i];
+
+		if (wp == NULL) {
 			rcu_read_unlock();
 			continue;
 		}
@@ -658,24 +684,51 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
 		 * single watchpoint, we can set the trigger to the lowest
 		 * possible faulting address.
 		 */
-		info = counter_arch_bp(slots[i]);
-		info->trigger = slots[i]->attr.bp_addr;
+		info = counter_arch_bp(wp);
+		info->trigger = wp->attr.bp_addr;
 		pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
-		perf_bp_event(slots[i], regs);
+		perf_bp_event(wp, regs);
 
 		/*
 		 * If no overflow handler is present, insert a temporary
 		 * mismatch breakpoint so we can single-step over the
 		 * watchpoint trigger.
 		 */
-		if (!slots[i]->overflow_handler) {
-			bp = register_user_hw_breakpoint(&attr,
-							 wp_single_step_handler,
-							 current);
-			counter_arch_bp(bp)->suspended_wp = slots[i];
-			perf_event_disable(slots[i]);
-		}
+		if (!wp->overflow_handler)
+			enable_single_step(wp, instruction_pointer(regs));
+
+		rcu_read_unlock();
+	}
+}
 
+static void watchpoint_single_step_handler(unsigned long pc)
+{
+	int i;
+	struct perf_event *wp, **slots;
+	struct arch_hw_breakpoint *info;
+
+	slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
+
+	for (i = 0; i < core_num_reserved_brps; ++i) {
+		rcu_read_lock();
+
+		wp = slots[i];
+
+		if (wp == NULL)
+			goto unlock;
+
+		info = counter_arch_bp(wp);
+		if (!info->step_ctrl.enabled)
+			goto unlock;
+
+		/*
+		 * Restore the original watchpoint if we've completed the
+		 * single-step.
+		 */
+		if (info->trigger != pc)
+			disable_single_step(wp);
+
+unlock:
 		rcu_read_unlock();
 	}
 }
@@ -683,62 +736,69 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
 static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs)
 {
 	int i;
-	int mismatch;
 	u32 ctrl_reg, val, addr;
-	struct perf_event *bp, **slots = __get_cpu_var(bp_on_reg);
+	struct perf_event *bp, **slots;
 	struct arch_hw_breakpoint *info;
 	struct arch_hw_breakpoint_ctrl ctrl;
 
+	slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
+
 	/* The exception entry code places the amended lr in the PC. */
 	addr = regs->ARM_pc;
 
+	/* Check the currently installed breakpoints first. */
 	for (i = 0; i < core_num_brps; ++i) {
 		rcu_read_lock();
 
 		bp = slots[i];
 
-		if (bp == NULL) {
-			rcu_read_unlock();
-			continue;
-		}
+		if (bp == NULL)
+			goto unlock;
 
-		mismatch = 0;
+		info = counter_arch_bp(bp);
 
 		/* Check if the breakpoint value matches. */
 		val = read_wb_reg(ARM_BASE_BVR + i);
 		if (val != (addr & ~0x3))
-			goto unlock;
+			goto mismatch;
 
 		/* Possible match, check the byte address select to confirm. */
 		ctrl_reg = read_wb_reg(ARM_BASE_BCR + i);
 		decode_ctrl_reg(ctrl_reg, &ctrl);
 		if ((1 << (addr & 0x3)) & ctrl.len) {
-			mismatch = 1;
-			info = counter_arch_bp(bp);
 			info->trigger = addr;
-		}
-
-unlock:
-		if ((mismatch && !info->ctrl.mismatch) || bp_is_single_step(bp)) {
 			pr_debug("breakpoint fired: address = 0x%x\n", addr);
 			perf_bp_event(bp, regs);
+			if (!bp->overflow_handler)
+				enable_single_step(bp, addr);
+			goto unlock;
 		}
 
-		update_mismatch_flag(i, mismatch);
+mismatch:
+		/* If we're stepping a breakpoint, it can now be restored. */
+		if (info->step_ctrl.enabled)
+			disable_single_step(bp);
+unlock:
 		rcu_read_unlock();
 	}
+
+	/* Handle any pending watchpoint single-step breakpoints. */
+	watchpoint_single_step_handler(addr);
 }
 
 /*
  * Called from either the Data Abort Handler [watchpoint] or the
- * Prefetch Abort Handler [breakpoint].
+ * Prefetch Abort Handler [breakpoint] with preemption disabled.
  */
 static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 				 struct pt_regs *regs)
 {
-	int ret = 1; /* Unhandled fault. */
+	int ret = 0;
 	u32 dscr;
 
+	/* We must be called with preemption disabled. */
+	WARN_ON(preemptible());
+
 	/* We only handle watchpoints and hardware breakpoints. */
 	ARM_DBG_READ(c1, 0, dscr);
 
@@ -753,25 +813,47 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 		watchpoint_handler(addr, regs);
 		break;
 	default:
-		goto out;
+		ret = 1; /* Unhandled fault. */
 	}
 
-	ret = 0;
-out:
+	/*
+	 * Re-enable preemption after it was disabled in the
+	 * low-level exception handling code.
+	 */
+	preempt_enable();
+
 	return ret;
 }
 
 /*
  * One-time initialisation.
  */
-static void __init reset_ctrl_regs(void *unused)
+static void reset_ctrl_regs(void *unused)
 {
 	int i;
 
+	/*
+	 * v7 debug contains save and restore registers so that debug state
+	 * can be maintained across low-power modes without leaving
+	 * the debug logic powered up. It is IMPLEMENTATION DEFINED whether
+	 * we can write to the debug registers out of reset, so we must
+	 * unlock the OS Lock Access Register to avoid taking undefined
+	 * instruction exceptions later on.
+	 */
+	if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) {
+		/*
+		 * Unconditionally clear the lock by writing a value
+		 * other than 0xC5ACCE55 to the access register.
+		 */
+		asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
+		isb();
+	}
+
 	if (enable_monitor_mode())
 		return;
 
-	for (i = 0; i < core_num_brps; ++i) {
+	/* We must also reset any reserved registers. */
+	for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) {
 		write_wb_reg(ARM_BASE_BCR + i, 0UL);
 		write_wb_reg(ARM_BASE_BVR + i, 0UL);
 	}
@@ -782,45 +864,57 @@ static void __init reset_ctrl_regs(void *unused)
 	}
 }
 
+static int __cpuinit dbg_reset_notify(struct notifier_block *self,
+				      unsigned long action, void *cpu)
+{
+	if (action == CPU_ONLINE)
+		smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata dbg_reset_nb = {
+	.notifier_call = dbg_reset_notify,
+};
+
 static int __init arch_hw_breakpoint_init(void)
 {
-	int ret = 0;
 	u32 dscr;
 
 	debug_arch = get_debug_arch();
 
 	if (debug_arch > ARM_DEBUG_ARCH_V7_ECP14) {
 		pr_info("debug architecture 0x%x unsupported.\n", debug_arch);
-		ret = -ENODEV;
-		goto out;
+		return 0;
 	}
 
 	/* Determine how many BRPs/WRPs are available. */
 	core_num_brps = get_num_brps();
+	core_num_reserved_brps = get_num_reserved_brps();
 	core_num_wrps = get_num_wrps();
 
 	pr_info("found %d breakpoint and %d watchpoint registers.\n",
-			core_num_brps, core_num_wrps);
+		core_num_brps + core_num_reserved_brps, core_num_wrps);
 
-	if (core_has_mismatch_bps())
-		pr_info("1 breakpoint reserved for watchpoint single-step.\n");
+	if (core_num_reserved_brps)
+		pr_info("%d breakpoint(s) reserved for watchpoint "
+				"single-step.\n", core_num_reserved_brps);
 
 	ARM_DBG_READ(c1, 0, dscr);
 	if (dscr & ARM_DSCR_HDBGEN) {
 		pr_warning("halting debug mode enabled. Assuming maximum "
 				"watchpoint size of 4 bytes.");
 	} else {
-		/* Work out the maximum supported watchpoint length. */
-		max_watchpoint_len = get_max_wp_len();
-		pr_info("maximum watchpoint size is %u bytes.\n",
-				max_watchpoint_len);
-
 		/*
 		 * Reset the breakpoint resources. We assume that a halting
 		 * debugger will leave the world in a nice state for us.
 		 */
 		smp_call_function(reset_ctrl_regs, NULL, 1);
 		reset_ctrl_regs(NULL);
+
+		/* Work out the maximum supported watchpoint length. */
+		max_watchpoint_len = get_max_wp_len();
+		pr_info("maximum watchpoint size is %u bytes.\n",
+				max_watchpoint_len);
 	}
 
 	/* Register debug fault handler. */
@@ -829,8 +923,9 @@ static int __init arch_hw_breakpoint_init(void)
 	hook_ifault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT,
 			"breakpoint debug exception");
 
-out:
-	return ret;
+	/* Register hotplug notifier. */
+	register_cpu_notifier(&dbg_reset_nb);
+	return 0;
 }
 arch_initcall(arch_hw_breakpoint_init);
 

+ 2 - 2
arch/arm/kernel/ptrace.c

@@ -1060,8 +1060,8 @@ static int ptrace_sethbpregs(struct task_struct *tsk, long num,
 			goto out;
 
 		if ((gen_type & implied_type) != gen_type) {
-				ret = -EINVAL;
-				goto out;
+			ret = -EINVAL;
+			goto out;
 		}
 
 		attr.bp_len	= gen_len;

+ 7 - 0
arch/arm/mach-omap2/pm24xx.c

@@ -30,6 +30,7 @@
 #include <linux/irq.h>
 #include <linux/time.h>
 #include <linux/gpio.h>
+#include <linux/console.h>
 
 #include <asm/mach/time.h>
 #include <asm/mach/irq.h>
@@ -118,6 +119,10 @@ static void omap2_enter_full_retention(void)
 	if (omap_irq_pending())
 		goto no_sleep;
 
+	/* Block console output in case it is on one of the OMAP UARTs */
+	if (try_acquire_console_sem())
+		goto no_sleep;
+
 	omap_uart_prepare_idle(0);
 	omap_uart_prepare_idle(1);
 	omap_uart_prepare_idle(2);
@@ -131,6 +136,8 @@ static void omap2_enter_full_retention(void)
 	omap_uart_resume_idle(1);
 	omap_uart_resume_idle(0);
 
+	release_console_sem();
+
 no_sleep:
 	if (omap2_pm_debug) {
 		unsigned long long tmp;

+ 10 - 0
arch/arm/mach-omap2/pm34xx.c

@@ -28,6 +28,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/console.h>
 
 #include <plat/sram.h>
 #include <plat/clockdomain.h>
@@ -385,6 +386,12 @@ void omap_sram_idle(void)
 		omap3_enable_io_chain();
 	}
 
+	/* Block console output in case it is on one of the OMAP UARTs */
+	if (per_next_state < PWRDM_POWER_ON ||
+	    core_next_state < PWRDM_POWER_ON)
+		if (try_acquire_console_sem())
+			goto console_still_active;
+
 	/* PER */
 	if (per_next_state < PWRDM_POWER_ON) {
 		omap_uart_prepare_idle(2);
@@ -463,6 +470,9 @@ void omap_sram_idle(void)
 		omap_uart_resume_idle(3);
 	}
 
+	release_console_sem();
+
+console_still_active:
 	/* Disable IO-PAD and IO-CHAIN wakeup */
 	if (omap3_has_io_wakeup() &&
 	    (per_next_state < PWRDM_POWER_ON ||

+ 6 - 1
arch/arm/mach-omap2/serial.c

@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/serial_8250.h>
 #include <linux/pm_runtime.h>
+#include <linux/console.h>
 
 #ifdef CONFIG_SERIAL_OMAP
 #include <plat/omap-serial.h>
@@ -406,7 +407,7 @@ void omap_uart_resume_idle(int num)
 	struct omap_uart_state *uart;
 
 	list_for_each_entry(uart, &uart_list, node) {
-		if (num == uart->num) {
+		if (num == uart->num && uart->can_sleep) {
 			omap_uart_enable_clocks(uart);
 
 			/* Check for IO pad wakeup */
@@ -807,6 +808,8 @@ void __init omap_serial_init_port(int port)
 
 	oh->dev_attr = uart;
 
+	acquire_console_sem(); /* in case the earlycon is on the UART */
+
 	/*
 	 * Because of early UART probing, UART did not get idled
 	 * on init.  Now that omap_device is ready, ensure full idle
@@ -831,6 +834,8 @@ void __init omap_serial_init_port(int port)
 	omap_uart_block_sleep(uart);
 	uart->timeout = DEFAULT_TIMEOUT;
 
+	release_console_sem();
+
 	if ((cpu_is_omap34xx() && uart->padconf) ||
 	    (uart->wk_en && uart->wk_mask)) {
 		device_init_wakeup(&od->pdev.dev, true);

+ 4 - 4
arch/arm/mach-s3c2410/h1940-bluetooth.c

@@ -77,13 +77,13 @@ static int __devinit h1940bt_probe(struct platform_device *pdev)
 
 	/* Configures BT serial port GPIOs */
 	s3c_gpio_cfgpin(S3C2410_GPH(0), S3C2410_GPH0_nCTS0);
-	s3c_gpio_cfgpull(S3C2410_GPH(0), S3C_GPIO_PULL_NONE);
+	s3c_gpio_setpull(S3C2410_GPH(0), S3C_GPIO_PULL_NONE);
 	s3c_gpio_cfgpin(S3C2410_GPH(1), S3C2410_GPIO_OUTPUT);
-	s3c_gpio_cfgpull(S3C2410_GPH(1), S3C_GPIO_PULL_NONE);
+	s3c_gpio_setpull(S3C2410_GPH(1), S3C_GPIO_PULL_NONE);
 	s3c_gpio_cfgpin(S3C2410_GPH(2), S3C2410_GPH2_TXD0);
-	s3c_gpio_cfgpull(S3C2410_GPH(2), S3C_GPIO_PULL_NONE);
+	s3c_gpio_setpull(S3C2410_GPH(2), S3C_GPIO_PULL_NONE);
 	s3c_gpio_cfgpin(S3C2410_GPH(3), S3C2410_GPH3_RXD0);
-	s3c_gpio_cfgpull(S3C2410_GPH(3), S3C_GPIO_PULL_NONE);
+	s3c_gpio_setpull(S3C2410_GPH(3), S3C_GPIO_PULL_NONE);
 
 
 	rfk = rfkill_alloc(DRV_NAME, &pdev->dev, RFKILL_TYPE_BLUETOOTH,

+ 2 - 3
arch/arm/mach-s3c2416/irq.c

@@ -168,12 +168,11 @@ static struct irq_chip s3c2416_irq_dma = {
 
 static void s3c2416_irq_demux_uart3(unsigned int irq, struct irq_desc *desc)
 {
-	s3c2416_irq_demux(IRQ_S3C2443_UART3, 3);
+	s3c2416_irq_demux(IRQ_S3C2443_RX3, 3);
 }
 
 #define INTMSK_UART3	(1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0))
-#define SUBMSK_UART3	(0xf << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0)))
-
+#define SUBMSK_UART3	(0x7 << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0)))
 
 static void s3c2416_irq_uart3_mask(unsigned int irqno)
 {

+ 2 - 3
arch/arm/mach-s3c2443/irq.c

@@ -166,12 +166,11 @@ static struct irq_chip s3c2443_irq_dma = {
 
 static void s3c2443_irq_demux_uart3(unsigned int irq, struct irq_desc *desc)
 {
-	s3c2443_irq_demux(IRQ_S3C2443_UART3, 3);
+	s3c2443_irq_demux(IRQ_S3C2443_RX3, 3);
 }
 
 #define INTMSK_UART3	(1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0))
-#define SUBMSK_UART3	(0xf << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0)))
-
+#define SUBMSK_UART3	(0x7 << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0)))
 
 static void s3c2443_irq_uart3_mask(unsigned int irqno)
 {

+ 1 - 1
arch/arm/mach-s3c64xx/mach-mini6410.c

@@ -45,7 +45,7 @@
 
 #include <video/platform_lcd.h>
 
-#define UCON (S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK)
+#define UCON S3C2410_UCON_DEFAULT
 #define ULCON (S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB)
 #define UFCON (S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE)
 

+ 1 - 1
arch/arm/mach-s3c64xx/mach-real6410.c

@@ -46,7 +46,7 @@
 
 #include <video/platform_lcd.h>
 
-#define UCON (S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK)
+#define UCON S3C2410_UCON_DEFAULT
 #define ULCON (S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB)
 #define UFCON (S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE)
 

+ 1 - 0
arch/arm/mach-s5pv210/mach-smdkc110.c

@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/serial_core.h>
 #include <linux/i2c.h>
+#include <linux/sysdev.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>

+ 1 - 0
arch/arm/mach-s5pv210/mach-smdkv210.c

@@ -13,6 +13,7 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/serial_core.h>
+#include <linux/sysdev.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>

+ 111 - 36
arch/arm/mach-shmobile/board-ap4evb.c

@@ -567,38 +567,127 @@ static struct platform_device *qhd_devices[] __initdata = {
 
 /* FSI */
 #define IRQ_FSI		evt2irq(0x1840)
+static int __fsi_set_rate(struct clk *clk, long rate, int enable)
+{
+	int ret = 0;
+
+	if (rate <= 0)
+		return ret;
+
+	if (enable) {
+		ret = clk_set_rate(clk, rate);
+		if (0 == ret)
+			ret = clk_enable(clk);
+	} else {
+		clk_disable(clk);
+	}
+
+	return ret;
+}
+
+static int __fsi_set_round_rate(struct clk *clk, long rate, int enable)
+{
+	return __fsi_set_rate(clk, clk_round_rate(clk, rate), enable);
+}
 
-static int fsi_set_rate(int is_porta, int rate)
+static int fsi_ak4642_set_rate(struct device *dev, int rate, int enable)
+{
+	struct clk *fsia_ick;
+	struct clk *fsiack;
+	int ret = -EIO;
+
+	fsia_ick = clk_get(dev, "icka");
+	if (IS_ERR(fsia_ick))
+		return PTR_ERR(fsia_ick);
+
+	/*
+	 * FSIACK is connected to AK4642,
+	 * and use external clock pin from it.
+	 * it is parent of fsia_ick now.
+	 */
+	fsiack = clk_get_parent(fsia_ick);
+	if (!fsiack)
+		goto fsia_ick_out;
+
+	/*
+	 * we get 1/1 divided clock by setting same rate to fsiack and fsia_ick
+	 *
+	 ** FIXME **
+	 * Because the freq_table of external clk (fsiack) are all 0,
+	 * the return value of clk_round_rate became 0.
+	 * So, it use __fsi_set_rate here.
+	 */
+	ret = __fsi_set_rate(fsiack, rate, enable);
+	if (ret < 0)
+		goto fsiack_out;
+
+	ret = __fsi_set_round_rate(fsia_ick, rate, enable);
+	if ((ret < 0) && enable)
+		__fsi_set_round_rate(fsiack, rate, 0); /* disable FSI ACK */
+
+fsiack_out:
+	clk_put(fsiack);
+
+fsia_ick_out:
+	clk_put(fsia_ick);
+
+	return 0;
+}
+
+static int fsi_hdmi_set_rate(struct device *dev, int rate, int enable)
 {
 	struct clk *fsib_clk;
 	struct clk *fdiv_clk = &sh7372_fsidivb_clk;
+	long fsib_rate = 0;
+	long fdiv_rate = 0;
+	int ackmd_bpfmd;
 	int ret;
 
-	/* set_rate is not needed if port A */
-	if (is_porta)
-		return 0;
-
-	fsib_clk = clk_get(NULL, "fsib_clk");
-	if (IS_ERR(fsib_clk))
-		return -EINVAL;
-
 	switch (rate) {
 	case 44100:
-		clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 11283000));
-		ret = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64;
+		fsib_rate	= rate * 256;
+		ackmd_bpfmd	= SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64;
 		break;
 	case 48000:
-		clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 85428000));
-		clk_set_rate(fdiv_clk, clk_round_rate(fdiv_clk, 12204000));
-		ret = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64;
+		fsib_rate	= 85428000; /* around 48kHz x 256 x 7 */
+		fdiv_rate	= rate * 256;
+		ackmd_bpfmd	= SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64;
 		break;
 	default:
 		pr_err("unsupported rate in FSI2 port B\n");
-		ret = -EINVAL;
-		break;
+		return -EINVAL;
 	}
 
+	/* FSI B setting */
+	fsib_clk = clk_get(dev, "ickb");
+	if (IS_ERR(fsib_clk))
+		return -EIO;
+
+	ret = __fsi_set_round_rate(fsib_clk, fsib_rate, enable);
 	clk_put(fsib_clk);
+	if (ret < 0)
+		return ret;
+
+	/* FSI DIV setting */
+	ret = __fsi_set_round_rate(fdiv_clk, fdiv_rate, enable);
+	if (ret < 0) {
+		/* disable FSI B */
+		if (enable)
+			__fsi_set_round_rate(fsib_clk, fsib_rate, 0);
+		return ret;
+	}
+
+	return ackmd_bpfmd;
+}
+
+static int fsi_set_rate(struct device *dev, int is_porta, int rate, int enable)
+{
+	int ret;
+
+	if (is_porta)
+		ret = fsi_ak4642_set_rate(dev, rate, enable);
+	else
+		ret = fsi_hdmi_set_rate(dev, rate, enable);
 
 	return ret;
 }
@@ -880,6 +969,11 @@ static int __init hdmi_init_pm_clock(void)
 		goto out;
 	}
 
+	ret = clk_enable(&sh7372_pllc2_clk);
+	if (ret < 0) {
+		pr_err("Cannot enable pllc2 clock\n");
+		goto out;
+	}
 	pr_debug("PLLC2 set frequency %lu\n", rate);
 
 	ret = clk_set_parent(hdmi_ick, &sh7372_pllc2_clk);
@@ -896,23 +990,11 @@ out:
 
 device_initcall(hdmi_init_pm_clock);
 
-#define FSIACK_DUMMY_RATE 48000
 static int __init fsi_init_pm_clock(void)
 {
 	struct clk *fsia_ick;
 	int ret;
 
-	/*
-	 * FSIACK is connected to AK4642,
-	 * and the rate is depend on playing sound rate.
-	 * So, set dummy rate (= 48k) here
-	 */
-	ret = clk_set_rate(&sh7372_fsiack_clk, FSIACK_DUMMY_RATE);
-	if (ret < 0) {
-		pr_err("Cannot set FSIACK dummy rate: %d\n", ret);
-		return ret;
-	}
-
 	fsia_ick = clk_get(&fsi_device.dev, "icka");
 	if (IS_ERR(fsia_ick)) {
 		ret = PTR_ERR(fsia_ick);
@@ -921,16 +1003,9 @@ static int __init fsi_init_pm_clock(void)
 	}
 
 	ret = clk_set_parent(fsia_ick, &sh7372_fsiack_clk);
-	if (ret < 0) {
-		pr_err("Cannot set FSI-A parent: %d\n", ret);
-		goto out;
-	}
-
-	ret = clk_set_rate(fsia_ick, FSIACK_DUMMY_RATE);
 	if (ret < 0)
-		pr_err("Cannot set FSI-A rate: %d\n", ret);
+		pr_err("Cannot set FSI-A parent: %d\n", ret);
 
-out:
 	clk_put(fsia_ick);
 
 	return ret;

+ 8 - 25
arch/arm/mach-shmobile/clock-sh7372.c

@@ -229,21 +229,13 @@ static int pllc2_set_rate(struct clk *clk, unsigned long rate)
 	if (idx < 0)
 		return idx;
 
-	if (rate == clk->parent->rate) {
-		pllc2_disable(clk);
-		return 0;
-	}
+	if (rate == clk->parent->rate)
+		return -EINVAL;
 
 	value = __raw_readl(PLLC2CR) & ~(0x3f << 24);
 
-	if (value & 0x80000000)
-		pllc2_disable(clk);
-
 	__raw_writel((value & ~0x80000000) | ((idx + 19) << 24), PLLC2CR);
 
-	if (value & 0x80000000)
-		return pllc2_enable(clk);
-
 	return 0;
 }
 
@@ -452,10 +444,8 @@ static int fsidiv_enable(struct clk *clk)
 	unsigned long value;
 
 	value  = __raw_readl(clk->mapping->base) >> 16;
-	if (value < 2) {
-		fsidiv_disable(clk);
-		return -ENOENT;
-	}
+	if (value < 2)
+		return -EIO;
 
 	__raw_writel((value << 16) | 0x3, clk->mapping->base);
 
@@ -466,17 +456,12 @@ static int fsidiv_set_rate(struct clk *clk, unsigned long rate)
 {
 	int idx;
 
-	if (clk->parent->rate == rate) {
-		fsidiv_disable(clk);
-		return 0;
-	}
-
 	idx = (clk->parent->rate / rate) & 0xffff;
 	if (idx < 2)
-		return -ENOENT;
+		return -EINVAL;
 
 	__raw_writel(idx << 16, clk->mapping->base);
-	return fsidiv_enable(clk);
+	return 0;
 }
 
 static struct clk_ops fsidiv_clk_ops = {
@@ -607,8 +592,6 @@ static struct clk_lookup lookups[] = {
 	CLKDEV_CON_ID("vck3_clk", &div6_clks[DIV6_VCK3]),
 	CLKDEV_CON_ID("fmsi_clk", &div6_clks[DIV6_FMSI]),
 	CLKDEV_CON_ID("fmso_clk", &div6_clks[DIV6_FMSO]),
-	CLKDEV_CON_ID("fsia_clk", &div6_reparent_clks[DIV6_FSIA]),
-	CLKDEV_CON_ID("fsib_clk", &div6_reparent_clks[DIV6_FSIB]),
 	CLKDEV_CON_ID("sub_clk", &div6_clks[DIV6_SUB]),
 	CLKDEV_CON_ID("spu_clk", &div6_clks[DIV6_SPU]),
 	CLKDEV_CON_ID("vou_clk", &div6_clks[DIV6_VOU]),
@@ -645,8 +628,8 @@ static struct clk_lookup lookups[] = {
 	CLKDEV_DEV_ID("sh_cmt.10", &mstp_clks[MSTP329]), /* CMT10 */
 	CLKDEV_DEV_ID("sh_fsi2", &mstp_clks[MSTP328]), /* FSI2 */
 	CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[MSTP323]), /* IIC1 */
-	CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP323]), /* USB0 */
-	CLKDEV_DEV_ID("r8a66597_udc.0", &mstp_clks[MSTP323]), /* USB0 */
+	CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP322]), /* USB0 */
+	CLKDEV_DEV_ID("r8a66597_udc.0", &mstp_clks[MSTP322]), /* USB0 */
 	CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[MSTP314]), /* SDHI0 */
 	CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[MSTP313]), /* SDHI1 */
 	CLKDEV_DEV_ID("sh_mmcif.0", &mstp_clks[MSTP312]), /* MMC */

+ 3 - 0
arch/arm/plat-pxa/include/plat/sdhci.h

@@ -17,6 +17,9 @@
 /* Require clock free running */
 #define PXA_FLAG_DISABLE_CLOCK_GATING (1<<0)
 
+/* Board design supports 8-bit data on SD/SDIO BUS */
+#define PXA_FLAG_SD_8_BIT_CAPABLE_SLOT (1<<2)
+
 /*
  * struct pxa_sdhci_platdata() - Platform device data for PXA SDHCI
  * @max_speed: the maximum speed supported

+ 3 - 3
arch/arm/plat-s3c24xx/spi-bus0-gpe11_12_13.c

@@ -29,8 +29,8 @@ void s3c24xx_spi_gpiocfg_bus0_gpe11_12_13(struct s3c2410_spi_info *spi,
 	} else {
 		s3c_gpio_cfgpin(S3C2410_GPE(13), S3C2410_GPIO_INPUT);
 		s3c_gpio_cfgpin(S3C2410_GPE(11), S3C2410_GPIO_INPUT);
-		s3c_gpio_cfgpull(S3C2410_GPE(11), S3C_GPIO_PULL_NONE);
-		s3c_gpio_cfgpull(S3C2410_GPE(12), S3C_GPIO_PULL_NONE);
-		s3c_gpio_cfgpull(S3C2410_GPE(13), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPE(11), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPE(12), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPE(13), S3C_GPIO_PULL_NONE);
 	}
 }

+ 3 - 3
arch/arm/plat-s3c24xx/spi-bus1-gpd8_9_10.c

@@ -31,8 +31,8 @@ void s3c24xx_spi_gpiocfg_bus1_gpd8_9_10(struct s3c2410_spi_info *spi,
 	} else {
 		s3c_gpio_cfgpin(S3C2410_GPD(8), S3C2410_GPIO_INPUT);
 		s3c_gpio_cfgpin(S3C2410_GPD(9), S3C2410_GPIO_INPUT);
-		s3c_gpio_cfgpull(S3C2410_GPD(10), S3C_GPIO_PULL_NONE);
-		s3c_gpio_cfgpull(S3C2410_GPD(9), S3C_GPIO_PULL_NONE);
-		s3c_gpio_cfgpull(S3C2410_GPD(8), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPD(10), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPD(9), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPD(8), S3C_GPIO_PULL_NONE);
 	}
 }

+ 3 - 3
arch/arm/plat-s3c24xx/spi-bus1-gpg5_6_7.c

@@ -29,8 +29,8 @@ void s3c24xx_spi_gpiocfg_bus1_gpg5_6_7(struct s3c2410_spi_info *spi,
 	} else {
 		s3c_gpio_cfgpin(S3C2410_GPG(7), S3C2410_GPIO_INPUT);
 		s3c_gpio_cfgpin(S3C2410_GPG(5), S3C2410_GPIO_INPUT);
-		s3c_gpio_cfgpull(S3C2410_GPG(5), S3C_GPIO_PULL_NONE);
-		s3c_gpio_cfgpull(S3C2410_GPG(6), S3C_GPIO_PULL_NONE);
-		s3c_gpio_cfgpull(S3C2410_GPG(7), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPG(5), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPG(6), S3C_GPIO_PULL_NONE);
+		s3c_gpio_setpull(S3C2410_GPG(7), S3C_GPIO_PULL_NONE);
 	}
 }

+ 1 - 1
arch/powerpc/mm/pgtable.c

@@ -92,7 +92,7 @@ static void pte_free_rcu_callback(struct rcu_head *head)
 
 static void pte_free_submit(struct pte_freelist_batch *batch)
 {
-	call_rcu(&batch->rcu, pte_free_rcu_callback);
+	call_rcu_sched(&batch->rcu, pte_free_rcu_callback);
 }
 
 void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)

+ 4 - 6
arch/s390/kernel/nmi.c

@@ -95,7 +95,6 @@ EXPORT_SYMBOL_GPL(s390_handle_mcck);
 static int notrace s390_revalidate_registers(struct mci *mci)
 {
 	int kill_task;
-	u64 tmpclock;
 	u64 zero;
 	void *fpt_save_area, *fpt_creg_save_area;
 
@@ -214,11 +213,10 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 			: "0", "cc");
 #endif
 	/* Revalidate clock comparator register */
-	asm volatile(
-		"	stck	0(%1)\n"
-		"	sckc	0(%1)"
-		: "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory");
-
+	if (S390_lowcore.clock_comparator == -1)
+		set_clock_comparator(S390_lowcore.mcck_clock);
+	else
+		set_clock_comparator(S390_lowcore.clock_comparator);
 	/* Check if old PSW is valid */
 	if (!mci->wp)
 		/*

+ 9 - 5
arch/s390/lib/delay.c

@@ -29,17 +29,21 @@ static void __udelay_disabled(unsigned long long usecs)
 {
 	unsigned long mask, cr0, cr0_saved;
 	u64 clock_saved;
+	u64 end;
 
+	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
+	end = get_clock() + (usecs << 12);
 	clock_saved = local_tick_disable();
-	set_clock_comparator(get_clock() + (usecs << 12));
 	__ctl_store(cr0_saved, 0, 0);
 	cr0 = (cr0_saved & 0xffff00e0) | 0x00000800;
 	__ctl_load(cr0 , 0, 0);
-	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
 	lockdep_off();
-	trace_hardirqs_on();
-	__load_psw_mask(mask);
-	local_irq_disable();
+	do {
+		set_clock_comparator(end);
+		trace_hardirqs_on();
+		__load_psw_mask(mask);
+		local_irq_disable();
+	} while (get_clock() < end);
 	lockdep_on();
 	__ctl_load(cr0_saved, 0, 0);
 	local_tick_enable(clock_saved);

+ 1 - 1
arch/x86/Kconfig

@@ -21,7 +21,7 @@ config X86
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_IDE
 	select HAVE_OPROFILE
-	select HAVE_PERF_EVENTS if (!M386 && !M486)
+	select HAVE_PERF_EVENTS
 	select HAVE_IRQ_WORK
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES

+ 1 - 1
arch/x86/include/asm/msr-index.h

@@ -128,7 +128,7 @@
 #define FAM10H_MMIO_CONF_ENABLE		(1<<0)
 #define FAM10H_MMIO_CONF_BUSRANGE_MASK	0xf
 #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
-#define FAM10H_MMIO_CONF_BASE_MASK	0xfffffff
+#define FAM10H_MMIO_CONF_BASE_MASK	0xfffffffULL
 #define FAM10H_MMIO_CONF_BASE_SHIFT	20
 #define MSR_FAM10H_NODE_ID		0xc001100c
 

+ 5 - 5
arch/x86/include/asm/paravirt.h

@@ -824,27 +824,27 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
 #define __PV_IS_CALLEE_SAVE(func)			\
 	((struct paravirt_callee_save) { func })
 
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long arch_local_save_flags(void)
 {
 	return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
 }
 
-static inline void arch_local_irq_restore(unsigned long f)
+static inline notrace void arch_local_irq_restore(unsigned long f)
 {
 	PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
 }
 
-static inline void arch_local_irq_disable(void)
+static inline notrace void arch_local_irq_disable(void)
 {
 	PVOP_VCALLEE0(pv_irq_ops.irq_disable);
 }
 
-static inline void arch_local_irq_enable(void)
+static inline notrace void arch_local_irq_enable(void)
 {
 	PVOP_VCALLEE0(pv_irq_ops.irq_enable);
 }
 
-static inline unsigned long arch_local_irq_save(void)
+static inline notrace unsigned long arch_local_irq_save(void)
 {
 	unsigned long f;
 

+ 4 - 0
arch/x86/include/asm/uv/uv_hub.h

@@ -199,6 +199,8 @@ union uvh_apicid {
 #define UVH_APICID		0x002D0E00L
 #define UV_APIC_PNODE_SHIFT	6
 
+#define UV_APICID_HIBIT_MASK	0xffff0000
+
 /* Local Bus from cpu's perspective */
 #define LOCAL_BUS_BASE		0x1c00000
 #define LOCAL_BUS_SIZE		(4 * 1024 * 1024)
@@ -491,8 +493,10 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
 	}
 }
 
+extern unsigned int uv_apicid_hibits;
 static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode)
 {
+	apicid |= uv_apicid_hibits;
 	return (1UL << UVH_IPI_INT_SEND_SHFT) |
 			((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
 			(mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |

+ 18 - 1
arch/x86/include/asm/uv/uv_mmrs.h

@@ -5,7 +5,7 @@
  *
  * SGI UV MMR definitions
  *
- * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
  */
 
 #ifndef _ASM_X86_UV_UV_MMRS_H
@@ -753,6 +753,23 @@ union uvh_lb_bau_sb_descriptor_base_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                   UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK                     */
+/* ========================================================================= */
+#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL
+#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x009f0
+
+#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0
+#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL
+
+union uvh_lb_target_physical_apic_id_mask_u {
+	unsigned long v;
+	struct uvh_lb_target_physical_apic_id_mask_s {
+		unsigned long bit_enables : 32;  /* RW */
+		unsigned long rsvd_32_63  : 32;  /*    */
+	} s;
+};
+
 /* ========================================================================= */
 /*                               UVH_NODE_ID                                 */
 /* ========================================================================= */

+ 4 - 3
arch/x86/kernel/apic/hw_nmi.c

@@ -17,15 +17,16 @@
 #include <linux/nmi.h>
 #include <linux/module.h>
 
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-
 u64 hw_nmi_get_sample_period(void)
 {
 	return (u64)(cpu_khz) * 1000 * 60;
 }
 
 #ifdef ARCH_HAS_NMI_WATCHDOG
+
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+
 void arch_trigger_all_cpu_backtrace(void)
 {
 	int i;

+ 23 - 2
arch/x86/kernel/apic/x2apic_uv_x.c

@@ -44,6 +44,8 @@ static u64 gru_start_paddr, gru_end_paddr;
 static union uvh_apicid uvh_apicid;
 int uv_min_hub_revision_id;
 EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
+unsigned int uv_apicid_hibits;
+EXPORT_SYMBOL_GPL(uv_apicid_hibits);
 static DEFINE_SPINLOCK(uv_nmi_lock);
 
 static inline bool is_GRU_range(u64 start, u64 end)
@@ -85,6 +87,23 @@ static void __init early_get_apic_pnode_shift(void)
 		uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
 }
 
+/*
+ * Add an extra bit as dictated by bios to the destination apicid of
+ * interrupts potentially passing through the UV HUB.  This prevents
+ * a deadlock between interrupts and IO port operations.
+ */
+static void __init uv_set_apicid_hibit(void)
+{
+	union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
+	unsigned long *mmr;
+
+	mmr = early_ioremap(UV_LOCAL_MMR_BASE |
+		UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
+	apicid_mask.v = *mmr;
+	early_iounmap(mmr, sizeof(*mmr));
+	uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
+}
+
 static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
 	int nodeid;
@@ -102,6 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 			__get_cpu_var(x2apic_extra_bits) =
 				nodeid << (uvh_apicid.s.pnode_shift - 1);
 			uv_system_type = UV_NON_UNIQUE_APIC;
+			uv_set_apicid_hibit();
 			return 1;
 		}
 	}
@@ -155,6 +175,7 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
 	int pnode;
 
 	pnode = uv_apicid_to_pnode(phys_apicid);
+	phys_apicid |= uv_apicid_hibits;
 	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
 	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
 	    ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
@@ -236,7 +257,7 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
 	int cpu = cpumask_first(cpumask);
 
 	if ((unsigned)cpu < nr_cpu_ids)
-		return per_cpu(x86_cpu_to_apicid, cpu);
+		return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
 	else
 		return BAD_APICID;
 }
@@ -255,7 +276,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
 		if (cpumask_test_cpu(cpu, cpu_online_mask))
 			break;
 	}
-	return per_cpu(x86_cpu_to_apicid, cpu);
+	return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
 }
 
 static unsigned int x2apic_get_apic_id(unsigned long x)

+ 20 - 0
arch/x86/kernel/cpu/perf_event.c

@@ -381,6 +381,20 @@ static void release_pmc_hardware(void) {}
 
 #endif
 
+static bool check_hw_exists(void)
+{
+	u64 val, val_new = 0;
+	int ret = 0;
+
+	val = 0xabcdUL;
+	ret |= checking_wrmsrl(x86_pmu.perfctr, val);
+	ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
+	if (ret || val != val_new)
+		return false;
+
+	return true;
+}
+
 static void reserve_ds_buffers(void);
 static void release_ds_buffers(void);
 
@@ -1372,6 +1386,12 @@ void __init init_hw_perf_events(void)
 
 	pmu_check_apic();
 
+	/* sanity check that the hardware exists or is emulated */
+	if (!check_hw_exists()) {
+		pr_cont("Broken PMU hardware detected, software events only.\n");
+		return;
+	}
+
 	pr_cont("%s PMU driver.\n", x86_pmu.name);
 
 	if (x86_pmu.quirks)

+ 1 - 1
arch/x86/kernel/entry_32.S

@@ -395,7 +395,7 @@ sysenter_past_esp:
 	 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
 	 * pushed above; +8 corresponds to copy_thread's esp0 setting.
 	 */
-	pushl_cfi (TI_sysenter_return-THREAD_SIZE_asm+8+4*4)(%esp)
+	pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
 	CFI_REL_OFFSET eip, 0
 
 	pushl_cfi %eax

+ 2 - 0
arch/x86/kernel/entry_64.S

@@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64)
 	.endm
 
 /* save partial stack frame */
+	.pushsection .kprobes.text, "ax"
 ENTRY(save_args)
 	XCPT_FRAME
 	cld
@@ -334,6 +335,7 @@ ENTRY(save_args)
 	ret
 	CFI_ENDPROC
 END(save_args)
+	.popsection
 
 ENTRY(save_rest)
 	PARTIAL_FRAME 1 REST_SKIP+8

+ 4 - 0
arch/x86/kernel/hw_breakpoint.c

@@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 	dr6_p = (unsigned long *)ERR_PTR(args->err);
 	dr6 = *dr6_p;
 
+	/* If it's a single step, TRAP bits are random */
+	if (dr6 & DR_STEP)
+		return NOTIFY_DONE;
+
 	/* Do an early return if no trap bits are set in DR6 */
 	if ((dr6 & DR_TRAP_BITS) == 0)
 		return NOTIFY_DONE;

+ 30 - 34
arch/x86/kernel/mmconf-fam10h_64.c

@@ -25,7 +25,6 @@ struct pci_hostbridge_probe {
 };
 
 static u64 __cpuinitdata fam10h_pci_mmconf_base;
-static int __cpuinitdata fam10h_pci_mmconf_base_status;
 
 static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
 	{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 },
@@ -44,10 +43,12 @@ static int __cpuinit cmp_range(const void *x1, const void *x2)
 	return start1 - start2;
 }
 
-/*[47:0] */
-/* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */
+#define MMCONF_UNIT (1ULL << FAM10H_MMIO_CONF_BASE_SHIFT)
+#define MMCONF_MASK (~(MMCONF_UNIT - 1))
+#define MMCONF_SIZE (MMCONF_UNIT << 8)
+/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */
 #define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32)
-#define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32)))
+#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40))
 static void __cpuinit get_fam10h_pci_mmconf_base(void)
 {
 	int i;
@@ -64,12 +65,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
 	struct range range[8];
 
 	/* only try to get setting from BSP */
-	/* -1 or 1 */
-	if (fam10h_pci_mmconf_base_status)
+	if (fam10h_pci_mmconf_base)
 		return;
 
 	if (!early_pci_allowed())
-		goto fail;
+		return;
 
 	found = 0;
 	for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
@@ -91,7 +91,7 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
 	}
 
 	if (!found)
-		goto fail;
+		return;
 
 	/* SYS_CFG */
 	address = MSR_K8_SYSCFG;
@@ -99,16 +99,16 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
 
 	/* TOP_MEM2 is not enabled? */
 	if (!(val & (1<<21))) {
-		tom2 = 0;
+		tom2 = 1ULL << 32;
 	} else {
 		/* TOP_MEM2 */
 		address = MSR_K8_TOP_MEM2;
 		rdmsrl(address, val);
-		tom2 = val & (0xffffULL<<32);
+		tom2 = max(val & 0xffffff800000ULL, 1ULL << 32);
 	}
 
 	if (base <= tom2)
-		base = tom2 + (1ULL<<32);
+		base = (tom2 + 2 * MMCONF_UNIT - 1) & MMCONF_MASK;
 
 	/*
 	 * need to check if the range is in the high mmio range that is
@@ -123,11 +123,11 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
 		if (!(reg & 3))
 			continue;
 
-		start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/
+		start = (u64)(reg & 0xffffff00) << 8; /* 39:16 on 31:8*/
 		reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3));
-		end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/
+		end = ((u64)(reg & 0xffffff00) << 8) | 0xffff; /* 39:16 on 31:8*/
 
-		if (!end)
+		if (end < tom2)
 			continue;
 
 		range[hi_mmio_num].start = start;
@@ -143,32 +143,27 @@ static void __cpuinit get_fam10h_pci_mmconf_base(void)
 
 	if (range[hi_mmio_num - 1].end < base)
 		goto out;
-	if (range[0].start > base)
+	if (range[0].start > base + MMCONF_SIZE)
 		goto out;
 
 	/* need to find one window */
-	base = range[0].start - (1ULL << 32);
+	base = (range[0].start & MMCONF_MASK) - MMCONF_UNIT;
 	if ((base > tom2) && BASE_VALID(base))
 		goto out;
-	base = range[hi_mmio_num - 1].end + (1ULL << 32);
-	if ((base > tom2) && BASE_VALID(base))
+	base = (range[hi_mmio_num - 1].end + MMCONF_UNIT) & MMCONF_MASK;
+	if (BASE_VALID(base))
 		goto out;
 	/* need to find window between ranges */
-	if (hi_mmio_num > 1)
-	for (i = 0; i < hi_mmio_num - 1; i++) {
-		if (range[i + 1].start > (range[i].end + (1ULL << 32))) {
-			base = range[i].end + (1ULL << 32);
-			if ((base > tom2) && BASE_VALID(base))
-				goto out;
-		}
+	for (i = 1; i < hi_mmio_num; i++) {
+		base = (range[i - 1].end + MMCONF_UNIT) & MMCONF_MASK;
+		val = range[i].start & MMCONF_MASK;
+		if (val >= base + MMCONF_SIZE && BASE_VALID(base))
+			goto out;
 	}
-
-fail:
-	fam10h_pci_mmconf_base_status = -1;
 	return;
+
 out:
 	fam10h_pci_mmconf_base = base;
-	fam10h_pci_mmconf_base_status = 1;
 }
 
 void __cpuinit fam10h_check_enable_mmcfg(void)
@@ -190,11 +185,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
 
 		/* only trust the one handle 256 buses, if acpi=off */
 		if (!acpi_pci_disabled || busnbits >= 8) {
-			u64 base;
-			base = val & (0xffffULL << 32);
-			if (fam10h_pci_mmconf_base_status <= 0) {
+			u64 base = val & MMCONF_MASK;
+
+			if (!fam10h_pci_mmconf_base) {
 				fam10h_pci_mmconf_base = base;
-				fam10h_pci_mmconf_base_status = 1;
 				return;
 			} else if (fam10h_pci_mmconf_base ==  base)
 				return;
@@ -206,8 +200,10 @@ void __cpuinit fam10h_check_enable_mmcfg(void)
 	 * with 256 buses
 	 */
 	get_fam10h_pci_mmconf_base();
-	if (fam10h_pci_mmconf_base_status <= 0)
+	if (!fam10h_pci_mmconf_base) {
+		pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF;
 		return;
+	}
 
 	printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n");
 	val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) |

+ 3 - 2
arch/x86/mm/tlb.c

@@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 
 static void __cpuinit calculate_tlb_offset(void)
 {
-	int cpu, node, nr_node_vecs;
+	int cpu, node, nr_node_vecs, idx = 0;
 	/*
 	 * we are changing tlb_vector_offset for each CPU in runtime, but this
 	 * will not cause inconsistency, as the write is atomic under X86. we
@@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void)
 		nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
 
 	for_each_online_node(node) {
-		int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) *
+		int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) *
 			nr_node_vecs;
 		int cpu_offset = 0;
 		for_each_cpu(cpu, cpumask_of_node(node)) {
@@ -248,6 +248,7 @@ static void __cpuinit calculate_tlb_offset(void)
 			cpu_offset++;
 			cpu_offset = cpu_offset % nr_node_vecs;
 		}
+		idx++;
 	}
 }
 

+ 1 - 1
arch/x86/platform/uv/tlb_uv.c

@@ -1455,7 +1455,7 @@ static void __init uv_init_uvhub(int uvhub, int vector)
 	 * the below initialization can't be in firmware because the
 	 * messaging IRQ will be determined by the OS
 	 */
-	apicid = uvhub_to_first_apicid(uvhub);
+	apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
 	uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
 				      ((apicid << 32) | vector));
 }

+ 3 - 1
arch/x86/platform/uv/uv_time.c

@@ -89,6 +89,7 @@ static void uv_rtc_send_IPI(int cpu)
 
 	apicid = cpu_physical_id(cpu);
 	pnode = uv_apicid_to_pnode(apicid);
+	apicid |= uv_apicid_hibits;
 	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
 	      (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
 	      (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
@@ -107,6 +108,7 @@ static int uv_intr_pending(int pnode)
 static int uv_setup_intr(int cpu, u64 expires)
 {
 	u64 val;
+	unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits;
 	int pnode = uv_cpu_to_pnode(cpu);
 
 	uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
@@ -117,7 +119,7 @@ static int uv_setup_intr(int cpu, u64 expires)
 		UVH_EVENT_OCCURRED0_RTC1_MASK);
 
 	val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
-		((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
+		((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
 
 	/* Set configuration */
 	uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);

+ 1 - 1
block/blk-throttle.c

@@ -645,7 +645,7 @@ static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg,
 {
 	unsigned int nr_reads = 0, nr_writes = 0;
 	unsigned int max_nr_reads = throtl_grp_quantum*3/4;
-	unsigned int max_nr_writes = throtl_grp_quantum - nr_reads;
+	unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads;
 	struct bio *bio;
 
 	/* Try to dispatch 75% READS and 25% WRITES */

+ 1 - 1
drivers/block/amiflop.c

@@ -1341,7 +1341,7 @@ static struct request *set_next_request(void)
 {
 	struct request_queue *q;
 	int cnt = FD_MAX_UNITS;
-	struct request *rq;
+	struct request *rq = NULL;
 
 	/* Find next queue we can dispatch from */
 	fdc_queue = fdc_queue + 1;

+ 1 - 1
drivers/block/ataflop.c

@@ -1399,7 +1399,7 @@ static struct request *set_next_request(void)
 {
 	struct request_queue *q;
 	int old_pos = fdc_queue;
-	struct request *rq;
+	struct request *rq = NULL;
 
 	do {
 		q = unit[fdc_queue].disk->queue;

+ 1 - 2
drivers/block/cciss.c

@@ -66,6 +66,7 @@ MODULE_VERSION("3.6.26");
 MODULE_LICENSE("GPL");
 
 static DEFINE_MUTEX(cciss_mutex);
+static struct proc_dir_entry *proc_cciss;
 
 #include "cciss_cmd.h"
 #include "cciss.h"
@@ -363,8 +364,6 @@ static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
 #define ENG_GIG_FACTOR (ENG_GIG/512)
 #define ENGAGE_SCSI	"engage scsi"
 
-static struct proc_dir_entry *proc_cciss;
-
 static void cciss_seq_show_header(struct seq_file *seq)
 {
 	ctlr_info_t *h = seq->private;

+ 33 - 22
drivers/block/xen-blkfront.c

@@ -65,7 +65,7 @@ enum blkif_state {
 
 struct blk_shadow {
 	struct blkif_request req;
-	unsigned long request;
+	struct request *request;
 	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
@@ -136,7 +136,7 @@ static void add_id_to_freelist(struct blkfront_info *info,
 			       unsigned long id)
 {
 	info->shadow[id].req.id  = info->shadow_free;
-	info->shadow[id].request = 0;
+	info->shadow[id].request = NULL;
 	info->shadow_free = id;
 }
 
@@ -245,14 +245,11 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
 }
 
 /*
- * blkif_queue_request
+ * Generate a Xen blkfront IO request from a blk layer request.  Reads
+ * and writes are handled as expected.  Since we lack a loose flush
+ * request, we map flushes into a full ordered barrier.
  *
- * request block io
- *
- * id: for guest use only.
- * operation: BLKIF_OP_{READ,WRITE,PROBE}
- * buffer: buffer to read/write into. this should be a
- *   virtual address in the guest os.
+ * @req: a request struct
  */
 static int blkif_queue_request(struct request *req)
 {
@@ -281,7 +278,7 @@ static int blkif_queue_request(struct request *req)
 	/* Fill out a communications ring structure. */
 	ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
 	id = get_id_from_freelist(info);
-	info->shadow[id].request = (unsigned long)req;
+	info->shadow[id].request = req;
 
 	ring_req->id = id;
 	ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
@@ -290,6 +287,18 @@ static int blkif_queue_request(struct request *req)
 	ring_req->operation = rq_data_dir(req) ?
 		BLKIF_OP_WRITE : BLKIF_OP_READ;
 
+	if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+		/*
+		 * Ideally we could just do an unordered
+		 * flush-to-disk, but all we have is a full write
+		 * barrier at the moment.  However, a barrier write is
+		 * a superset of FUA, so we can implement it the same
+		 * way.  (It's also a FLUSH+FUA, since it is
+		 * guaranteed ordered WRT previous writes.)
+		 */
+		ring_req->operation = BLKIF_OP_WRITE_BARRIER;
+	}
+
 	ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
 	BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
 
@@ -634,7 +643,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 
 		bret = RING_GET_RESPONSE(&info->ring, i);
 		id   = bret->id;
-		req  = (struct request *)info->shadow[id].request;
+		req  = info->shadow[id].request;
 
 		blkif_completion(&info->shadow[id]);
 
@@ -647,6 +656,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
 				       info->gd->disk_name);
 				error = -EOPNOTSUPP;
+			}
+			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
+				     info->shadow[id].req.nr_segments == 0)) {
+				printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n",
+				       info->gd->disk_name);
+				error = -EOPNOTSUPP;
+			}
+			if (unlikely(error)) {
+				if (error == -EOPNOTSUPP)
+					error = 0;
 				info->feature_flush = 0;
 				xlvbd_flush(info);
 			}
@@ -899,7 +918,7 @@ static int blkif_recover(struct blkfront_info *info)
 	/* Stage 3: Find pending requests and requeue them. */
 	for (i = 0; i < BLK_RING_SIZE; i++) {
 		/* Not in use? */
-		if (copy[i].request == 0)
+		if (!copy[i].request)
 			continue;
 
 		/* Grab a request slot and copy shadow state into it. */
@@ -916,9 +935,7 @@ static int blkif_recover(struct blkfront_info *info)
 				req->seg[j].gref,
 				info->xbdev->otherend_id,
 				pfn_to_mfn(info->shadow[req->id].frame[j]),
-				rq_data_dir(
-					(struct request *)
-					info->shadow[req->id].request));
+				rq_data_dir(info->shadow[req->id].request));
 		info->shadow[req->id].req = *req;
 
 		info->ring.req_prod_pvt++;
@@ -1067,14 +1084,8 @@ static void blkfront_connect(struct blkfront_info *info)
 	 */
 	info->feature_flush = 0;
 
-	/*
-	 * The driver doesn't properly handled empty flushes, so
-	 * lets disable barrier support for now.
-	 */
-#if 0
 	if (!err && barrier)
-		info->feature_flush = REQ_FLUSH;
-#endif
+		info->feature_flush = REQ_FLUSH | REQ_FUA;
 
 	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
 	if (err) {

+ 24 - 0
drivers/char/tpm/tpm_tis.c

@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/wait.h>
+#include <linux/acpi.h>
 #include "tpm.h"
 
 #define TPM_HEADER_SIZE 10
@@ -78,6 +79,26 @@ enum tis_defaults {
 static LIST_HEAD(tis_chips);
 static DEFINE_SPINLOCK(tis_lock);
 
+#ifdef CONFIG_ACPI
+static int is_itpm(struct pnp_dev *dev)
+{
+	struct acpi_device *acpi = pnp_acpi_device(dev);
+	struct acpi_hardware_id *id;
+
+	list_for_each_entry(id, &acpi->pnp.ids, list) {
+		if (!strcmp("INTC0102", id->id))
+			return 1;
+	}
+
+	return 0;
+}
+#else
+static int is_itpm(struct pnp_dev *dev)
+{
+	return 0;
+}
+#endif
+
 static int check_locality(struct tpm_chip *chip, int l)
 {
 	if ((ioread8(chip->vendor.iobase + TPM_ACCESS(l)) &
@@ -472,6 +493,9 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
 		 "1.2 TPM (device-id 0x%X, rev-id %d)\n",
 		 vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0)));
 
+	if (is_itpm(to_pnp_dev(dev)))
+		itpm = 1;
+
 	if (itpm)
 		dev_info(dev, "Intel iTPM workaround enabled\n");
 

+ 4 - 4
drivers/edac/Makefile

@@ -10,16 +10,16 @@ obj-$(CONFIG_EDAC)			:= edac_stub.o
 obj-$(CONFIG_EDAC_MM_EDAC)		+= edac_core.o
 obj-$(CONFIG_EDAC_MCE)			+= edac_mce.o
 
-edac_core-objs	:= edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
-edac_core-objs	+= edac_module.o edac_device_sysfs.o
+edac_core-y	:= edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
+edac_core-y	+= edac_module.o edac_device_sysfs.o
 
 ifdef CONFIG_PCI
-edac_core-objs	+= edac_pci.o edac_pci_sysfs.o
+edac_core-y	+= edac_pci.o edac_pci_sysfs.o
 endif
 
 obj-$(CONFIG_EDAC_MCE_INJ)		+= mce_amd_inj.o
 
-edac_mce_amd-objs			:= mce_amd.o
+edac_mce_amd-y				:= mce_amd.o
 obj-$(CONFIG_EDAC_DECODE_MCE)		+= edac_mce_amd.o
 
 obj-$(CONFIG_EDAC_AMD76X)		+= amd76x_edac.o

+ 1 - 1
drivers/edac/mce_amd_inj.c

@@ -139,7 +139,7 @@ static int __init edac_init_mce_inject(void)
 	return 0;
 
 err_sysfs_create:
-	while (i-- >= 0)
+	while (--i >= 0)
 		sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr);
 
 	kobject_del(mce_kobj);

+ 90 - 70
drivers/firewire/net.c

@@ -7,6 +7,7 @@
  */
 
 #include <linux/bug.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/firewire.h>
 #include <linux/firewire-constants.h>
@@ -26,8 +27,14 @@
 #include <asm/unaligned.h>
 #include <net/arp.h>
 
-#define FWNET_MAX_FRAGMENTS	25	/* arbitrary limit */
-#define FWNET_ISO_PAGE_COUNT	(PAGE_SIZE < 16 * 1024 ? 4 : 2)
+/* rx limits */
+#define FWNET_MAX_FRAGMENTS		30 /* arbitrary, > TX queue depth */
+#define FWNET_ISO_PAGE_COUNT		(PAGE_SIZE < 16*1024 ? 4 : 2)
+
+/* tx limits */
+#define FWNET_MAX_QUEUED_DATAGRAMS	20 /* < 64 = number of tlabels */
+#define FWNET_MIN_QUEUED_DATAGRAMS	10 /* should keep AT DMA busy enough */
+#define FWNET_TX_QUEUE_LEN		FWNET_MAX_QUEUED_DATAGRAMS /* ? */
 
 #define IEEE1394_BROADCAST_CHANNEL	31
 #define IEEE1394_ALL_NODES		(0xffc0 | 0x003f)
@@ -169,15 +176,8 @@ struct fwnet_device {
 	struct fw_address_handler handler;
 	u64 local_fifo;
 
-	/* List of packets to be sent */
-	struct list_head packet_list;
-	/*
-	 * List of packets that were broadcasted.  When we get an ISO interrupt
-	 * one of them has been sent
-	 */
-	struct list_head broadcasted_list;
-	/* List of packets that have been sent but not yet acked */
-	struct list_head sent_list;
+	/* Number of tx datagrams that have been queued but not yet acked */
+	int queued_datagrams;
 
 	struct list_head peer_list;
 	struct fw_card *card;
@@ -195,7 +195,7 @@ struct fwnet_peer {
 	unsigned pdg_size;        /* pd_list size */
 
 	u16 datagram_label;       /* outgoing datagram label */
-	unsigned max_payload;     /* includes RFC2374_FRAG_HDR_SIZE overhead */
+	u16 max_payload;          /* includes RFC2374_FRAG_HDR_SIZE overhead */
 	int node_id;
 	int generation;
 	unsigned speed;
@@ -203,22 +203,18 @@ struct fwnet_peer {
 
 /* This is our task struct. It's used for the packet complete callback.  */
 struct fwnet_packet_task {
-	/*
-	 * ptask can actually be on dev->packet_list, dev->broadcasted_list,
-	 * or dev->sent_list depending on its current state.
-	 */
-	struct list_head pt_link;
 	struct fw_transaction transaction;
 	struct rfc2734_header hdr;
 	struct sk_buff *skb;
 	struct fwnet_device *dev;
 
 	int outstanding_pkts;
-	unsigned max_payload;
 	u64 fifo_addr;
 	u16 dest_node;
+	u16 max_payload;
 	u8 generation;
 	u8 speed;
+	u8 enqueued;
 };
 
 /*
@@ -650,8 +646,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
 		net->stats.rx_packets++;
 		net->stats.rx_bytes += skb->len;
 	}
-	if (netif_queue_stopped(net))
-		netif_wake_queue(net);
 
 	return 0;
 
@@ -660,8 +654,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
 	net->stats.rx_dropped++;
 
 	dev_kfree_skb_any(skb);
-	if (netif_queue_stopped(net))
-		netif_wake_queue(net);
 
 	return -ENOENT;
 }
@@ -793,15 +785,10 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
 	 * Datagram is not complete, we're done for the
 	 * moment.
 	 */
-	spin_unlock_irqrestore(&dev->lock, flags);
-
-	return 0;
+	retval = 0;
  fail:
 	spin_unlock_irqrestore(&dev->lock, flags);
 
-	if (netif_queue_stopped(net))
-		netif_wake_queue(net);
-
 	return retval;
 }
 
@@ -901,11 +888,19 @@ static void fwnet_free_ptask(struct fwnet_packet_task *ptask)
 	kmem_cache_free(fwnet_packet_task_cache, ptask);
 }
 
+/* Caller must hold dev->lock. */
+static void dec_queued_datagrams(struct fwnet_device *dev)
+{
+	if (--dev->queued_datagrams == FWNET_MIN_QUEUED_DATAGRAMS)
+		netif_wake_queue(dev->netdev);
+}
+
 static int fwnet_send_packet(struct fwnet_packet_task *ptask);
 
 static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 {
 	struct fwnet_device *dev = ptask->dev;
+	struct sk_buff *skb = ptask->skb;
 	unsigned long flags;
 	bool free;
 
@@ -914,10 +909,14 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 	ptask->outstanding_pkts--;
 
 	/* Check whether we or the networking TX soft-IRQ is last user. */
-	free = (ptask->outstanding_pkts == 0 && !list_empty(&ptask->pt_link));
+	free = (ptask->outstanding_pkts == 0 && ptask->enqueued);
+	if (free)
+		dec_queued_datagrams(dev);
 
-	if (ptask->outstanding_pkts == 0)
-		list_del(&ptask->pt_link);
+	if (ptask->outstanding_pkts == 0) {
+		dev->netdev->stats.tx_packets++;
+		dev->netdev->stats.tx_bytes += skb->len;
+	}
 
 	spin_unlock_irqrestore(&dev->lock, flags);
 
@@ -926,7 +925,6 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 		u16 fg_off;
 		u16 datagram_label;
 		u16 lf;
-		struct sk_buff *skb;
 
 		/* Update the ptask to point to the next fragment and send it */
 		lf = fwnet_get_hdr_lf(&ptask->hdr);
@@ -953,7 +951,7 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 			datagram_label = fwnet_get_hdr_dgl(&ptask->hdr);
 			break;
 		}
-		skb = ptask->skb;
+
 		skb_pull(skb, ptask->max_payload);
 		if (ptask->outstanding_pkts > 1) {
 			fwnet_make_sf_hdr(&ptask->hdr, RFC2374_HDR_INTFRAG,
@@ -970,6 +968,31 @@ static void fwnet_transmit_packet_done(struct fwnet_packet_task *ptask)
 		fwnet_free_ptask(ptask);
 }
 
+static void fwnet_transmit_packet_failed(struct fwnet_packet_task *ptask)
+{
+	struct fwnet_device *dev = ptask->dev;
+	unsigned long flags;
+	bool free;
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/* One fragment failed; don't try to send remaining fragments. */
+	ptask->outstanding_pkts = 0;
+
+	/* Check whether we or the networking TX soft-IRQ is last user. */
+	free = ptask->enqueued;
+	if (free)
+		dec_queued_datagrams(dev);
+
+	dev->netdev->stats.tx_dropped++;
+	dev->netdev->stats.tx_errors++;
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	if (free)
+		fwnet_free_ptask(ptask);
+}
+
 static void fwnet_write_complete(struct fw_card *card, int rcode,
 				 void *payload, size_t length, void *data)
 {
@@ -977,11 +1000,12 @@ static void fwnet_write_complete(struct fw_card *card, int rcode,
 
 	ptask = data;
 
-	if (rcode == RCODE_COMPLETE)
+	if (rcode == RCODE_COMPLETE) {
 		fwnet_transmit_packet_done(ptask);
-	else
+	} else {
 		fw_error("fwnet_write_complete: failed: %x\n", rcode);
-		/* ??? error recovery */
+		fwnet_transmit_packet_failed(ptask);
+	}
 }
 
 static int fwnet_send_packet(struct fwnet_packet_task *ptask)
@@ -1039,9 +1063,11 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
 		spin_lock_irqsave(&dev->lock, flags);
 
 		/* If the AT tasklet already ran, we may be last user. */
-		free = (ptask->outstanding_pkts == 0 && list_empty(&ptask->pt_link));
+		free = (ptask->outstanding_pkts == 0 && !ptask->enqueued);
 		if (!free)
-			list_add_tail(&ptask->pt_link, &dev->broadcasted_list);
+			ptask->enqueued = true;
+		else
+			dec_queued_datagrams(dev);
 
 		spin_unlock_irqrestore(&dev->lock, flags);
 
@@ -1056,9 +1082,11 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask)
 	spin_lock_irqsave(&dev->lock, flags);
 
 	/* If the AT tasklet already ran, we may be last user. */
-	free = (ptask->outstanding_pkts == 0 && list_empty(&ptask->pt_link));
+	free = (ptask->outstanding_pkts == 0 && !ptask->enqueued);
 	if (!free)
-		list_add_tail(&ptask->pt_link, &dev->sent_list);
+		ptask->enqueued = true;
+	else
+		dec_queued_datagrams(dev);
 
 	spin_unlock_irqrestore(&dev->lock, flags);
 
@@ -1224,6 +1252,15 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
 	struct fwnet_peer *peer;
 	unsigned long flags;
 
+	spin_lock_irqsave(&dev->lock, flags);
+
+	/* Can this happen? */
+	if (netif_queue_stopped(dev->netdev)) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+
+		return NETDEV_TX_BUSY;
+	}
+
 	ptask = kmem_cache_alloc(fwnet_packet_task_cache, GFP_ATOMIC);
 	if (ptask == NULL)
 		goto fail;
@@ -1242,9 +1279,6 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
 	proto = hdr_buf.h_proto;
 	dg_size = skb->len;
 
-	/* serialize access to peer, including peer->datagram_label */
-	spin_lock_irqsave(&dev->lock, flags);
-
 	/*
 	 * Set the transmission type for the packet.  ARP packets and IP
 	 * broadcast packets are sent via GASP.
@@ -1266,7 +1300,7 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
 
 		peer = fwnet_peer_find_by_guid(dev, be64_to_cpu(guid));
 		if (!peer || peer->fifo == FWNET_NO_FIFO_ADDR)
-			goto fail_unlock;
+			goto fail;
 
 		generation         = peer->generation;
 		dest_node          = peer->node_id;
@@ -1320,18 +1354,21 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
 		max_payload += RFC2374_FRAG_HDR_SIZE;
 	}
 
+	if (++dev->queued_datagrams == FWNET_MAX_QUEUED_DATAGRAMS)
+		netif_stop_queue(dev->netdev);
+
 	spin_unlock_irqrestore(&dev->lock, flags);
 
 	ptask->max_payload = max_payload;
-	INIT_LIST_HEAD(&ptask->pt_link);
+	ptask->enqueued    = 0;
 
 	fwnet_send_packet(ptask);
 
 	return NETDEV_TX_OK;
 
- fail_unlock:
-	spin_unlock_irqrestore(&dev->lock, flags);
  fail:
+	spin_unlock_irqrestore(&dev->lock, flags);
+
 	if (ptask)
 		kmem_cache_free(fwnet_packet_task_cache, ptask);
 
@@ -1377,7 +1414,7 @@ static void fwnet_init_dev(struct net_device *net)
 	net->addr_len		= FWNET_ALEN;
 	net->hard_header_len	= FWNET_HLEN;
 	net->type		= ARPHRD_IEEE1394;
-	net->tx_queue_len	= 10;
+	net->tx_queue_len	= FWNET_TX_QUEUE_LEN;
 }
 
 /* caller must hold fwnet_device_mutex */
@@ -1457,14 +1494,9 @@ static int fwnet_probe(struct device *_dev)
 	dev->broadcast_rcv_context = NULL;
 	dev->broadcast_xmt_max_payload = 0;
 	dev->broadcast_xmt_datagramlabel = 0;
-
 	dev->local_fifo = FWNET_NO_FIFO_ADDR;
-
-	INIT_LIST_HEAD(&dev->packet_list);
-	INIT_LIST_HEAD(&dev->broadcasted_list);
-	INIT_LIST_HEAD(&dev->sent_list);
+	dev->queued_datagrams = 0;
 	INIT_LIST_HEAD(&dev->peer_list);
-
 	dev->card = card;
 	dev->netdev = net;
 
@@ -1522,7 +1554,7 @@ static int fwnet_remove(struct device *_dev)
 	struct fwnet_peer *peer = dev_get_drvdata(_dev);
 	struct fwnet_device *dev = peer->dev;
 	struct net_device *net;
-	struct fwnet_packet_task *ptask, *pt_next;
+	int i;
 
 	mutex_lock(&fwnet_device_mutex);
 
@@ -1540,21 +1572,9 @@ static int fwnet_remove(struct device *_dev)
 					      dev->card);
 			fw_iso_context_destroy(dev->broadcast_rcv_context);
 		}
-		list_for_each_entry_safe(ptask, pt_next,
-					 &dev->packet_list, pt_link) {
-			dev_kfree_skb_any(ptask->skb);
-			kmem_cache_free(fwnet_packet_task_cache, ptask);
-		}
-		list_for_each_entry_safe(ptask, pt_next,
-					 &dev->broadcasted_list, pt_link) {
-			dev_kfree_skb_any(ptask->skb);
-			kmem_cache_free(fwnet_packet_task_cache, ptask);
-		}
-		list_for_each_entry_safe(ptask, pt_next,
-					 &dev->sent_list, pt_link) {
-			dev_kfree_skb_any(ptask->skb);
-			kmem_cache_free(fwnet_packet_task_cache, ptask);
-		}
+		for (i = 0; dev->queued_datagrams && i < 5; i++)
+			ssleep(1);
+		WARN_ON(dev->queued_datagrams);
 		list_del(&dev->dev_link);
 
 		free_netdev(net);

+ 4 - 3
drivers/isdn/icn/icn.c

@@ -1627,7 +1627,7 @@ __setup("icn=", icn_setup);
 static int __init icn_init(void)
 {
 	char *p;
-	char rev[10];
+	char rev[20];
 
 	memset(&dev, 0, sizeof(icn_dev));
 	dev.memaddr = (membase & 0x0ffc000);
@@ -1637,9 +1637,10 @@ static int __init icn_init(void)
 	spin_lock_init(&dev.devlock);
 
 	if ((p = strchr(revision, ':'))) {
-		strcpy(rev, p + 1);
+		strncpy(rev, p + 1, 20);
 		p = strchr(rev, '$');
-		*p = 0;
+		if (p)
+			*p = 0;
 	} else
 		strcpy(rev, " ??? ");
 	printk(KERN_NOTICE "ICN-ISDN-driver Rev%smem=0x%08lx\n", rev,

+ 1 - 1
drivers/mmc/core/core.c

@@ -1559,7 +1559,7 @@ void mmc_stop_host(struct mmc_host *host)
 
 	if (host->caps & MMC_CAP_DISABLE)
 		cancel_delayed_work(&host->disable);
-	cancel_delayed_work(&host->detect);
+	cancel_delayed_work_sync(&host->detect);
 	mmc_flush_scheduled_work();
 
 	/* clear pm flags now and let card drivers set them as needed */

+ 6 - 2
drivers/mmc/core/mmc.c

@@ -375,7 +375,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	struct mmc_card *oldcard)
 {
 	struct mmc_card *card;
-	int err, ddr = MMC_SDR_MODE;
+	int err, ddr = 0;
 	u32 cid[4];
 	unsigned int max_dtr;
 
@@ -562,7 +562,11 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 			       1 << bus_width, ddr);
 			err = 0;
 		} else {
-			mmc_card_set_ddr_mode(card);
+			if (ddr)
+				mmc_card_set_ddr_mode(card);
+			else
+				ddr = MMC_SDR_MODE;
+
 			mmc_set_bus_width_ddr(card->host, bus_width, ddr);
 		}
 	}

+ 35 - 16
drivers/mmc/core/sdio.c

@@ -547,9 +547,11 @@ static void mmc_sdio_detect(struct mmc_host *host)
 	BUG_ON(!host->card);
 
 	/* Make sure card is powered before detecting it */
-	err = pm_runtime_get_sync(&host->card->dev);
-	if (err < 0)
-		goto out;
+	if (host->caps & MMC_CAP_POWER_OFF_CARD) {
+		err = pm_runtime_get_sync(&host->card->dev);
+		if (err < 0)
+			goto out;
+	}
 
 	mmc_claim_host(host);
 
@@ -560,6 +562,20 @@ static void mmc_sdio_detect(struct mmc_host *host)
 
 	mmc_release_host(host);
 
+	/*
+	 * Tell PM core it's OK to power off the card now.
+	 *
+	 * The _sync variant is used in order to ensure that the card
+	 * is left powered off in case an error occurred, and the card
+	 * is going to be removed.
+	 *
+	 * Since there is no specific reason to believe a new user
+	 * is about to show up at this point, the _sync variant is
+	 * desirable anyway.
+	 */
+	if (host->caps & MMC_CAP_POWER_OFF_CARD)
+		pm_runtime_put_sync(&host->card->dev);
+
 out:
 	if (err) {
 		mmc_sdio_remove(host);
@@ -568,9 +584,6 @@ out:
 		mmc_detach_bus(host);
 		mmc_release_host(host);
 	}
-
-	/* Tell PM core that we're done */
-	pm_runtime_put(&host->card->dev);
 }
 
 /*
@@ -718,16 +731,21 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
 	card = host->card;
 
 	/*
-	 * Let runtime PM core know our card is active
+	 * Enable runtime PM only if supported by host+card+board
 	 */
-	err = pm_runtime_set_active(&card->dev);
-	if (err)
-		goto remove;
+	if (host->caps & MMC_CAP_POWER_OFF_CARD) {
+		/*
+		 * Let runtime PM core know our card is active
+		 */
+		err = pm_runtime_set_active(&card->dev);
+		if (err)
+			goto remove;
 
-	/*
-	 * Enable runtime PM for this card
-	 */
-	pm_runtime_enable(&card->dev);
+		/*
+		 * Enable runtime PM for this card
+		 */
+		pm_runtime_enable(&card->dev);
+	}
 
 	/*
 	 * The number of functions on the card is encoded inside
@@ -745,9 +763,10 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
 			goto remove;
 
 		/*
-		 * Enable Runtime PM for this func
+		 * Enable Runtime PM for this func (if supported)
 		 */
-		pm_runtime_enable(&card->sdio_func[i]->dev);
+		if (host->caps & MMC_CAP_POWER_OFF_CARD)
+			pm_runtime_enable(&card->sdio_func[i]->dev);
 	}
 
 	mmc_release_host(host);

+ 22 - 11
drivers/mmc/core/sdio_bus.c

@@ -17,6 +17,7 @@
 #include <linux/pm_runtime.h>
 
 #include <linux/mmc/card.h>
+#include <linux/mmc/host.h>
 #include <linux/mmc/sdio_func.h>
 
 #include "sdio_cis.h"
@@ -132,9 +133,11 @@ static int sdio_bus_probe(struct device *dev)
 	 * it should call pm_runtime_put_noidle() in its probe routine and
 	 * pm_runtime_get_noresume() in its remove routine.
 	 */
-	ret = pm_runtime_get_sync(dev);
-	if (ret < 0)
-		goto out;
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) {
+		ret = pm_runtime_get_sync(dev);
+		if (ret < 0)
+			goto out;
+	}
 
 	/* Set the default block size so the driver is sure it's something
 	 * sensible. */
@@ -151,7 +154,8 @@ static int sdio_bus_probe(struct device *dev)
 	return 0;
 
 disable_runtimepm:
-	pm_runtime_put_noidle(dev);
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
+		pm_runtime_put_noidle(dev);
 out:
 	return ret;
 }
@@ -160,12 +164,14 @@ static int sdio_bus_remove(struct device *dev)
 {
 	struct sdio_driver *drv = to_sdio_driver(dev->driver);
 	struct sdio_func *func = dev_to_sdio_func(dev);
-	int ret;
+	int ret = 0;
 
 	/* Make sure card is powered before invoking ->remove() */
-	ret = pm_runtime_get_sync(dev);
-	if (ret < 0)
-		goto out;
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) {
+		ret = pm_runtime_get_sync(dev);
+		if (ret < 0)
+			goto out;
+	}
 
 	drv->remove(func);
 
@@ -178,10 +184,12 @@ static int sdio_bus_remove(struct device *dev)
 	}
 
 	/* First, undo the increment made directly above */
-	pm_runtime_put_noidle(dev);
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
+		pm_runtime_put_noidle(dev);
 
 	/* Then undo the runtime PM settings in sdio_bus_probe() */
-	pm_runtime_put_noidle(dev);
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
+		pm_runtime_put_noidle(dev);
 
 out:
 	return ret;
@@ -191,6 +199,8 @@ out:
 
 static int sdio_bus_pm_prepare(struct device *dev)
 {
+	struct sdio_func *func = dev_to_sdio_func(dev);
+
 	/*
 	 * Resume an SDIO device which was suspended at run time at this
 	 * point, in order to allow standard SDIO suspend/resume paths
@@ -212,7 +222,8 @@ static int sdio_bus_pm_prepare(struct device *dev)
 	 * since there is little point in failing system suspend if a
 	 * device can't be resumed.
 	 */
-	pm_runtime_resume(dev);
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
+		pm_runtime_resume(dev);
 
 	return 0;
 }

+ 1 - 1
drivers/mmc/host/omap_hsmmc.c

@@ -1002,7 +1002,7 @@ static inline void omap_hsmmc_reset_controller_fsm(struct omap_hsmmc_host *host,
 	 * Monitor a 0->1 transition first
 	 */
 	if (mmc_slot(host).features & HSMMC_HAS_UPDATED_RESET) {
-		while ((!(OMAP_HSMMC_READ(host, SYSCTL) & bit))
+		while ((!(OMAP_HSMMC_READ(host->base, SYSCTL) & bit))
 					&& (i++ < limit))
 			cpu_relax();
 	}

+ 9 - 3
drivers/mmc/host/sdhci-esdhc-imx.c

@@ -17,6 +17,7 @@
 #include <linux/clk.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/sdhci-pltfm.h>
+#include <mach/hardware.h>
 #include "sdhci.h"
 #include "sdhci-pltfm.h"
 #include "sdhci-esdhc.h"
@@ -112,6 +113,13 @@ static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pd
 	clk_enable(clk);
 	pltfm_host->clk = clk;
 
+	if (cpu_is_mx35() || cpu_is_mx51())
+		host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
+
+	/* Fix errata ENGcm07207 which is present on i.MX25 and i.MX35 */
+	if (cpu_is_mx25() || cpu_is_mx35())
+		host->quirks |= SDHCI_QUIRK_NO_MULTIBLOCK;
+
 	return 0;
 }
 
@@ -133,10 +141,8 @@ static struct sdhci_ops sdhci_esdhc_ops = {
 };
 
 struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = {
-	.quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_NO_MULTIBLOCK
-			| SDHCI_QUIRK_BROKEN_ADMA,
+	.quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA,
 	/* ADMA has issues. Might be fixable */
-	/* NO_MULTIBLOCK might be MX35 only (Errata: ENGcm07207) */
 	.ops = &sdhci_esdhc_ops,
 	.init = esdhc_pltfm_init,
 	.exit = esdhc_pltfm_exit,

+ 23 - 8
drivers/mmc/host/sdhci-pci.c

@@ -149,11 +149,11 @@ static const struct sdhci_pci_fixes sdhci_cafe = {
  * ADMA operation is disabled for Moorestown platform due to
  * hardware bugs.
  */
-static int mrst_hc1_probe(struct sdhci_pci_chip *chip)
+static int mrst_hc_probe(struct sdhci_pci_chip *chip)
 {
 	/*
-	 * slots number is fixed here for MRST as SDIO3 is never used and has
-	 * hardware bugs.
+	 * slots number is fixed here for MRST as SDIO3/5 are never used and
+	 * have hardware bugs.
 	 */
 	chip->num_slots = 1;
 	return 0;
@@ -163,9 +163,9 @@ static const struct sdhci_pci_fixes sdhci_intel_mrst_hc0 = {
 	.quirks		= SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT,
 };
 
-static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1 = {
+static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1_hc2 = {
 	.quirks		= SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT,
-	.probe		= mrst_hc1_probe,
+	.probe		= mrst_hc_probe,
 };
 
 static const struct sdhci_pci_fixes sdhci_intel_mfd_sd = {
@@ -538,7 +538,15 @@ static const struct pci_device_id pci_ids[] __devinitdata = {
 		.device		= PCI_DEVICE_ID_INTEL_MRST_SD1,
 		.subvendor	= PCI_ANY_ID,
 		.subdevice	= PCI_ANY_ID,
-		.driver_data	= (kernel_ulong_t)&sdhci_intel_mrst_hc1,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mrst_hc1_hc2,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_MRST_SD2,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mrst_hc1_hc2,
 	},
 
 	{
@@ -637,6 +645,7 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 {
 	struct sdhci_pci_chip *chip;
 	struct sdhci_pci_slot *slot;
+	mmc_pm_flag_t slot_pm_flags;
 	mmc_pm_flag_t pm_flags = 0;
 	int i, ret;
 
@@ -657,7 +666,11 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 			return ret;
 		}
 
-		pm_flags |= slot->host->mmc->pm_flags;
+		slot_pm_flags = slot->host->mmc->pm_flags;
+		if (slot_pm_flags & MMC_PM_WAKE_SDIO_IRQ)
+			sdhci_enable_irq_wakeups(slot->host);
+
+		pm_flags |= slot_pm_flags;
 	}
 
 	if (chip->fixes && chip->fixes->suspend) {
@@ -671,8 +684,10 @@ static int sdhci_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 
 	pci_save_state(pdev);
 	if (pm_flags & MMC_PM_KEEP_POWER) {
-		if (pm_flags & MMC_PM_WAKE_SDIO_IRQ)
+		if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) {
+			pci_pme_active(pdev, true);
 			pci_enable_wake(pdev, PCI_D3hot, 1);
+		}
 		pci_set_power_state(pdev, PCI_D3hot);
 	} else {
 		pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);

+ 4 - 0
drivers/mmc/host/sdhci-pxa.c

@@ -141,6 +141,10 @@ static int __devinit sdhci_pxa_probe(struct platform_device *pdev)
 	if (pdata->quirks)
 		host->quirks |= pdata->quirks;
 
+	/* If slot design supports 8 bit data, indicate this to MMC. */
+	if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT)
+		host->mmc->caps |= MMC_CAP_8_BIT_DATA;
+
 	ret = sdhci_add_host(host);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to add host\n");

+ 43 - 11
drivers/mmc/host/sdhci.c

@@ -1185,17 +1185,31 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (host->ops->platform_send_init_74_clocks)
 		host->ops->platform_send_init_74_clocks(host, ios->power_mode);
 
-	ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
-
-	if (ios->bus_width == MMC_BUS_WIDTH_8)
-		ctrl |= SDHCI_CTRL_8BITBUS;
-	else
-		ctrl &= ~SDHCI_CTRL_8BITBUS;
+	/*
+	 * If your platform has 8-bit width support but is not a v3 controller,
+	 * or if it requires special setup code, you should implement that in
+	 * platform_8bit_width().
+	 */
+	if (host->ops->platform_8bit_width)
+		host->ops->platform_8bit_width(host, ios->bus_width);
+	else {
+		ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
+		if (ios->bus_width == MMC_BUS_WIDTH_8) {
+			ctrl &= ~SDHCI_CTRL_4BITBUS;
+			if (host->version >= SDHCI_SPEC_300)
+				ctrl |= SDHCI_CTRL_8BITBUS;
+		} else {
+			if (host->version >= SDHCI_SPEC_300)
+				ctrl &= ~SDHCI_CTRL_8BITBUS;
+			if (ios->bus_width == MMC_BUS_WIDTH_4)
+				ctrl |= SDHCI_CTRL_4BITBUS;
+			else
+				ctrl &= ~SDHCI_CTRL_4BITBUS;
+		}
+		sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
+	}
 
-	if (ios->bus_width == MMC_BUS_WIDTH_4)
-		ctrl |= SDHCI_CTRL_4BITBUS;
-	else
-		ctrl &= ~SDHCI_CTRL_4BITBUS;
+	ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
 
 	if ((ios->timing == MMC_TIMING_SD_HS ||
 	     ios->timing == MMC_TIMING_MMC_HS)
@@ -1681,6 +1695,16 @@ int sdhci_resume_host(struct sdhci_host *host)
 
 EXPORT_SYMBOL_GPL(sdhci_resume_host);
 
+void sdhci_enable_irq_wakeups(struct sdhci_host *host)
+{
+	u8 val;
+	val = sdhci_readb(host, SDHCI_WAKE_UP_CONTROL);
+	val |= SDHCI_WAKE_ON_INT;
+	sdhci_writeb(host, val, SDHCI_WAKE_UP_CONTROL);
+}
+
+EXPORT_SYMBOL_GPL(sdhci_enable_irq_wakeups);
+
 #endif /* CONFIG_PM */
 
 /*****************************************************************************\
@@ -1845,11 +1869,19 @@ int sdhci_add_host(struct sdhci_host *host)
 		mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300;
 	else
 		mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200;
+
 	mmc->f_max = host->max_clk;
 	mmc->caps |= MMC_CAP_SDIO_IRQ;
 
+	/*
+	 * A controller may support 8-bit width, but the board itself
+	 * might not have the pins brought out.  Boards that support
+	 * 8-bit width must set "mmc->caps |= MMC_CAP_8_BIT_DATA;" in
+	 * their platform code before calling sdhci_add_host(), and we
+	 * won't assume 8-bit width for hosts without that CAP.
+	 */
 	if (!(host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA))
-		mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA;
+		mmc->caps |= MMC_CAP_4_BIT_DATA;
 
 	if (caps & SDHCI_CAN_DO_HISPD)
 		mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;

+ 8 - 1
drivers/mmc/host/sdhci.h

@@ -76,7 +76,7 @@
 #define   SDHCI_CTRL_ADMA1	0x08
 #define   SDHCI_CTRL_ADMA32	0x10
 #define   SDHCI_CTRL_ADMA64	0x18
-#define  SDHCI_CTRL_8BITBUS	0x20
+#define   SDHCI_CTRL_8BITBUS	0x20
 
 #define SDHCI_POWER_CONTROL	0x29
 #define  SDHCI_POWER_ON		0x01
@@ -87,6 +87,9 @@
 #define SDHCI_BLOCK_GAP_CONTROL	0x2A
 
 #define SDHCI_WAKE_UP_CONTROL	0x2B
+#define  SDHCI_WAKE_ON_INT	0x01
+#define  SDHCI_WAKE_ON_INSERT	0x02
+#define  SDHCI_WAKE_ON_REMOVE	0x04
 
 #define SDHCI_CLOCK_CONTROL	0x2C
 #define  SDHCI_DIVIDER_SHIFT	8
@@ -152,6 +155,7 @@
 #define  SDHCI_CLOCK_BASE_SHIFT	8
 #define  SDHCI_MAX_BLOCK_MASK	0x00030000
 #define  SDHCI_MAX_BLOCK_SHIFT  16
+#define  SDHCI_CAN_DO_8BIT	0x00040000
 #define  SDHCI_CAN_DO_ADMA2	0x00080000
 #define  SDHCI_CAN_DO_ADMA1	0x00100000
 #define  SDHCI_CAN_DO_HISPD	0x00200000
@@ -212,6 +216,8 @@ struct sdhci_ops {
 	unsigned int	(*get_max_clock)(struct sdhci_host *host);
 	unsigned int	(*get_min_clock)(struct sdhci_host *host);
 	unsigned int	(*get_timeout_clock)(struct sdhci_host *host);
+	int		(*platform_8bit_width)(struct sdhci_host *host,
+					       int width);
 	void (*platform_send_init_74_clocks)(struct sdhci_host *host,
 					     u8 power_mode);
 	unsigned int    (*get_ro)(struct sdhci_host *host);
@@ -317,6 +323,7 @@ extern void sdhci_remove_host(struct sdhci_host *host, int dead);
 #ifdef CONFIG_PM
 extern int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state);
 extern int sdhci_resume_host(struct sdhci_host *host);
+extern void sdhci_enable_irq_wakeups(struct sdhci_host *host);
 #endif
 
 #endif /* __SDHCI_HW_H */

+ 22 - 8
drivers/mmc/host/ushc.c

@@ -425,7 +425,7 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
 	struct usb_device *usb_dev = interface_to_usbdev(intf);
 	struct mmc_host *mmc;
 	struct ushc_data *ushc;
-	int ret = -ENOMEM;
+	int ret;
 
 	mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev);
 	if (mmc == NULL)
@@ -462,11 +462,15 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
 	mmc->max_blk_count = 511;
 
 	ushc->int_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (ushc->int_urb == NULL)
+	if (ushc->int_urb == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	ushc->int_data = kzalloc(sizeof(struct ushc_int_data), GFP_KERNEL);
-	if (ushc->int_data == NULL)
+	if (ushc->int_data == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	usb_fill_int_urb(ushc->int_urb, ushc->usb_dev,
 			 usb_rcvintpipe(usb_dev,
 					intf->cur_altsetting->endpoint[0].desc.bEndpointAddress),
@@ -475,11 +479,15 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
 			 intf->cur_altsetting->endpoint[0].desc.bInterval);
 
 	ushc->cbw_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (ushc->cbw_urb == NULL)
+	if (ushc->cbw_urb == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	ushc->cbw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL);
-	if (ushc->cbw == NULL)
+	if (ushc->cbw == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	ushc->cbw->signature = USHC_CBW_SIGNATURE;
 
 	usb_fill_bulk_urb(ushc->cbw_urb, ushc->usb_dev, usb_sndbulkpipe(usb_dev, 2),
@@ -487,15 +495,21 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
 			  cbw_callback, ushc);
 
 	ushc->data_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (ushc->data_urb == NULL)
+	if (ushc->data_urb == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 
 	ushc->csw_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (ushc->csw_urb == NULL)
+	if (ushc->csw_urb == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	ushc->csw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL);
-	if (ushc->csw == NULL)
+	if (ushc->csw == NULL) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	usb_fill_bulk_urb(ushc->csw_urb, ushc->usb_dev, usb_rcvbulkpipe(usb_dev, 6),
 			  ushc->csw, sizeof(struct ushc_csw),
 			  csw_callback, ushc);

+ 9 - 7
drivers/mtd/ubi/scan.c

@@ -787,16 +787,15 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr,
 		 * erased, so it became unstable and corrupted, and should be
 		 * erased.
 		 */
-		return 0;
+		err = 0;
+		goto out_unlock;
 	}
 
 	if (err)
-		return err;
+		goto out_unlock;
 
-	if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size)) {
-		mutex_unlock(&ubi->buf_mutex);
-		return 0;
-	}
+	if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size))
+		goto out_unlock;
 
 	ubi_err("PEB %d contains corrupted VID header, and the data does not "
 		"contain all 0xFF, this may be a non-UBI PEB or a severe VID "
@@ -806,8 +805,11 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr,
 		pnum, ubi->leb_start, ubi->leb_size);
 	ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
 			       ubi->peb_buf1, ubi->leb_size, 1);
+	err = 1;
+
+out_unlock:
 	mutex_unlock(&ubi->buf_mutex);
-	return 1;
+	return err;
 }
 
 /**

+ 3 - 3
drivers/net/Kconfig

@@ -2543,10 +2543,10 @@ config PCH_GBE
 	depends on PCI
 	select MII
 	---help---
-	  This is a gigabit ethernet driver for Topcliff PCH.
-	  Topcliff PCH is the platform controller hub that is used in Intel's
+	  This is a gigabit ethernet driver for EG20T PCH.
+	  EG20T PCH is the platform controller hub that is used in Intel's
 	  general embedded platform.
-	  Topcliff PCH has Gigabit Ethernet interface.
+	  EG20T PCH has Gigabit Ethernet interface.
 	  Using this interface, it is able to access system devices connected
 	  to Gigabit Ethernet.
 	  This driver enables Gigabit Ethernet function.

+ 5 - 5
drivers/net/au1000_eth.c

@@ -155,10 +155,10 @@ static void au1000_enable_mac(struct net_device *dev, int force_reset)
 	spin_lock_irqsave(&aup->lock, flags);
 
 	if (force_reset || (!aup->mac_enabled)) {
-		writel(MAC_EN_CLOCK_ENABLE, &aup->enable);
+		writel(MAC_EN_CLOCK_ENABLE, aup->enable);
 		au_sync_delay(2);
 		writel((MAC_EN_RESET0 | MAC_EN_RESET1 | MAC_EN_RESET2
-				| MAC_EN_CLOCK_ENABLE), &aup->enable);
+				| MAC_EN_CLOCK_ENABLE), aup->enable);
 		au_sync_delay(2);
 
 		aup->mac_enabled = 1;
@@ -503,9 +503,9 @@ static void au1000_reset_mac_unlocked(struct net_device *dev)
 
 	au1000_hard_stop(dev);
 
-	writel(MAC_EN_CLOCK_ENABLE, &aup->enable);
+	writel(MAC_EN_CLOCK_ENABLE, aup->enable);
 	au_sync_delay(2);
-	writel(0, &aup->enable);
+	writel(0, aup->enable);
 	au_sync_delay(2);
 
 	aup->tx_full = 0;
@@ -1119,7 +1119,7 @@ static int __devinit au1000_probe(struct platform_device *pdev)
 	/* set a random MAC now in case platform_data doesn't provide one */
 	random_ether_addr(dev->dev_addr);
 
-	writel(0, &aup->enable);
+	writel(0, aup->enable);
 	aup->mac_enabled = 0;
 
 	pd = pdev->dev.platform_data;

+ 45 - 28
drivers/net/cxgb4vf/cxgb4vf_main.c

@@ -816,40 +816,48 @@ static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
 }
 
 /*
- * Collect up to maxaddrs worth of a netdevice's unicast addresses into an
- * array of addrss pointers and return the number collected.
+ * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting
+ * at a specified offset within the list, into an array of addrss pointers and
+ * return the number collected.
  */
-static inline int collect_netdev_uc_list_addrs(const struct net_device *dev,
-					       const u8 **addr,
-					       unsigned int maxaddrs)
+static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev,
+							const u8 **addr,
+							unsigned int offset,
+							unsigned int maxaddrs)
 {
+	unsigned int index = 0;
 	unsigned int naddr = 0;
 	const struct netdev_hw_addr *ha;
 
-	for_each_dev_addr(dev, ha) {
-		addr[naddr++] = ha->addr;
-		if (naddr >= maxaddrs)
-			break;
-	}
+	for_each_dev_addr(dev, ha)
+		if (index++ >= offset) {
+			addr[naddr++] = ha->addr;
+			if (naddr >= maxaddrs)
+				break;
+		}
 	return naddr;
 }
 
 /*
- * Collect up to maxaddrs worth of a netdevice's multicast addresses into an
- * array of addrss pointers and return the number collected.
+ * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting
+ * at a specified offset within the list, into an array of addrss pointers and
+ * return the number collected.
  */
-static inline int collect_netdev_mc_list_addrs(const struct net_device *dev,
-					       const u8 **addr,
-					       unsigned int maxaddrs)
+static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev,
+							const u8 **addr,
+							unsigned int offset,
+							unsigned int maxaddrs)
 {
+	unsigned int index = 0;
 	unsigned int naddr = 0;
 	const struct netdev_hw_addr *ha;
 
-	netdev_for_each_mc_addr(ha, dev) {
-		addr[naddr++] = ha->addr;
-		if (naddr >= maxaddrs)
-			break;
-	}
+	netdev_for_each_mc_addr(ha, dev)
+		if (index++ >= offset) {
+			addr[naddr++] = ha->addr;
+			if (naddr >= maxaddrs)
+				break;
+		}
 	return naddr;
 }
 
@@ -862,16 +870,20 @@ static int set_addr_filters(const struct net_device *dev, bool sleep)
 	u64 mhash = 0;
 	u64 uhash = 0;
 	bool free = true;
-	u16 filt_idx[7];
+	unsigned int offset, naddr;
 	const u8 *addr[7];
-	int ret, naddr = 0;
+	int ret;
 	const struct port_info *pi = netdev_priv(dev);
 
 	/* first do the secondary unicast addresses */
-	naddr = collect_netdev_uc_list_addrs(dev, addr, ARRAY_SIZE(addr));
-	if (naddr > 0) {
+	for (offset = 0; ; offset += naddr) {
+		naddr = collect_netdev_uc_list_addrs(dev, addr, offset,
+						     ARRAY_SIZE(addr));
+		if (naddr == 0)
+			break;
+
 		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
-					  naddr, addr, filt_idx, &uhash, sleep);
+					  naddr, addr, NULL, &uhash, sleep);
 		if (ret < 0)
 			return ret;
 
@@ -879,12 +891,17 @@ static int set_addr_filters(const struct net_device *dev, bool sleep)
 	}
 
 	/* next set up the multicast addresses */
-	naddr = collect_netdev_mc_list_addrs(dev, addr, ARRAY_SIZE(addr));
-	if (naddr > 0) {
+	for (offset = 0; ; offset += naddr) {
+		naddr = collect_netdev_mc_list_addrs(dev, addr, offset,
+						     ARRAY_SIZE(addr));
+		if (naddr == 0)
+			break;
+
 		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
-					  naddr, addr, filt_idx, &mhash, sleep);
+					  naddr, addr, NULL, &mhash, sleep);
 		if (ret < 0)
 			return ret;
+		free = false;
 	}
 
 	return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0,

+ 59 - 35
drivers/net/cxgb4vf/t4vf_hw.c

@@ -1014,48 +1014,72 @@ int t4vf_alloc_mac_filt(struct adapter *adapter, unsigned int viid, bool free,
 			unsigned int naddr, const u8 **addr, u16 *idx,
 			u64 *hash, bool sleep_ok)
 {
-	int i, ret;
+	int offset, ret = 0;
+	unsigned nfilters = 0;
+	unsigned int rem = naddr;
 	struct fw_vi_mac_cmd cmd, rpl;
-	struct fw_vi_mac_exact *p;
-	size_t len16;
 
-	if (naddr > ARRAY_SIZE(cmd.u.exact))
+	if (naddr > FW_CLS_TCAM_NUM_ENTRIES)
 		return -EINVAL;
-	len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd,
-				      u.exact[naddr]), 16);
 
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_MAC_CMD) |
-				     FW_CMD_REQUEST |
-				     FW_CMD_WRITE |
-				     (free ? FW_CMD_EXEC : 0) |
-				     FW_VI_MAC_CMD_VIID(viid));
-	cmd.freemacs_to_len16 = cpu_to_be32(FW_VI_MAC_CMD_FREEMACS(free) |
-					    FW_CMD_LEN16(len16));
+	for (offset = 0; offset < naddr; /**/) {
+		unsigned int fw_naddr = (rem < ARRAY_SIZE(cmd.u.exact)
+					 ? rem
+					 : ARRAY_SIZE(cmd.u.exact));
+		size_t len16 = DIV_ROUND_UP(offsetof(struct fw_vi_mac_cmd,
+						     u.exact[fw_naddr]), 16);
+		struct fw_vi_mac_exact *p;
+		int i;
 
-	for (i = 0, p = cmd.u.exact; i < naddr; i++, p++) {
-		p->valid_to_idx =
-			cpu_to_be16(FW_VI_MAC_CMD_VALID |
-				    FW_VI_MAC_CMD_IDX(FW_VI_MAC_ADD_MAC));
-		memcpy(p->macaddr, addr[i], sizeof(p->macaddr));
-	}
+		memset(&cmd, 0, sizeof(cmd));
+		cmd.op_to_viid = cpu_to_be32(FW_CMD_OP(FW_VI_MAC_CMD) |
+					     FW_CMD_REQUEST |
+					     FW_CMD_WRITE |
+					     (free ? FW_CMD_EXEC : 0) |
+					     FW_VI_MAC_CMD_VIID(viid));
+		cmd.freemacs_to_len16 =
+			cpu_to_be32(FW_VI_MAC_CMD_FREEMACS(free) |
+				    FW_CMD_LEN16(len16));
+
+		for (i = 0, p = cmd.u.exact; i < fw_naddr; i++, p++) {
+			p->valid_to_idx = cpu_to_be16(
+				FW_VI_MAC_CMD_VALID |
+				FW_VI_MAC_CMD_IDX(FW_VI_MAC_ADD_MAC));
+			memcpy(p->macaddr, addr[offset+i], sizeof(p->macaddr));
+		}
+
+
+		ret = t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), &rpl,
+					sleep_ok);
+		if (ret && ret != -ENOMEM)
+			break;
 
-	ret = t4vf_wr_mbox_core(adapter, &cmd, sizeof(cmd), &rpl, sleep_ok);
-	if (ret)
-		return ret;
-
-	for (i = 0, p = rpl.u.exact; i < naddr; i++, p++) {
-		u16 index = FW_VI_MAC_CMD_IDX_GET(be16_to_cpu(p->valid_to_idx));
-
-		if (idx)
-			idx[i] = (index >= FW_CLS_TCAM_NUM_ENTRIES
-				  ? 0xffff
-				  : index);
-		if (index < FW_CLS_TCAM_NUM_ENTRIES)
-			ret++;
-		else if (hash)
-			*hash |= (1 << hash_mac_addr(addr[i]));
+		for (i = 0, p = rpl.u.exact; i < fw_naddr; i++, p++) {
+			u16 index = FW_VI_MAC_CMD_IDX_GET(
+				be16_to_cpu(p->valid_to_idx));
+
+			if (idx)
+				idx[offset+i] =
+					(index >= FW_CLS_TCAM_NUM_ENTRIES
+					 ? 0xffff
+					 : index);
+			if (index < FW_CLS_TCAM_NUM_ENTRIES)
+				nfilters++;
+			else if (hash)
+				*hash |= (1ULL << hash_mac_addr(addr[offset+i]));
+		}
+
+		free = false;
+		offset += fw_naddr;
+		rem -= fw_naddr;
 	}
+
+	/*
+	 * If there were no errors or we merely ran out of room in our MAC
+	 * address arena, return the number of filters actually written.
+	 */
+	if (ret == 0 || ret == -ENOMEM)
+		ret = nfilters;
 	return ret;
 }
 

+ 14 - 4
drivers/net/ehea/ehea_main.c

@@ -400,6 +400,7 @@ static void ehea_refill_rq1(struct ehea_port_res *pr, int index, int nr_of_wqes)
 			skb_arr_rq1[index] = netdev_alloc_skb(dev,
 							      EHEA_L_PKT_SIZE);
 			if (!skb_arr_rq1[index]) {
+				ehea_info("Unable to allocate enough skb in the array\n");
 				pr->rq1_skba.os_skbs = fill_wqes - i;
 				break;
 			}
@@ -422,13 +423,20 @@ static void ehea_init_fill_rq1(struct ehea_port_res *pr, int nr_rq1a)
 	struct net_device *dev = pr->port->netdev;
 	int i;
 
-	for (i = 0; i < pr->rq1_skba.len; i++) {
+	if (nr_rq1a > pr->rq1_skba.len) {
+		ehea_error("NR_RQ1A bigger than skb array len\n");
+		return;
+	}
+
+	for (i = 0; i < nr_rq1a; i++) {
 		skb_arr_rq1[i] = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE);
-		if (!skb_arr_rq1[i])
+		if (!skb_arr_rq1[i]) {
+			ehea_info("No enough memory to allocate skb array\n");
 			break;
+		}
 	}
 	/* Ring doorbell */
-	ehea_update_rq1a(pr->qp, nr_rq1a);
+	ehea_update_rq1a(pr->qp, i);
 }
 
 static int ehea_refill_rq_def(struct ehea_port_res *pr,
@@ -735,8 +743,10 @@ static int ehea_proc_rwqes(struct net_device *dev,
 
 					skb = netdev_alloc_skb(dev,
 							       EHEA_L_PKT_SIZE);
-					if (!skb)
+					if (!skb) {
+						ehea_info("Not enough memory to allocate skb\n");
 						break;
+					}
 				}
 				skb_copy_to_linear_data(skb, ((char *)cqe) + 64,
 						 cqe->num_bytes_transfered - 4);

+ 3 - 3
drivers/net/pch_gbe/pch_gbe_main.c

@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 1999 - 2010 Intel Corporation.
- * Copyright (C) 2010 OKI SEMICONDUCTOR Co., LTD.
+ * Copyright (C) 2010 OKI SEMICONDUCTOR CO., LTD.
  *
  * This code was derived from the Intel e1000e Linux driver.
  *
@@ -2464,8 +2464,8 @@ static void __exit pch_gbe_exit_module(void)
 module_init(pch_gbe_init_module);
 module_exit(pch_gbe_exit_module);
 
-MODULE_DESCRIPTION("OKI semiconductor PCH Gigabit ethernet Driver");
-MODULE_AUTHOR("OKI semiconductor, <masa-korg@dsn.okisemi.com>");
+MODULE_DESCRIPTION("EG20T PCH Gigabit ethernet Driver");
+MODULE_AUTHOR("OKI SEMICONDUCTOR, <toshiharu-linux@dsn.okisemi.com>");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 MODULE_DEVICE_TABLE(pci, pch_gbe_pcidev_id);

+ 4 - 4
drivers/net/pch_gbe/pch_gbe_param.c

@@ -434,8 +434,8 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter)
 			.err  = "using default of "
 				__MODULE_STRING(PCH_GBE_DEFAULT_TXD),
 			.def  = PCH_GBE_DEFAULT_TXD,
-			.arg  = { .r = { .min = PCH_GBE_MIN_TXD } },
-			.arg  = { .r = { .max = PCH_GBE_MAX_TXD } }
+			.arg  = { .r = { .min = PCH_GBE_MIN_TXD,
+					 .max = PCH_GBE_MAX_TXD } }
 		};
 		struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
 		tx_ring->count = TxDescriptors;
@@ -450,8 +450,8 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter)
 			.err  = "using default of "
 				__MODULE_STRING(PCH_GBE_DEFAULT_RXD),
 			.def  = PCH_GBE_DEFAULT_RXD,
-			.arg  = { .r = { .min = PCH_GBE_MIN_RXD } },
-			.arg  = { .r = { .max = PCH_GBE_MAX_RXD } }
+			.arg  = { .r = { .min = PCH_GBE_MIN_RXD,
+					 .max = PCH_GBE_MAX_RXD } }
 		};
 		struct pch_gbe_rx_ring *rx_ring = adapter->rx_ring;
 		rx_ring->count = RxDescriptors;

+ 22 - 21
drivers/net/ppp_generic.c

@@ -2584,16 +2584,16 @@ ppp_create_interface(struct net *net, int unit, int *retp)
 	 */
 	dev_net_set(dev, net);
 
-	ret = -EEXIST;
 	mutex_lock(&pn->all_ppp_mutex);
 
 	if (unit < 0) {
 		unit = unit_get(&pn->units_idr, ppp);
 		if (unit < 0) {
-			*retp = unit;
+			ret = unit;
 			goto out2;
 		}
 	} else {
+		ret = -EEXIST;
 		if (unit_find(&pn->units_idr, unit))
 			goto out2; /* unit already exists */
 		/*
@@ -2668,10 +2668,10 @@ static void ppp_shutdown_interface(struct ppp *ppp)
 		ppp->closing = 1;
 		ppp_unlock(ppp);
 		unregister_netdev(ppp->dev);
+		unit_put(&pn->units_idr, ppp->file.index);
 	} else
 		ppp_unlock(ppp);
 
-	unit_put(&pn->units_idr, ppp->file.index);
 	ppp->file.dead = 1;
 	ppp->owner = NULL;
 	wake_up_interruptible(&ppp->file.rwait);
@@ -2859,8 +2859,7 @@ static void __exit ppp_cleanup(void)
  * by holding all_ppp_mutex
  */
 
-/* associate pointer with specified number */
-static int unit_set(struct idr *p, void *ptr, int n)
+static int __unit_alloc(struct idr *p, void *ptr, int n)
 {
 	int unit, err;
 
@@ -2871,10 +2870,24 @@ again:
 	}
 
 	err = idr_get_new_above(p, ptr, n, &unit);
-	if (err == -EAGAIN)
-		goto again;
+	if (err < 0) {
+		if (err == -EAGAIN)
+			goto again;
+		return err;
+	}
+
+	return unit;
+}
+
+/* associate pointer with specified number */
+static int unit_set(struct idr *p, void *ptr, int n)
+{
+	int unit;
 
-	if (unit != n) {
+	unit = __unit_alloc(p, ptr, n);
+	if (unit < 0)
+		return unit;
+	else if (unit != n) {
 		idr_remove(p, unit);
 		return -EINVAL;
 	}
@@ -2885,19 +2898,7 @@ again:
 /* get new free unit number and associate pointer with it */
 static int unit_get(struct idr *p, void *ptr)
 {
-	int unit, err;
-
-again:
-	if (!idr_pre_get(p, GFP_KERNEL)) {
-		printk(KERN_ERR "PPP: No free memory for idr\n");
-		return -ENOMEM;
-	}
-
-	err = idr_get_new_above(p, ptr, 0, &unit);
-	if (err == -EAGAIN)
-		goto again;
-
-	return unit;
+	return __unit_alloc(p, ptr, 0);
 }
 
 /* put unit number back to a pool */

+ 2 - 1
drivers/net/ucc_geth.h

@@ -899,7 +899,8 @@ struct ucc_geth_hardware_statistics {
 #define UCC_GETH_UTFS_INIT                      512	/* Tx virtual FIFO size
 							 */
 #define UCC_GETH_UTFET_INIT                     256	/* 1/2 utfs */
-#define UCC_GETH_UTFTT_INIT                     512
+#define UCC_GETH_UTFTT_INIT                     256	/* 1/2 utfs
+							   due to errata */
 /* Gigabit Ethernet (1000 Mbps) */
 #define UCC_GETH_URFS_GIGA_INIT                 4096/*2048*/	/* Rx virtual
 								   FIFO size */

+ 6 - 4
drivers/net/usb/hso.c

@@ -2994,12 +2994,14 @@ static int hso_probe(struct usb_interface *interface,
 
 	case HSO_INTF_BULK:
 		/* It's a regular bulk interface */
-		if (((port_spec & HSO_PORT_MASK) == HSO_PORT_NETWORK) &&
-		    !disable_net)
-			hso_dev = hso_create_net_device(interface, port_spec);
-		else
+		if ((port_spec & HSO_PORT_MASK) == HSO_PORT_NETWORK) {
+			if (!disable_net)
+				hso_dev =
+				    hso_create_net_device(interface, port_spec);
+		} else {
 			hso_dev =
 			    hso_create_bulk_serial_device(interface, port_spec);
+		}
 		if (!hso_dev)
 			goto exit;
 		break;

+ 6 - 5
drivers/net/wan/x25_asy.c

@@ -498,7 +498,6 @@ norbuff:
 static int x25_asy_close(struct net_device *dev)
 {
 	struct x25_asy *sl = netdev_priv(dev);
-	int err;
 
 	spin_lock(&sl->lock);
 	if (sl->tty)
@@ -507,10 +506,6 @@ static int x25_asy_close(struct net_device *dev)
 	netif_stop_queue(dev);
 	sl->rcount = 0;
 	sl->xleft  = 0;
-	err = lapb_unregister(dev);
-	if (err != LAPB_OK)
-		printk(KERN_ERR "x25_asy_close: lapb_unregister error -%d\n",
-			err);
 	spin_unlock(&sl->lock);
 	return 0;
 }
@@ -595,6 +590,7 @@ static int x25_asy_open_tty(struct tty_struct *tty)
 static void x25_asy_close_tty(struct tty_struct *tty)
 {
 	struct x25_asy *sl = tty->disc_data;
+	int err;
 
 	/* First make sure we're connected. */
 	if (!sl || sl->magic != X25_ASY_MAGIC)
@@ -605,6 +601,11 @@ static void x25_asy_close_tty(struct tty_struct *tty)
 		dev_close(sl->dev);
 	rtnl_unlock();
 
+	err = lapb_unregister(sl->dev);
+	if (err != LAPB_OK)
+		printk(KERN_ERR "x25_asy_close: lapb_unregister error -%d\n",
+			err);
+
 	tty->disc_data = NULL;
 	sl->tty = NULL;
 	x25_asy_free(sl);

+ 1 - 1
drivers/net/wireless/ath/ath9k/recv.c

@@ -518,7 +518,7 @@ bool ath_stoprecv(struct ath_softc *sc)
 	bool stopped;
 
 	spin_lock_bh(&sc->rx.rxbuflock);
-	ath9k_hw_stoppcurecv(ah);
+	ath9k_hw_abortpcurecv(ah);
 	ath9k_hw_setrxfilter(ah, 0);
 	stopped = ath9k_hw_stopdmarecv(ah);
 

+ 1 - 1
drivers/net/wireless/ath/carl9170/main.c

@@ -647,7 +647,7 @@ init:
 	}
 
 unlock:
-	if (err && (vif_id != -1)) {
+	if (err && (vif_id >= 0)) {
 		vif_priv->active = false;
 		bitmap_release_region(&ar->vif_bitmap, vif_id, 0);
 		ar->vifs--;

+ 1 - 0
drivers/net/wireless/b43/sdio.c

@@ -163,6 +163,7 @@ static int b43_sdio_probe(struct sdio_func *func,
 err_free_ssb:
 	kfree(sdio);
 err_disable_func:
+	sdio_claim_host(func);
 	sdio_disable_func(func);
 err_release_host:
 	sdio_release_host(func);

+ 1 - 1
drivers/s390/cio/qdio_thinint.c

@@ -292,8 +292,8 @@ void qdio_shutdown_thinint(struct qdio_irq *irq_ptr)
 		return;
 
 	/* reset adapter interrupt indicators */
-	put_indicator(irq_ptr->dsci);
 	set_subchannel_ind(irq_ptr, 1);
+	put_indicator(irq_ptr->dsci);
 }
 
 void __exit tiqdio_unregister_thinints(void)

+ 1 - 0
drivers/ssb/b43_pci_bridge.c

@@ -24,6 +24,7 @@ static const struct pci_device_id b43_pci_bridge_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4312) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4315) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4318) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_BCM_GVC,  0x4318) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4319) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4320) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x4321) },

+ 3 - 2
drivers/vhost/net.c

@@ -129,8 +129,9 @@ static void handle_tx(struct vhost_net *net)
 	size_t hdr_size;
 	struct socket *sock;
 
-	sock = rcu_dereference_check(vq->private_data,
-				     lockdep_is_held(&vq->mutex));
+	/* TODO: check that we are running from vhost_worker?
+	 * Not sure it's worth it, it's straight-forward enough. */
+	sock = rcu_dereference_check(vq->private_data, 1);
 	if (!sock)
 		return;
 

+ 1 - 14
fs/btrfs/compression.c

@@ -91,23 +91,10 @@ static inline int compressed_bio_size(struct btrfs_root *root,
 static struct bio *compressed_bio_alloc(struct block_device *bdev,
 					u64 first_byte, gfp_t gfp_flags)
 {
-	struct bio *bio;
 	int nr_vecs;
 
 	nr_vecs = bio_get_nr_vecs(bdev);
-	bio = bio_alloc(gfp_flags, nr_vecs);
-
-	if (bio == NULL && (current->flags & PF_MEMALLOC)) {
-		while (!bio && (nr_vecs /= 2))
-			bio = bio_alloc(gfp_flags, nr_vecs);
-	}
-
-	if (bio) {
-		bio->bi_size = 0;
-		bio->bi_bdev = bdev;
-		bio->bi_sector = first_byte >> 9;
-	}
-	return bio;
+	return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
 }
 
 static int check_compressed_csum(struct inode *inode,

+ 3 - 3
fs/btrfs/ctree.h

@@ -808,9 +808,9 @@ struct btrfs_block_group_cache {
 	int extents_thresh;
 	int free_extents;
 	int total_bitmaps;
-	int ro:1;
-	int dirty:1;
-	int iref:1;
+	unsigned int ro:1;
+	unsigned int dirty:1;
+	unsigned int iref:1;
 
 	int disk_cache_state;
 

+ 32 - 6
fs/btrfs/disk-io.c

@@ -28,6 +28,7 @@
 #include <linux/freezer.h>
 #include <linux/crc32c.h>
 #include <linux/slab.h>
+#include <linux/migrate.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -355,6 +356,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
 	ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
 					     btrfs_header_generation(eb));
 	BUG_ON(ret);
+	WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
+
 	found_start = btrfs_header_bytenr(eb);
 	if (found_start != start) {
 		WARN_ON(1);
@@ -693,6 +696,29 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 				   __btree_submit_bio_done);
 }
 
+static int btree_migratepage(struct address_space *mapping,
+			struct page *newpage, struct page *page)
+{
+	/*
+	 * we can't safely write a btree page from here,
+	 * we haven't done the locking hook
+	 */
+	if (PageDirty(page))
+		return -EAGAIN;
+	/*
+	 * Buffers may be managed in a filesystem specific way.
+	 * We must have no buffers or drop them.
+	 */
+	if (page_has_private(page) &&
+	    !try_to_release_page(page, GFP_KERNEL))
+		return -EAGAIN;
+#ifdef CONFIG_MIGRATION
+	return migrate_page(mapping, newpage, page);
+#else
+	return -ENOSYS;
+#endif
+}
+
 static int btree_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct extent_io_tree *tree;
@@ -707,8 +733,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc)
 	}
 
 	redirty_page_for_writepage(wbc, page);
-	eb = btrfs_find_tree_block(root, page_offset(page),
-				      PAGE_CACHE_SIZE);
+	eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
 	WARN_ON(!eb);
 
 	was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
@@ -799,6 +824,9 @@ static const struct address_space_operations btree_aops = {
 	.releasepage	= btree_releasepage,
 	.invalidatepage = btree_invalidatepage,
 	.sync_page	= block_sync_page,
+#ifdef CONFIG_MIGRATION
+	.migratepage	= btree_migratepage,
+#endif
 };
 
 int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
@@ -1538,10 +1566,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 						 GFP_NOFS);
 	struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
 						 GFP_NOFS);
-	struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root),
-					       GFP_NOFS);
-	struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
-						GFP_NOFS);
+	struct btrfs_root *tree_root = btrfs_sb(sb);
+	struct btrfs_fs_info *fs_info = tree_root->fs_info;
 	struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
 						GFP_NOFS);
 	struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),

+ 76 - 0
fs/btrfs/export.c

@@ -232,9 +232,85 @@ fail:
 	return ERR_PTR(ret);
 }
 
+static int btrfs_get_name(struct dentry *parent, char *name,
+			  struct dentry *child)
+{
+	struct inode *inode = child->d_inode;
+	struct inode *dir = parent->d_inode;
+	struct btrfs_path *path;
+	struct btrfs_root *root = BTRFS_I(dir)->root;
+	struct btrfs_inode_ref *iref;
+	struct btrfs_root_ref *rref;
+	struct extent_buffer *leaf;
+	unsigned long name_ptr;
+	struct btrfs_key key;
+	int name_len;
+	int ret;
+
+	if (!dir || !inode)
+		return -EINVAL;
+
+	if (!S_ISDIR(dir->i_mode))
+		return -EINVAL;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+	path->leave_spinning = 1;
+
+	if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+		key.objectid = BTRFS_I(inode)->root->root_key.objectid;
+		key.type = BTRFS_ROOT_BACKREF_KEY;
+		key.offset = (u64)-1;
+		root = root->fs_info->tree_root;
+	} else {
+		key.objectid = inode->i_ino;
+		key.offset = dir->i_ino;
+		key.type = BTRFS_INODE_REF_KEY;
+	}
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		btrfs_free_path(path);
+		return ret;
+	} else if (ret > 0) {
+		if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+			path->slots[0]--;
+		} else {
+			btrfs_free_path(path);
+			return -ENOENT;
+		}
+	}
+	leaf = path->nodes[0];
+
+	if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+	       rref = btrfs_item_ptr(leaf, path->slots[0],
+				     struct btrfs_root_ref);
+	       name_ptr = (unsigned long)(rref + 1);
+	       name_len = btrfs_root_ref_name_len(leaf, rref);
+	} else {
+		iref = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_inode_ref);
+		name_ptr = (unsigned long)(iref + 1);
+		name_len = btrfs_inode_ref_name_len(leaf, iref);
+	}
+
+	read_extent_buffer(leaf, name, name_ptr, name_len);
+	btrfs_free_path(path);
+
+	/*
+	 * have to add the null termination to make sure that reconnect_path
+	 * gets the right len for strlen
+	 */
+	name[name_len] = '\0';
+
+	return 0;
+}
+
 const struct export_operations btrfs_export_ops = {
 	.encode_fh	= btrfs_encode_fh,
 	.fh_to_dentry	= btrfs_fh_to_dentry,
 	.fh_to_parent	= btrfs_fh_to_parent,
 	.get_parent	= btrfs_get_parent,
+	.get_name	= btrfs_get_name,
 };

+ 1 - 1
fs/btrfs/extent-tree.c

@@ -3412,7 +3412,7 @@ again:
 	 * our reservation.
 	 */
 	if (unused <= space_info->total_bytes) {
-		unused -= space_info->total_bytes;
+		unused = space_info->total_bytes - unused;
 		if (unused >= num_bytes) {
 			if (!reserved)
 				space_info->bytes_reserved += orig_bytes;

+ 62 - 15
fs/btrfs/extent_io.c

@@ -1828,9 +1828,9 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
 	bio_put(bio);
 }
 
-static struct bio *
-extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
-		 gfp_t gfp_flags)
+struct bio *
+btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+		gfp_t gfp_flags)
 {
 	struct bio *bio;
 
@@ -1919,7 +1919,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
 	else
 		nr = bio_get_nr_vecs(bdev);
 
-	bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+	bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
 
 	bio_add_page(bio, page, page_size, offset);
 	bio->bi_end_io = end_io_func;
@@ -2901,21 +2901,53 @@ out:
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		__u64 start, __u64 len, get_extent_t *get_extent)
 {
-	int ret;
+	int ret = 0;
 	u64 off = start;
 	u64 max = start + len;
 	u32 flags = 0;
+	u32 found_type;
+	u64 last;
 	u64 disko = 0;
+	struct btrfs_key found_key;
 	struct extent_map *em = NULL;
 	struct extent_state *cached_state = NULL;
+	struct btrfs_path *path;
+	struct btrfs_file_extent_item *item;
 	int end = 0;
 	u64 em_start = 0, em_len = 0;
 	unsigned long emflags;
-	ret = 0;
+	int hole = 0;
 
 	if (len == 0)
 		return -EINVAL;
 
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+	path->leave_spinning = 1;
+
+	ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
+				       path, inode->i_ino, -1, 0);
+	if (ret < 0) {
+		btrfs_free_path(path);
+		return ret;
+	}
+	WARN_ON(!ret);
+	path->slots[0]--;
+	item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			      struct btrfs_file_extent_item);
+	btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+	found_type = btrfs_key_type(&found_key);
+
+	/* No extents, just return */
+	if (found_key.objectid != inode->i_ino ||
+	    found_type != BTRFS_EXTENT_DATA_KEY) {
+		btrfs_free_path(path);
+		return 0;
+	}
+	last = found_key.offset;
+	btrfs_free_path(path);
+
 	lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
 			 &cached_state, GFP_NOFS);
 	em = get_extent(inode, NULL, 0, off, max - off, 0);
@@ -2925,11 +2957,18 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		ret = PTR_ERR(em);
 		goto out;
 	}
+
 	while (!end) {
+		hole = 0;
 		off = em->start + em->len;
 		if (off >= max)
 			end = 1;
 
+		if (em->block_start == EXTENT_MAP_HOLE) {
+			hole = 1;
+			goto next;
+		}
+
 		em_start = em->start;
 		em_len = em->len;
 
@@ -2939,8 +2978,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		if (em->block_start == EXTENT_MAP_LAST_BYTE) {
 			end = 1;
 			flags |= FIEMAP_EXTENT_LAST;
-		} else if (em->block_start == EXTENT_MAP_HOLE) {
-			flags |= FIEMAP_EXTENT_UNWRITTEN;
 		} else if (em->block_start == EXTENT_MAP_INLINE) {
 			flags |= (FIEMAP_EXTENT_DATA_INLINE |
 				  FIEMAP_EXTENT_NOT_ALIGNED);
@@ -2953,10 +2990,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
 			flags |= FIEMAP_EXTENT_ENCODED;
 
+next:
 		emflags = em->flags;
 		free_extent_map(em);
 		em = NULL;
-
 		if (!end) {
 			em = get_extent(inode, NULL, 0, off, max - off, 0);
 			if (!em)
@@ -2967,15 +3004,23 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			}
 			emflags = em->flags;
 		}
+
 		if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
 			flags |= FIEMAP_EXTENT_LAST;
 			end = 1;
 		}
 
-		ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
-					em_len, flags);
-		if (ret)
-			goto out_free;
+		if (em_start == last) {
+			flags |= FIEMAP_EXTENT_LAST;
+			end = 1;
+		}
+
+		if (!hole) {
+			ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+						em_len, flags);
+			if (ret)
+				goto out_free;
+		}
 	}
 out_free:
 	free_extent_map(em);
@@ -3836,8 +3881,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
 
 	spin_lock(&tree->buffer_lock);
 	eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
-	if (!eb)
-		goto out;
+	if (!eb) {
+		spin_unlock(&tree->buffer_lock);
+		return ret;
+	}
 
 	if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
 		ret = 0;

+ 3 - 0
fs/btrfs/extent_io.h

@@ -310,4 +310,7 @@ int extent_clear_unlock_delalloc(struct inode *inode,
 				struct extent_io_tree *tree,
 				u64 start, u64 end, struct page *locked_page,
 				unsigned long op);
+struct bio *
+btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+		gfp_t gfp_flags);
 #endif

+ 7 - 0
fs/btrfs/file.c

@@ -1047,8 +1047,14 @@ out:
 
 		if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
 			trans = btrfs_start_transaction(root, 0);
+			if (IS_ERR(trans)) {
+				num_written = PTR_ERR(trans);
+				goto done;
+			}
+			mutex_lock(&inode->i_mutex);
 			ret = btrfs_log_dentry_safe(trans, root,
 						    file->f_dentry);
+			mutex_unlock(&inode->i_mutex);
 			if (ret == 0) {
 				ret = btrfs_sync_log(trans, root);
 				if (ret == 0)
@@ -1067,6 +1073,7 @@ out:
 			     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
 		}
 	}
+done:
 	current->backing_dev_info = NULL;
 	return num_written ? num_written : err;
 }

+ 237 - 57
fs/btrfs/inode.c

@@ -4501,6 +4501,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	BTRFS_I(inode)->index_cnt = 2;
 	BTRFS_I(inode)->root = root;
 	BTRFS_I(inode)->generation = trans->transid;
+	inode->i_generation = BTRFS_I(inode)->generation;
 	btrfs_set_inode_space_info(root, inode);
 
 	if (mode & S_IFDIR)
@@ -4622,12 +4623,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 }
 
 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
-			    struct dentry *dentry, struct inode *inode,
-			    int backref, u64 index)
+			    struct inode *dir, struct dentry *dentry,
+			    struct inode *inode, int backref, u64 index)
 {
-	int err = btrfs_add_link(trans, dentry->d_parent->d_inode,
-				 inode, dentry->d_name.name,
-				 dentry->d_name.len, backref, index);
+	int err = btrfs_add_link(trans, dir, inode,
+				 dentry->d_name.name, dentry->d_name.len,
+				 backref, index);
 	if (!err) {
 		d_instantiate(dentry, inode);
 		return 0;
@@ -4668,8 +4669,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 	btrfs_set_trans_block_group(trans, dir);
 
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-				dentry->d_name.len,
-				dentry->d_parent->d_inode->i_ino, objectid,
+				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, mode, &index);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
@@ -4682,7 +4682,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 	}
 
 	btrfs_set_trans_block_group(trans, inode);
-	err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
 	if (err)
 		drop_inode = 1;
 	else {
@@ -4730,10 +4730,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 	btrfs_set_trans_block_group(trans, dir);
 
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-				dentry->d_name.len,
-				dentry->d_parent->d_inode->i_ino,
-				objectid, BTRFS_I(dir)->block_group, mode,
-				&index);
+				dentry->d_name.len, dir->i_ino, objectid,
+				BTRFS_I(dir)->block_group, mode, &index);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_unlock;
@@ -4745,7 +4743,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 	}
 
 	btrfs_set_trans_block_group(trans, inode);
-	err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
 	if (err)
 		drop_inode = 1;
 	else {
@@ -4787,6 +4785,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 		return -EPERM;
 
 	btrfs_inc_nlink(inode);
+	inode->i_ctime = CURRENT_TIME;
 
 	err = btrfs_set_inode_index(dir, &index);
 	if (err)
@@ -4805,15 +4804,17 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	btrfs_set_trans_block_group(trans, dir);
 	ihold(inode);
 
-	err = btrfs_add_nondir(trans, dentry, inode, 1, index);
+	err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
 
 	if (err) {
 		drop_inode = 1;
 	} else {
+		struct dentry *parent = dget_parent(dentry);
 		btrfs_update_inode_block_group(trans, dir);
 		err = btrfs_update_inode(trans, root, inode);
 		BUG_ON(err);
-		btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
+		btrfs_log_new_name(trans, inode, NULL, parent);
+		dput(parent);
 	}
 
 	nr = trans->blocks_used;
@@ -4853,8 +4854,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	btrfs_set_trans_block_group(trans, dir);
 
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-				dentry->d_name.len,
-				dentry->d_parent->d_inode->i_ino, objectid,
+				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, S_IFDIR | mode,
 				&index);
 	if (IS_ERR(inode)) {
@@ -4877,9 +4877,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (err)
 		goto out_fail;
 
-	err = btrfs_add_link(trans, dentry->d_parent->d_inode,
-				 inode, dentry->d_name.name,
-				 dentry->d_name.len, 0, index);
+	err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
+			     dentry->d_name.len, 0, index);
 	if (err)
 		goto out_fail;
 
@@ -5535,13 +5534,21 @@ struct btrfs_dio_private {
 	u64 bytes;
 	u32 *csums;
 	void *private;
+
+	/* number of bios pending for this dio */
+	atomic_t pending_bios;
+
+	/* IO errors */
+	int errors;
+
+	struct bio *orig_bio;
 };
 
 static void btrfs_endio_direct_read(struct bio *bio, int err)
 {
+	struct btrfs_dio_private *dip = bio->bi_private;
 	struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
 	struct bio_vec *bvec = bio->bi_io_vec;
-	struct btrfs_dio_private *dip = bio->bi_private;
 	struct inode *inode = dip->inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	u64 start;
@@ -5595,15 +5602,18 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
 	struct btrfs_trans_handle *trans;
 	struct btrfs_ordered_extent *ordered = NULL;
 	struct extent_state *cached_state = NULL;
+	u64 ordered_offset = dip->logical_offset;
+	u64 ordered_bytes = dip->bytes;
 	int ret;
 
 	if (err)
 		goto out_done;
-
-	ret = btrfs_dec_test_ordered_pending(inode, &ordered,
-					     dip->logical_offset, dip->bytes);
+again:
+	ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
+						   &ordered_offset,
+						   ordered_bytes);
 	if (!ret)
-		goto out_done;
+		goto out_test;
 
 	BUG_ON(!ordered);
 
@@ -5663,8 +5673,20 @@ out_unlock:
 out:
 	btrfs_delalloc_release_metadata(inode, ordered->len);
 	btrfs_end_transaction(trans, root);
+	ordered_offset = ordered->file_offset + ordered->len;
 	btrfs_put_ordered_extent(ordered);
 	btrfs_put_ordered_extent(ordered);
+
+out_test:
+	/*
+	 * our bio might span multiple ordered extents.  If we haven't
+	 * completed the accounting for the whole dio, go back and try again
+	 */
+	if (ordered_offset < dip->logical_offset + dip->bytes) {
+		ordered_bytes = dip->logical_offset + dip->bytes -
+			ordered_offset;
+		goto again;
+	}
 out_done:
 	bio->bi_private = dip->private;
 
@@ -5684,6 +5706,176 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
 	return 0;
 }
 
+static void btrfs_end_dio_bio(struct bio *bio, int err)
+{
+	struct btrfs_dio_private *dip = bio->bi_private;
+
+	if (err) {
+		printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
+		      "disk_bytenr %lu len %u err no %d\n",
+		      dip->inode->i_ino, bio->bi_rw, bio->bi_sector,
+		      bio->bi_size, err);
+		dip->errors = 1;
+
+		/*
+		 * before atomic variable goto zero, we must make sure
+		 * dip->errors is perceived to be set.
+		 */
+		smp_mb__before_atomic_dec();
+	}
+
+	/* if there are more bios still pending for this dio, just exit */
+	if (!atomic_dec_and_test(&dip->pending_bios))
+		goto out;
+
+	if (dip->errors)
+		bio_io_error(dip->orig_bio);
+	else {
+		set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
+		bio_endio(dip->orig_bio, 0);
+	}
+out:
+	bio_put(bio);
+}
+
+static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
+				       u64 first_sector, gfp_t gfp_flags)
+{
+	int nr_vecs = bio_get_nr_vecs(bdev);
+	return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
+}
+
+static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
+					 int rw, u64 file_offset, int skip_sum,
+					 u32 *csums)
+{
+	int write = rw & REQ_WRITE;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	int ret;
+
+	bio_get(bio);
+	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+	if (ret)
+		goto err;
+
+	if (write && !skip_sum) {
+		ret = btrfs_wq_submit_bio(root->fs_info,
+				   inode, rw, bio, 0, 0,
+				   file_offset,
+				   __btrfs_submit_bio_start_direct_io,
+				   __btrfs_submit_bio_done);
+		goto err;
+	} else if (!skip_sum)
+		btrfs_lookup_bio_sums_dio(root, inode, bio,
+					  file_offset, csums);
+
+	ret = btrfs_map_bio(root, rw, bio, 0, 1);
+err:
+	bio_put(bio);
+	return ret;
+}
+
+static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
+				    int skip_sum)
+{
+	struct inode *inode = dip->inode;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+	struct bio *bio;
+	struct bio *orig_bio = dip->orig_bio;
+	struct bio_vec *bvec = orig_bio->bi_io_vec;
+	u64 start_sector = orig_bio->bi_sector;
+	u64 file_offset = dip->logical_offset;
+	u64 submit_len = 0;
+	u64 map_length;
+	int nr_pages = 0;
+	u32 *csums = dip->csums;
+	int ret = 0;
+
+	bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
+	if (!bio)
+		return -ENOMEM;
+	bio->bi_private = dip;
+	bio->bi_end_io = btrfs_end_dio_bio;
+	atomic_inc(&dip->pending_bios);
+
+	map_length = orig_bio->bi_size;
+	ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+			      &map_length, NULL, 0);
+	if (ret) {
+		bio_put(bio);
+		return -EIO;
+	}
+
+	while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
+		if (unlikely(map_length < submit_len + bvec->bv_len ||
+		    bio_add_page(bio, bvec->bv_page, bvec->bv_len,
+				 bvec->bv_offset) < bvec->bv_len)) {
+			/*
+			 * inc the count before we submit the bio so
+			 * we know the end IO handler won't happen before
+			 * we inc the count. Otherwise, the dip might get freed
+			 * before we're done setting it up
+			 */
+			atomic_inc(&dip->pending_bios);
+			ret = __btrfs_submit_dio_bio(bio, inode, rw,
+						     file_offset, skip_sum,
+						     csums);
+			if (ret) {
+				bio_put(bio);
+				atomic_dec(&dip->pending_bios);
+				goto out_err;
+			}
+
+			if (!skip_sum)
+				csums = csums + nr_pages;
+			start_sector += submit_len >> 9;
+			file_offset += submit_len;
+
+			submit_len = 0;
+			nr_pages = 0;
+
+			bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
+						  start_sector, GFP_NOFS);
+			if (!bio)
+				goto out_err;
+			bio->bi_private = dip;
+			bio->bi_end_io = btrfs_end_dio_bio;
+
+			map_length = orig_bio->bi_size;
+			ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+					      &map_length, NULL, 0);
+			if (ret) {
+				bio_put(bio);
+				goto out_err;
+			}
+		} else {
+			submit_len += bvec->bv_len;
+			nr_pages ++;
+			bvec++;
+		}
+	}
+
+	ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
+				     csums);
+	if (!ret)
+		return 0;
+
+	bio_put(bio);
+out_err:
+	dip->errors = 1;
+	/*
+	 * before atomic variable goto zero, we must
+	 * make sure dip->errors is perceived to be set.
+	 */
+	smp_mb__before_atomic_dec();
+	if (atomic_dec_and_test(&dip->pending_bios))
+		bio_io_error(dip->orig_bio);
+
+	/* bio_end_io() will handle error, so we needn't return it */
+	return 0;
+}
+
 static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 				loff_t file_offset)
 {
@@ -5723,36 +5915,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 
 	dip->disk_bytenr = (u64)bio->bi_sector << 9;
 	bio->bi_private = dip;
+	dip->errors = 0;
+	dip->orig_bio = bio;
+	atomic_set(&dip->pending_bios, 0);
 
 	if (write)
 		bio->bi_end_io = btrfs_endio_direct_write;
 	else
 		bio->bi_end_io = btrfs_endio_direct_read;
 
-	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
-	if (ret)
-		goto out_err;
-
-	if (write && !skip_sum) {
-		ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
-				   inode, rw, bio, 0, 0,
-				   dip->logical_offset,
-				   __btrfs_submit_bio_start_direct_io,
-				   __btrfs_submit_bio_done);
-		if (ret)
-			goto out_err;
+	ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
+	if (!ret)
 		return;
-	} else if (!skip_sum)
-		btrfs_lookup_bio_sums_dio(root, inode, bio,
-					  dip->logical_offset, dip->csums);
-
-	ret = btrfs_map_bio(root, rw, bio, 0, 1);
-	if (ret)
-		goto out_err;
-	return;
-out_err:
-	kfree(dip->csums);
-	kfree(dip);
 free_ordered:
 	/*
 	 * If this is a write, we need to clean up the reserved space and kill
@@ -6607,8 +6781,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	BUG_ON(ret);
 
 	if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
-		btrfs_log_new_name(trans, old_inode, old_dir,
-				   new_dentry->d_parent);
+		struct dentry *parent = dget_parent(new_dentry);
+		btrfs_log_new_name(trans, old_inode, old_dir, parent);
+		dput(parent);
 		btrfs_end_log_trans(root);
 	}
 out_fail:
@@ -6758,8 +6933,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	btrfs_set_trans_block_group(trans, dir);
 
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-				dentry->d_name.len,
-				dentry->d_parent->d_inode->i_ino, objectid,
+				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
 				&index);
 	err = PTR_ERR(inode);
@@ -6773,7 +6947,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	}
 
 	btrfs_set_trans_block_group(trans, inode);
-	err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
 	if (err)
 		drop_inode = 1;
 	else {
@@ -6844,6 +7018,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_key ins;
 	u64 cur_offset = start;
+	u64 i_size;
 	int ret = 0;
 	bool own_trans = true;
 
@@ -6885,11 +7060,11 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 		    (actual_len > inode->i_size) &&
 		    (cur_offset > inode->i_size)) {
 			if (cur_offset > actual_len)
-				i_size_write(inode, actual_len);
+				i_size = actual_len;
 			else
-				i_size_write(inode, cur_offset);
-			i_size_write(inode, cur_offset);
-			btrfs_ordered_update_i_size(inode, cur_offset, NULL);
+				i_size = cur_offset;
+			i_size_write(inode, i_size);
+			btrfs_ordered_update_i_size(inode, i_size, NULL);
 		}
 
 		ret = btrfs_update_inode(trans, root, inode);
@@ -6943,6 +7118,10 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 	btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
 
 	mutex_lock(&inode->i_mutex);
+	ret = inode_newsize_ok(inode, alloc_end);
+	if (ret)
+		goto out;
+
 	if (alloc_start > inode->i_size) {
 		ret = btrfs_cont_expand(inode, alloc_start);
 		if (ret)
@@ -7139,6 +7318,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
+	.getattr	= btrfs_getattr,
 	.permission	= btrfs_permission,
 	.setxattr	= btrfs_setxattr,
 	.getxattr	= btrfs_getxattr,

+ 21 - 10
fs/btrfs/ioctl.c

@@ -233,7 +233,8 @@ static noinline int create_subvol(struct btrfs_root *root,
 	struct btrfs_inode_item *inode_item;
 	struct extent_buffer *leaf;
 	struct btrfs_root *new_root;
-	struct inode *dir = dentry->d_parent->d_inode;
+	struct dentry *parent = dget_parent(dentry);
+	struct inode *dir;
 	int ret;
 	int err;
 	u64 objectid;
@@ -242,8 +243,13 @@ static noinline int create_subvol(struct btrfs_root *root,
 
 	ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root,
 				       0, &objectid);
-	if (ret)
+	if (ret) {
+		dput(parent);
 		return ret;
+	}
+
+	dir = parent->d_inode;
+
 	/*
 	 * 1 - inode item
 	 * 2 - refs
@@ -251,8 +257,10 @@ static noinline int create_subvol(struct btrfs_root *root,
 	 * 2 - dir items
 	 */
 	trans = btrfs_start_transaction(root, 6);
-	if (IS_ERR(trans))
+	if (IS_ERR(trans)) {
+		dput(parent);
 		return PTR_ERR(trans);
+	}
 
 	leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
 				      0, objectid, NULL, 0, 0, 0);
@@ -339,6 +347,7 @@ static noinline int create_subvol(struct btrfs_root *root,
 
 	d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
 fail:
+	dput(parent);
 	if (async_transid) {
 		*async_transid = trans->transid;
 		err = btrfs_commit_transaction_async(trans, root, 1);
@@ -354,6 +363,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
 			   char *name, int namelen, u64 *async_transid)
 {
 	struct inode *inode;
+	struct dentry *parent;
 	struct btrfs_pending_snapshot *pending_snapshot;
 	struct btrfs_trans_handle *trans;
 	int ret;
@@ -396,7 +406,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
 
 	btrfs_orphan_cleanup(pending_snapshot->snap);
 
-	inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
+	parent = dget_parent(dentry);
+	inode = btrfs_lookup_dentry(parent->d_inode, dentry);
+	dput(parent);
 	if (IS_ERR(inode)) {
 		ret = PTR_ERR(inode);
 		goto fail;
@@ -1669,12 +1681,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 		olen = len = src->i_size - off;
 	/* if we extend to eof, continue to block boundary */
 	if (off + len == src->i_size)
-		len = ((src->i_size + bs-1) & ~(bs-1))
-			- off;
+		len = ALIGN(src->i_size, bs) - off;
 
 	/* verify the end result is block aligned */
-	if ((off & (bs-1)) ||
-	    ((off + len) & (bs-1)))
+	if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
+	    !IS_ALIGNED(destoff, bs))
 		goto out_unlock;
 
 	/* do any pending delalloc/csum calc on src, one way or
@@ -1874,8 +1885,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 			 * but shouldn't round up the file size
 			 */
 			endoff = new_key.offset + datal;
-			if (endoff > off+olen)
-				endoff = off+olen;
+			if (endoff > destoff+olen)
+				endoff = destoff+olen;
 			if (endoff > inode->i_size)
 				btrfs_i_size_write(inode, endoff);
 

+ 67 - 0
fs/btrfs/ordered-data.c

@@ -248,6 +248,73 @@ int btrfs_add_ordered_sum(struct inode *inode,
 	return 0;
 }
 
+/*
+ * this is used to account for finished IO across a given range
+ * of the file.  The IO may span ordered extents.  If
+ * a given ordered_extent is completely done, 1 is returned, otherwise
+ * 0.
+ *
+ * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
+ * to make sure this function only returns 1 once for a given ordered extent.
+ *
+ * file_offset is updated to one byte past the range that is recorded as
+ * complete.  This allows you to walk forward in the file.
+ */
+int btrfs_dec_test_first_ordered_pending(struct inode *inode,
+				   struct btrfs_ordered_extent **cached,
+				   u64 *file_offset, u64 io_size)
+{
+	struct btrfs_ordered_inode_tree *tree;
+	struct rb_node *node;
+	struct btrfs_ordered_extent *entry = NULL;
+	int ret;
+	u64 dec_end;
+	u64 dec_start;
+	u64 to_dec;
+
+	tree = &BTRFS_I(inode)->ordered_tree;
+	spin_lock(&tree->lock);
+	node = tree_search(tree, *file_offset);
+	if (!node) {
+		ret = 1;
+		goto out;
+	}
+
+	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+	if (!offset_in_entry(entry, *file_offset)) {
+		ret = 1;
+		goto out;
+	}
+
+	dec_start = max(*file_offset, entry->file_offset);
+	dec_end = min(*file_offset + io_size, entry->file_offset +
+		      entry->len);
+	*file_offset = dec_end;
+	if (dec_start > dec_end) {
+		printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
+		       (unsigned long long)dec_start,
+		       (unsigned long long)dec_end);
+	}
+	to_dec = dec_end - dec_start;
+	if (to_dec > entry->bytes_left) {
+		printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
+		       (unsigned long long)entry->bytes_left,
+		       (unsigned long long)to_dec);
+	}
+	entry->bytes_left -= to_dec;
+	if (entry->bytes_left == 0)
+		ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
+	else
+		ret = 1;
+out:
+	if (!ret && cached && entry) {
+		*cached = entry;
+		atomic_inc(&entry->refs);
+	}
+	spin_unlock(&tree->lock);
+	return ret == 0;
+}
+
 /*
  * this is used to account for finished IO across a given range
  * of the file.  The IO should not span ordered extents.  If

+ 3 - 0
fs/btrfs/ordered-data.h

@@ -141,6 +141,9 @@ int btrfs_remove_ordered_extent(struct inode *inode,
 int btrfs_dec_test_ordered_pending(struct inode *inode,
 				   struct btrfs_ordered_extent **cached,
 				   u64 file_offset, u64 io_size);
+int btrfs_dec_test_first_ordered_pending(struct inode *inode,
+				   struct btrfs_ordered_extent **cached,
+				   u64 *file_offset, u64 io_size);
 int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 			     u64 start, u64 len, u64 disk_len, int type);
 int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,

+ 38 - 3
fs/btrfs/super.c

@@ -244,6 +244,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 		case Opt_space_cache:
 			printk(KERN_INFO "btrfs: enabling disk space caching\n");
 			btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+			break;
 		case Opt_clear_cache:
 			printk(KERN_INFO "btrfs: force clearing of disk cache\n");
 			btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
@@ -562,12 +563,26 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 
 static int btrfs_test_super(struct super_block *s, void *data)
 {
-	struct btrfs_fs_devices *test_fs_devices = data;
+	struct btrfs_root *test_root = data;
 	struct btrfs_root *root = btrfs_sb(s);
 
-	return root->fs_info->fs_devices == test_fs_devices;
+	/*
+	 * If this super block is going away, return false as it
+	 * can't match as an existing super block.
+	 */
+	if (!atomic_read(&s->s_active))
+		return 0;
+	return root->fs_info->fs_devices == test_root->fs_info->fs_devices;
+}
+
+static int btrfs_set_super(struct super_block *s, void *data)
+{
+	s->s_fs_info = data;
+
+	return set_anon_super(s, data);
 }
 
+
 /*
  * Find a superblock for the given device / mount point.
  *
@@ -581,6 +596,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 	struct super_block *s;
 	struct dentry *root;
 	struct btrfs_fs_devices *fs_devices = NULL;
+	struct btrfs_root *tree_root = NULL;
+	struct btrfs_fs_info *fs_info = NULL;
 	fmode_t mode = FMODE_READ;
 	char *subvol_name = NULL;
 	u64 subvol_objectid = 0;
@@ -608,8 +625,24 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 		goto error_close_devices;
 	}
 
+	/*
+	 * Setup a dummy root and fs_info for test/set super.  This is because
+	 * we don't actually fill this stuff out until open_ctree, but we need
+	 * it for searching for existing supers, so this lets us do that and
+	 * then open_ctree will properly initialize everything later.
+	 */
+	fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
+	tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+	if (!fs_info || !tree_root) {
+		error = -ENOMEM;
+		goto error_close_devices;
+	}
+	fs_info->tree_root = tree_root;
+	fs_info->fs_devices = fs_devices;
+	tree_root->fs_info = fs_info;
+
 	bdev = fs_devices->latest_bdev;
-	s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
+	s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
 	if (IS_ERR(s))
 		goto error_s;
 
@@ -675,6 +708,8 @@ error_s:
 	error = PTR_ERR(s);
 error_close_devices:
 	btrfs_close_devices(fs_devices);
+	kfree(fs_info);
+	kfree(tree_root);
 error_free_subvol_name:
 	kfree(subvol_name);
 	return ERR_PTR(error);

+ 4 - 1
fs/btrfs/transaction.c

@@ -902,6 +902,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	struct btrfs_root *root = pending->root;
 	struct btrfs_root *parent_root;
 	struct inode *parent_inode;
+	struct dentry *parent;
 	struct dentry *dentry;
 	struct extent_buffer *tmp;
 	struct extent_buffer *old;
@@ -941,7 +942,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	trans->block_rsv = &pending->block_rsv;
 
 	dentry = pending->dentry;
-	parent_inode = dentry->d_parent->d_inode;
+	parent = dget_parent(dentry);
+	parent_inode = parent->d_inode;
 	parent_root = BTRFS_I(parent_inode)->root;
 	record_root_in_trans(trans, parent_root);
 
@@ -989,6 +991,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 				 parent_inode->i_ino, index,
 				 dentry->d_name.name, dentry->d_name.len);
 	BUG_ON(ret);
+	dput(parent);
 
 	key.offset = (u64)-1;
 	pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);

+ 17 - 4
fs/btrfs/tree-log.c

@@ -2869,6 +2869,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 {
 	int ret = 0;
 	struct btrfs_root *root;
+	struct dentry *old_parent = NULL;
 
 	/*
 	 * for regular files, if its inode is already on disk, we don't
@@ -2910,10 +2911,13 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 		if (IS_ROOT(parent))
 			break;
 
-		parent = parent->d_parent;
+		parent = dget_parent(parent);
+		dput(old_parent);
+		old_parent = parent;
 		inode = parent->d_inode;
 
 	}
+	dput(old_parent);
 out:
 	return ret;
 }
@@ -2945,6 +2949,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 {
 	int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
 	struct super_block *sb;
+	struct dentry *old_parent = NULL;
 	int ret = 0;
 	u64 last_committed = root->fs_info->last_trans_committed;
 
@@ -3016,10 +3021,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 		if (IS_ROOT(parent))
 			break;
 
-		parent = parent->d_parent;
+		parent = dget_parent(parent);
+		dput(old_parent);
+		old_parent = parent;
 	}
 	ret = 0;
 end_trans:
+	dput(old_parent);
 	if (ret < 0) {
 		BUG_ON(ret != -ENOSPC);
 		root->fs_info->last_trans_log_full_commit = trans->transid;
@@ -3039,8 +3047,13 @@ end_no_trans:
 int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, struct dentry *dentry)
 {
-	return btrfs_log_inode_parent(trans, root, dentry->d_inode,
-				      dentry->d_parent, 0);
+	struct dentry *parent = dget_parent(dentry);
+	int ret;
+
+	ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0);
+	dput(parent);
+
+	return ret;
 }
 
 /*

+ 8 - 7
fs/gfs2/quota.c

@@ -631,6 +631,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 			     struct fs_disk_quota *fdq)
 {
 	struct inode *inode = &ip->i_inode;
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct address_space *mapping = inode->i_mapping;
 	unsigned long index = loc >> PAGE_CACHE_SHIFT;
 	unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
@@ -658,11 +659,11 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 	qd->qd_qb.qb_value = qp->qu_value;
 	if (fdq) {
 		if (fdq->d_fieldmask & FS_DQ_BSOFT) {
-			qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit);
+			qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
 			qd->qd_qb.qb_warn = qp->qu_warn;
 		}
 		if (fdq->d_fieldmask & FS_DQ_BHARD) {
-			qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit);
+			qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
 			qd->qd_qb.qb_limit = qp->qu_limit;
 		}
 	}
@@ -1497,9 +1498,9 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
 	fdq->d_version = FS_DQUOT_VERSION;
 	fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
 	fdq->d_id = id;
-	fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit);
-	fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn);
-	fdq->d_bcount = be64_to_cpu(qlvb->qb_value);
+	fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift;
+	fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift;
+	fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift;
 
 	gfs2_glock_dq_uninit(&q_gh);
 out:
@@ -1566,10 +1567,10 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 
 	/* If nothing has changed, this is a no-op */
 	if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
-	    (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn)))
+	    ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
 		fdq->d_fieldmask ^= FS_DQ_BSOFT;
 	if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
-	    (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit)))
+	    ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
 		fdq->d_fieldmask ^= FS_DQ_BHARD;
 	if (fdq->d_fieldmask == 0)
 		goto out_i;

+ 6 - 25
fs/ioprio.c

@@ -103,22 +103,15 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
 	}
 
 	ret = -ESRCH;
-	/*
-	 * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic",
-	 * so we can't use rcu_read_lock(). See re-copy of ->ioprio
-	 * in copy_process().
-	 */
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	switch (which) {
 		case IOPRIO_WHO_PROCESS:
-			rcu_read_lock();
 			if (!who)
 				p = current;
 			else
 				p = find_task_by_vpid(who);
 			if (p)
 				ret = set_task_ioprio(p, ioprio);
-			rcu_read_unlock();
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
@@ -141,12 +134,7 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
 				break;
 
 			do_each_thread(g, p) {
-				int match;
-
-				rcu_read_lock();
-				match = __task_cred(p)->uid == who;
-				rcu_read_unlock();
-				if (!match)
+				if (__task_cred(p)->uid != who)
 					continue;
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
@@ -160,7 +148,7 @@ free_uid:
 			ret = -EINVAL;
 	}
 
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -204,17 +192,15 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
 	int ret = -ESRCH;
 	int tmpio;
 
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	switch (which) {
 		case IOPRIO_WHO_PROCESS:
-			rcu_read_lock();
 			if (!who)
 				p = current;
 			else
 				p = find_task_by_vpid(who);
 			if (p)
 				ret = get_task_ioprio(p);
-			rcu_read_unlock();
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
@@ -241,12 +227,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
 				break;
 
 			do_each_thread(g, p) {
-				int match;
-
-				rcu_read_lock();
-				match = __task_cred(p)->uid == user->uid;
-				rcu_read_unlock();
-				if (!match)
+				if (__task_cred(p)->uid != user->uid)
 					continue;
 				tmpio = get_task_ioprio(p);
 				if (tmpio < 0)
@@ -264,6 +245,6 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
 			ret = -EINVAL;
 	}
 
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	return ret;
 }

+ 34 - 28
fs/nfs/dir.c

@@ -162,6 +162,7 @@ struct nfs_cache_array_entry {
 	u64 cookie;
 	u64 ino;
 	struct qstr string;
+	unsigned char d_type;
 };
 
 struct nfs_cache_array {
@@ -171,8 +172,6 @@ struct nfs_cache_array {
 	struct nfs_cache_array_entry array[0];
 };
 
-#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
-
 typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 typedef struct {
 	struct file	*file;
@@ -257,13 +256,17 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 
 	if (IS_ERR(array))
 		return PTR_ERR(array);
+
+	cache_entry = &array->array[array->size];
+
+	/* Check that this entry lies within the page bounds */
 	ret = -ENOSPC;
-	if (array->size >= MAX_READDIR_ARRAY)
+	if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE)
 		goto out;
 
-	cache_entry = &array->array[array->size];
 	cache_entry->cookie = entry->prev_cookie;
 	cache_entry->ino = entry->ino;
+	cache_entry->d_type = entry->d_type;
 	ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
 	if (ret)
 		goto out;
@@ -466,8 +469,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 	struct xdr_stream stream;
 	struct xdr_buf buf;
 	__be32 *ptr = xdr_page;
-	int status;
 	struct nfs_cache_array *array;
+	unsigned int count = 0;
+	int status;
 
 	buf.head->iov_base = xdr_page;
 	buf.head->iov_len = buflen;
@@ -488,6 +492,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 			break;
 		}
 
+		count++;
+
 		if (desc->plus == 1)
 			nfs_prime_dcache(desc->file->f_path.dentry, entry);
 
@@ -496,13 +502,14 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 			break;
 	} while (!entry->eof);
 
-	if (status == -EBADCOOKIE && entry->eof) {
+	if (count == 0 || (status == -EBADCOOKIE && entry->eof == 1)) {
 		array = nfs_readdir_get_array(page);
 		if (!IS_ERR(array)) {
 			array->eof_index = array->size;
 			status = 0;
 			nfs_readdir_release_array(page);
-		}
+		} else
+			status = PTR_ERR(array);
 	}
 	return status;
 }
@@ -696,21 +703,23 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	int i = 0;
 	int res = 0;
 	struct nfs_cache_array *array = NULL;
-	unsigned int d_type = DT_UNKNOWN;
-	struct dentry *dentry = NULL;
 
 	array = nfs_readdir_get_array(desc->page);
-	if (IS_ERR(array))
-		return PTR_ERR(array);
+	if (IS_ERR(array)) {
+		res = PTR_ERR(array);
+		goto out;
+	}
 
 	for (i = desc->cache_entry_index; i < array->size; i++) {
-		d_type = DT_UNKNOWN;
+		struct nfs_cache_array_entry *ent;
 
-		res = filldir(dirent, array->array[i].string.name,
-			array->array[i].string.len, file->f_pos,
-			nfs_compat_user_ino64(array->array[i].ino), d_type);
-		if (res < 0)
+		ent = &array->array[i];
+		if (filldir(dirent, ent->string.name, ent->string.len,
+		    file->f_pos, nfs_compat_user_ino64(ent->ino),
+		    ent->d_type) < 0) {
+			desc->eof = 1;
 			break;
+		}
 		file->f_pos++;
 		desc->cache_entry_index = i;
 		if (i < (array->size-1))
@@ -722,9 +731,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 		desc->eof = 1;
 
 	nfs_readdir_release_array(desc->page);
+out:
 	cache_page_release(desc);
-	if (dentry != NULL)
-		dput(dentry);
 	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
 			(unsigned long long)*desc->dir_cookie, res);
 	return res;
@@ -759,13 +767,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 		goto out;
 	}
 
-	if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) {
-		status = -EIO;
-		goto out_release;
-	}
-
 	desc->page_index = 0;
 	desc->page = page;
+
+	status = nfs_readdir_xdr_to_array(desc, page, inode);
+	if (status < 0)
+		goto out_release;
+
 	status = nfs_do_filldir(desc, dirent, filldir);
 
  out:
@@ -816,14 +824,14 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		res = readdir_search_pagecache(desc);
 
 		if (res == -EBADCOOKIE) {
+			res = 0;
 			/* This means either end of directory */
 			if (*desc->dir_cookie && desc->eof == 0) {
 				/* Or that the server has 'lost' a cookie */
 				res = uncached_readdir(desc, dirent, filldir);
-				if (res >= 0)
+				if (res == 0)
 					continue;
 			}
-			res = 0;
 			break;
 		}
 		if (res == -ETOOSMALL && desc->plus) {
@@ -838,10 +846,8 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 			break;
 
 		res = nfs_do_filldir(desc, dirent, filldir);
-		if (res < 0) {
-			res = 0;
+		if (res < 0)
 			break;
-		}
 	}
 out:
 	nfs_unblock_sillyrename(dentry);

+ 1 - 1
fs/nfs/direct.c

@@ -867,7 +867,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 		goto out;
 	nfs_alloc_commit_data(dreq);
 
-	if (dreq->commit_data == NULL || count < wsize)
+	if (dreq->commit_data == NULL || count <= wsize)
 		sync = NFS_FILE_SYNC;
 
 	dreq->inode = inode;

Some files were not shown because too many files changed in this diff