Browse Source

Merge branch 'master' of ssh://rsync.linux-nfs.org/home/trondmy/www_sites/rsync.linux-nfs.org/pub/linux/nfs-2.6

Trond Myklebust 19 years ago
parent
commit
ba9b543d5b
76 changed files with 1340 additions and 979 deletions
  1. 7 0
      MAINTAINERS
  2. 3 1
      Makefile
  3. 8 7
      arch/arm/kernel/vmlinux.lds.S
  4. 15 5
      arch/arm/mach-l7200/core.c
  5. 19 1
      arch/arm/mach-pxa/corgi_lcd.c
  6. 5 0
      arch/arm/mach-pxa/generic.c
  7. 1 3
      arch/arm/mach-pxa/spitz.c
  8. 1 0
      arch/arm/mach-s3c2410/Kconfig
  9. 8 4
      arch/m32r/kernel/smp.c
  10. 17 16
      arch/mips/pci/fixup-tb0226.c
  11. 2 1
      arch/ppc64/kernel/pmac_setup.c
  12. 159 204
      arch/sparc64/kernel/pci_iommu.c
  13. 5 39
      arch/sparc64/kernel/pci_psycho.c
  14. 4 35
      arch/sparc64/kernel/pci_sabre.c
  15. 3 54
      arch/sparc64/kernel/pci_schizo.c
  16. 0 7
      arch/sparc64/kernel/smp.c
  17. 0 16
      arch/sparc64/mm/ultra.S
  18. 0 12
      arch/sparc64/prom/misc.c
  19. 2 3
      drivers/acpi/event.c
  20. 3 0
      drivers/char/mbcs.c
  21. 4 4
      drivers/char/n_r3964.c
  22. 1 1
      drivers/input/keyboard/Kconfig
  23. 1 1
      drivers/input/keyboard/spitzkbd.c
  24. 2 2
      drivers/input/misc/uinput.c
  25. 1 1
      drivers/media/radio/radio-cadet.c
  26. 20 12
      drivers/media/video/vpx3220.c
  27. 1 1
      drivers/net/wireless/Kconfig
  28. 2 2
      drivers/pci/quirks.c
  29. 6 8
      drivers/pcmcia/soc_common.c
  30. 5 0
      drivers/scsi/Kconfig
  31. 1 1
      drivers/scsi/aacraid/linit.c
  32. 35 4
      drivers/scsi/qlogicpti.c
  33. 2 0
      drivers/serial/8250_pnp.c
  34. 1 1
      drivers/serial/sh-sci.c
  35. 2 1
      drivers/usb/host/isp116x-hcd.c
  36. 0 3
      drivers/usb/input/hid-core.c
  37. 1 1
      drivers/usb/serial/generic.c
  38. 7 2
      drivers/video/console/vgacon.c
  39. 2 0
      drivers/video/sa1100fb.c
  40. 1 2
      drivers/w1/w1.c
  41. 1 25
      fs/aio.c
  42. 6 6
      fs/exec.c
  43. 25 17
      fs/locks.c
  44. 87 8
      fs/namei.c
  45. 5 1
      fs/nfs/delegation.c
  46. 15 1
      fs/nfs/delegation.h
  47. 24 20
      fs/nfs/dir.c
  48. 20 10
      fs/nfs/file.c
  49. 8 9
      fs/nfs/inode.c
  50. 1 1
      fs/nfs/nfs3proc.c
  51. 41 12
      fs/nfs/nfs4_fs.h
  52. 351 226
      fs/nfs/nfs4proc.c
  53. 124 41
      fs/nfs/nfs4state.c
  54. 37 25
      fs/nfs/nfs4xdr.c
  55. 1 1
      fs/nfs/proc.c
  56. 65 14
      fs/open.c
  57. 12 0
      fs/proc/base.c
  58. 1 0
      fs/proc/nommu.c
  59. 1 0
      include/asm-arm/arch-pxa/pxafb.h
  60. 42 16
      include/asm-arm/arch-s3c2410/io.h
  61. 9 21
      include/asm-sparc64/pbm.h
  62. 2 2
      include/linux/acct.h
  63. 6 1
      include/linux/aio.h
  64. 7 9
      include/linux/cpumask.h
  65. 22 17
      include/linux/list.h
  66. 8 0
      include/linux/namei.h
  67. 13 23
      include/linux/nfs_xdr.h
  68. 1 0
      include/linux/rcupdate.h
  69. 1 0
      include/linux/sunrpc/xprt.h
  70. 3 0
      kernel/posix-cpu-timers.c
  71. 12 1
      kernel/rcupdate.c
  72. 1 0
      kernel/time.c
  73. 9 4
      mm/vmscan.c
  74. 0 1
      net/ipv6/netfilter/ip6_tables.c
  75. 14 12
      net/sunrpc/clnt.c
  76. 8 0
      net/sunrpc/xprt.c

+ 7 - 0
MAINTAINERS

@@ -1618,6 +1618,13 @@ M:	vandrove@vc.cvut.cz
 L:	linux-fbdev-devel@lists.sourceforge.net
 S:	Maintained
 
+MEGARAID SCSI DRIVERS
+P:     Neela Syam Kolli
+M:     Neela.Kolli@engenio.com
+S:     linux-scsi@vger.kernel.org
+W:     http://megaraid.lsilogic.com
+S:     Maintained
+
 MEMORY TECHNOLOGY DEVICES
 P:	David Woodhouse
 M:	dwmw2@infradead.org

+ 3 - 1
Makefile

@@ -660,8 +660,10 @@ quiet_cmd_sysmap = SYSMAP
 # Link of vmlinux
 # If CONFIG_KALLSYMS is set .version is already updated
 # Generate System.map and verify that the content is consistent
-
+# Use + in front of the vmlinux_version rule to silent warning with make -j2
+# First command is ':' to allow us to use + in front of the rule
 define rule_vmlinux__
+	:
 	$(if $(CONFIG_KALLSYMS),,+$(call cmd,vmlinux_version))
 
 	$(call cmd,vmlinux__)

+ 8 - 7
arch/arm/kernel/vmlinux.lds.S

@@ -89,13 +89,6 @@ SECTIONS
 		*(.got)			/* Global offset table		*/
 	}
 
-	. = ALIGN(16);
-	__ex_table : {			/* Exception table		*/
-		__start___ex_table = .;
-			*(__ex_table)
-		__stop___ex_table = .;
-	}
-
 	RODATA
 
 	_etext = .;			/* End of text and rodata section */
@@ -137,6 +130,14 @@ SECTIONS
 		. = ALIGN(32);
 		*(.data.cacheline_aligned)
 
+		/*
+		 * The exception fixup table (might need resorting at runtime)
+		 */
+		. = ALIGN(32);
+		__start___ex_table = .;
+		*(__ex_table)
+		__stop___ex_table = .;
+
 		/*
 		 * and the usual data section
 		 */

+ 15 - 5
arch/arm/mach-l7200/core.c

@@ -7,11 +7,17 @@
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/device.h>
 
+#include <asm/types.h>
+#include <asm/irq.h>
+#include <asm/mach-types.h>
 #include <asm/hardware.h>
 #include <asm/page.h>
 
+#include <asm/mach/arch.h>
 #include <asm/mach/map.h>
+#include <asm/mach/irq.h>
 
 /*
  * IRQ base register
@@ -47,6 +53,12 @@ static void l7200_unmask_irq(unsigned int irq)
 {
 	IRQ_ENABLE = 1 << irq;
 }
+
+static struct irqchip l7200_irq_chip = {
+	.ack		= l7200_mask_irq,
+	.mask		= l7200_mask_irq,
+	.unmask		= l7200_unmask_irq
+};
  
 static void __init l7200_init_irq(void)
 {
@@ -56,11 +68,9 @@ static void __init l7200_init_irq(void)
 	FIQ_ENABLECLEAR = 0xffffffff;	/* clear all fast interrupt enables */
 
 	for (irq = 0; irq < NR_IRQS; irq++) {
-		irq_desc[irq].valid	= 1;
-		irq_desc[irq].probe_ok	= 1;
-		irq_desc[irq].mask_ack	= l7200_mask_irq;
-		irq_desc[irq].mask	= l7200_mask_irq;
-		irq_desc[irq].unmask	= l7200_unmask_irq;
+		set_irq_chip(irq, &l7200_irq_chip);
+		set_irq_flags(irq, IRQF_VALID);
+		set_irq_handler(irq, do_level_IRQ);
 	}
 
 	init_FIQ();

+ 19 - 1
arch/arm/mach-pxa/corgi_lcd.c

@@ -467,6 +467,7 @@ void corgi_put_hsync(void)
 {
 	if (get_hsync_time)
 		symbol_put(w100fb_get_hsynclen);
+	get_hsync_time = NULL;
 }
 
 void corgi_wait_hsync(void)
@@ -476,20 +477,37 @@ void corgi_wait_hsync(void)
 #endif
 
 #ifdef CONFIG_PXA_SHARP_Cxx00
+static struct device *spitz_pxafb_dev;
+
+static int is_pxafb_device(struct device * dev, void * data)
+{
+	struct platform_device *pdev = container_of(dev, struct platform_device, dev);
+
+	return (strncmp(pdev->name, "pxa2xx-fb", 9) == 0);
+}
+
 unsigned long spitz_get_hsync_len(void)
 {
+	if (!spitz_pxafb_dev) {
+		spitz_pxafb_dev = bus_find_device(&platform_bus_type, NULL, NULL, is_pxafb_device);
+		if (!spitz_pxafb_dev)
+			return 0;
+	}
 	if (!get_hsync_time)
 		get_hsync_time = symbol_get(pxafb_get_hsync_time);
 	if (!get_hsync_time)
 		return 0;
 
-	return pxafb_get_hsync_time(&pxafb_device.dev);
+	return pxafb_get_hsync_time(spitz_pxafb_dev);
 }
 
 void spitz_put_hsync(void)
 {
+	put_device(spitz_pxafb_dev);
 	if (get_hsync_time)
 		symbol_put(pxafb_get_hsync_time);
+	spitz_pxafb_dev = NULL;
+	get_hsync_time = NULL;
 }
 
 void spitz_wait_hsync(void)

+ 5 - 0
arch/arm/mach-pxa/generic.c

@@ -208,6 +208,11 @@ static struct platform_device pxafb_device = {
 	.resource	= pxafb_resources,
 };
 
+void __init set_pxa_fb_parent(struct device *parent_dev)
+{
+	pxafb_device.dev.parent = parent_dev;
+}
+
 static struct platform_device ffuart_device = {
 	.name		= "pxa2xx-uart",
 	.id		= 0,

+ 1 - 3
arch/arm/mach-pxa/spitz.c

@@ -36,7 +36,6 @@
 #include <asm/arch/irq.h>
 #include <asm/arch/mmc.h>
 #include <asm/arch/udc.h>
-#include <asm/arch/ohci.h>
 #include <asm/arch/pxafb.h>
 #include <asm/arch/akita.h>
 #include <asm/arch/spitz.h>
@@ -304,7 +303,6 @@ static struct platform_device *devices[] __initdata = {
 	&spitzkbd_device,
 	&spitzts_device,
 	&spitzbl_device,
-	&spitzbattery_device,
 };
 
 static void __init common_init(void)
@@ -328,7 +326,7 @@ static void __init common_init(void)
 
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 	pxa_set_mci_info(&spitz_mci_platform_data);
-	pxafb_device.dev.parent = &spitzssp_device.dev;
+	set_pxa_fb_parent(&spitzssp_device.dev);
 	set_pxa_fb_info(&spitz_pxafb_info);
 }
 

+ 1 - 0
arch/arm/mach-s3c2410/Kconfig

@@ -12,6 +12,7 @@ config MACH_ANUBIS
 config ARCH_BAST
 	bool "Simtec Electronics BAST (EB2410ITX)"
 	select CPU_S3C2410
+	select ISA
 	help
 	  Say Y here if you are using the Simtec Electronics EB2410ITX
 	  development board (also known as BAST)

+ 8 - 4
arch/m32r/kernel/smp.c

@@ -275,12 +275,14 @@ static void flush_tlb_all_ipi(void *info)
  *==========================================================================*/
 void smp_flush_tlb_mm(struct mm_struct *mm)
 {
-	int cpu_id = smp_processor_id();
+	int cpu_id;
 	cpumask_t cpu_mask;
-	unsigned long *mmc = &mm->context[cpu_id];
+	unsigned long *mmc;
 	unsigned long flags;
 
 	preempt_disable();
+	cpu_id = smp_processor_id();
+	mmc = &mm->context[cpu_id];
 	cpu_mask = mm->cpu_vm_mask;
 	cpu_clear(cpu_id, cpu_mask);
 
@@ -343,12 +345,14 @@ void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	int cpu_id = smp_processor_id();
+	int cpu_id;
 	cpumask_t cpu_mask;
-	unsigned long *mmc = &mm->context[cpu_id];
+	unsigned long *mmc;
 	unsigned long flags;
 
 	preempt_disable();
+	cpu_id = smp_processor_id();
+	mmc = &mm->context[cpu_id];
 	cpu_mask = mm->cpu_vm_mask;
 	cpu_clear(cpu_id, cpu_mask);
 

+ 17 - 16
arch/mips/pci/fixup-tb0226.c

@@ -1,7 +1,7 @@
 /*
  *  fixup-tb0226.c, The TANBAC TB0226 specific PCI fixups.
  *
- *  Copyright (C) 2002-2004  Yoichi Yuasa <yuasa@hh.iij4u.or.jp>
+ *  Copyright (C) 2002-2005  Yoichi Yuasa <yuasa@hh.iij4u.or.jp>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 
+#include <asm/vr41xx/giu.h>
 #include <asm/vr41xx/tb0226.h>
 
 int __init pcibios_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
@@ -29,42 +30,42 @@ int __init pcibios_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
 	switch (slot) {
 	case 12:
 		vr41xx_set_irq_trigger(GD82559_1_PIN,
-				       TRIGGER_LEVEL,
-				       SIGNAL_THROUGH);
-		vr41xx_set_irq_level(GD82559_1_PIN, LEVEL_LOW);
+				       IRQ_TRIGGER_LEVEL,
+				       IRQ_SIGNAL_THROUGH);
+		vr41xx_set_irq_level(GD82559_1_PIN, IRQ_LEVEL_LOW);
 		irq = GD82559_1_IRQ;
 		break;
 	case 13:
 		vr41xx_set_irq_trigger(GD82559_2_PIN,
-				       TRIGGER_LEVEL,
-				       SIGNAL_THROUGH);
-		vr41xx_set_irq_level(GD82559_2_PIN, LEVEL_LOW);
+				       IRQ_TRIGGER_LEVEL,
+				       IRQ_SIGNAL_THROUGH);
+		vr41xx_set_irq_level(GD82559_2_PIN, IRQ_LEVEL_LOW);
 		irq = GD82559_2_IRQ;
 		break;
 	case 14:
 		switch (pin) {
 		case 1:
 			vr41xx_set_irq_trigger(UPD720100_INTA_PIN,
-					       TRIGGER_LEVEL,
-					       SIGNAL_THROUGH);
+					       IRQ_TRIGGER_LEVEL,
+					       IRQ_SIGNAL_THROUGH);
 			vr41xx_set_irq_level(UPD720100_INTA_PIN,
-					     LEVEL_LOW);
+					     IRQ_LEVEL_LOW);
 			irq = UPD720100_INTA_IRQ;
 			break;
 		case 2:
 			vr41xx_set_irq_trigger(UPD720100_INTB_PIN,
-					       TRIGGER_LEVEL,
-					       SIGNAL_THROUGH);
+					       IRQ_TRIGGER_LEVEL,
+					       IRQ_SIGNAL_THROUGH);
 			vr41xx_set_irq_level(UPD720100_INTB_PIN,
-					     LEVEL_LOW);
+					     IRQ_LEVEL_LOW);
 			irq = UPD720100_INTB_IRQ;
 			break;
 		case 3:
 			vr41xx_set_irq_trigger(UPD720100_INTC_PIN,
-					       TRIGGER_LEVEL,
-					       SIGNAL_THROUGH);
+					       IRQ_TRIGGER_LEVEL,
+					       IRQ_SIGNAL_THROUGH);
 			vr41xx_set_irq_level(UPD720100_INTC_PIN,
-					     LEVEL_LOW);
+					     IRQ_LEVEL_LOW);
 			irq = UPD720100_INTC_IRQ;
 			break;
 		default:

+ 2 - 1
arch/ppc64/kernel/pmac_setup.c

@@ -115,7 +115,7 @@ static void __pmac pmac_show_cpuinfo(struct seq_file *m)
 	
 	/* find motherboard type */
 	seq_printf(m, "machine\t\t: ");
-	np = find_devices("device-tree");
+	np = of_find_node_by_path("/");
 	if (np != NULL) {
 		pp = (char *) get_property(np, "model", NULL);
 		if (pp != NULL)
@@ -133,6 +133,7 @@ static void __pmac pmac_show_cpuinfo(struct seq_file *m)
 			}
 			seq_printf(m, "\n");
 		}
+		of_node_put(np);
 	} else
 		seq_printf(m, "PowerMac\n");
 

+ 159 - 204
arch/sparc64/kernel/pci_iommu.c

@@ -49,12 +49,6 @@ static void __iommu_flushall(struct pci_iommu *iommu)
 
 	/* Ensure completion of previous PIO writes. */
 	(void) pci_iommu_read(iommu->write_complete_reg);
-
-	/* Now update everyone's flush point. */
-	for (entry = 0; entry < PBM_NCLUSTERS; entry++) {
-		iommu->alloc_info[entry].flush =
-			iommu->alloc_info[entry].next;
-	}
 }
 
 #define IOPTE_CONSISTENT(CTX) \
@@ -80,120 +74,117 @@ static void inline iopte_make_dummy(struct pci_iommu *iommu, iopte_t *iopte)
 	iopte_val(*iopte) = val;
 }
 
-void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize)
+/* Based largely upon the ppc64 iommu allocator.  */
+static long pci_arena_alloc(struct pci_iommu *iommu, unsigned long npages)
 {
-	int i;
-
-	tsbsize /= sizeof(iopte_t);
-
-	for (i = 0; i < tsbsize; i++)
-		iopte_make_dummy(iommu, &iommu->page_table[i]);
-}
-
-static iopte_t *alloc_streaming_cluster(struct pci_iommu *iommu, unsigned long npages)
-{
-	iopte_t *iopte, *limit, *first;
-	unsigned long cnum, ent, flush_point;
-
-	cnum = 0;
-	while ((1UL << cnum) < npages)
-		cnum++;
-	iopte  = (iommu->page_table +
-		  (cnum << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
-
-	if (cnum == 0)
-		limit = (iommu->page_table +
-			 iommu->lowest_consistent_map);
-	else
-		limit = (iopte +
-			 (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
-
-	iopte += ((ent = iommu->alloc_info[cnum].next) << cnum);
-	flush_point = iommu->alloc_info[cnum].flush;
-	
-	first = iopte;
-	for (;;) {
-		if (IOPTE_IS_DUMMY(iommu, iopte)) {
-			if ((iopte + (1 << cnum)) >= limit)
-				ent = 0;
-			else
-				ent = ent + 1;
-			iommu->alloc_info[cnum].next = ent;
-			if (ent == flush_point)
-				__iommu_flushall(iommu);
-			break;
+	struct pci_iommu_arena *arena = &iommu->arena;
+	unsigned long n, i, start, end, limit;
+	int pass;
+
+	limit = arena->limit;
+	start = arena->hint;
+	pass = 0;
+
+again:
+	n = find_next_zero_bit(arena->map, limit, start);
+	end = n + npages;
+	if (unlikely(end >= limit)) {
+		if (likely(pass < 1)) {
+			limit = start;
+			start = 0;
+			__iommu_flushall(iommu);
+			pass++;
+			goto again;
+		} else {
+			/* Scanned the whole thing, give up. */
+			return -1;
 		}
-		iopte += (1 << cnum);
-		ent++;
-		if (iopte >= limit) {
-			iopte = (iommu->page_table +
-				 (cnum <<
-				  (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
-			ent = 0;
+	}
+
+	for (i = n; i < end; i++) {
+		if (test_bit(i, arena->map)) {
+			start = i + 1;
+			goto again;
 		}
-		if (ent == flush_point)
-			__iommu_flushall(iommu);
-		if (iopte == first)
-			goto bad;
 	}
 
-	/* I've got your streaming cluster right here buddy boy... */
-	return iopte;
+	for (i = n; i < end; i++)
+		__set_bit(i, arena->map);
 
-bad:
-	printk(KERN_EMERG "pci_iommu: alloc_streaming_cluster of npages(%ld) failed!\n",
-	       npages);
-	return NULL;
+	arena->hint = end;
+
+	return n;
 }
 
-static void free_streaming_cluster(struct pci_iommu *iommu, dma_addr_t base,
-				   unsigned long npages, unsigned long ctx)
+static void pci_arena_free(struct pci_iommu_arena *arena, unsigned long base, unsigned long npages)
 {
-	unsigned long cnum, ent;
+	unsigned long i;
 
-	cnum = 0;
-	while ((1UL << cnum) < npages)
-		cnum++;
+	for (i = base; i < (base + npages); i++)
+		__clear_bit(i, arena->map);
+}
 
-	ent = (base << (32 - IO_PAGE_SHIFT + PBM_LOGCLUSTERS - iommu->page_table_sz_bits))
-		>> (32 + PBM_LOGCLUSTERS + cnum - iommu->page_table_sz_bits);
+void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask)
+{
+	unsigned long i, tsbbase, order, sz, num_tsb_entries;
+
+	num_tsb_entries = tsbsize / sizeof(iopte_t);
+
+	/* Setup initial software IOMMU state. */
+	spin_lock_init(&iommu->lock);
+	iommu->ctx_lowest_free = 1;
+	iommu->page_table_map_base = dma_offset;
+	iommu->dma_addr_mask = dma_addr_mask;
+
+	/* Allocate and initialize the free area map.  */
+	sz = num_tsb_entries / 8;
+	sz = (sz + 7UL) & ~7UL;
+	iommu->arena.map = kmalloc(sz, GFP_KERNEL);
+	if (!iommu->arena.map) {
+		prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
+		prom_halt();
+	}
+	memset(iommu->arena.map, 0, sz);
+	iommu->arena.limit = num_tsb_entries;
 
-	/* If the global flush might not have caught this entry,
-	 * adjust the flush point such that we will flush before
-	 * ever trying to reuse it.
+	/* Allocate and initialize the dummy page which we
+	 * set inactive IO PTEs to point to.
 	 */
-#define between(X,Y,Z)	(((Z) - (Y)) >= ((X) - (Y)))
-	if (between(ent, iommu->alloc_info[cnum].next, iommu->alloc_info[cnum].flush))
-		iommu->alloc_info[cnum].flush = ent;
-#undef between
+	iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0);
+	if (!iommu->dummy_page) {
+		prom_printf("PCI_IOMMU: Error, gfp(dummy_page) failed.\n");
+		prom_halt();
+	}
+	memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
+	iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
+
+	/* Now allocate and setup the IOMMU page table itself.  */
+	order = get_order(tsbsize);
+	tsbbase = __get_free_pages(GFP_KERNEL, order);
+	if (!tsbbase) {
+		prom_printf("PCI_IOMMU: Error, gfp(tsb) failed.\n");
+		prom_halt();
+	}
+	iommu->page_table = (iopte_t *)tsbbase;
+
+	for (i = 0; i < num_tsb_entries; i++)
+		iopte_make_dummy(iommu, &iommu->page_table[i]);
 }
 
-/* We allocate consistent mappings from the end of cluster zero. */
-static iopte_t *alloc_consistent_cluster(struct pci_iommu *iommu, unsigned long npages)
+static inline iopte_t *alloc_npages(struct pci_iommu *iommu, unsigned long npages)
 {
-	iopte_t *iopte;
+	long entry;
 
-	iopte = iommu->page_table + (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS));
-	while (iopte > iommu->page_table) {
-		iopte--;
-		if (IOPTE_IS_DUMMY(iommu, iopte)) {
-			unsigned long tmp = npages;
+	entry = pci_arena_alloc(iommu, npages);
+	if (unlikely(entry < 0))
+		return NULL;
 
-			while (--tmp) {
-				iopte--;
-				if (!IOPTE_IS_DUMMY(iommu, iopte))
-					break;
-			}
-			if (tmp == 0) {
-				u32 entry = (iopte - iommu->page_table);
+	return iommu->page_table + entry;
+}
 
-				if (entry < iommu->lowest_consistent_map)
-					iommu->lowest_consistent_map = entry;
-				return iopte;
-			}
-		}
-	}
-	return NULL;
+static inline void free_npages(struct pci_iommu *iommu, dma_addr_t base, unsigned long npages)
+{
+	pci_arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages);
 }
 
 static int iommu_alloc_ctx(struct pci_iommu *iommu)
@@ -233,7 +224,7 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad
 	struct pcidev_cookie *pcp;
 	struct pci_iommu *iommu;
 	iopte_t *iopte;
-	unsigned long flags, order, first_page, ctx;
+	unsigned long flags, order, first_page;
 	void *ret;
 	int npages;
 
@@ -251,9 +242,10 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad
 	iommu = pcp->pbm->iommu;
 
 	spin_lock_irqsave(&iommu->lock, flags);
-	iopte = alloc_consistent_cluster(iommu, size >> IO_PAGE_SHIFT);
-	if (iopte == NULL) {
-		spin_unlock_irqrestore(&iommu->lock, flags);
+	iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT);
+	spin_unlock_irqrestore(&iommu->lock, flags);
+
+	if (unlikely(iopte == NULL)) {
 		free_pages(first_page, order);
 		return NULL;
 	}
@@ -262,31 +254,15 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad
 		      ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
 	ret = (void *) first_page;
 	npages = size >> IO_PAGE_SHIFT;
-	ctx = 0;
-	if (iommu->iommu_ctxflush)
-		ctx = iommu_alloc_ctx(iommu);
 	first_page = __pa(first_page);
 	while (npages--) {
-		iopte_val(*iopte) = (IOPTE_CONSISTENT(ctx) |
+		iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) |
 				     IOPTE_WRITE |
 				     (first_page & IOPTE_PAGE));
 		iopte++;
 		first_page += IO_PAGE_SIZE;
 	}
 
-	{
-		int i;
-		u32 daddr = *dma_addrp;
-
-		npages = size >> IO_PAGE_SHIFT;
-		for (i = 0; i < npages; i++) {
-			pci_iommu_write(iommu->iommu_flush, daddr);
-			daddr += IO_PAGE_SIZE;
-		}
-	}
-
-	spin_unlock_irqrestore(&iommu->lock, flags);
-
 	return ret;
 }
 
@@ -296,7 +272,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_
 	struct pcidev_cookie *pcp;
 	struct pci_iommu *iommu;
 	iopte_t *iopte;
-	unsigned long flags, order, npages, i, ctx;
+	unsigned long flags, order, npages;
 
 	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
 	pcp = pdev->sysdata;
@@ -306,46 +282,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_
 
 	spin_lock_irqsave(&iommu->lock, flags);
 
-	if ((iopte - iommu->page_table) ==
-	    iommu->lowest_consistent_map) {
-		iopte_t *walk = iopte + npages;
-		iopte_t *limit;
-
-		limit = (iommu->page_table +
-			 (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)));
-		while (walk < limit) {
-			if (!IOPTE_IS_DUMMY(iommu, walk))
-				break;
-			walk++;
-		}
-		iommu->lowest_consistent_map =
-			(walk - iommu->page_table);
-	}
-
-	/* Data for consistent mappings cannot enter the streaming
-	 * buffers, so we only need to update the TSB.  We flush
-	 * the IOMMU here as well to prevent conflicts with the
-	 * streaming mapping deferred tlb flush scheme.
-	 */
-
-	ctx = 0;
-	if (iommu->iommu_ctxflush)
-		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
-
-	for (i = 0; i < npages; i++, iopte++)
-		iopte_make_dummy(iommu, iopte);
-
-	if (iommu->iommu_ctxflush) {
-		pci_iommu_write(iommu->iommu_ctxflush, ctx);
-	} else {
-		for (i = 0; i < npages; i++) {
-			u32 daddr = dvma + (i << IO_PAGE_SHIFT);
-
-			pci_iommu_write(iommu->iommu_flush, daddr);
-		}
-	}
-
-	iommu_free_ctx(iommu, ctx);
+	free_npages(iommu, dvma, npages);
 
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
@@ -372,25 +309,27 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct
 	iommu = pcp->pbm->iommu;
 	strbuf = &pcp->pbm->stc;
 
-	if (direction == PCI_DMA_NONE)
-		BUG();
+	if (unlikely(direction == PCI_DMA_NONE))
+		goto bad_no_ctx;
 
 	oaddr = (unsigned long)ptr;
 	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
 	npages >>= IO_PAGE_SHIFT;
 
 	spin_lock_irqsave(&iommu->lock, flags);
+	base = alloc_npages(iommu, npages);
+	ctx = 0;
+	if (iommu->iommu_ctxflush)
+		ctx = iommu_alloc_ctx(iommu);
+	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	base = alloc_streaming_cluster(iommu, npages);
-	if (base == NULL)
+	if (unlikely(!base))
 		goto bad;
+
 	bus_addr = (iommu->page_table_map_base +
 		    ((base - iommu->page_table) << IO_PAGE_SHIFT));
 	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
 	base_paddr = __pa(oaddr & IO_PAGE_MASK);
-	ctx = 0;
-	if (iommu->iommu_ctxflush)
-		ctx = iommu_alloc_ctx(iommu);
 	if (strbuf->strbuf_enabled)
 		iopte_protection = IOPTE_STREAMING(ctx);
 	else
@@ -401,12 +340,13 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct
 	for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE)
 		iopte_val(*base) = iopte_protection | base_paddr;
 
-	spin_unlock_irqrestore(&iommu->lock, flags);
-
 	return ret;
 
 bad:
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	iommu_free_ctx(iommu, ctx);
+bad_no_ctx:
+	if (printk_ratelimit())
+		WARN_ON(1);
 	return PCI_DMA_ERROR_CODE;
 }
 
@@ -481,10 +421,13 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
 	struct pci_iommu *iommu;
 	struct pci_strbuf *strbuf;
 	iopte_t *base;
-	unsigned long flags, npages, ctx;
+	unsigned long flags, npages, ctx, i;
 
-	if (direction == PCI_DMA_NONE)
-		BUG();
+	if (unlikely(direction == PCI_DMA_NONE)) {
+		if (printk_ratelimit())
+			WARN_ON(1);
+		return;
+	}
 
 	pcp = pdev->sysdata;
 	iommu = pcp->pbm->iommu;
@@ -510,13 +453,14 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
 
 	/* Step 1: Kick data out of streaming buffers if necessary. */
 	if (strbuf->strbuf_enabled)
-		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
+		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx,
+				 npages, direction);
 
-	/* Step 2: Clear out first TSB entry. */
-	iopte_make_dummy(iommu, base);
+	/* Step 2: Clear out TSB entries. */
+	for (i = 0; i < npages; i++)
+		iopte_make_dummy(iommu, base + i);
 
-	free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
-			       npages, ctx);
+	free_npages(iommu, bus_addr - iommu->page_table_map_base, npages);
 
 	iommu_free_ctx(iommu, ctx);
 
@@ -621,6 +565,8 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int
 			pci_map_single(pdev,
 				       (page_address(sglist->page) + sglist->offset),
 				       sglist->length, direction);
+		if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE))
+			return 0;
 		sglist->dma_length = sglist->length;
 		return 1;
 	}
@@ -629,21 +575,29 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int
 	iommu = pcp->pbm->iommu;
 	strbuf = &pcp->pbm->stc;
 	
-	if (direction == PCI_DMA_NONE)
-		BUG();
+	if (unlikely(direction == PCI_DMA_NONE))
+		goto bad_no_ctx;
 
 	/* Step 1: Prepare scatter list. */
 
 	npages = prepare_sg(sglist, nelems);
 
-	/* Step 2: Allocate a cluster. */
+	/* Step 2: Allocate a cluster and context, if necessary. */
 
 	spin_lock_irqsave(&iommu->lock, flags);
 
-	base = alloc_streaming_cluster(iommu, npages);
+	base = alloc_npages(iommu, npages);
+	ctx = 0;
+	if (iommu->iommu_ctxflush)
+		ctx = iommu_alloc_ctx(iommu);
+
+	spin_unlock_irqrestore(&iommu->lock, flags);
+
 	if (base == NULL)
 		goto bad;
-	dma_base = iommu->page_table_map_base + ((base - iommu->page_table) << IO_PAGE_SHIFT);
+
+	dma_base = iommu->page_table_map_base +
+		((base - iommu->page_table) << IO_PAGE_SHIFT);
 
 	/* Step 3: Normalize DMA addresses. */
 	used = nelems;
@@ -656,30 +610,28 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int
 	}
 	used = nelems - used;
 
-	/* Step 4: Choose a context if necessary. */
-	ctx = 0;
-	if (iommu->iommu_ctxflush)
-		ctx = iommu_alloc_ctx(iommu);
-
-	/* Step 5: Create the mappings. */
+	/* Step 4: Create the mappings. */
 	if (strbuf->strbuf_enabled)
 		iopte_protection = IOPTE_STREAMING(ctx);
 	else
 		iopte_protection = IOPTE_CONSISTENT(ctx);
 	if (direction != PCI_DMA_TODEVICE)
 		iopte_protection |= IOPTE_WRITE;
-	fill_sg (base, sglist, used, nelems, iopte_protection);
+
+	fill_sg(base, sglist, used, nelems, iopte_protection);
+
 #ifdef VERIFY_SG
 	verify_sglist(sglist, nelems, base, npages);
 #endif
 
-	spin_unlock_irqrestore(&iommu->lock, flags);
-
 	return used;
 
 bad:
-	spin_unlock_irqrestore(&iommu->lock, flags);
-	return PCI_DMA_ERROR_CODE;
+	iommu_free_ctx(iommu, ctx);
+bad_no_ctx:
+	if (printk_ratelimit())
+		WARN_ON(1);
+	return 0;
 }
 
 /* Unmap a set of streaming mode DMA translations. */
@@ -692,8 +644,10 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
 	unsigned long flags, ctx, i, npages;
 	u32 bus_addr;
 
-	if (direction == PCI_DMA_NONE)
-		BUG();
+	if (unlikely(direction == PCI_DMA_NONE)) {
+		if (printk_ratelimit())
+			WARN_ON(1);
+	}
 
 	pcp = pdev->sysdata;
 	iommu = pcp->pbm->iommu;
@@ -705,7 +659,8 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
 		if (sglist[i].dma_length == 0)
 			break;
 	i--;
-	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT;
+	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) -
+		  bus_addr) >> IO_PAGE_SHIFT;
 
 	base = iommu->page_table +
 		((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
@@ -726,11 +681,11 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
 	if (strbuf->strbuf_enabled)
 		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
 
-	/* Step 2: Clear out first TSB entry. */
-	iopte_make_dummy(iommu, base);
+	/* Step 2: Clear out the TSB entries. */
+	for (i = 0; i < npages; i++)
+		iopte_make_dummy(iommu, base + i);
 
-	free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
-			       npages, ctx);
+	free_npages(iommu, bus_addr - iommu->page_table_map_base, npages);
 
 	iommu_free_ctx(iommu, ctx);
 

+ 5 - 39
arch/sparc64/kernel/pci_psycho.c

@@ -1207,13 +1207,9 @@ static void psycho_scan_bus(struct pci_controller_info *p)
 static void psycho_iommu_init(struct pci_controller_info *p)
 {
 	struct pci_iommu *iommu = p->pbm_A.iommu;
-	unsigned long tsbbase, i;
+	unsigned long i;
 	u64 control;
 
-	/* Setup initial software IOMMU state. */
-	spin_lock_init(&iommu->lock);
-	iommu->ctx_lowest_free = 1;
-
 	/* Register addresses. */
 	iommu->iommu_control  = p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL;
 	iommu->iommu_tsbbase  = p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE;
@@ -1240,40 +1236,10 @@ static void psycho_iommu_init(struct pci_controller_info *p)
 	/* Leave diag mode enabled for full-flushing done
 	 * in pci_iommu.c
 	 */
+	pci_iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff);
 
-	iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0);
-	if (!iommu->dummy_page) {
-		prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n");
-		prom_halt();
-	}
-	memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
-	iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
-
-	/* Using assumed page size 8K with 128K entries we need 1MB iommu page
-	 * table (128K ioptes * 8 bytes per iopte).  This is
-	 * page order 7 on UltraSparc.
-	 */
-	tsbbase = __get_free_pages(GFP_KERNEL, get_order(IO_TSB_SIZE));
-	if (!tsbbase) {
-		prom_printf("PSYCHO_IOMMU: Error, gfp(tsb) failed.\n");
-		prom_halt();
-	}
-	iommu->page_table = (iopte_t *)tsbbase;
-	iommu->page_table_sz_bits = 17;
-	iommu->page_table_map_base = 0xc0000000;
-	iommu->dma_addr_mask = 0xffffffff;
-	pci_iommu_table_init(iommu, IO_TSB_SIZE);
-
-	/* We start with no consistent mappings. */
-	iommu->lowest_consistent_map =
-		1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS);
-
-	for (i = 0; i < PBM_NCLUSTERS; i++) {
-		iommu->alloc_info[i].flush = 0;
-		iommu->alloc_info[i].next = 0;
-	}
-
-	psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE, __pa(tsbbase));
+	psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE,
+		     __pa(iommu->page_table));
 
 	control = psycho_read(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL);
 	control &= ~(PSYCHO_IOMMU_CTRL_TSBSZ | PSYCHO_IOMMU_CTRL_TBWSZ);
@@ -1281,7 +1247,7 @@ static void psycho_iommu_init(struct pci_controller_info *p)
 	psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL, control);
 
 	/* If necessary, hook us up for starfire IRQ translations. */
-	if(this_is_starfire)
+	if (this_is_starfire)
 		p->starfire_cookie = starfire_hookup(p->pbm_A.portid);
 	else
 		p->starfire_cookie = NULL;

+ 4 - 35
arch/sparc64/kernel/pci_sabre.c

@@ -1267,13 +1267,9 @@ static void sabre_iommu_init(struct pci_controller_info *p,
 			     u32 dma_mask)
 {
 	struct pci_iommu *iommu = p->pbm_A.iommu;
-	unsigned long tsbbase, i, order;
+	unsigned long i;
 	u64 control;
 
-	/* Setup initial software IOMMU state. */
-	spin_lock_init(&iommu->lock);
-	iommu->ctx_lowest_free = 1;
-
 	/* Register addresses. */
 	iommu->iommu_control  = p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL;
 	iommu->iommu_tsbbase  = p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE;
@@ -1295,26 +1291,10 @@ static void sabre_iommu_init(struct pci_controller_info *p,
 	/* Leave diag mode enabled for full-flushing done
 	 * in pci_iommu.c
 	 */
+	pci_iommu_table_init(iommu, tsbsize * 1024 * 8, dvma_offset, dma_mask);
 
-	iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0);
-	if (!iommu->dummy_page) {
-		prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n");
-		prom_halt();
-	}
-	memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
-	iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
-
-	tsbbase = __get_free_pages(GFP_KERNEL, order = get_order(tsbsize * 1024 * 8));
-	if (!tsbbase) {
-		prom_printf("SABRE_IOMMU: Error, gfp(tsb) failed.\n");
-		prom_halt();
-	}
-	iommu->page_table = (iopte_t *)tsbbase;
-	iommu->page_table_map_base = dvma_offset;
-	iommu->dma_addr_mask = dma_mask;
-	pci_iommu_table_init(iommu, PAGE_SIZE << order);
-
-	sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE, __pa(tsbbase));
+	sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE,
+		    __pa(iommu->page_table));
 
 	control = sabre_read(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL);
 	control &= ~(SABRE_IOMMUCTRL_TSBSZ | SABRE_IOMMUCTRL_TBWSZ);
@@ -1322,11 +1302,9 @@ static void sabre_iommu_init(struct pci_controller_info *p,
 	switch(tsbsize) {
 	case 64:
 		control |= SABRE_IOMMU_TSBSZ_64K;
-		iommu->page_table_sz_bits = 16;
 		break;
 	case 128:
 		control |= SABRE_IOMMU_TSBSZ_128K;
-		iommu->page_table_sz_bits = 17;
 		break;
 	default:
 		prom_printf("iommu_init: Illegal TSB size %d\n", tsbsize);
@@ -1334,15 +1312,6 @@ static void sabre_iommu_init(struct pci_controller_info *p,
 		break;
 	}
 	sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL, control);
-
-	/* We start with no consistent mappings. */
-	iommu->lowest_consistent_map =
-		1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS);
-
-	for (i = 0; i < PBM_NCLUSTERS; i++) {
-		iommu->alloc_info[i].flush = 0;
-		iommu->alloc_info[i].next = 0;
-	}
 }
 
 static void pbm_register_toplevel_resources(struct pci_controller_info *p,

+ 3 - 54
arch/sparc64/kernel/pci_schizo.c

@@ -1765,7 +1765,7 @@ static void schizo_pbm_strbuf_init(struct pci_pbm_info *pbm)
 static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
 {
 	struct pci_iommu *iommu = pbm->iommu;
-	unsigned long tsbbase, i, tagbase, database, order;
+	unsigned long i, tagbase, database;
 	u32 vdma[2], dma_mask;
 	u64 control;
 	int err, tsbsize;
@@ -1800,10 +1800,6 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
 			prom_halt();
 	};
 
-	/* Setup initial software IOMMU state. */
-	spin_lock_init(&iommu->lock);
-	iommu->ctx_lowest_free = 1;
-
 	/* Register addresses, SCHIZO has iommu ctx flushing. */
 	iommu->iommu_control  = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL;
 	iommu->iommu_tsbbase  = pbm->pbm_regs + SCHIZO_IOMMU_TSBBASE;
@@ -1832,56 +1828,9 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
 	/* Leave diag mode enabled for full-flushing done
 	 * in pci_iommu.c
 	 */
+	pci_iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask);
 
-	iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0);
-	if (!iommu->dummy_page) {
-		prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n");
-		prom_halt();
-	}
-	memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
-	iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
-
-	/* Using assumed page size 8K with 128K entries we need 1MB iommu page
-	 * table (128K ioptes * 8 bytes per iopte).  This is
-	 * page order 7 on UltraSparc.
-	 */
-	order = get_order(tsbsize * 8 * 1024);
-	tsbbase = __get_free_pages(GFP_KERNEL, order);
-	if (!tsbbase) {
-		prom_printf("%s: Error, gfp(tsb) failed.\n", pbm->name);
-		prom_halt();
-	}
-
-	iommu->page_table = (iopte_t *)tsbbase;
-	iommu->page_table_map_base = vdma[0];
-	iommu->dma_addr_mask = dma_mask;
-	pci_iommu_table_init(iommu, PAGE_SIZE << order);
-
-	switch (tsbsize) {
-	case 64:
-		iommu->page_table_sz_bits = 16;
-		break;
-
-	case 128:
-		iommu->page_table_sz_bits = 17;
-		break;
-
-	default:
-		prom_printf("iommu_init: Illegal TSB size %d\n", tsbsize);
-		prom_halt();
-		break;
-	};
-
-	/* We start with no consistent mappings. */
-	iommu->lowest_consistent_map =
-		1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS);
-
-	for (i = 0; i < PBM_NCLUSTERS; i++) {
-		iommu->alloc_info[i].flush = 0;
-		iommu->alloc_info[i].next = 0;
-	}
-
-	schizo_write(iommu->iommu_tsbbase, __pa(tsbbase));
+	schizo_write(iommu->iommu_tsbbase, __pa(iommu->page_table));
 
 	control = schizo_read(iommu->iommu_control);
 	control &= ~(SCHIZO_IOMMU_CTRL_TSBSZ | SCHIZO_IOMMU_CTRL_TBWSZ);

+ 0 - 7
arch/sparc64/kernel/smp.c

@@ -1001,13 +1001,6 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
 	preempt_enable();
 }
 
-extern unsigned long xcall_promstop;
-
-void smp_promstop_others(void)
-{
-	smp_cross_call(&xcall_promstop, 0, 0, 0);
-}
-
 #define prof_multiplier(__cpu)		cpu_data(__cpu).multiplier
 #define prof_counter(__cpu)		cpu_data(__cpu).counter
 

+ 0 - 16
arch/sparc64/mm/ultra.S

@@ -453,22 +453,6 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address
 	nop
 	nop
 
-	.globl		xcall_promstop
-xcall_promstop:
-	rdpr		%pstate, %g2
-	wrpr		%g2, PSTATE_IG | PSTATE_AG, %pstate
-	rdpr		%pil, %g2
-	wrpr		%g0, 15, %pil
-	sethi		%hi(109f), %g7
-	b,pt		%xcc, etrap_irq
-109:	 or		%g7, %lo(109b), %g7
-	flushw
-	call		prom_stopself
-	 nop
-	/* We should not return, just spin if we do... */
-1:	b,a,pt		%xcc, 1b
-	nop
-
 	.data
 
 errata32_hwbug:

+ 0 - 12
arch/sparc64/prom/misc.c

@@ -68,19 +68,11 @@ void prom_cmdline(void)
 	local_irq_restore(flags);
 }
 
-#ifdef CONFIG_SMP
-extern void smp_promstop_others(void);
-#endif
-
 /* Drop into the prom, but completely terminate the program.
  * No chance of continuing.
  */
 void prom_halt(void)
 {
-#ifdef CONFIG_SMP
-	smp_promstop_others();
-	udelay(8000);
-#endif
 again:
 	p1275_cmd("exit", P1275_INOUT(0, 0));
 	goto again; /* PROM is out to get me -DaveM */
@@ -88,10 +80,6 @@ again:
 
 void prom_halt_power_off(void)
 {
-#ifdef CONFIG_SMP
-	smp_promstop_others();
-	udelay(8000);
-#endif
 	p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0));
 
 	/* if nothing else helps, we just halt */

+ 2 - 3
drivers/acpi/event.c

@@ -58,9 +58,8 @@ acpi_system_read_event(struct file *file, char __user * buffer, size_t count,
 			return_VALUE(-EAGAIN);
 
 		result = acpi_bus_receive_event(&event);
-		if (result) {
-			return_VALUE(-EIO);
-		}
+		if (result)
+			return_VALUE(result);
 
 		chars_remaining = sprintf(str, "%s %s %08x %08x\n",
 					  event.device_class ? event.

+ 3 - 0
drivers/char/mbcs.c

@@ -830,6 +830,9 @@ static int __init mbcs_init(void)
 {
 	int rv;
 
+	if (!ia64_platform_is("sn2"))
+		return -ENODEV;
+
 	// Put driver into chrdevs[].  Get major number.
 	rv = register_chrdev(mbcs_major, DEVICE_NAME, &mbcs_ops);
 	if (rv < 0) {

+ 4 - 4
drivers/char/n_r3964.c

@@ -695,7 +695,7 @@ static void receive_char(struct r3964_info *pInfo, const unsigned char c)
             {
                TRACE_PE("IDLE - got STX but no space in rx_queue!");
                pInfo->state=R3964_WAIT_FOR_RX_BUF;
-	       mod_timer(&pInfo->tmr, R3964_TO_NO_BUF);
+	       mod_timer(&pInfo->tmr, jiffies + R3964_TO_NO_BUF);
                break;
             }
 start_receiving:
@@ -705,7 +705,7 @@ start_receiving:
             pInfo->last_rx = 0;
             pInfo->flags &= ~R3964_ERROR;
             pInfo->state=R3964_RECEIVING;
-	    mod_timer(&pInfo->tmr, R3964_TO_ZVZ);
+	    mod_timer(&pInfo->tmr, jiffies + R3964_TO_ZVZ);
 	    pInfo->nRetry = 0;
             put_char(pInfo, DLE);
             flush(pInfo);
@@ -732,7 +732,7 @@ start_receiving:
                if(pInfo->flags & R3964_BCC)
                {
                   pInfo->state = R3964_WAIT_FOR_BCC;
-		  mod_timer(&pInfo->tmr, R3964_TO_ZVZ);
+		  mod_timer(&pInfo->tmr, jiffies + R3964_TO_ZVZ);
                }
                else 
                {
@@ -744,7 +744,7 @@ start_receiving:
                pInfo->last_rx = c;
 char_to_buf:
                pInfo->rx_buf[pInfo->rx_position++] = c;
-	       mod_timer(&pInfo->tmr, R3964_TO_ZVZ);
+	       mod_timer(&pInfo->tmr, jiffies + R3964_TO_ZVZ);
             }
          }
         /* else: overflow-msg? BUF_SIZE>MTU; should not happen? */ 

+ 1 - 1
drivers/input/keyboard/Kconfig

@@ -93,7 +93,7 @@ config KEYBOARD_LKKBD
 
 config KEYBOARD_LOCOMO
 	tristate "LoCoMo Keyboard Support"
-	depends on SHARP_LOCOMO
+	depends on SHARP_LOCOMO && INPUT_KEYBOARD
 	help
 	  Say Y here if you are running Linux on a Sharp Zaurus Collie or Poodle based PDA
 

+ 1 - 1
drivers/input/keyboard/spitzkbd.c

@@ -53,7 +53,7 @@ static unsigned char spitzkbd_keycode[NR_SCANCODES] = {
 	KEY_LEFTCTRL, KEY_1, KEY_3, KEY_5, KEY_6, KEY_7, KEY_9, KEY_0, KEY_BACKSPACE, SPITZ_KEY_EXOK, SPITZ_KEY_EXCANCEL, 0, 0, 0, 0, 0,  /* 1-16 */
 	0, KEY_2, KEY_4, KEY_R, KEY_Y, KEY_8, KEY_I, KEY_O, KEY_P, SPITZ_KEY_EXJOGDOWN, SPITZ_KEY_EXJOGUP, 0, 0, 0, 0, 0, /* 17-32 */
 	KEY_TAB, KEY_Q, KEY_E, KEY_T, KEY_G, KEY_U, KEY_J, KEY_K, 0, 0, 0, 0, 0, 0, 0, 0,                                 /* 33-48 */
-	SPITZ_KEY_CALENDER, KEY_W, KEY_S, KEY_F, KEY_V, KEY_H, KEY_M, KEY_L, 0, 0, KEY_RIGHTSHIFT, 0, 0, 0, 0, 0,         /* 49-64 */
+	SPITZ_KEY_CALENDER, KEY_W, KEY_S, KEY_F, KEY_V, KEY_H, KEY_M, KEY_L, 0, KEY_RIGHTSHIFT, 0, 0, 0, 0, 0, 0,         /* 49-64 */
 	SPITZ_KEY_ADDRESS, KEY_A, KEY_D, KEY_C, KEY_B, KEY_N, KEY_DOT, 0, KEY_ENTER, KEY_LEFTSHIFT, 0, 0, 0, 0, 0, 0,	  /* 65-80 */
 	SPITZ_KEY_MAIL, KEY_Z, KEY_X, KEY_MINUS, KEY_SPACE, KEY_COMMA, 0, KEY_UP, 0, 0, SPITZ_KEY_FN, 0, 0, 0, 0, 0,      /* 81-96 */
 	KEY_SYSRQ, SPITZ_KEY_JAP1, SPITZ_KEY_JAP2, SPITZ_KEY_CANCEL, SPITZ_KEY_OK, SPITZ_KEY_MENU, KEY_LEFT, KEY_DOWN, KEY_RIGHT, 0, 0, 0, 0, 0, 0, 0  /* 97-112 */

+ 2 - 2
drivers/input/misc/uinput.c

@@ -90,11 +90,11 @@ static inline int uinput_request_reserve_slot(struct uinput_device *udev, struct
 
 static void uinput_request_done(struct uinput_device *udev, struct uinput_request *request)
 {
-	complete(&request->done);
-
 	/* Mark slot as available */
 	udev->requests[request->id] = NULL;
 	wake_up_interruptible(&udev->requests_waitq);
+
+	complete(&request->done);
 }
 
 static int uinput_request_submit(struct input_dev *dev, struct uinput_request *request)

+ 1 - 1
drivers/media/radio/radio-cadet.c

@@ -543,7 +543,7 @@ static int cadet_probe(void)
 
 	for(i=0;i<8;i++) {
 	        io=iovals[i];
-	        if(request_region(io,2, "cadet-probe")>=0) {
+		if (request_region(io, 2, "cadet-probe")) {
 		        cadet_setfreq(1410);
 			if(cadet_getfreq()==1410) {
 				release_region(io, 2);

+ 20 - 12
drivers/media/video/vpx3220.c

@@ -203,7 +203,7 @@ static const unsigned short init_ntsc[] = {
 	0x8c, 640,		/* Horizontal length */
 	0x8d, 640,		/* Number of pixels */
 	0x8f, 0xc00,		/* Disable window 2 */
-	0xf0, 0x173,		/* 13.5 MHz transport, Forced
+	0xf0, 0x73,		/* 13.5 MHz transport, Forced
 				 * mode, latch windows */
 	0xf2, 0x13,		/* NTSC M, composite input */
 	0xe7, 0x1e1,		/* Enable vertical standard
@@ -212,38 +212,36 @@ static const unsigned short init_ntsc[] = {
 
 static const unsigned short init_pal[] = {
 	0x88, 23,		/* Window 1 vertical begin */
-	0x89, 288 + 16,		/* Vertical lines in (16 lines
+	0x89, 288,		/* Vertical lines in (16 lines
 				 * skipped by the VFE) */
-	0x8a, 288 + 16,		/* Vertical lines out (16 lines
+	0x8a, 288,		/* Vertical lines out (16 lines
 				 * skipped by the VFE) */
 	0x8b, 16,		/* Horizontal begin */
 	0x8c, 768,		/* Horizontal length */
 	0x8d, 784, 		/* Number of pixels
 				 * Must be >= Horizontal begin + Horizontal length */
 	0x8f, 0xc00,		/* Disable window 2 */
-	0xf0, 0x177,		/* 13.5 MHz transport, Forced
+	0xf0, 0x77,		/* 13.5 MHz transport, Forced
 				 * mode, latch windows */
 	0xf2, 0x3d1,		/* PAL B,G,H,I, composite input */
-	0xe7, 0x261,		/* PAL/SECAM set to 288 + 16 lines 
-				 * change to 0x241 for 288 lines */
+	0xe7, 0x241,		/* PAL/SECAM set to 288 lines */
 };
 
 static const unsigned short init_secam[] = {
-	0x88, 23  - 16,		/* Window 1 vertical begin */
-	0x89, 288 + 16,		/* Vertical lines in (16 lines
+	0x88, 23,		/* Window 1 vertical begin */
+	0x89, 288,		/* Vertical lines in (16 lines
 				 * skipped by the VFE) */
-	0x8a, 288 + 16,		/* Vertical lines out (16 lines
+	0x8a, 288,		/* Vertical lines out (16 lines
 				 * skipped by the VFE) */
 	0x8b, 16,		/* Horizontal begin */
 	0x8c, 768,		/* Horizontal length */
 	0x8d, 784,		/* Number of pixels
 				 * Must be >= Horizontal begin + Horizontal length */
 	0x8f, 0xc00,		/* Disable window 2 */
-	0xf0, 0x177,		/* 13.5 MHz transport, Forced
+	0xf0, 0x77,		/* 13.5 MHz transport, Forced
 				 * mode, latch windows */
 	0xf2, 0x3d5,		/* SECAM, composite input */
-	0xe7, 0x261,		/* PAL/SECAM set to 288 + 16 lines 
-				 * change to 0x241 for 288 lines */
+	0xe7, 0x241,		/* PAL/SECAM set to 288 lines */
 };
 
 static const unsigned char init_common[] = {
@@ -410,6 +408,12 @@ vpx3220_command (struct i2c_client *client,
 	case DECODER_SET_NORM:
 	{
 		int *iarg = arg, data;
+		int temp_input;
+
+		/* Here we back up the input selection because it gets
+		   overwritten when we fill the registers with the
+                   choosen video norm */
+		temp_input = vpx3220_fp_read(client, 0xf2);
 
 		dprintk(1, KERN_DEBUG "%s: DECODER_SET_NORM %d\n",
 			I2C_NAME(client), *iarg);
@@ -449,6 +453,10 @@ vpx3220_command (struct i2c_client *client,
 
 		}
 		decoder->norm = *iarg;
+
+		/* And here we set the backed up video input again */
+		vpx3220_fp_write(client, 0xf2, temp_input | 0x0010);
+		udelay(10);
 	}
 		break;
 

+ 1 - 1
drivers/net/wireless/Kconfig

@@ -243,7 +243,7 @@ config IPW_DEBUG
 
 config AIRO
 	tristate "Cisco/Aironet 34X/35X/4500/4800 ISA and PCI cards"
-	depends on NET_RADIO && ISA && (PCI || BROKEN)
+	depends on NET_RADIO && ISA_DMA_API && (PCI || BROKEN)
 	---help---
 	  This is the standard Linux driver to support Cisco/Aironet ISA and
 	  PCI 802.11 wireless cards.

+ 2 - 2
drivers/pci/quirks.c

@@ -1233,7 +1233,7 @@ static void __init quirk_alder_ioapic(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_EESSC,	quirk_alder_ioapic );
 #endif
 
-#ifdef CONFIG_SCSI_SATA
+#ifdef CONFIG_SCSI_SATA_INTEL_COMBINED
 static void __devinit quirk_intel_ide_combined(struct pci_dev *pdev)
 {
 	u8 prog, comb, tmp;
@@ -1310,7 +1310,7 @@ static void __devinit quirk_intel_ide_combined(struct pci_dev *pdev)
 		request_region(0x170, 8, "libata");	/* port 1 */
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,    PCI_ANY_ID,	  quirk_intel_ide_combined );
-#endif /* CONFIG_SCSI_SATA */
+#endif /* CONFIG_SCSI_SATA_INTEL_COMBINED */
 
 
 int pcie_mch_quirk;

+ 6 - 8
drivers/pcmcia/soc_common.c

@@ -66,7 +66,7 @@ void soc_pcmcia_debug(struct soc_pcmcia_socket *skt, const char *func,
 	if (pc_debug > lvl) {
 		printk(KERN_DEBUG "skt%u: %s: ", skt->nr, func);
 		va_start(args, fmt);
-		printk(fmt, args);
+		vprintk(fmt, args);
 		va_end(args);
 	}
 }
@@ -321,8 +321,6 @@ soc_common_pcmcia_get_socket(struct pcmcia_socket *sock, socket_state_t *state)
  * less punt all of this work and let the kernel handle the details
  * of power configuration, reset, &c. We also record the value of
  * `state' in order to regurgitate it to the PCMCIA core later.
- *
- * Returns: 0
  */
 static int
 soc_common_pcmcia_set_socket(struct pcmcia_socket *sock, socket_state_t *state)
@@ -407,7 +405,7 @@ soc_common_pcmcia_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *m
  * the map speed as requested, but override the address ranges
  * supplied by Card Services.
  *
- * Returns: 0 on success, -1 on error
+ * Returns: 0 on success, -ERRNO on error
  */
 static int
 soc_common_pcmcia_set_mem_map(struct pcmcia_socket *sock, struct pccard_mem_map *map)
@@ -655,8 +653,8 @@ static void soc_pcmcia_cpufreq_unregister(void)
 }
 
 #else
-#define soc_pcmcia_cpufreq_register()
-#define soc_pcmcia_cpufreq_unregister()
+static int soc_pcmcia_cpufreq_register(void) { return 0; }
+static void soc_pcmcia_cpufreq_unregister(void) {}
 #endif
 
 int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int first, int nr)
@@ -738,7 +736,7 @@ int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops
 			goto out_err_5;
 		}
 
-		if ( list_empty(&soc_pcmcia_sockets) )
+		if (list_empty(&soc_pcmcia_sockets))
 			soc_pcmcia_cpufreq_register();
 
 		list_add(&skt->node, &soc_pcmcia_sockets);
@@ -839,7 +837,7 @@ int soc_common_drv_pcmcia_remove(struct device *dev)
 		release_resource(&skt->res_io);
 		release_resource(&skt->res_skt);
 	}
-	if ( list_empty(&soc_pcmcia_sockets) )
+	if (list_empty(&soc_pcmcia_sockets))
 		soc_pcmcia_cpufreq_unregister();
 
 	up(&soc_pcmcia_sockets_lock);

+ 5 - 0
drivers/scsi/Kconfig

@@ -553,6 +553,11 @@ config SCSI_SATA_VITESSE
 
 	  If unsure, say N.
 
+config SCSI_SATA_INTEL_COMBINED
+	bool
+	depends on IDE=y && !BLK_DEV_IDE_SATA && (SCSI_SATA_AHCI || SCSI_ATA_PIIX)
+	default y
+
 config SCSI_BUSLOGIC
 	tristate "BusLogic SCSI support"
 	depends on (PCI || ISA || MCA) && SCSI && ISA_DMA_API

+ 1 - 1
drivers/scsi/aacraid/linit.c

@@ -453,9 +453,9 @@ static int aac_eh_reset(struct scsi_cmnd* cmd)
 		/*
 		 * We can exit If all the commands are complete
 		 */
+		spin_unlock_irq(host->host_lock);
 		if (active == 0)
 			return SUCCESS;
-		spin_unlock_irq(host->host_lock);
 		ssleep(1);
 		spin_lock_irq(host->host_lock);
 	}

+ 35 - 4
drivers/scsi/qlogicpti.c

@@ -1119,6 +1119,36 @@ static inline void update_can_queue(struct Scsi_Host *host, u_int in_ptr, u_int
 	host->sg_tablesize = QLOGICPTI_MAX_SG(num_free);
 }
 
+static unsigned int scsi_rbuf_get(struct scsi_cmnd *cmd, unsigned char **buf_out)
+{
+	unsigned char *buf;
+	unsigned int buflen;
+
+	if (cmd->use_sg) {
+		struct scatterlist *sg;
+
+		sg = (struct scatterlist *) cmd->request_buffer;
+		buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+		buflen = sg->length;
+	} else {
+		buf = cmd->request_buffer;
+		buflen = cmd->request_bufflen;
+	}
+
+	*buf_out = buf;
+	return buflen;
+}
+
+static void scsi_rbuf_put(struct scsi_cmnd *cmd, unsigned char *buf)
+{
+	if (cmd->use_sg) {
+		struct scatterlist *sg;
+
+		sg = (struct scatterlist *) cmd->request_buffer;
+		kunmap_atomic(buf - sg->offset, KM_IRQ0);
+	}
+}
+
 /*
  * Until we scan the entire bus with inquiries, go throught this fella...
  */
@@ -1145,11 +1175,9 @@ static void ourdone(struct scsi_cmnd *Cmnd)
 		int ok = host_byte(Cmnd->result) == DID_OK;
 		if (Cmnd->cmnd[0] == 0x12 && ok) {
 			unsigned char *iqd;
+			unsigned int iqd_len;
 
-			if (Cmnd->use_sg != 0)
-				BUG();
-
-			iqd = ((unsigned char *)Cmnd->buffer);
+			iqd_len = scsi_rbuf_get(Cmnd, &iqd);
 
 			/* tags handled in midlayer */
 			/* enable sync mode? */
@@ -1163,6 +1191,9 @@ static void ourdone(struct scsi_cmnd *Cmnd)
 			if (iqd[7] & 0x20) {
 				qpti->dev_param[tgt].device_flags |= 0x20;
 			}
+
+			scsi_rbuf_put(Cmnd, iqd);
+
 			qpti->sbits |= (1 << tgt);
 		} else if (!ok) {
 			qpti->sbits |= (1 << tgt);

+ 2 - 0
drivers/serial/8250_pnp.c

@@ -272,6 +272,8 @@ static const struct pnp_device_id pnp_dev_table[] = {
 	{	"SUP1421",		0	},
 	/* SupraExpress 33.6 Data/Fax PnP modem */
 	{	"SUP1590",		0	},
+	/* SupraExpress 336i Sp ASVD */
+	{	"SUP1620",		0	},
 	/* SupraExpress 33.6 Data/Fax PnP modem */
 	{	"SUP1760",		0	},
 	/* Phoebe Micro */

+ 1 - 1
drivers/serial/sh-sci.c

@@ -967,7 +967,7 @@ static int sci_startup(struct uart_port *port)
 #endif
 
 	sci_request_irq(s);
-	sci_start_tx(port, 1);
+	sci_start_tx(port);
 	sci_start_rx(port, 1);
 
 	return 0;

+ 2 - 1
drivers/usb/host/isp116x-hcd.c

@@ -326,7 +326,8 @@ static void postproc_atl_queue(struct isp116x *isp116x)
 					usb_settoggle(udev, ep->epnum,
 						      ep->nextpid ==
 						      USB_PID_OUT,
-						      PTD_GET_TOGGLE(ptd) ^ 1);
+						      PTD_GET_TOGGLE(ptd));
+				urb->actual_length += PTD_GET_COUNT(ptd);
 				urb->status = cc_to_error[TD_DATAUNDERRUN];
 				spin_unlock(&urb->lock);
 				continue;

+ 0 - 3
drivers/usb/input/hid-core.c

@@ -1702,10 +1702,7 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf)
 		if ((endpoint->bmAttributes & 3) != 3)		/* Not an interrupt endpoint */
 			continue;
 
-		/* handle potential highspeed HID correctly */
 		interval = endpoint->bInterval;
-		if (dev->speed == USB_SPEED_HIGH)
-			interval = 1 << (interval - 1);
 
 		/* Change the polling interval of mice. */
 		if (hid->collection->usage == HID_GD_MOUSE && hid_mousepoll_interval > 0)

+ 1 - 1
drivers/usb/serial/generic.c

@@ -223,7 +223,7 @@ int usb_serial_generic_write_room (struct usb_serial_port *port)
 	dbg("%s - port %d", __FUNCTION__, port->number);
 
 	if (serial->num_bulk_out) {
-		if (port->write_urb_busy)
+		if (!(port->write_urb_busy))
 			room = port->bulk_out_size;
 	}
 

+ 7 - 2
drivers/video/console/vgacon.c

@@ -565,7 +565,11 @@ static int vgacon_switch(struct vc_data *c)
 		scr_memcpyw((u16 *) c->vc_origin, (u16 *) c->vc_screenbuf,
 			    c->vc_screenbuf_size > vga_vram_size ?
 				vga_vram_size : c->vc_screenbuf_size);
-		vgacon_doresize(c, c->vc_cols, c->vc_rows);
+		if (!(vga_video_num_columns % 2) &&
+		    vga_video_num_columns <= ORIG_VIDEO_COLS &&
+		    vga_video_num_lines <= (ORIG_VIDEO_LINES *
+			vga_default_font_height) / c->vc_font.height)
+			vgacon_doresize(c, c->vc_cols, c->vc_rows);
 	}
 
 	return 0;		/* Redrawing not needed */
@@ -1023,7 +1027,8 @@ static int vgacon_resize(struct vc_data *c, unsigned int width,
 	if (width % 2 || width > ORIG_VIDEO_COLS ||
 	    height > (ORIG_VIDEO_LINES * vga_default_font_height)/
 	    c->vc_font.height)
-		return -EINVAL;
+		/* let svgatextmode tinker with video timings */
+		return 0;
 
 	if (CON_IS_VISIBLE(c) && !vga_is_gfx) /* who knows */
 		vgacon_doresize(c, width, height);

+ 2 - 0
drivers/video/sa1100fb.c

@@ -592,6 +592,7 @@ sa1100fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 	return ret;
 }
 
+#ifdef CONFIG_CPU_FREQ
 /*
  *  sa1100fb_display_dma_period()
  *    Calculate the minimum period (in picoseconds) between two DMA
@@ -606,6 +607,7 @@ static inline unsigned int sa1100fb_display_dma_period(struct fb_var_screeninfo
 	 */
 	return var->pixclock * 8 * 16 / var->bits_per_pixel;
 }
+#endif
 
 /*
  *  sa1100fb_check_var():

+ 1 - 2
drivers/w1/w1.c

@@ -77,8 +77,7 @@ static void w1_master_release(struct device *dev)
 
 	dev_dbg(dev, "%s: Releasing %s.\n", __func__, md->name);
 
-	if (md->nls && md->nls->sk_socket)
-		sock_release(md->nls->sk_socket);
+	dev_fini_netlink(md);
 	memset(md, 0, sizeof(struct w1_master) + sizeof(struct w1_bus_master));
 	kfree(md);
 }

+ 1 - 25
fs/aio.c

@@ -398,7 +398,7 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx)
 	if (unlikely(!req))
 		return NULL;
 
-	req->ki_flags = 1 << KIF_LOCKED;
+	req->ki_flags = 0;
 	req->ki_users = 2;
 	req->ki_key = 0;
 	req->ki_ctx = ctx;
@@ -547,25 +547,6 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id)
 	return ioctx;
 }
 
-static int lock_kiocb_action(void *param)
-{
-	schedule();
-	return 0;
-}
-
-static inline void lock_kiocb(struct kiocb *iocb)
-{
-	wait_on_bit_lock(&iocb->ki_flags, KIF_LOCKED, lock_kiocb_action,
-			 TASK_UNINTERRUPTIBLE);
-}
-
-static inline void unlock_kiocb(struct kiocb *iocb)
-{
-	kiocbClearLocked(iocb);
-	smp_mb__after_clear_bit();
-	wake_up_bit(&iocb->ki_flags, KIF_LOCKED);
-}
-
 /*
  * use_mm
  *	Makes the calling kernel thread take on the specified
@@ -796,9 +777,7 @@ static int __aio_run_iocbs(struct kioctx *ctx)
 		 * Hold an extra reference while retrying i/o.
 		 */
 		iocb->ki_users++;       /* grab extra reference */
-		lock_kiocb(iocb);
 		aio_run_iocb(iocb);
-		unlock_kiocb(iocb);
 		if (__aio_put_req(ctx, iocb))  /* drop extra ref */
 			put_ioctx(ctx);
  	}
@@ -1542,7 +1521,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 
 	spin_lock_irq(&ctx->ctx_lock);
 	aio_run_iocb(req);
-	unlock_kiocb(req);
 	if (!list_empty(&ctx->run_list)) {
 		/* drain the run list */
 		while (__aio_run_iocbs(ctx))
@@ -1674,7 +1652,6 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
 	if (NULL != cancel) {
 		struct io_event tmp;
 		pr_debug("calling cancel\n");
-		lock_kiocb(kiocb);
 		memset(&tmp, 0, sizeof(tmp));
 		tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user;
 		tmp.data = kiocb->ki_user_data;
@@ -1686,7 +1663,6 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
 			if (copy_to_user(result, &tmp, sizeof(tmp)))
 				ret = -EFAULT;
 		}
-		unlock_kiocb(kiocb);
 	} else
 		ret = -EINVAL;
 

+ 6 - 6
fs/exec.c

@@ -126,8 +126,7 @@ asmlinkage long sys_uselib(const char __user * library)
 	struct nameidata nd;
 	int error;
 
-	nd.intent.open.flags = FMODE_READ;
-	error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
+	error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ);
 	if (error)
 		goto out;
 
@@ -139,7 +138,7 @@ asmlinkage long sys_uselib(const char __user * library)
 	if (error)
 		goto exit;
 
-	file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
+	file = nameidata_to_filp(&nd, O_RDONLY);
 	error = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out;
@@ -167,6 +166,7 @@ asmlinkage long sys_uselib(const char __user * library)
 out:
   	return error;
 exit:
+	release_open_intent(&nd);
 	path_release(&nd);
 	goto out;
 }
@@ -490,8 +490,7 @@ struct file *open_exec(const char *name)
 	int err;
 	struct file *file;
 
-	nd.intent.open.flags = FMODE_READ;
-	err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
+	err = path_lookup_open(name, LOOKUP_FOLLOW, &nd, FMODE_READ);
 	file = ERR_PTR(err);
 
 	if (!err) {
@@ -504,7 +503,7 @@ struct file *open_exec(const char *name)
 				err = -EACCES;
 			file = ERR_PTR(err);
 			if (!err) {
-				file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
+				file = nameidata_to_filp(&nd, O_RDONLY);
 				if (!IS_ERR(file)) {
 					err = deny_write_access(file);
 					if (err) {
@@ -516,6 +515,7 @@ out:
 				return file;
 			}
 		}
+		release_open_intent(&nd);
 		path_release(&nd);
 	}
 	goto out;

+ 25 - 17
fs/locks.c

@@ -316,21 +316,22 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
 	/* POSIX-1996 leaves the case l->l_len < 0 undefined;
 	   POSIX-2001 defines it. */
 	start += l->l_start;
-	end = start + l->l_len - 1;
-	if (l->l_len < 0) {
+	if (start < 0)
+		return -EINVAL;
+	fl->fl_end = OFFSET_MAX;
+	if (l->l_len > 0) {
+		end = start + l->l_len - 1;
+		fl->fl_end = end;
+	} else if (l->l_len < 0) {
 		end = start - 1;
+		fl->fl_end = end;
 		start += l->l_len;
+		if (start < 0)
+			return -EINVAL;
 	}
-
-	if (start < 0)
-		return -EINVAL;
-	if (l->l_len > 0 && end < 0)
-		return -EOVERFLOW;
-
 	fl->fl_start = start;	/* we record the absolute position */
-	fl->fl_end = end;
-	if (l->l_len == 0)
-		fl->fl_end = OFFSET_MAX;
+	if (fl->fl_end < fl->fl_start)
+		return -EOVERFLOW;
 	
 	fl->fl_owner = current->files;
 	fl->fl_pid = current->tgid;
@@ -362,14 +363,21 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
 		return -EINVAL;
 	}
 
-	if (((start += l->l_start) < 0) || (l->l_len < 0))
+	start += l->l_start;
+	if (start < 0)
 		return -EINVAL;
-	fl->fl_end = start + l->l_len - 1;
-	if (l->l_len > 0 && fl->fl_end < 0)
-		return -EOVERFLOW;
+	fl->fl_end = OFFSET_MAX;
+	if (l->l_len > 0) {
+		fl->fl_end = start + l->l_len - 1;
+	} else if (l->l_len < 0) {
+		fl->fl_end = start - 1;
+		start += l->l_len;
+		if (start < 0)
+			return -EINVAL;
+	}
 	fl->fl_start = start;	/* we record the absolute position */
-	if (l->l_len == 0)
-		fl->fl_end = OFFSET_MAX;
+	if (fl->fl_end < fl->fl_start)
+		return -EOVERFLOW;
 	
 	fl->fl_owner = current->files;
 	fl->fl_pid = current->tgid;

+ 87 - 8
fs/namei.c

@@ -28,6 +28,7 @@
 #include <linux/syscalls.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
+#include <linux/file.h>
 #include <asm/namei.h>
 #include <asm/uaccess.h>
 
@@ -317,6 +318,18 @@ void path_release_on_umount(struct nameidata *nd)
 	mntput_no_expire(nd->mnt);
 }
 
+/**
+ * release_open_intent - free up open intent resources
+ * @nd: pointer to nameidata
+ */
+void release_open_intent(struct nameidata *nd)
+{
+	if (nd->intent.open.file->f_dentry == NULL)
+		put_filp(nd->intent.open.file);
+	else
+		fput(nd->intent.open.file);
+}
+
 /*
  * Internal lookup() using the new generic dcache.
  * SMP-safe
@@ -750,6 +763,7 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
 		struct qstr this;
 		unsigned int c;
 
+		nd->flags |= LOOKUP_CONTINUE;
 		err = exec_permission_lite(inode, nd);
 		if (err == -EAGAIN) { 
 			err = permission(inode, MAY_EXEC, nd);
@@ -802,7 +816,6 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
 			if (err < 0)
 				break;
 		}
-		nd->flags |= LOOKUP_CONTINUE;
 		/* This does the actual lookups.. */
 		err = do_lookup(nd, &this, &next);
 		if (err)
@@ -1052,6 +1065,70 @@ out:
 	return retval;
 }
 
+static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags,
+		struct nameidata *nd, int open_flags, int create_mode)
+{
+	struct file *filp = get_empty_filp();
+	int err;
+
+	if (filp == NULL)
+		return -ENFILE;
+	nd->intent.open.file = filp;
+	nd->intent.open.flags = open_flags;
+	nd->intent.open.create_mode = create_mode;
+	err = path_lookup(name, lookup_flags|LOOKUP_OPEN, nd);
+	if (IS_ERR(nd->intent.open.file)) {
+		if (err == 0) {
+			err = PTR_ERR(nd->intent.open.file);
+			path_release(nd);
+		}
+	} else if (err != 0)
+		release_open_intent(nd);
+	return err;
+}
+
+/**
+ * path_lookup_open - lookup a file path with open intent
+ * @name: pointer to file name
+ * @lookup_flags: lookup intent flags
+ * @nd: pointer to nameidata
+ * @open_flags: open intent flags
+ */
+int path_lookup_open(const char *name, unsigned int lookup_flags,
+		struct nameidata *nd, int open_flags)
+{
+	return __path_lookup_intent_open(name, lookup_flags, nd,
+			open_flags, 0);
+}
+
+/**
+ * path_lookup_create - lookup a file path with open + create intent
+ * @name: pointer to file name
+ * @lookup_flags: lookup intent flags
+ * @nd: pointer to nameidata
+ * @open_flags: open intent flags
+ * @create_mode: create intent flags
+ */
+int path_lookup_create(const char *name, unsigned int lookup_flags,
+		struct nameidata *nd, int open_flags, int create_mode)
+{
+	return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd,
+			open_flags, create_mode);
+}
+
+int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
+		struct nameidata *nd, int open_flags)
+{
+	char *tmp = getname(name);
+	int err = PTR_ERR(tmp);
+
+	if (!IS_ERR(tmp)) {
+		err = __path_lookup_intent_open(tmp, lookup_flags, nd, open_flags, 0);
+		putname(tmp);
+	}
+	return err;
+}
+
 /*
  * Restricted form of lookup. Doesn't follow links, single-component only,
  * needs parent already locked. Doesn't follow mounts.
@@ -1416,27 +1493,27 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
  */
 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
 {
-	int acc_mode, error = 0;
+	int acc_mode, error;
 	struct path path;
 	struct dentry *dir;
 	int count = 0;
 
 	acc_mode = ACC_MODE(flag);
 
+	/* O_TRUNC implies we need access checks for write permissions */
+	if (flag & O_TRUNC)
+		acc_mode |= MAY_WRITE;
+
 	/* Allow the LSM permission hook to distinguish append 
 	   access from general write access. */
 	if (flag & O_APPEND)
 		acc_mode |= MAY_APPEND;
 
-	/* Fill in the open() intent data */
-	nd->intent.open.flags = flag;
-	nd->intent.open.create_mode = mode;
-
 	/*
 	 * The simplest case - just a plain lookup.
 	 */
 	if (!(flag & O_CREAT)) {
-		error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd);
+		error = path_lookup_open(pathname, lookup_flags(flag), nd, flag);
 		if (error)
 			return error;
 		goto ok;
@@ -1445,7 +1522,7 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
 	/*
 	 * Create - we need to know the parent.
 	 */
-	error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
+	error = path_lookup_create(pathname, LOOKUP_PARENT, nd, flag, mode);
 	if (error)
 		return error;
 
@@ -1520,6 +1597,8 @@ ok:
 exit_dput:
 	dput_path(&path, nd);
 exit:
+	if (!IS_ERR(nd->intent.open.file))
+		release_open_intent(nd);
 	path_release(nd);
 	return error;
 

+ 5 - 1
fs/nfs/delegation.c

@@ -85,6 +85,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	struct nfs_delegation *delegation;
 	int status = 0;
 
+	/* Ensure we first revalidate the attributes and page cache! */
+	if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)))
+		__nfs_revalidate_inode(NFS_SERVER(inode), inode);
+
 	delegation = nfs_alloc_delegation();
 	if (delegation == NULL)
 		return -ENOMEM;
@@ -138,7 +142,7 @@ static void nfs_msync_inode(struct inode *inode)
 /*
  * Basic procedure for returning a delegation to the server
  */
-int nfs_inode_return_delegation(struct inode *inode)
+int __nfs_inode_return_delegation(struct inode *inode)
 {
 	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
 	struct nfs_inode *nfsi = NFS_I(inode);

+ 15 - 1
fs/nfs/delegation.h

@@ -25,7 +25,7 @@ struct nfs_delegation {
 
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
-int nfs_inode_return_delegation(struct inode *inode);
+int __nfs_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 
 struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
@@ -47,11 +47,25 @@ static inline int nfs_have_delegation(struct inode *inode, int flags)
 		return 1;
 	return 0;
 }
+
+static inline int nfs_inode_return_delegation(struct inode *inode)
+{
+	int err = 0;
+
+	if (NFS_I(inode)->delegation != NULL)
+		err = __nfs_inode_return_delegation(inode);
+	return err;
+}
 #else
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
 	return 0;
 }
+
+static inline int nfs_inode_return_delegation(struct inode *inode)
+{
+	return 0;
+}
 #endif
 
 #endif

+ 24 - 20
fs/nfs/dir.c

@@ -801,6 +801,7 @@ static int nfs_dentry_delete(struct dentry *dentry)
  */
 static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
 {
+	nfs_inode_return_delegation(inode);
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
 		lock_kernel();
 		inode->i_nlink--;
@@ -914,7 +915,6 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd)
 static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct dentry *res = NULL;
-	struct inode *inode = NULL;
 	int error;
 
 	/* Check that we are indeed trying to open this file */
@@ -928,8 +928,10 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
 
 	/* Let vfs_create() deal with O_EXCL */
-	if (nd->intent.open.flags & O_EXCL)
-		goto no_entry;
+	if (nd->intent.open.flags & O_EXCL) {
+		d_add(dentry, NULL);
+		goto out;
+	}
 
 	/* Open the file on the server */
 	lock_kernel();
@@ -943,32 +945,30 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 
 	if (nd->intent.open.flags & O_CREAT) {
 		nfs_begin_data_update(dir);
-		inode = nfs4_atomic_open(dir, dentry, nd);
+		res = nfs4_atomic_open(dir, dentry, nd);
 		nfs_end_data_update(dir);
 	} else
-		inode = nfs4_atomic_open(dir, dentry, nd);
+		res = nfs4_atomic_open(dir, dentry, nd);
 	unlock_kernel();
-	if (IS_ERR(inode)) {
-		error = PTR_ERR(inode);
+	if (IS_ERR(res)) {
+		error = PTR_ERR(res);
 		switch (error) {
 			/* Make a negative dentry */
 			case -ENOENT:
-				inode = NULL;
-				break;
+				res = NULL;
+				goto out;
 			/* This turned out not to be a regular file */
+			case -EISDIR:
+			case -ENOTDIR:
+				goto no_open;
 			case -ELOOP:
 				if (!(nd->intent.open.flags & O_NOFOLLOW))
 					goto no_open;
-			/* case -EISDIR: */
 			/* case -EINVAL: */
 			default:
-				res = ERR_PTR(error);
 				goto out;
 		}
-	}
-no_entry:
-	res = d_add_unique(dentry, inode);
-	if (res != NULL)
+	} else if (res != NULL)
 		dentry = res;
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -1012,7 +1012,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
 	 */
 	lock_kernel();
 	verifier = nfs_save_change_attribute(dir);
-	ret = nfs4_open_revalidate(dir, dentry, openflags);
+	ret = nfs4_open_revalidate(dir, dentry, openflags, nd);
 	if (!ret)
 		nfs_set_verifier(dentry, verifier);
 	unlock_kernel();
@@ -1135,7 +1135,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 
 	lock_kernel();
 	nfs_begin_data_update(dir);
-	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
+	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
 	nfs_end_data_update(dir);
 	if (error != 0)
 		goto out_err;
@@ -1330,6 +1330,7 @@ static int nfs_safe_remove(struct dentry *dentry)
 
 	nfs_begin_data_update(dir);
 	if (inode != NULL) {
+		nfs_inode_return_delegation(inode);
 		nfs_begin_data_update(inode);
 		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
 		/* The VFS may want to delete this inode */
@@ -1510,9 +1511,11 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 */
 	if (!new_inode)
 		goto go_ahead;
-	if (S_ISDIR(new_inode->i_mode))
-		goto out;
-	else if (atomic_read(&new_dentry->d_count) > 2) {
+	if (S_ISDIR(new_inode->i_mode)) {
+		error = -EISDIR;
+		if (!S_ISDIR(old_inode->i_mode))
+			goto out;
+	} else if (atomic_read(&new_dentry->d_count) > 2) {
 		int err;
 		/* copy the target dentry's name */
 		dentry = d_alloc(new_dentry->d_parent,
@@ -1548,6 +1551,7 @@ go_ahead:
 		nfs_wb_all(old_inode);
 		shrink_dcache_parent(old_dentry);
 	}
+	nfs_inode_return_delegation(old_inode);
 
 	if (new_inode)
 		d_delete(new_dentry);

+ 20 - 10
fs/nfs/file.c

@@ -137,7 +137,8 @@ static int nfs_revalidate_file(struct inode *inode, struct file *filp)
 	struct nfs_inode *nfsi = NFS_I(inode);
 	int retval = 0;
 
-	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode))
+	if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR))
+			|| nfs_attribute_timeout(inode))
 		retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	nfs_revalidate_mapping(inode, filp->f_mapping);
 	return 0;
@@ -375,22 +376,31 @@ out_swapfile:
 
 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 {
+	struct file_lock *cfl;
 	struct inode *inode = filp->f_mapping->host;
 	int status = 0;
 
 	lock_kernel();
-	/* Use local locking if mounted with "-onolock" */
-	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
-		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
-	else {
-		struct file_lock *cfl = posix_test_lock(filp, fl);
-
-		fl->fl_type = F_UNLCK;
-		if (cfl != NULL)
-			memcpy(fl, cfl, sizeof(*fl));
+	/* Try local locking first */
+	cfl = posix_test_lock(filp, fl);
+	if (cfl != NULL) {
+		locks_copy_lock(fl, cfl);
+		goto out;
 	}
+
+	if (nfs_have_delegation(inode, FMODE_READ))
+		goto out_noconflict;
+
+	if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
+		goto out_noconflict;
+
+	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+out:
 	unlock_kernel();
 	return status;
+out_noconflict:
+	fl->fl_type = F_UNLCK;
+	goto out;
 }
 
 static int do_vfs_lock(struct file *file, struct file_lock *fl)

+ 8 - 9
fs/nfs/inode.c

@@ -853,6 +853,11 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 			filemap_fdatawait(inode->i_mapping);
 		nfs_wb_all(inode);
 	}
+	/*
+	 * Return any delegations if we're going to change ACLs
+	 */
+	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
+		nfs_inode_return_delegation(inode);
 	error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
 	if (error == 0)
 		nfs_refresh_inode(inode, &fattr);
@@ -909,12 +914,10 @@ static int nfs_wait_on_inode(struct inode *inode)
 	sigset_t oldmask;
 	int error;
 
-	atomic_inc(&inode->i_count);
 	rpc_clnt_sigmask(clnt, &oldmask);
 	error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING,
 					nfs_wait_schedule, TASK_INTERRUPTIBLE);
 	rpc_clnt_sigunmask(clnt, &oldmask);
-	iput(inode);
 
 	return error;
 }
@@ -1258,10 +1261,6 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 	loff_t cur_size, new_isize;
 	int data_unstable;
 
-	/* Do we hold a delegation? */
-	if (nfs_have_delegation(inode, FMODE_READ))
-		return 0;
-
 	spin_lock(&inode->i_lock);
 
 	/* Are we in the process of updating data on the server? */
@@ -1382,7 +1381,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 	nfsi->read_cache_jiffies = fattr->timestamp;
 
 	/* Are we racing with known updates of the metadata on the server? */
-	data_unstable = ! nfs_verify_change_attribute(inode, verifier);
+	data_unstable = ! (nfs_verify_change_attribute(inode, verifier) ||
+		(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE));
 
 	/* Check if our cached file size is stale */
  	new_isize = nfs_size_to_loff_t(fattr->size);
@@ -1676,8 +1676,7 @@ static void nfs4_clear_inode(struct inode *inode)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	/* If we are holding a delegation, return it! */
-	if (nfsi->delegation != NULL)
-		nfs_inode_return_delegation(inode);
+	nfs_inode_return_delegation(inode);
 	/* First call standard NFS clear_inode() code */
 	nfs_clear_inode(inode);
 	/* Now clear out any remaining state */

+ 1 - 1
fs/nfs/nfs3proc.c

@@ -299,7 +299,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata)
  */
 static int
 nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-		 int flags)
+		 int flags, struct nameidata *nd)
 {
 	struct nfs_fh		fhandle;
 	struct nfs_fattr	fattr;

+ 41 - 12
fs/nfs/nfs4_fs.h

@@ -92,26 +92,51 @@ struct nfs4_client {
 	unsigned char		cl_id_uniquifier;
 };
 
+/*
+ * struct rpc_sequence ensures that RPC calls are sent in the exact
+ * order that they appear on the list.
+ */
+struct rpc_sequence {
+	struct rpc_wait_queue	wait;	/* RPC call delay queue */
+	spinlock_t lock;		/* Protects the list */
+	struct list_head list;		/* Defines sequence of RPC calls */
+};
+
+#define NFS_SEQID_CONFIRMED 1
+struct nfs_seqid_counter {
+	struct rpc_sequence *sequence;
+	int flags;
+	u32 counter;
+};
+
+struct nfs_seqid {
+	struct list_head list;
+	struct nfs_seqid_counter *sequence;
+	struct rpc_task *task;
+};
+
+static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status)
+{
+	if (seqid_mutating_err(-status))
+		seqid->flags |= NFS_SEQID_CONFIRMED;
+}
+
 /*
  * NFS4 state_owners and lock_owners are simply labels for ordered
  * sequences of RPC calls. Their sole purpose is to provide once-only
  * semantics by allowing the server to identify replayed requests.
- *
- * The ->so_sema is held during all state_owner seqid-mutating operations:
- * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize
- * so_seqid.
  */
 struct nfs4_state_owner {
 	struct list_head     so_list;	 /* per-clientid list of state_owners */
 	struct nfs4_client   *so_client;
 	u32                  so_id;      /* 32-bit identifier, unique */
-	struct semaphore     so_sema;
-	u32                  so_seqid;   /* protected by so_sema */
 	atomic_t	     so_count;
 
 	struct rpc_cred	     *so_cred;	 /* Associated cred */
 	struct list_head     so_states;
 	struct list_head     so_delegations;
+	struct nfs_seqid_counter so_seqid;
+	struct rpc_sequence  so_sequence;
 };
 
 /*
@@ -132,7 +157,7 @@ struct nfs4_lock_state {
 	fl_owner_t		ls_owner;	/* POSIX lock owner */
 #define NFS_LOCK_INITIALIZED 1
 	int			ls_flags;
-	u32			ls_seqid;
+	struct nfs_seqid_counter	ls_seqid;
 	u32			ls_id;
 	nfs4_stateid		ls_stateid;
 	atomic_t		ls_count;
@@ -153,7 +178,6 @@ struct nfs4_state {
 	struct inode *inode;		/* Pointer to the inode */
 
 	unsigned long flags;		/* Do we hold any locks? */
-	struct semaphore lock_sema;	/* Serializes file locking operations */
 	spinlock_t state_lock;		/* Protects the lock_states list */
 
 	nfs4_stateid stateid;
@@ -191,8 +215,8 @@ extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
 extern int nfs4_proc_async_renew(struct nfs4_client *);
 extern int nfs4_proc_renew(struct nfs4_client *);
 extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
-extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
-extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
+extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
 
 extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
 extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;
@@ -224,12 +248,17 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, mode_t);
 extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
-extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
 extern void nfs4_schedule_state_recovery(struct nfs4_client *);
+extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
-extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
 
+extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter);
+extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
+extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
+extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
+extern void nfs_free_seqid(struct nfs_seqid *seqid);
+
 extern const nfs4_stateid zero_stateid;
 
 /* nfs4xdr.c */

+ 351 - 226
fs/nfs/nfs4proc.c

@@ -47,6 +47,7 @@
 #include <linux/nfs_page.h>
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
+#include <linux/mount.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -56,10 +57,11 @@
 #define NFS4_POLL_RETRY_MIN	(1*HZ)
 #define NFS4_POLL_RETRY_MAX	(15*HZ)
 
+static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
-static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *);
+static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
-static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
+static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 
@@ -189,6 +191,21 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf
 		nfsi->change_attr = cinfo->after;
 }
 
+/* Helper for asynchronous RPC calls */
+static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin,
+		rpc_action tk_exit, void *calldata)
+{
+	struct rpc_task *task;
+
+	if (!(task = rpc_new_task(clnt, tk_exit, RPC_TASK_ASYNC)))
+		return -ENOMEM;
+
+	task->tk_calldata = calldata;
+	task->tk_action = tk_begin;
+	rpc_execute(task);
+	return 0;
+}
+
 static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
 {
 	struct inode *inode = state->inode;
@@ -209,7 +226,6 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid,
 /*
  * OPEN_RECLAIM:
  * 	reclaim state on the server after a reboot.
- * 	Assumes caller is holding the sp->so_sem
  */
 static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
 {
@@ -218,7 +234,6 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st
 	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
 	struct nfs_openargs o_arg = {
 		.fh = NFS_FH(inode),
-		.seqid = sp->so_seqid,
 		.id = sp->so_id,
 		.open_flags = state->state,
 		.clientid = server->nfs4_state->cl_clientid,
@@ -245,8 +260,13 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st
 		}
 		o_arg.u.delegation_type = delegation->type;
 	}
+	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+	if (o_arg.seqid == NULL)
+		return -ENOMEM;
 	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
+	/* Confirm the sequence as being established */
+	nfs_confirm_seqid(&sp->so_seqid, status);
+	nfs_increment_open_seqid(status, o_arg.seqid);
 	if (status == 0) {
 		memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
 		if (o_res.delegation_type != 0) {
@@ -256,6 +276,7 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st
 				nfs_async_inode_return_delegation(inode, &o_res.stateid);
 		}
 	}
+	nfs_free_seqid(o_arg.seqid);
 	clear_bit(NFS_DELEGATED_STATE, &state->flags);
 	/* Ensure we update the inode attributes */
 	NFS_CACHEINV(inode);
@@ -302,23 +323,35 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state
 	};
 	int status = 0;
 
-	down(&sp->so_sema);
 	if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
 		goto out;
 	if (state->state == 0)
 		goto out;
-	arg.seqid = sp->so_seqid;
+	arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+	status = -ENOMEM;
+	if (arg.seqid == NULL)
+		goto out;
 	arg.open_flags = state->state;
 	memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data));
 	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
+	nfs_increment_open_seqid(status, arg.seqid);
+	if (status != 0)
+		goto out_free;
+	if(res.rflags & NFS4_OPEN_RESULT_CONFIRM) {
+		status = _nfs4_proc_open_confirm(server->client, NFS_FH(inode),
+				sp, &res.stateid, arg.seqid);
+		if (status != 0)
+			goto out_free;
+	}
+	nfs_confirm_seqid(&sp->so_seqid, 0);
 	if (status >= 0) {
 		memcpy(state->stateid.data, res.stateid.data,
 				sizeof(state->stateid.data));
 		clear_bit(NFS_DELEGATED_STATE, &state->flags);
 	}
+out_free:
+	nfs_free_seqid(arg.seqid);
 out:
-	up(&sp->so_sema);
 	dput(parent);
 	return status;
 }
@@ -345,11 +378,11 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
 	return err;
 }
 
-static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid)
+static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid)
 {
 	struct nfs_open_confirmargs arg = {
 		.fh             = fh,
-		.seqid          = sp->so_seqid,
+		.seqid          = seqid,
 		.stateid	= *stateid,
 	};
 	struct nfs_open_confirmres res;
@@ -362,7 +395,9 @@ static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nf
 	int status;
 
 	status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
+	/* Confirm the sequence as being established */
+	nfs_confirm_seqid(&sp->so_seqid, status);
+	nfs_increment_open_seqid(status, seqid);
 	if (status >= 0)
 		memcpy(stateid, &res.stateid, sizeof(*stateid));
 	return status;
@@ -380,21 +415,37 @@ static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner  *sp, stru
 	int status;
 
 	/* Update sequence id. The caller must serialize! */
-	o_arg->seqid = sp->so_seqid;
 	o_arg->id = sp->so_id;
 	o_arg->clientid = sp->so_client->cl_clientid;
 
 	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
+	if (status == 0) {
+		/* OPEN on anything except a regular file is disallowed in NFSv4 */
+		switch (o_res->f_attr->mode & S_IFMT) {
+			case S_IFREG:
+				break;
+			case S_IFLNK:
+				status = -ELOOP;
+				break;
+			case S_IFDIR:
+				status = -EISDIR;
+				break;
+			default:
+				status = -ENOTDIR;
+		}
+	}
+
+	nfs_increment_open_seqid(status, o_arg->seqid);
 	if (status != 0)
 		goto out;
 	update_changeattr(dir, &o_res->cinfo);
 	if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
 		status = _nfs4_proc_open_confirm(server->client, &o_res->fh,
-				sp, &o_res->stateid);
+				sp, &o_res->stateid, o_arg->seqid);
 		if (status != 0)
 			goto out;
 	}
+	nfs_confirm_seqid(&sp->so_seqid, 0);
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
 		status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
 out:
@@ -465,6 +516,10 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 		set_bit(NFS_DELEGATED_STATE, &state->flags);
 		goto out;
 	}
+	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+	status = -ENOMEM;
+	if (o_arg.seqid == NULL)
+		goto out;
 	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
 	if (status != 0)
 		goto out_nodeleg;
@@ -490,6 +545,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 			nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res);
 	}
 out_nodeleg:
+	nfs_free_seqid(o_arg.seqid);
 	clear_bit(NFS_DELEGATED_STATE, &state->flags);
 out:
 	dput(parent);
@@ -564,7 +620,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 		dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
 		goto out_err;
 	}
-	down(&sp->so_sema);
 	state = nfs4_get_open_state(inode, sp);
 	if (state == NULL)
 		goto out_err;
@@ -589,7 +644,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 	set_bit(NFS_DELEGATED_STATE, &state->flags);
 	update_open_stateid(state, &delegation->stateid, open_flags);
 out_ok:
-	up(&sp->so_sema);
 	nfs4_put_state_owner(sp);
 	up_read(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
@@ -600,11 +654,12 @@ out_err:
 	if (sp != NULL) {
 		if (state != NULL)
 			nfs4_put_open_state(state);
-		up(&sp->so_sema);
 		nfs4_put_state_owner(sp);
 	}
 	up_read(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
+	if (err != -EACCES)
+		nfs_inode_return_delegation(inode);
 	return err;
 }
 
@@ -665,8 +720,10 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	} else
 		o_arg.u.attrs = sattr;
 	/* Serialization for the sequence id */
-	down(&sp->so_sema);
 
+	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+	if (o_arg.seqid == NULL)
+		return -ENOMEM;
 	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
 	if (status != 0)
 		goto out_err;
@@ -681,7 +738,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	update_open_stateid(state, &o_res.stateid, flags);
 	if (o_res.delegation_type != 0)
 		nfs_inode_set_delegation(inode, cred, &o_res);
-	up(&sp->so_sema);
+	nfs_free_seqid(o_arg.seqid);
 	nfs4_put_state_owner(sp);
 	up_read(&clp->cl_sem);
 	*res = state;
@@ -690,7 +747,7 @@ out_err:
 	if (sp != NULL) {
 		if (state != NULL)
 			nfs4_put_open_state(state);
-		up(&sp->so_sema);
+		nfs_free_seqid(o_arg.seqid);
 		nfs4_put_state_owner(sp);
 	}
 	/* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
@@ -718,7 +775,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 		 * It is actually a sign of a bug on the client or on the server.
 		 *
 		 * If we receive a BAD_SEQID error in the particular case of
-		 * doing an OPEN, we assume that nfs4_increment_seqid() will
+		 * doing an OPEN, we assume that nfs_increment_open_seqid() will
 		 * have unhashed the old state_owner for us, and that we can
 		 * therefore safely retry using a new one. We should still warn
 		 * the user though...
@@ -728,6 +785,16 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 			exception.retry = 1;
 			continue;
 		}
+		/*
+		 * BAD_STATEID on OPEN means that the server cancelled our
+		 * state before it received the OPEN_CONFIRM.
+		 * Recover by retrying the request as per the discussion
+		 * on Page 181 of RFC3530.
+		 */
+		if (status == -NFS4ERR_BAD_STATEID) {
+			exception.retry = 1;
+			continue;
+		}
 		res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
 					status, &exception));
 	} while (exception.retry);
@@ -789,17 +856,27 @@ struct nfs4_closedata {
 	struct nfs_closeres res;
 };
 
+static void nfs4_free_closedata(struct nfs4_closedata *calldata)
+{
+	struct nfs4_state *state = calldata->state;
+	struct nfs4_state_owner *sp = state->owner;
+
+	nfs4_put_open_state(calldata->state);
+	nfs_free_seqid(calldata->arg.seqid);
+	nfs4_put_state_owner(sp);
+	kfree(calldata);
+}
+
 static void nfs4_close_done(struct rpc_task *task)
 {
 	struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
 	struct nfs4_state *state = calldata->state;
-	struct nfs4_state_owner *sp = state->owner;
 	struct nfs_server *server = NFS_SERVER(calldata->inode);
 
         /* hmm. we are done with the inode, and in the process of freeing
 	 * the state_owner. we keep this around to process errors
 	 */
-	nfs4_increment_seqid(task->tk_status, sp);
+	nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
 	switch (task->tk_status) {
 		case 0:
 			memcpy(&state->stateid, &calldata->res.stateid,
@@ -817,24 +894,46 @@ static void nfs4_close_done(struct rpc_task *task)
 			}
 	}
 	state->state = calldata->arg.open_flags;
-	nfs4_put_open_state(state);
-	up(&sp->so_sema);
-	nfs4_put_state_owner(sp);
-	up_read(&server->nfs4_state->cl_sem);
-	kfree(calldata);
+	nfs4_free_closedata(calldata);
 }
 
-static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata)
+static void nfs4_close_begin(struct rpc_task *task)
 {
+	struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
+	struct nfs4_state *state = calldata->state;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
 		.rpc_argp = &calldata->arg,
 		.rpc_resp = &calldata->res,
-		.rpc_cred = calldata->state->owner->so_cred,
+		.rpc_cred = state->owner->so_cred,
 	};
-	if (calldata->arg.open_flags != 0)
+	int mode = 0;
+	int status;
+
+	status = nfs_wait_on_sequence(calldata->arg.seqid, task);
+	if (status != 0)
+		return;
+	/* Don't reorder reads */
+	smp_rmb();
+	/* Recalculate the new open mode in case someone reopened the file
+	 * while we were waiting in line to be scheduled.
+	 */
+	if (state->nreaders != 0)
+		mode |= FMODE_READ;
+	if (state->nwriters != 0)
+		mode |= FMODE_WRITE;
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
+		state->state = mode;
+	if (mode == state->state) {
+		nfs4_free_closedata(calldata);
+		task->tk_exit = NULL;
+		rpc_exit(task, 0);
+		return;
+	}
+	if (mode != 0)
 		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
-	return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata);
+	calldata->arg.open_flags = mode;
+	rpc_call_setup(task, &msg, 0);
 }
 
 /* 
@@ -851,39 +950,52 @@ static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *
 int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) 
 {
 	struct nfs4_closedata *calldata;
-	int status;
+	int status = -ENOMEM;
 
-	/* Tell caller we're done */
-	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
-		state->state = mode;
-		return 0;
-	}
-	calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL);
+	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
 	if (calldata == NULL)
-		return -ENOMEM;
+		goto out;
 	calldata->inode = inode;
 	calldata->state = state;
 	calldata->arg.fh = NFS_FH(inode);
+	calldata->arg.stateid = &state->stateid;
 	/* Serialization for the sequence id */
-	calldata->arg.seqid = state->owner->so_seqid;
-	calldata->arg.open_flags = mode;
-	memcpy(&calldata->arg.stateid, &state->stateid,
-			sizeof(calldata->arg.stateid));
-	status = nfs4_close_call(NFS_SERVER(inode)->client, calldata);
-	/*
-	 * Return -EINPROGRESS on success in order to indicate to the
-	 * caller that an asynchronous RPC call has been launched, and
-	 * that it will release the semaphores on completion.
-	 */
-	return (status == 0) ? -EINPROGRESS : status;
+	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
+	if (calldata->arg.seqid == NULL)
+		goto out_free_calldata;
+
+	status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_close_begin,
+			nfs4_close_done, calldata);
+	if (status == 0)
+		goto out;
+
+	nfs_free_seqid(calldata->arg.seqid);
+out_free_calldata:
+	kfree(calldata);
+out:
+	return status;
+}
+
+static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
+{
+	struct file *filp;
+
+	filp = lookup_instantiate_filp(nd, dentry, NULL);
+	if (!IS_ERR(filp)) {
+		struct nfs_open_context *ctx;
+		ctx = (struct nfs_open_context *)filp->private_data;
+		ctx->state = state;
+	} else
+		nfs4_close_state(state, nd->intent.open.flags);
 }
 
-struct inode *
+struct dentry *
 nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct iattr attr;
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
+	struct dentry *res;
 
 	if (nd->flags & LOOKUP_CREATE) {
 		attr.ia_mode = nd->intent.open.create_mode;
@@ -897,16 +1009,23 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 
 	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
 	if (IS_ERR(cred))
-		return (struct inode *)cred;
+		return (struct dentry *)cred;
 	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
 	put_rpccred(cred);
-	if (IS_ERR(state))
-		return (struct inode *)state;
-	return state->inode;
+	if (IS_ERR(state)) {
+		if (PTR_ERR(state) == -ENOENT)
+			d_add(dentry, NULL);
+		return (struct dentry *)state;
+	}
+	res = d_add_unique(dentry, state->inode);
+	if (res != NULL)
+		dentry = res;
+	nfs4_intent_set_file(nd, dentry, state);
+	return res;
 }
 
 int
-nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
+nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
 {
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
@@ -919,18 +1038,30 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
 	if (IS_ERR(state))
 		state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
 	put_rpccred(cred);
-	if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0)
-		return 1;
-	if (IS_ERR(state))
-		return 0;
+	if (IS_ERR(state)) {
+		switch (PTR_ERR(state)) {
+			case -EPERM:
+			case -EACCES:
+			case -EDQUOT:
+			case -ENOSPC:
+			case -EROFS:
+				lookup_instantiate_filp(nd, (struct dentry *)state, NULL);
+				return 1;
+			case -ENOENT:
+				if (dentry->d_inode == NULL)
+					return 1;
+		}
+		goto out_drop;
+	}
 	inode = state->inode;
+	iput(inode);
 	if (inode == dentry->d_inode) {
-		iput(inode);
+		nfs4_intent_set_file(nd, dentry, state);
 		return 1;
 	}
-	d_drop(dentry);
 	nfs4_close_state(state, openflags);
-	iput(inode);
+out_drop:
+	d_drop(dentry);
 	return 0;
 }
 
@@ -1431,7 +1562,7 @@ static int nfs4_proc_commit(struct nfs_write_data *cdata)
 
 static int
 nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-                 int flags)
+                 int flags, struct nameidata *nd)
 {
 	struct nfs4_state *state;
 	struct rpc_cred *cred;
@@ -1453,13 +1584,13 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		struct nfs_fattr fattr;
 		status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
 		                     NFS_FH(state->inode), sattr, state);
-		if (status == 0) {
+		if (status == 0)
 			nfs_setattr_update_inode(state->inode, sattr);
-			goto out;
-		}
-	} else if (flags != 0)
-		goto out;
-	nfs4_close_state(state, flags);
+	}
+	if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
+		nfs4_intent_set_file(nd, dentry, state);
+	else
+		nfs4_close_state(state, flags);
 out:
 	return status;
 }
@@ -2106,65 +2237,6 @@ nfs4_proc_renew(struct nfs4_client *clp)
 	return 0;
 }
 
-/*
- * We will need to arrange for the VFS layer to provide an atomic open.
- * Until then, this open method is prone to inefficiency and race conditions
- * due to the lookup, potential create, and open VFS calls from sys_open()
- * placed on the wire.
- */
-static int
-nfs4_proc_file_open(struct inode *inode, struct file *filp)
-{
-	struct dentry *dentry = filp->f_dentry;
-	struct nfs_open_context *ctx;
-	struct nfs4_state *state = NULL;
-	struct rpc_cred *cred;
-	int status = -ENOMEM;
-
-	dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n",
-	                       (int)dentry->d_parent->d_name.len,
-	                       dentry->d_parent->d_name.name,
-	                       (int)dentry->d_name.len, dentry->d_name.name);
-
-
-	/* Find our open stateid */
-	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
-	if (IS_ERR(cred))
-		return PTR_ERR(cred);
-	ctx = alloc_nfs_open_context(dentry, cred);
-	put_rpccred(cred);
-	if (unlikely(ctx == NULL))
-		return -ENOMEM;
-	status = -EIO; /* ERACE actually */
-	state = nfs4_find_state(inode, cred, filp->f_mode);
-	if (unlikely(state == NULL))
-		goto no_state;
-	ctx->state = state;
-	nfs4_close_state(state, filp->f_mode);
-	ctx->mode = filp->f_mode;
-	nfs_file_set_open_context(filp, ctx);
-	put_nfs_open_context(ctx);
-	if (filp->f_mode & FMODE_WRITE)
-		nfs_begin_data_update(inode);
-	return 0;
-no_state:
-	printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__);
-	put_nfs_open_context(ctx);
-	return status;
-}
-
-/*
- * Release our state
- */
-static int
-nfs4_proc_file_release(struct inode *inode, struct file *filp)
-{
-	if (filp->f_mode & FMODE_WRITE)
-		nfs_end_data_update(inode);
-	nfs_file_clear_open_context(filp);
-	return 0;
-}
-
 static inline int nfs4_server_supports_acls(struct nfs_server *server)
 {
 	return (server->caps & NFS_CAP_ACLS)
@@ -2345,6 +2417,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 
 	if (!nfs4_server_supports_acls(server))
 		return -EOPNOTSUPP;
+	nfs_inode_return_delegation(inode);
 	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
 	ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
 	if (ret == 0)
@@ -2353,7 +2426,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 }
 
 static int
-nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
+nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
 	struct nfs4_client *clp = server->nfs4_state;
 
@@ -2431,7 +2504,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
 /* This is the error handling routine for processes that are allowed
  * to sleep.
  */
-int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
+int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 {
 	struct nfs4_client *clp = server->nfs4_state;
 	int ret = errorcode;
@@ -2632,7 +2705,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 
 	down_read(&clp->cl_sem);
 	nlo.clientid = clp->cl_clientid;
-	down(&state->lock_sema);
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
 		goto out;
@@ -2659,7 +2731,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 		status = 0;
 	}
 out:
-	up(&state->lock_sema);
 	up_read(&clp->cl_sem);
 	return status;
 }
@@ -2696,79 +2767,149 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
 	return res;
 }
 
-static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
+struct nfs4_unlockdata {
+	struct nfs_lockargs arg;
+	struct nfs_locku_opargs luargs;
+	struct nfs_lockres res;
+	struct nfs4_lock_state *lsp;
+	struct nfs_open_context *ctx;
+	atomic_t refcount;
+	struct completion completion;
+};
+
+static void nfs4_locku_release_calldata(struct nfs4_unlockdata *calldata)
 {
-	struct inode *inode = state->inode;
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_client *clp = server->nfs4_state;
-	struct nfs_lockargs arg = {
-		.fh = NFS_FH(inode),
-		.type = nfs4_lck_type(cmd, request),
-		.offset = request->fl_start,
-		.length = nfs4_lck_length(request),
-	};
-	struct nfs_lockres res = {
-		.server = server,
-	};
+	if (atomic_dec_and_test(&calldata->refcount)) {
+		nfs_free_seqid(calldata->luargs.seqid);
+		nfs4_put_lock_state(calldata->lsp);
+		put_nfs_open_context(calldata->ctx);
+		kfree(calldata);
+	}
+}
+
+static void nfs4_locku_complete(struct nfs4_unlockdata *calldata)
+{
+	complete(&calldata->completion);
+	nfs4_locku_release_calldata(calldata);
+}
+
+static void nfs4_locku_done(struct rpc_task *task)
+{
+	struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata;
+
+	nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid);
+	switch (task->tk_status) {
+		case 0:
+			memcpy(calldata->lsp->ls_stateid.data,
+					calldata->res.u.stateid.data,
+					sizeof(calldata->lsp->ls_stateid.data));
+			break;
+		case -NFS4ERR_STALE_STATEID:
+		case -NFS4ERR_EXPIRED:
+			nfs4_schedule_state_recovery(calldata->res.server->nfs4_state);
+			break;
+		default:
+			if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) {
+				rpc_restart_call(task);
+				return;
+			}
+	}
+	nfs4_locku_complete(calldata);
+}
+
+static void nfs4_locku_begin(struct rpc_task *task)
+{
+	struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata;
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
-		.rpc_argp       = &arg,
-		.rpc_resp       = &res,
-		.rpc_cred	= state->owner->so_cred,
+		.rpc_argp       = &calldata->arg,
+		.rpc_resp       = &calldata->res,
+		.rpc_cred	= calldata->lsp->ls_state->owner->so_cred,
 	};
+	int status;
+
+	status = nfs_wait_on_sequence(calldata->luargs.seqid, task);
+	if (status != 0)
+		return;
+	if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) {
+		nfs4_locku_complete(calldata);
+		task->tk_exit = NULL;
+		rpc_exit(task, 0);
+		return;
+	}
+	rpc_call_setup(task, &msg, 0);
+}
+
+static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+	struct nfs4_unlockdata *calldata;
+	struct inode *inode = state->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_lock_state *lsp;
-	struct nfs_locku_opargs luargs;
 	int status;
-			
-	down_read(&clp->cl_sem);
-	down(&state->lock_sema);
+
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
-		goto out;
+		return status;
 	lsp = request->fl_u.nfs4_fl.owner;
 	/* We might have lost the locks! */
 	if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0)
-		goto out;
-	luargs.seqid = lsp->ls_seqid;
-	memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid));
-	arg.u.locku = &luargs;
-	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_lock_seqid(status, lsp);
-
-	if (status == 0)
-		memcpy(&lsp->ls_stateid,  &res.u.stateid, 
-				sizeof(lsp->ls_stateid));
-out:
-	up(&state->lock_sema);
+		return 0;
+	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
+	if (calldata == NULL)
+		return -ENOMEM;
+	calldata->luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+	if (calldata->luargs.seqid == NULL) {
+		kfree(calldata);
+		return -ENOMEM;
+	}
+	calldata->luargs.stateid = &lsp->ls_stateid;
+	calldata->arg.fh = NFS_FH(inode);
+	calldata->arg.type = nfs4_lck_type(cmd, request);
+	calldata->arg.offset = request->fl_start;
+	calldata->arg.length = nfs4_lck_length(request);
+	calldata->arg.u.locku = &calldata->luargs;
+	calldata->res.server = server;
+	calldata->lsp = lsp;
+	atomic_inc(&lsp->ls_count);
+
+	/* Ensure we don't close file until we're done freeing locks! */
+	calldata->ctx = get_nfs_open_context((struct nfs_open_context*)request->fl_file->private_data);
+
+	atomic_set(&calldata->refcount, 2);
+	init_completion(&calldata->completion);
+
+	status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin,
+			nfs4_locku_done, calldata);
 	if (status == 0)
-		do_vfs_lock(request->fl_file, request);
-	up_read(&clp->cl_sem);
+		wait_for_completion_interruptible(&calldata->completion);
+	do_vfs_lock(request->fl_file, request);
+	nfs4_locku_release_calldata(calldata);
 	return status;
 }
 
-static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
-{
-	struct nfs4_exception exception = { };
-	int err;
-
-	do {
-		err = nfs4_handle_exception(NFS_SERVER(state->inode),
-				_nfs4_proc_unlck(state, cmd, request),
-				&exception);
-	} while (exception.retry);
-	return err;
-}
-
 static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim)
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
+	struct nfs_lock_opargs largs = {
+		.lock_stateid = &lsp->ls_stateid,
+		.open_stateid = &state->stateid,
+		.lock_owner = {
+			.clientid = server->nfs4_state->cl_clientid,
+			.id = lsp->ls_id,
+		},
+		.reclaim = reclaim,
+	};
 	struct nfs_lockargs arg = {
 		.fh = NFS_FH(inode),
 		.type = nfs4_lck_type(cmd, request),
 		.offset = request->fl_start,
 		.length = nfs4_lck_length(request),
+		.u = {
+			.lock = &largs,
+		},
 	};
 	struct nfs_lockres res = {
 		.server = server,
@@ -2779,53 +2920,39 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 		.rpc_resp       = &res,
 		.rpc_cred	= state->owner->so_cred,
 	};
-	struct nfs_lock_opargs largs = {
-		.reclaim = reclaim,
-		.new_lock_owner = 0,
-	};
-	int status;
+	int status = -ENOMEM;
 
-	if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) {
+	largs.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+	if (largs.lock_seqid == NULL)
+		return -ENOMEM;
+	if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) {
 		struct nfs4_state_owner *owner = state->owner;
-		struct nfs_open_to_lock otl = {
-			.lock_owner = {
-				.clientid = server->nfs4_state->cl_clientid,
-			},
-		};
-
-		otl.lock_seqid = lsp->ls_seqid;
-		otl.lock_owner.id = lsp->ls_id;
-		memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid));
-		largs.u.open_lock = &otl;
+
+		largs.open_seqid = nfs_alloc_seqid(&owner->so_seqid);
+		if (largs.open_seqid == NULL)
+			goto out;
 		largs.new_lock_owner = 1;
-		arg.u.lock = &largs;
-		down(&owner->so_sema);
-		otl.open_seqid = owner->so_seqid;
 		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-		/* increment open_owner seqid on success, and 
-		* seqid mutating errors */
-		nfs4_increment_seqid(status, owner);
-		up(&owner->so_sema);
-		if (status == 0) {
-			lsp->ls_flags |= NFS_LOCK_INITIALIZED;
-			lsp->ls_seqid++;
+		/* increment open seqid on success, and seqid mutating errors */
+		if (largs.new_lock_owner != 0) {
+			nfs_increment_open_seqid(status, largs.open_seqid);
+			if (status == 0)
+				nfs_confirm_seqid(&lsp->ls_seqid, 0);
 		}
-	} else {
-		struct nfs_exist_lock el = {
-			.seqid = lsp->ls_seqid,
-		};
-		memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid));
-		largs.u.exist_lock = &el;
-		arg.u.lock = &largs;
+		nfs_free_seqid(largs.open_seqid);
+	} else
 		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-		/* increment seqid on success, and * seqid mutating errors*/
-		nfs4_increment_lock_seqid(status, lsp);
-	}
+	/* increment lock seqid on success, and seqid mutating errors*/
+	nfs_increment_lock_seqid(status, largs.lock_seqid);
 	/* save the returned stateid. */
-	if (status == 0)
-		memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid));
-	else if (status == -NFS4ERR_DENIED)
+	if (status == 0) {
+		memcpy(lsp->ls_stateid.data, res.u.stateid.data,
+				sizeof(lsp->ls_stateid.data));
+		lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+	} else if (status == -NFS4ERR_DENIED)
 		status = -EAGAIN;
+out:
+	nfs_free_seqid(largs.lock_seqid);
 	return status;
 }
 
@@ -2865,11 +2992,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 	int status;
 
 	down_read(&clp->cl_sem);
-	down(&state->lock_sema);
 	status = nfs4_set_lock_state(state, request);
 	if (status == 0)
 		status = _nfs4_do_setlk(state, cmd, request, 0);
-	up(&state->lock_sema);
 	if (status == 0) {
 		/* Note: we always want to sleep here! */
 		request->fl_flags |= FL_SLEEP;
@@ -3024,8 +3149,8 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.read_setup	= nfs4_proc_read_setup,
 	.write_setup	= nfs4_proc_write_setup,
 	.commit_setup	= nfs4_proc_commit_setup,
-	.file_open      = nfs4_proc_file_open,
-	.file_release   = nfs4_proc_file_release,
+	.file_open      = nfs_open,
+	.file_release   = nfs_release,
 	.lock		= nfs4_proc_lock,
 	.clear_acl_cache = nfs4_zap_acl_attr,
 };

+ 124 - 41
fs/nfs/nfs4state.c

@@ -264,13 +264,15 @@ nfs4_alloc_state_owner(void)
 {
 	struct nfs4_state_owner *sp;
 
-	sp = kmalloc(sizeof(*sp),GFP_KERNEL);
+	sp = kzalloc(sizeof(*sp),GFP_KERNEL);
 	if (!sp)
 		return NULL;
-	init_MUTEX(&sp->so_sema);
-	sp->so_seqid = 0;                 /* arbitrary */
 	INIT_LIST_HEAD(&sp->so_states);
 	INIT_LIST_HEAD(&sp->so_delegations);
+	rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
+	sp->so_seqid.sequence = &sp->so_sequence;
+	spin_lock_init(&sp->so_sequence.lock);
+	INIT_LIST_HEAD(&sp->so_sequence.list);
 	atomic_set(&sp->so_count, 1);
 	return sp;
 }
@@ -359,7 +361,6 @@ nfs4_alloc_open_state(void)
 	memset(state->stateid.data, 0, sizeof(state->stateid.data));
 	atomic_set(&state->count, 1);
 	INIT_LIST_HEAD(&state->lock_states);
-	init_MUTEX(&state->lock_sema);
 	spin_lock_init(&state->state_lock);
 	return state;
 }
@@ -441,7 +442,6 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
 	state = __nfs4_find_state_byowner(inode, owner);
 	if (state == NULL && new != NULL) {
 		state = new;
-		/* Caller *must* be holding owner->so_sem */
 		/* Note: The reclaim code dictates that we add stateless
 		 * and read-only stateids to the end of the list */
 		list_add_tail(&state->open_states, &owner->so_states);
@@ -461,7 +461,7 @@ out:
 
 /*
  * Beware! Caller must be holding exactly one
- * reference to clp->cl_sem and owner->so_sema!
+ * reference to clp->cl_sem!
  */
 void nfs4_put_open_state(struct nfs4_state *state)
 {
@@ -481,19 +481,15 @@ void nfs4_put_open_state(struct nfs4_state *state)
 }
 
 /*
- * Beware! Caller must be holding no references to clp->cl_sem!
- * of owner->so_sema!
+ * Close the current file.
  */
 void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 {
 	struct inode *inode = state->inode;
 	struct nfs4_state_owner *owner = state->owner;
-	struct nfs4_client *clp = owner->so_client;
 	int newstate;
 
 	atomic_inc(&owner->so_count);
-	down_read(&clp->cl_sem);
-	down(&owner->so_sema);
 	/* Protect against nfs4_find_state() */
 	spin_lock(&inode->i_lock);
 	if (mode & FMODE_READ)
@@ -515,14 +511,16 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 			newstate |= FMODE_WRITE;
 		if (state->state == newstate)
 			goto out;
-		if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
+		if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+			state->state = newstate;
+			goto out;
+		}
+		if (nfs4_do_close(inode, state, newstate) == 0)
 			return;
 	}
 out:
 	nfs4_put_open_state(state);
-	up(&owner->so_sema);
 	nfs4_put_state_owner(owner);
-	up_read(&clp->cl_sem);
 }
 
 /*
@@ -546,19 +544,16 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
  *
- * The caller must be holding state->lock_sema
  */
 static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 {
 	struct nfs4_lock_state *lsp;
 	struct nfs4_client *clp = state->owner->so_client;
 
-	lsp = kmalloc(sizeof(*lsp), GFP_KERNEL);
+	lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
 	if (lsp == NULL)
 		return NULL;
-	lsp->ls_flags = 0;
-	lsp->ls_seqid = 0;	/* arbitrary */
-	memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
+	lsp->ls_seqid.sequence = &state->owner->so_sequence;
 	atomic_set(&lsp->ls_count, 1);
 	lsp->ls_owner = fl_owner;
 	spin_lock(&clp->cl_lock);
@@ -572,7 +567,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
  *
- * The caller must be holding state->lock_sema and clp->cl_sem
+ * The caller must be holding clp->cl_sem
  */
 static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
 {
@@ -605,7 +600,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
  * Release reference to lock_state, and free it if we see that
  * it is no longer in use
  */
-static void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
+void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
 {
 	struct nfs4_state *state;
 
@@ -673,29 +668,98 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f
 	nfs4_put_lock_state(lsp);
 }
 
-/*
-* Called with state->lock_sema and clp->cl_sem held.
-*/
-void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
+struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
+{
+	struct rpc_sequence *sequence = counter->sequence;
+	struct nfs_seqid *new;
+
+	new = kmalloc(sizeof(*new), GFP_KERNEL);
+	if (new != NULL) {
+		new->sequence = counter;
+		new->task = NULL;
+		spin_lock(&sequence->lock);
+		list_add_tail(&new->list, &sequence->list);
+		spin_unlock(&sequence->lock);
+	}
+	return new;
+}
+
+void nfs_free_seqid(struct nfs_seqid *seqid)
 {
-	if (status == NFS_OK || seqid_mutating_err(-status))
-		lsp->ls_seqid++;
+	struct rpc_sequence *sequence = seqid->sequence->sequence;
+	struct rpc_task *next = NULL;
+
+	spin_lock(&sequence->lock);
+	list_del(&seqid->list);
+	if (!list_empty(&sequence->list)) {
+		next = list_entry(sequence->list.next, struct nfs_seqid, list)->task;
+		if (next)
+			rpc_wake_up_task(next);
+	}
+	spin_unlock(&sequence->lock);
+	kfree(seqid);
 }
 
 /*
-* Called with sp->so_sema and clp->cl_sem held.
-*
-* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
-* failed with a seqid incrementing error -
-* see comments nfs_fs.h:seqid_mutating_error()
-*/
-void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp)
-{
-	if (status == NFS_OK || seqid_mutating_err(-status))
-		sp->so_seqid++;
-	/* If the server returns BAD_SEQID, unhash state_owner here */
-	if (status == -NFS4ERR_BAD_SEQID)
+ * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
+ * failed with a seqid incrementing error -
+ * see comments nfs_fs.h:seqid_mutating_error()
+ */
+static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
+{
+	switch (status) {
+		case 0:
+			break;
+		case -NFS4ERR_BAD_SEQID:
+		case -NFS4ERR_STALE_CLIENTID:
+		case -NFS4ERR_STALE_STATEID:
+		case -NFS4ERR_BAD_STATEID:
+		case -NFS4ERR_BADXDR:
+		case -NFS4ERR_RESOURCE:
+		case -NFS4ERR_NOFILEHANDLE:
+			/* Non-seqid mutating errors */
+			return;
+	};
+	/*
+	 * Note: no locking needed as we are guaranteed to be first
+	 * on the sequence list
+	 */
+	seqid->sequence->counter++;
+}
+
+void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
+{
+	if (status == -NFS4ERR_BAD_SEQID) {
+		struct nfs4_state_owner *sp = container_of(seqid->sequence,
+				struct nfs4_state_owner, so_seqid);
 		nfs4_drop_state_owner(sp);
+	}
+	return nfs_increment_seqid(status, seqid);
+}
+
+/*
+ * Increment the seqid if the LOCK/LOCKU succeeded, or
+ * failed with a seqid incrementing error -
+ * see comments nfs_fs.h:seqid_mutating_error()
+ */
+void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
+{
+	return nfs_increment_seqid(status, seqid);
+}
+
+int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
+{
+	struct rpc_sequence *sequence = seqid->sequence->sequence;
+	int status = 0;
+
+	spin_lock(&sequence->lock);
+	if (sequence->list.next != &seqid->list) {
+		seqid->task = task;
+		rpc_sleep_on(&sequence->wait, task, NULL, NULL);
+		status = -EAGAIN;
+	}
+	spin_unlock(&sequence->lock);
+	return status;
 }
 
 static int reclaimer(void *);
@@ -791,8 +855,6 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n
 		if (state->state == 0)
 			continue;
 		status = ops->recover_open(sp, state);
-		list_for_each_entry(lock, &state->lock_states, ls_locks)
-			lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
 		if (status >= 0) {
 			status = nfs4_reclaim_locks(ops, state);
 			if (status < 0)
@@ -831,6 +893,26 @@ out_err:
 	return status;
 }
 
+static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
+{
+	struct nfs4_state_owner *sp;
+	struct nfs4_state *state;
+	struct nfs4_lock_state *lock;
+
+	/* Reset all sequence ids to zero */
+	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+		sp->so_seqid.counter = 0;
+		sp->so_seqid.flags = 0;
+		list_for_each_entry(state, &sp->so_states, open_states) {
+			list_for_each_entry(lock, &state->lock_states, ls_locks) {
+				lock->ls_seqid.counter = 0;
+				lock->ls_seqid.flags = 0;
+				lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
+			}
+		}
+	}
+}
+
 static int reclaimer(void *ptr)
 {
 	struct reclaimer_args *args = (struct reclaimer_args *)ptr;
@@ -864,6 +946,7 @@ restart_loop:
 		default:
 			ops = &nfs4_network_partition_recovery_ops;
 	};
+	nfs4_state_mark_reclaim(clp);
 	status = __nfs4_init_client(clp);
 	if (status)
 		goto out_error;

+ 37 - 25
fs/nfs/nfs4xdr.c

@@ -602,10 +602,10 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
 {
 	uint32_t *p;
 
-	RESERVE_SPACE(8+sizeof(arg->stateid.data));
+	RESERVE_SPACE(8+sizeof(arg->stateid->data));
 	WRITE32(OP_CLOSE);
-	WRITE32(arg->seqid);
-	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+	WRITE32(arg->seqid->sequence->counter);
+	WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data));
 	
 	return 0;
 }
@@ -729,22 +729,18 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
 	WRITE64(arg->length);
 	WRITE32(opargs->new_lock_owner);
 	if (opargs->new_lock_owner){
-		struct nfs_open_to_lock *ol = opargs->u.open_lock;
-
 		RESERVE_SPACE(40);
-		WRITE32(ol->open_seqid);
-		WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid));
-		WRITE32(ol->lock_seqid);
-		WRITE64(ol->lock_owner.clientid);
+		WRITE32(opargs->open_seqid->sequence->counter);
+		WRITEMEM(opargs->open_stateid->data, sizeof(opargs->open_stateid->data));
+		WRITE32(opargs->lock_seqid->sequence->counter);
+		WRITE64(opargs->lock_owner.clientid);
 		WRITE32(4);
-		WRITE32(ol->lock_owner.id);
+		WRITE32(opargs->lock_owner.id);
 	}
 	else {
-		struct nfs_exist_lock *el = opargs->u.exist_lock;
-
 		RESERVE_SPACE(20);
-		WRITEMEM(&el->stateid, sizeof(el->stateid));
-		WRITE32(el->seqid);
+		WRITEMEM(opargs->lock_stateid->data, sizeof(opargs->lock_stateid->data));
+		WRITE32(opargs->lock_seqid->sequence->counter);
 	}
 
 	return 0;
@@ -775,8 +771,8 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
 	RESERVE_SPACE(44);
 	WRITE32(OP_LOCKU);
 	WRITE32(arg->type);
-	WRITE32(opargs->seqid);
-	WRITEMEM(&opargs->stateid, sizeof(opargs->stateid));
+	WRITE32(opargs->seqid->sequence->counter);
+	WRITEMEM(opargs->stateid->data, sizeof(opargs->stateid->data));
 	WRITE64(arg->offset);
 	WRITE64(arg->length);
 
@@ -826,7 +822,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
  */
 	RESERVE_SPACE(8);
 	WRITE32(OP_OPEN);
-	WRITE32(arg->seqid);
+	WRITE32(arg->seqid->sequence->counter);
 	encode_share_access(xdr, arg->open_flags);
 	RESERVE_SPACE(16);
 	WRITE64(arg->clientid);
@@ -941,7 +937,7 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con
 	RESERVE_SPACE(8+sizeof(arg->stateid.data));
 	WRITE32(OP_OPEN_CONFIRM);
 	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
-	WRITE32(arg->seqid);
+	WRITE32(arg->seqid->sequence->counter);
 
 	return 0;
 }
@@ -950,10 +946,10 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea
 {
 	uint32_t *p;
 
-	RESERVE_SPACE(8+sizeof(arg->stateid.data));
+	RESERVE_SPACE(8+sizeof(arg->stateid->data));
 	WRITE32(OP_OPEN_DOWNGRADE);
-	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
-	WRITE32(arg->seqid);
+	WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data));
+	WRITE32(arg->seqid->sequence->counter);
 	encode_share_access(xdr, arg->open_flags);
 	return 0;
 }
@@ -1437,6 +1433,9 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena
 	};
 	int status;
 
+	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
+	if (status != 0)
+		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1464,6 +1463,9 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct n
 	};
 	int status;
 
+	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
+	if (status != 0)
+		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1485,6 +1487,9 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nf
 	};
 	int status;
 
+	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
+	if (status != 0)
+		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1525,8 +1530,15 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_locka
 	struct compound_hdr hdr = {
 		.nops   = 2,
 	};
+	struct nfs_lock_opargs *opargs = args->u.lock;
 	int status;
 
+	status = nfs_wait_on_sequence(opargs->lock_seqid, req->rq_task);
+	if (status != 0)
+		goto out;
+	/* Do we need to do an open_to_lock_owner? */
+	if (opargs->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)
+		opargs->new_lock_owner = 0;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -2890,8 +2902,8 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res)
 
 	status = decode_op_hdr(xdr, OP_LOCK);
 	if (status == 0) {
-		READ_BUF(sizeof(nfs4_stateid));
-		COPYMEM(&res->u.stateid, sizeof(res->u.stateid));
+		READ_BUF(sizeof(res->u.stateid.data));
+		COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data));
 	} else if (status == -NFS4ERR_DENIED)
 		return decode_lock_denied(xdr, &res->u.denied);
 	return status;
@@ -2913,8 +2925,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res)
 
 	status = decode_op_hdr(xdr, OP_LOCKU);
 	if (status == 0) {
-		READ_BUF(sizeof(nfs4_stateid));
-		COPYMEM(&res->u.stateid, sizeof(res->u.stateid));
+		READ_BUF(sizeof(res->u.stateid.data));
+		COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data));
 	}
 	return status;
 }

+ 1 - 1
fs/nfs/proc.c

@@ -216,7 +216,7 @@ static int nfs_proc_write(struct nfs_write_data *wdata)
 
 static int
 nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-		int flags)
+		int flags, struct nameidata *nd)
 {
 	struct nfs_fh		fhandle;
 	struct nfs_fattr	fattr;

+ 65 - 14
fs/open.c

@@ -739,7 +739,8 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
 }
 
 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
-					int flags, struct file *f)
+					int flags, struct file *f,
+					int (*open)(struct inode *, struct file *))
 {
 	struct inode *inode;
 	int error;
@@ -761,11 +762,14 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	f->f_op = fops_get(inode->i_fop);
 	file_move(f, &inode->i_sb->s_files);
 
-	if (f->f_op && f->f_op->open) {
-		error = f->f_op->open(inode,f);
+	if (!open && f->f_op)
+		open = f->f_op->open;
+	if (open) {
+		error = open(inode, f);
 		if (error)
 			goto cleanup_all;
 	}
+
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
@@ -814,28 +818,75 @@ struct file *filp_open(const char * filename, int flags, int mode)
 {
 	int namei_flags, error;
 	struct nameidata nd;
-	struct file *f;
 
 	namei_flags = flags;
 	if ((namei_flags+1) & O_ACCMODE)
 		namei_flags++;
-	if (namei_flags & O_TRUNC)
-		namei_flags |= 2;
-
-	error = -ENFILE;
-	f = get_empty_filp();
-	if (f == NULL)
-		return ERR_PTR(error);
 
 	error = open_namei(filename, namei_flags, mode, &nd);
 	if (!error)
-		return __dentry_open(nd.dentry, nd.mnt, flags, f);
+		return nameidata_to_filp(&nd, flags);
 
-	put_filp(f);
 	return ERR_PTR(error);
 }
 EXPORT_SYMBOL(filp_open);
 
+/**
+ * lookup_instantiate_filp - instantiates the open intent filp
+ * @nd: pointer to nameidata
+ * @dentry: pointer to dentry
+ * @open: open callback
+ *
+ * Helper for filesystems that want to use lookup open intents and pass back
+ * a fully instantiated struct file to the caller.
+ * This function is meant to be called from within a filesystem's
+ * lookup method.
+ * Note that in case of error, nd->intent.open.file is destroyed, but the
+ * path information remains valid.
+ * If the open callback is set to NULL, then the standard f_op->open()
+ * filesystem callback is substituted.
+ */
+struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
+		int (*open)(struct inode *, struct file *))
+{
+	if (IS_ERR(nd->intent.open.file))
+		goto out;
+	if (IS_ERR(dentry))
+		goto out_err;
+	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt),
+					     nd->intent.open.flags - 1,
+					     nd->intent.open.file,
+					     open);
+out:
+	return nd->intent.open.file;
+out_err:
+	release_open_intent(nd);
+	nd->intent.open.file = (struct file *)dentry;
+	goto out;
+}
+EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
+
+/**
+ * nameidata_to_filp - convert a nameidata to an open filp.
+ * @nd: pointer to nameidata
+ * @flags: open flags
+ *
+ * Note that this function destroys the original nameidata
+ */
+struct file *nameidata_to_filp(struct nameidata *nd, int flags)
+{
+	struct file *filp;
+
+	/* Pick up the filp from the open intent */
+	filp = nd->intent.open.file;
+	/* Has the filesystem initialised the file for us? */
+	if (filp->f_dentry == NULL)
+		filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL);
+	else
+		path_release(nd);
+	return filp;
+}
+
 struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 {
 	int error;
@@ -846,7 +897,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 	if (f == NULL)
 		return ERR_PTR(error);
 
-	return __dentry_open(dentry, mnt, flags, f);
+	return __dentry_open(dentry, mnt, flags, f, NULL);
 }
 EXPORT_SYMBOL(dentry_open);
 

+ 12 - 0
fs/proc/base.c

@@ -103,7 +103,9 @@ enum pid_directory_inos {
 	PROC_TGID_NUMA_MAPS,
 	PROC_TGID_MOUNTS,
 	PROC_TGID_WCHAN,
+#ifdef CONFIG_MMU
 	PROC_TGID_SMAPS,
+#endif
 #ifdef CONFIG_SCHEDSTATS
 	PROC_TGID_SCHEDSTAT,
 #endif
@@ -141,7 +143,9 @@ enum pid_directory_inos {
 	PROC_TID_NUMA_MAPS,
 	PROC_TID_MOUNTS,
 	PROC_TID_WCHAN,
+#ifdef CONFIG_MMU
 	PROC_TID_SMAPS,
+#endif
 #ifdef CONFIG_SCHEDSTATS
 	PROC_TID_SCHEDSTAT,
 #endif
@@ -195,7 +199,9 @@ static struct pid_entry tgid_base_stuff[] = {
 	E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
+#ifdef CONFIG_MMU
 	E(PROC_TGID_SMAPS,     "smaps",   S_IFREG|S_IRUGO),
+#endif
 #ifdef CONFIG_SECURITY
 	E(PROC_TGID_ATTR,      "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
 #endif
@@ -235,7 +241,9 @@ static struct pid_entry tid_base_stuff[] = {
 	E(PROC_TID_ROOT,       "root",    S_IFLNK|S_IRWXUGO),
 	E(PROC_TID_EXE,        "exe",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TID_MOUNTS,     "mounts",  S_IFREG|S_IRUGO),
+#ifdef CONFIG_MMU
 	E(PROC_TID_SMAPS,      "smaps",   S_IFREG|S_IRUGO),
+#endif
 #ifdef CONFIG_SECURITY
 	E(PROC_TID_ATTR,       "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
 #endif
@@ -630,6 +638,7 @@ static struct file_operations proc_numa_maps_operations = {
 };
 #endif
 
+#ifdef CONFIG_MMU
 extern struct seq_operations proc_pid_smaps_op;
 static int smaps_open(struct inode *inode, struct file *file)
 {
@@ -648,6 +657,7 @@ static struct file_operations proc_smaps_operations = {
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 };
+#endif
 
 extern struct seq_operations mounts_op;
 static int mounts_open(struct inode *inode, struct file *file)
@@ -1681,10 +1691,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
 		case PROC_TGID_MOUNTS:
 			inode->i_fop = &proc_mounts_operations;
 			break;
+#ifdef CONFIG_MMU
 		case PROC_TID_SMAPS:
 		case PROC_TGID_SMAPS:
 			inode->i_fop = &proc_smaps_operations;
 			break;
+#endif
 #ifdef CONFIG_SECURITY
 		case PROC_TID_ATTR:
 			inode->i_nlink = 2;

+ 1 - 0
fs/proc/nommu.c

@@ -91,6 +91,7 @@ static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos)
 			next = _rb;
 			break;
 		}
+		pos--;
 	}
 
 	return next;

+ 1 - 0
include/asm-arm/arch-pxa/pxafb.h

@@ -66,4 +66,5 @@ struct pxafb_mach_info {
 
 };
 void set_pxa_fb_info(struct pxafb_mach_info *hard_pxa_fb_info);
+void set_pxa_fb_parent(struct device *parent_dev);
 unsigned long pxafb_get_hsync_time(struct device *dev);

+ 42 - 16
include/asm-arm/arch-s3c2410/io.h

@@ -9,7 +9,7 @@
  *  06-Dec-1997	RMK	Created.
  *  02-Sep-2003 BJD	Modified for S3C2410
  *  10-Mar-2005 LCVR	Changed S3C2410_VA to S3C24XX_VA
- *
+ *  13-Oct-2005 BJD	Fixed problems with LDRH/STRH offset range
  */
 
 #ifndef __ASM_ARM_ARCH_IO_H
@@ -97,7 +97,7 @@ DECLARE_IO(int,l,"")
 	else								\
 		__asm__ __volatile__(					\
 		"strb	%0, [%1, #0]	@ outbc"			\
-		: : "r" (value), "r" ((port)));		\
+		: : "r" (value), "r" ((port)));				\
 })
 
 #define __inbc(port)							\
@@ -110,35 +110,61 @@ DECLARE_IO(int,l,"")
 	else								\
 		__asm__ __volatile__(					\
 		"ldrb	%0, [%1, #0]	@ inbc"				\
-		: "=r" (result) : "r" ((port)));	\
+		: "=r" (result) : "r" ((port)));			\
 	result;								\
 })
 
 #define __outwc(value,port)						\
 ({									\
 	unsigned long v = value;					\
-	if (__PORT_PCIO((port)))					\
-		__asm__ __volatile__(					\
-		"strh	%0, [%1, %2]	@ outwc"			\
-		: : "r" (v), "r" (PCIO_BASE), "Jr" ((port)));	\
-	else								\
+	if (__PORT_PCIO((port))) {					\
+		if ((port) < 256 && (port) > -256)			\
+			__asm__ __volatile__(				\
+			"strh	%0, [%1, %2]	@ outwc"		\
+			: : "r" (v), "r" (PCIO_BASE), "Jr" ((port)));	\
+		else if ((port) > 0)					\
+			__asm__ __volatile__(				\
+			"strh	%0, [%1, %2]	@ outwc"		\
+			: : "r" (v),					\
+			    "r" (PCIO_BASE + ((port) & ~0xff)),		\
+			     "Jr" (((port) & 0xff)));			\
+		else							\
+			__asm__ __volatile__(				\
+			"strh	%0, [%1, #0]	@ outwc"		\
+			: : "r" (v),					\
+			    "r" (PCIO_BASE + (port)));			\
+	} else								\
 		__asm__ __volatile__(					\
 		"strh	%0, [%1, #0]	@ outwc"			\
-		: : "r" (v), "r" ((port)));	\
+		: : "r" (v), "r" ((port)));				\
 })
 
 #define __inwc(port)							\
 ({									\
 	unsigned short result;						\
-	if (__PORT_PCIO((port)))					\
-		__asm__ __volatile__(					\
-		"ldrh	%0, [%1, %2]	@ inwc"				\
-		: "=r" (result) : "r" (PCIO_BASE), "Jr" ((port)));	\
-	else								\
+	if (__PORT_PCIO((port))) {					\
+		if ((port) < 256 && (port) > -256 )			\
+			__asm__ __volatile__(				\
+			"ldrh	%0, [%1, %2]	@ inwc"			\
+			: "=r" (result)					\
+			: "r" (PCIO_BASE),				\
+			  "Jr" ((port)));				\
+		else if ((port) > 0)					\
+			__asm__ __volatile__(				\
+			"ldrh	%0, [%1, %2]	@ inwc"			\
+			: "=r" (result)					\
+			: "r" (PCIO_BASE + ((port) & ~0xff)),		\
+			  "Jr" (((port) & 0xff)));			\
+		else							\
+			__asm__ __volatile__(				\
+			"ldrh	%0, [%1, #0]	@ inwc"			\
+			: "=r" (result)					\
+			: "r" (PCIO_BASE + ((port))));			\
+	} else								\
 		__asm__ __volatile__(					\
 		"ldrh	%0, [%1, #0]	@ inwc"				\
-		: "=r" (result) : "r" ((port)));		\
-	result;						\
+		: "=r" (result) : "r" ((port)));			\
+	result;								\
 })
 
 #define __outlc(value,port)						\

+ 9 - 21
include/asm-sparc64/pbm.h

@@ -27,23 +27,27 @@
  * PCI bus.
  */
 
-#define PBM_LOGCLUSTERS 3
-#define PBM_NCLUSTERS (1 << PBM_LOGCLUSTERS)
-
 struct pci_controller_info;
 
 /* This contains the software state necessary to drive a PCI
  * controller's IOMMU.
  */
+struct pci_iommu_arena {
+	unsigned long	*map;
+	unsigned int	hint;
+	unsigned int	limit;
+};
+
 struct pci_iommu {
 	/* This protects the controller's IOMMU and all
 	 * streaming buffers underneath.
 	 */
 	spinlock_t	lock;
 
+	struct pci_iommu_arena arena;
+
 	/* IOMMU page table, a linear array of ioptes. */
 	iopte_t		*page_table;		/* The page table itself. */
-	int		page_table_sz_bits;	/* log2 of ow many pages does it map? */
 
 	/* Base PCI memory space address where IOMMU mappings
 	 * begin.
@@ -62,12 +66,6 @@ struct pci_iommu {
 	 */
 	unsigned long	write_complete_reg;
 
-	/* The lowest used consistent mapping entry.  Since
-	 * we allocate consistent maps out of cluster 0 this
-	 * is relative to the beginning of closter 0.
-	 */
-	u32		lowest_consistent_map;
-
 	/* In order to deal with some buggy third-party PCI bridges that
 	 * do wrong prefetching, we never mark valid mappings as invalid.
 	 * Instead we point them at this dummy page.
@@ -75,16 +73,6 @@ struct pci_iommu {
 	unsigned long	dummy_page;
 	unsigned long	dummy_page_pa;
 
-	/* If PBM_NCLUSTERS is ever decreased to 4 or lower,
-	 * or if largest supported page_table_sz * 8K goes above
-	 * 2GB, you must increase the size of the type of
-	 * these counters.  You have been duly warned. -DaveM
-	 */
-	struct {
-		u16	next;
-		u16	flush;
-	} alloc_info[PBM_NCLUSTERS];
-
 	/* CTX allocation. */
 	unsigned long ctx_lowest_free;
 	unsigned long ctx_bitmap[IOMMU_NUM_CTXS / (sizeof(unsigned long) * 8)];
@@ -102,7 +90,7 @@ struct pci_iommu {
 	u32 dma_addr_mask;
 };
 
-extern void pci_iommu_table_init(struct pci_iommu *, int);
+extern void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask);
 
 /* This describes a PCI bus module's streaming buffer. */
 struct pci_strbuf {

+ 2 - 2
include/linux/acct.h

@@ -162,13 +162,13 @@ typedef struct acct acct_t;
 #ifdef __KERNEL__
 /*
  * Yet another set of HZ to *HZ helper functions.
- * See <linux/times.h> for the original.
+ * See <linux/jiffies.h> for the original.
  */
 
 static inline u32 jiffies_to_AHZ(unsigned long x)
 {
 #if (TICK_NSEC % (NSEC_PER_SEC / AHZ)) == 0
-	return x / (HZ / USER_HZ);
+	return x / (HZ / AHZ);
 #else
         u64 tmp = (u64)x * TICK_NSEC;
         do_div(tmp, (NSEC_PER_SEC / AHZ));

+ 6 - 1
include/linux/aio.h

@@ -24,7 +24,12 @@ struct kioctx;
 #define KIOCB_SYNC_KEY		(~0U)
 
 /* ki_flags bits */
-#define KIF_LOCKED		0
+/*
+ * This may be used for cancel/retry serialization in the future, but
+ * for now it's unused and we probably don't want modules to even
+ * think they can use it.
+ */
+/* #define KIF_LOCKED		0 */
 #define KIF_KICKED		1
 #define KIF_CANCELLED		2
 

+ 7 - 9
include/linux/cpumask.h

@@ -393,15 +393,13 @@ extern cpumask_t cpu_present_map;
 #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
 
 /* Find the highest possible smp_processor_id() */
-static inline unsigned int highest_possible_processor_id(void)
-{
-	unsigned int cpu, highest = 0;
-
-	for_each_cpu_mask(cpu, cpu_possible_map)
-		highest = cpu;
-
-	return highest;
-}
+#define highest_possible_processor_id() \
+({ \
+	unsigned int cpu, highest = 0; \
+	for_each_cpu_mask(cpu, cpu_possible_map) \
+		highest = cpu; \
+	highest; \
+})
 
 
 #endif /* __LINUX_CPUMASK_H */

+ 22 - 17
include/linux/list.h

@@ -442,12 +442,14 @@ static inline void list_splice_init(struct list_head *list,
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define list_for_each_rcu(pos, head) \
-	for (pos = (head)->next; prefetch(pos->next), pos != (head); \
-        	pos = rcu_dereference(pos->next))
+	for (pos = (head)->next; \
+		prefetch(rcu_dereference(pos)->next), pos != (head); \
+        	pos = pos->next)
 
 #define __list_for_each_rcu(pos, head) \
-	for (pos = (head)->next; pos != (head); \
-        	pos = rcu_dereference(pos->next))
+	for (pos = (head)->next; \
+		rcu_dereference(pos) != (head); \
+        	pos = pos->next)
 
 /**
  * list_for_each_safe_rcu	-	iterate over an rcu-protected list safe
@@ -461,8 +463,9 @@ static inline void list_splice_init(struct list_head *list,
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define list_for_each_safe_rcu(pos, n, head) \
-	for (pos = (head)->next, n = pos->next; pos != (head); \
-		pos = rcu_dereference(n), n = pos->next)
+	for (pos = (head)->next; \
+		n = rcu_dereference(pos)->next, pos != (head); \
+		pos = n)
 
 /**
  * list_for_each_entry_rcu	-	iterate over rcu list of given type
@@ -474,11 +477,11 @@ static inline void list_splice_init(struct list_head *list,
  * the _rcu list-mutation primitives such as list_add_rcu()
  * as long as the traversal is guarded by rcu_read_lock().
  */
-#define list_for_each_entry_rcu(pos, head, member)			\
-	for (pos = list_entry((head)->next, typeof(*pos), member);	\
-	     prefetch(pos->member.next), &pos->member != (head); 	\
-	     pos = rcu_dereference(list_entry(pos->member.next, 	\
-					typeof(*pos), member)))
+#define list_for_each_entry_rcu(pos, head, member) \
+	for (pos = list_entry((head)->next, typeof(*pos), member); \
+		prefetch(rcu_dereference(pos)->member.next), \
+			&pos->member != (head); \
+		pos = list_entry(pos->member.next, typeof(*pos), member))
 
 
 /**
@@ -492,8 +495,9 @@ static inline void list_splice_init(struct list_head *list,
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define list_for_each_continue_rcu(pos, head) \
-	for ((pos) = (pos)->next; prefetch((pos)->next), (pos) != (head); \
-        	(pos) = rcu_dereference((pos)->next))
+	for ((pos) = (pos)->next; \
+		prefetch(rcu_dereference((pos))->next), (pos) != (head); \
+        	(pos) = (pos)->next)
 
 /*
  * Double linked lists with a single pointer list head.
@@ -696,8 +700,9 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 	     pos = n)
 
 #define hlist_for_each_rcu(pos, head) \
-	for ((pos) = (head)->first; pos && ({ prefetch((pos)->next); 1; }); \
-		(pos) = rcu_dereference((pos)->next))
+	for ((pos) = (head)->first; \
+		rcu_dereference((pos)) && ({ prefetch((pos)->next); 1; }); \
+		(pos) = (pos)->next)
 
 /**
  * hlist_for_each_entry	- iterate over list of given type
@@ -762,9 +767,9 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  */
 #define hlist_for_each_entry_rcu(tpos, pos, head, member)		 \
 	for (pos = (head)->first;					 \
-	     pos && ({ prefetch(pos->next); 1;}) &&			 \
+	     rcu_dereference(pos) && ({ prefetch(pos->next); 1;}) &&	 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = rcu_dereference(pos->next))
+	     pos = pos->next)
 
 #else
 #warning "don't include kernel headers in userspace"

+ 8 - 0
include/linux/namei.h

@@ -8,6 +8,7 @@ struct vfsmount;
 struct open_intent {
 	int	flags;
 	int	create_mode;
+	struct file *file;
 };
 
 enum { MAX_NESTED_LINKS = 5 };
@@ -65,6 +66,13 @@ extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
 extern void path_release(struct nameidata *);
 extern void path_release_on_umount(struct nameidata *);
 
+extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags);
+extern int path_lookup_open(const char *, unsigned lookup_flags, struct nameidata *, int open_flags);
+extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
+		int (*open)(struct inode *, struct file *));
+extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
+extern void release_open_intent(struct nameidata *);
+
 extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
 extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
 

+ 13 - 23
include/linux/nfs_xdr.h

@@ -96,12 +96,13 @@ struct nfs4_change_info {
 	u64			after;
 };
 
+struct nfs_seqid;
 /*
  * Arguments to the open call.
  */
 struct nfs_openargs {
 	const struct nfs_fh *	fh;
-	__u32                   seqid;
+	struct nfs_seqid *	seqid;
 	int			open_flags;
 	__u64                   clientid;
 	__u32                   id;
@@ -136,7 +137,7 @@ struct nfs_openres {
 struct nfs_open_confirmargs {
 	const struct nfs_fh *	fh;
 	nfs4_stateid            stateid;
-	__u32                   seqid;
+	struct nfs_seqid *	seqid;
 };
 
 struct nfs_open_confirmres {
@@ -148,8 +149,8 @@ struct nfs_open_confirmres {
  */
 struct nfs_closeargs {
 	struct nfs_fh *         fh;
-	nfs4_stateid            stateid;
-	__u32                   seqid;
+	nfs4_stateid *		stateid;
+	struct nfs_seqid *	seqid;
 	int			open_flags;
 };
 
@@ -164,30 +165,19 @@ struct nfs_lowner {
 	u32                     id;
 };
 
-struct nfs_open_to_lock {
-	__u32                   open_seqid;
-	nfs4_stateid            open_stateid;
-	__u32                   lock_seqid;
-	struct nfs_lowner       lock_owner;
-};
-
-struct nfs_exist_lock {
-	nfs4_stateid            stateid;
-	__u32                   seqid;
-};
-
 struct nfs_lock_opargs {
+	struct nfs_seqid *	lock_seqid;
+	nfs4_stateid *		lock_stateid;
+	struct nfs_seqid *	open_seqid;
+	nfs4_stateid *		open_stateid;
+	struct nfs_lowner       lock_owner;
 	__u32                   reclaim;
 	__u32                   new_lock_owner;
-	union {
-		struct nfs_open_to_lock *open_lock;
-		struct nfs_exist_lock   *exist_lock;
-	} u;
 };
 
 struct nfs_locku_opargs {
-	__u32                   seqid;
-	nfs4_stateid            stateid;
+	struct nfs_seqid *	seqid;
+	nfs4_stateid *		stateid;
 };
 
 struct nfs_lockargs {
@@ -722,7 +712,7 @@ struct nfs_rpc_ops {
 	int	(*write)   (struct nfs_write_data *);
 	int	(*commit)  (struct nfs_write_data *);
 	int	(*create)  (struct inode *, struct dentry *,
-			    struct iattr *, int);
+			    struct iattr *, int, struct nameidata *);
 	int	(*remove)  (struct inode *, struct qstr *);
 	int	(*unlink_setup)  (struct rpc_message *,
 			    struct dentry *, struct qstr *);

+ 1 - 0
include/linux/rcupdate.h

@@ -94,6 +94,7 @@ struct rcu_data {
 	long  	       	batch;           /* Batch # for current RCU batch */
 	struct rcu_head *nxtlist;
 	struct rcu_head **nxttail;
+	long            count; /* # of queued items */
 	struct rcu_head *curlist;
 	struct rcu_head **curtail;
 	struct rcu_head *donelist;

+ 1 - 0
include/linux/sunrpc/xprt.h

@@ -211,6 +211,7 @@ int			xprt_reserve_xprt(struct rpc_task *task);
 int			xprt_reserve_xprt_cong(struct rpc_task *task);
 int			xprt_prepare_transmit(struct rpc_task *task);
 void			xprt_transmit(struct rpc_task *task);
+void			xprt_abort_transmit(struct rpc_task *task);
 int			xprt_adjust_timeout(struct rpc_rqst *req);
 void			xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);

+ 3 - 0
kernel/posix-cpu-timers.c

@@ -424,6 +424,7 @@ static void cleanup_timers(struct list_head *head,
 	cputime_t ptime = cputime_add(utime, stime);
 
 	list_for_each_entry_safe(timer, next, head, entry) {
+		put_task_struct(timer->task);
 		timer->task = NULL;
 		list_del_init(&timer->entry);
 		if (cputime_lt(timer->expires.cpu, ptime)) {
@@ -436,6 +437,7 @@ static void cleanup_timers(struct list_head *head,
 
 	++head;
 	list_for_each_entry_safe(timer, next, head, entry) {
+		put_task_struct(timer->task);
 		timer->task = NULL;
 		list_del_init(&timer->entry);
 		if (cputime_lt(timer->expires.cpu, utime)) {
@@ -448,6 +450,7 @@ static void cleanup_timers(struct list_head *head,
 
 	++head;
 	list_for_each_entry_safe(timer, next, head, entry) {
+		put_task_struct(timer->task);
 		timer->task = NULL;
 		list_del_init(&timer->entry);
 		if (timer->expires.sched < sched_time) {

+ 12 - 1
kernel/rcupdate.c

@@ -71,7 +71,7 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
 
 /* Fake initialization required by compiler */
 static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
-static int maxbatch = 10;
+static int maxbatch = 10000;
 
 #ifndef __HAVE_ARCH_CMPXCHG
 /*
@@ -109,6 +109,10 @@ void fastcall call_rcu(struct rcu_head *head,
 	rdp = &__get_cpu_var(rcu_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
+
+	if (unlikely(++rdp->count > 10000))
+		set_need_resched();
+
 	local_irq_restore(flags);
 }
 
@@ -140,6 +144,12 @@ void fastcall call_rcu_bh(struct rcu_head *head,
 	rdp = &__get_cpu_var(rcu_bh_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
+	rdp->count++;
+/*
+ *  Should we directly call rcu_do_batch() here ?
+ *  if (unlikely(rdp->count > 10000))
+ *      rcu_do_batch(rdp);
+ */
 	local_irq_restore(flags);
 }
 
@@ -157,6 +167,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
 		next = rdp->donelist = list->next;
 		list->func(list);
 		list = next;
+		rdp->count--;
 		if (++count >= maxbatch)
 			break;
 	}

+ 1 - 0
kernel/time.c

@@ -570,6 +570,7 @@ void getnstimeofday(struct timespec *tv)
 	tv->tv_sec = x.tv_sec;
 	tv->tv_nsec = x.tv_usec * NSEC_PER_USEC;
 }
+EXPORT_SYMBOL_GPL(getnstimeofday);
 #endif
 
 #if (BITS_PER_LONG < 64)

+ 9 - 4
mm/vmscan.c

@@ -511,10 +511,11 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
 		 * PageDirty _after_ making sure that the page is freeable and
 		 * not in use by anybody. 	(pagecache + us == 2)
 		 */
-		if (page_count(page) != 2 || PageDirty(page)) {
-			write_unlock_irq(&mapping->tree_lock);
-			goto keep_locked;
-		}
+		if (unlikely(page_count(page) != 2))
+			goto cannot_free;
+		smp_rmb();
+		if (unlikely(PageDirty(page)))
+			goto cannot_free;
 
 #ifdef CONFIG_SWAP
 		if (PageSwapCache(page)) {
@@ -538,6 +539,10 @@ free_it:
 			__pagevec_release_nonlru(&freed_pvec);
 		continue;
 
+cannot_free:
+		write_unlock_irq(&mapping->tree_lock);
+		goto keep_locked;
+
 activate_locked:
 		SetPageActive(page);
 		pgactivate++;

+ 0 - 1
net/ipv6/netfilter/ip6_tables.c

@@ -975,7 +975,6 @@ replace_table(struct ip6t_table *table,
 		struct ip6t_entry *table_base;
 		unsigned int i;
 
-		for (i = 0; i < num_possible_cpus(); i++) {
 		for_each_cpu(i) {
 			table_base =
 				(void *)newinfo->entries

+ 14 - 12
net/sunrpc/clnt.c

@@ -678,13 +678,11 @@ call_allocate(struct rpc_task *task)
 static void
 call_encode(struct rpc_task *task)
 {
-	struct rpc_clnt	*clnt = task->tk_client;
 	struct rpc_rqst	*req = task->tk_rqstp;
 	struct xdr_buf *sndbuf = &req->rq_snd_buf;
 	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
 	unsigned int	bufsiz;
 	kxdrproc_t	encode;
-	int		status;
 	u32		*p;
 
 	dprintk("RPC: %4d call_encode (status %d)\n", 
@@ -712,12 +710,9 @@ call_encode(struct rpc_task *task)
 		rpc_exit(task, -EIO);
 		return;
 	}
-	if (encode && (status = rpcauth_wrap_req(task, encode, req, p,
-						 task->tk_msg.rpc_argp)) < 0) {
-		printk(KERN_WARNING "%s: can't encode arguments: %d\n",
-				clnt->cl_protname, -status);
-		rpc_exit(task, status);
-	}
+	if (encode != NULL)
+		task->tk_status = rpcauth_wrap_req(task, encode, req, p,
+				task->tk_msg.rpc_argp);
 }
 
 /*
@@ -759,7 +754,8 @@ call_bind_status(struct rpc_task *task)
 	case -EACCES:
 		dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n",
 				task->tk_pid);
-		break;
+		rpc_delay(task, 3*HZ);
+		goto retry_bind;
 	case -ETIMEDOUT:
 		dprintk("RPC: %4d rpcbind request timed out\n",
 				task->tk_pid);
@@ -864,10 +860,12 @@ call_transmit(struct rpc_task *task)
 	if (task->tk_status != 0)
 		return;
 	/* Encode here so that rpcsec_gss can use correct sequence number. */
-	if (!task->tk_rqstp->rq_bytes_sent)
+	if (task->tk_rqstp->rq_bytes_sent == 0) {
 		call_encode(task);
-	if (task->tk_status < 0)
-		return;
+		/* Did the encode result in an error condition? */
+		if (task->tk_status != 0)
+			goto out_nosend;
+	}
 	xprt_transmit(task);
 	if (task->tk_status < 0)
 		return;
@@ -875,6 +873,10 @@ call_transmit(struct rpc_task *task)
 		task->tk_action = NULL;
 		rpc_wake_up_task(task);
 	}
+	return;
+out_nosend:
+	/* release socket write lock before attempting to handle error */
+	xprt_abort_transmit(task);
 }
 
 /*

+ 8 - 0
net/sunrpc/xprt.c

@@ -709,6 +709,14 @@ out_unlock:
 	return err;
 }
 
+void
+xprt_abort_transmit(struct rpc_task *task)
+{
+	struct rpc_xprt	*xprt = task->tk_xprt;
+
+	xprt_release_write(xprt, task);
+}
+
 /**
  * xprt_transmit - send an RPC request on a transport
  * @task: controlling RPC task