Browse Source

Merge branch 'master' into upstream

Jeff Garzik 19 years ago
parent
commit
70f05366b7

+ 3 - 3
Makefile

@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
-SUBLEVEL = 17
-EXTRAVERSION =
+SUBLEVEL = 18
+EXTRAVERSION = -rc1
 NAME=Crazed Snow-Weasel
 
 # *DOCUMENTATION*
@@ -528,7 +528,7 @@ export MODLIB
 
 ifdef INSTALL_MOD_STRIP
 ifeq ($(INSTALL_MOD_STRIP),1)
-mod_strip_cmd = $STRIP) --strip-debug
+mod_strip_cmd = $(STRIP) --strip-debug
 else
 mod_strip_cmd = $(STRIP) $(INSTALL_MOD_STRIP)
 endif # INSTALL_MOD_STRIP=1

+ 45 - 0
arch/arm/mach-at91rm9200/at91rm9200.c

@@ -107,3 +107,48 @@ void __init at91rm9200_map_io(void)
 	iotable_init(at91rm9200_io_desc, ARRAY_SIZE(at91rm9200_io_desc));
 }
 
+/*
+ * The default interrupt priority levels (0 = lowest, 7 = highest).
+ */
+static unsigned int at91rm9200_default_irq_priority[NR_AIC_IRQS] __initdata = {
+	7,	/* Advanced Interrupt Controller (FIQ) */
+	7,	/* System Peripherals */
+	0,	/* Parallel IO Controller A */
+	0,	/* Parallel IO Controller B */
+	0,	/* Parallel IO Controller C */
+	0,	/* Parallel IO Controller D */
+	6,	/* USART 0 */
+	6,	/* USART 1 */
+	6,	/* USART 2 */
+	6,	/* USART 3 */
+	0,	/* Multimedia Card Interface */
+	4,	/* USB Device Port */
+	0,	/* Two-Wire Interface */
+	6,	/* Serial Peripheral Interface */
+	5,	/* Serial Synchronous Controller 0 */
+	5,	/* Serial Synchronous Controller 1 */
+	5,	/* Serial Synchronous Controller 2 */
+	0,	/* Timer Counter 0 */
+	0,	/* Timer Counter 1 */
+	0,	/* Timer Counter 2 */
+	0,	/* Timer Counter 3 */
+	0,	/* Timer Counter 4 */
+	0,	/* Timer Counter 5 */
+	3,	/* USB Host port */
+	3,	/* Ethernet MAC */
+	0,	/* Advanced Interrupt Controller (IRQ0) */
+	0,	/* Advanced Interrupt Controller (IRQ1) */
+	0,	/* Advanced Interrupt Controller (IRQ2) */
+	0,	/* Advanced Interrupt Controller (IRQ3) */
+	0,	/* Advanced Interrupt Controller (IRQ4) */
+	0,	/* Advanced Interrupt Controller (IRQ5) */
+	0	/* Advanced Interrupt Controller (IRQ6) */
+};
+
+void __init at91rm9200_init_irq(unsigned int priority[NR_AIC_IRQS])
+{
+	if (!priority)
+		priority = at91rm9200_default_irq_priority;
+
+	at91_aic_init(priority);
+}

+ 7 - 1
arch/arm/mach-at91rm9200/generic.h

@@ -8,13 +8,19 @@
  * published by the Free Software Foundation.
  */
 
-void at91_gpio_irq_setup(unsigned banks);
+ /* Interrupts */
+extern void __init at91rm9200_init_irq(unsigned int priority[]);
+extern void __init at91_aic_init(unsigned int priority[]);
+extern void __init at91_gpio_irq_setup(unsigned banks);
 
+ /* Timer */
 struct sys_timer;
 extern struct sys_timer at91rm9200_timer;
 
+ /* Memory Map */
 extern void __init at91rm9200_map_io(void);
 
+ /* Clocks */
 extern int __init at91_clock_init(unsigned long main_clock);
 struct device;
 extern void __init at91_clock_associate(const char *id, struct device *dev, const char *func);

+ 14 - 56
arch/arm/mach-at91rm9200/irq.c

@@ -36,58 +36,20 @@
 
 #include "generic.h"
 
-/*
- * The default interrupt priority levels (0 = lowest, 7 = highest).
- */
-static unsigned int at91rm9200_default_irq_priority[NR_AIC_IRQS] __initdata = {
-	7,	/* Advanced Interrupt Controller */
-	7,	/* System Peripheral */
-	0,	/* Parallel IO Controller A */
-	0,	/* Parallel IO Controller B */
-	0,	/* Parallel IO Controller C */
-	0,	/* Parallel IO Controller D */
-	6,	/* USART 0 */
-	6,	/* USART 1 */
-	6,	/* USART 2 */
-	6,	/* USART 3 */
-	0,	/* Multimedia Card Interface */
-	4,	/* USB Device Port */
-	0,	/* Two-Wire Interface */
-	6,	/* Serial Peripheral Interface */
-	5,	/* Serial Synchronous Controller */
-	5,	/* Serial Synchronous Controller */
-	5,	/* Serial Synchronous Controller */
-	0,	/* Timer Counter 0 */
-	0,	/* Timer Counter 1 */
-	0,	/* Timer Counter 2 */
-	0,	/* Timer Counter 3 */
-	0,	/* Timer Counter 4 */
-	0,	/* Timer Counter 5 */
-	3,	/* USB Host port */
-	3,	/* Ethernet MAC */
-	0,	/* Advanced Interrupt Controller */
-	0,	/* Advanced Interrupt Controller */
-	0,	/* Advanced Interrupt Controller */
-	0,	/* Advanced Interrupt Controller */
-	0,	/* Advanced Interrupt Controller */
-	0,	/* Advanced Interrupt Controller */
-	0	/* Advanced Interrupt Controller */
-};
 
-
-static void at91rm9200_mask_irq(unsigned int irq)
+static void at91_aic_mask_irq(unsigned int irq)
 {
 	/* Disable interrupt on AIC */
 	at91_sys_write(AT91_AIC_IDCR, 1 << irq);
 }
 
-static void at91rm9200_unmask_irq(unsigned int irq)
+static void at91_aic_unmask_irq(unsigned int irq)
 {
 	/* Enable interrupt on AIC */
 	at91_sys_write(AT91_AIC_IECR, 1 << irq);
 }
 
-static int at91rm9200_irq_type(unsigned irq, unsigned type)
+static int at91_aic_set_type(unsigned irq, unsigned type)
 {
 	unsigned int smr, srctype;
 
@@ -122,7 +84,7 @@ static int at91rm9200_irq_type(unsigned irq, unsigned type)
 static u32 wakeups;
 static u32 backups;
 
-static int at91rm9200_irq_set_wake(unsigned irq, unsigned value)
+static int at91_aic_set_wake(unsigned irq, unsigned value)
 {
 	if (unlikely(irq >= 32))
 		return -EINVAL;
@@ -149,28 +111,24 @@ void at91_irq_resume(void)
 }
 
 #else
-#define at91rm9200_irq_set_wake	NULL
+#define at91_aic_set_wake	NULL
 #endif
 
-static struct irqchip at91rm9200_irq_chip = {
-	.ack		= at91rm9200_mask_irq,
-	.mask		= at91rm9200_mask_irq,
-	.unmask		= at91rm9200_unmask_irq,
-	.set_type	= at91rm9200_irq_type,
-	.set_wake	= at91rm9200_irq_set_wake,
+static struct irqchip at91_aic_chip = {
+	.ack		= at91_aic_mask_irq,
+	.mask		= at91_aic_mask_irq,
+	.unmask		= at91_aic_unmask_irq,
+	.set_type	= at91_aic_set_type,
+	.set_wake	= at91_aic_set_wake,
 };
 
 /*
  * Initialize the AIC interrupt controller.
  */
-void __init at91rm9200_init_irq(unsigned int priority[NR_AIC_IRQS])
+void __init at91_aic_init(unsigned int priority[NR_AIC_IRQS])
 {
 	unsigned int i;
 
-	/* No priority list specified for this board -> use defaults */
-	if (priority == NULL)
-		priority = at91rm9200_default_irq_priority;
-
 	/*
 	 * The IVR is used by macro get_irqnr_and_base to read and verify.
 	 * The irq number is NR_AIC_IRQS when a spurious interrupt has occurred.
@@ -178,10 +136,10 @@ void __init at91rm9200_init_irq(unsigned int priority[NR_AIC_IRQS])
 	for (i = 0; i < NR_AIC_IRQS; i++) {
 		/* Put irq number in Source Vector Register: */
 		at91_sys_write(AT91_AIC_SVR(i), i);
-		/* Store the Source Mode Register as defined in table above */
+		/* Active Low interrupt, with the specified priority */
 		at91_sys_write(AT91_AIC_SMR(i), AT91_AIC_SRCTYPE_LOW | priority[i]);
 
-		set_irq_chip(i, &at91rm9200_irq_chip);
+		set_irq_chip(i, &at91_aic_chip);
 		set_irq_handler(i, do_level_IRQ);
 		set_irq_flags(i, IRQF_VALID | IRQF_PROBE);
 

+ 0 - 2
arch/arm/mach-pnx4008/core.c

@@ -27,7 +27,6 @@
 #include <linux/spi/spi.h>
 
 #include <asm/hardware.h>
-#include <asm/irq.h>
 #include <asm/io.h>
 #include <asm/setup.h>
 #include <asm/mach-types.h>
@@ -36,7 +35,6 @@
 #include <asm/system.h>
 
 #include <asm/mach/arch.h>
-#include <asm/mach/irq.h>
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 

+ 0 - 1
arch/arm/mach-pnx4008/dma.c

@@ -23,7 +23,6 @@
 #include <linux/clk.h>
 
 #include <asm/system.h>
-#include <asm/irq.h>
 #include <asm/hardware.h>
 #include <asm/dma.h>
 #include <asm/dma-mapping.h>

+ 10 - 12
arch/arm/mach-pnx4008/irq.c

@@ -22,8 +22,8 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/device.h>
+#include <linux/irq.h>
 #include <asm/hardware.h>
-#include <asm/irq.h>
 #include <asm/io.h>
 #include <asm/setup.h>
 #include <asm/mach-types.h>
@@ -96,26 +96,24 @@ void __init pnx4008_init_irq(void)
 {
 	unsigned int i;
 
-	/* configure and enable IRQ 0,1,30,31 (cascade interrupts) mask all others */
+	/* configure IRQ's */
+	for (i = 0; i < NR_IRQS; i++) {
+		set_irq_flags(i, IRQF_VALID);
+		set_irq_chip(i, &pnx4008_irq_chip);
+		pnx4008_set_irq_type(i, pnx4008_irq_type[i]);
+	}
+
+	/* configure and enable IRQ 0,1,30,31 (cascade interrupts) */
 	pnx4008_set_irq_type(SUB1_IRQ_N, pnx4008_irq_type[SUB1_IRQ_N]);
 	pnx4008_set_irq_type(SUB2_IRQ_N, pnx4008_irq_type[SUB2_IRQ_N]);
 	pnx4008_set_irq_type(SUB1_FIQ_N, pnx4008_irq_type[SUB1_FIQ_N]);
 	pnx4008_set_irq_type(SUB2_FIQ_N, pnx4008_irq_type[SUB2_FIQ_N]);
 
+	/* mask all others */
 	__raw_writel((1 << SUB2_FIQ_N) | (1 << SUB1_FIQ_N) |
 			(1 << SUB2_IRQ_N) | (1 << SUB1_IRQ_N),
 		INTC_ER(MAIN_BASE_INT));
 	__raw_writel(0, INTC_ER(SIC1_BASE_INT));
 	__raw_writel(0, INTC_ER(SIC2_BASE_INT));
-
-	/* configure all other IRQ's */
-	for (i = 0; i < NR_IRQS; i++) {
-		if (i == SUB2_FIQ_N || i == SUB1_FIQ_N ||
-			i == SUB2_IRQ_N || i == SUB1_IRQ_N)
-			continue;
-		set_irq_flags(i, IRQF_VALID);
-		set_irq_chip(i, &pnx4008_irq_chip);
-		pnx4008_set_irq_type(i, pnx4008_irq_type[i]);
-	}
 }
 

+ 3 - 5
arch/arm/mach-pnx4008/time.c

@@ -20,17 +20,15 @@
 #include <linux/spinlock.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/irq.h>
 
 #include <asm/system.h>
 #include <asm/hardware.h>
 #include <asm/io.h>
 #include <asm/leds.h>
-#include <asm/irq.h>
-#include <asm/mach/irq.h>
 #include <asm/mach/time.h>
-
-#include <linux/time.h>
-#include <linux/timex.h>
 #include <asm/errno.h>
 
 /*! Note: all timers are UPCOUNTING */

+ 29 - 10
arch/powerpc/sysdev/mpic.c

@@ -405,20 +405,22 @@ static void mpic_unmask_irq(unsigned int irq)
 	unsigned int loops = 100000;
 	struct mpic *mpic = mpic_from_irq(irq);
 	unsigned int src = mpic_irq_to_hw(irq);
+	unsigned long flags;
 
 	DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, irq, src);
 
+	spin_lock_irqsave(&mpic_lock, flags);
 	mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
 		       mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) &
 		       ~MPIC_VECPRI_MASK);
-
 	/* make sure mask gets to controller before we return to user */
 	do {
 		if (!loops--) {
 			printk(KERN_ERR "mpic_enable_irq timeout\n");
 			break;
 		}
-	} while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK);	
+	} while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK);
+	spin_unlock_irqrestore(&mpic_lock, flags);
 }
 
 static void mpic_mask_irq(unsigned int irq)
@@ -426,9 +428,11 @@ static void mpic_mask_irq(unsigned int irq)
 	unsigned int loops = 100000;
 	struct mpic *mpic = mpic_from_irq(irq);
 	unsigned int src = mpic_irq_to_hw(irq);
+	unsigned long flags;
 
 	DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src);
 
+	spin_lock_irqsave(&mpic_lock, flags);
 	mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
 		       mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) |
 		       MPIC_VECPRI_MASK);
@@ -440,6 +444,7 @@ static void mpic_mask_irq(unsigned int irq)
 			break;
 		}
 	} while(!(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK));
+	spin_unlock_irqrestore(&mpic_lock, flags);
 }
 
 static void mpic_end_irq(unsigned int irq)
@@ -624,9 +629,10 @@ static int mpic_host_map(struct irq_host *h, unsigned int virq,
 	struct irq_desc *desc = get_irq_desc(virq);
 	struct irq_chip *chip;
 	struct mpic *mpic = h->host_data;
-	unsigned int vecpri = MPIC_VECPRI_SENSE_LEVEL |
+	u32 v, vecpri = MPIC_VECPRI_SENSE_LEVEL |
 		MPIC_VECPRI_POLARITY_NEGATIVE;
 	int level;
+	unsigned long iflags;
 
 	pr_debug("mpic: map virq %d, hwirq 0x%lx, flags: 0x%x\n",
 		 virq, hw, flags);
@@ -668,11 +674,21 @@ static int mpic_host_map(struct irq_host *h, unsigned int virq,
 	}
 #endif
 
-	/* Reconfigure irq */
-	vecpri |= MPIC_VECPRI_MASK | hw | (8 << MPIC_VECPRI_PRIORITY_SHIFT);
-	mpic_irq_write(hw, MPIC_IRQ_VECTOR_PRI, vecpri);
-
-	pr_debug("mpic: mapping as IRQ\n");
+	/* Reconfigure irq. We must preserve the mask bit as we can be called
+	 * while the interrupt is still active (This may change in the future
+	 * but for now, it is the case).
+	 */
+	spin_lock_irqsave(&mpic_lock, iflags);
+	v = mpic_irq_read(hw, MPIC_IRQ_VECTOR_PRI);
+	vecpri = (v &
+		~(MPIC_VECPRI_POLARITY_MASK | MPIC_VECPRI_SENSE_MASK)) |
+		vecpri;
+	if (vecpri != v)
+		mpic_irq_write(hw, MPIC_IRQ_VECTOR_PRI, vecpri);
+	spin_unlock_irqrestore(&mpic_lock, iflags);
+
+	pr_debug("mpic: mapping as IRQ, vecpri = 0x%08x (was 0x%08x)\n",
+		 vecpri, v);
 
 	set_irq_chip_data(virq, mpic);
 	set_irq_chip_and_handler(virq, chip, handle_fasteoi_irq);
@@ -904,8 +920,8 @@ void __init mpic_init(struct mpic *mpic)
 		
 		/* do senses munging */
 		if (mpic->senses && i < mpic->senses_count)
-			vecpri = mpic_flags_to_vecpri(mpic->senses[i],
-						      &level);
+			vecpri |= mpic_flags_to_vecpri(mpic->senses[i],
+						       &level);
 		else
 			vecpri |= MPIC_VECPRI_SENSE_LEVEL;
 
@@ -955,14 +971,17 @@ void __init mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio)
 
 void __init mpic_set_serial_int(struct mpic *mpic, int enable)
 {
+	unsigned long flags;
 	u32 v;
 
+	spin_lock_irqsave(&mpic_lock, flags);
 	v = mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1);
 	if (enable)
 		v |= MPIC_GREG_GLOBAL_CONF_1_SIE;
 	else
 		v &= ~MPIC_GREG_GLOBAL_CONF_1_SIE;
 	mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_1, v);
+	spin_unlock_irqrestore(&mpic_lock, flags);
 }
 
 void mpic_irq_set_priority(unsigned int irq, unsigned int pri)

+ 2 - 0
arch/sparc64/kernel/prom.c

@@ -1032,7 +1032,9 @@ static void sun4v_vdev_irq_trans_init(struct device_node *dp)
 static void irq_trans_init(struct device_node *dp)
 {
 	const char *model;
+#ifdef CONFIG_PCI
 	int i;
+#endif
 
 	model = of_get_property(dp, "model", NULL);
 	if (!model)

+ 5 - 5
arch/sparc64/kernel/sparc64_ksyms.c

@@ -124,11 +124,6 @@ EXPORT_SYMBOL(__write_lock);
 EXPORT_SYMBOL(__write_unlock);
 EXPORT_SYMBOL(__write_trylock);
 
-#if defined(CONFIG_MCOUNT)
-extern void _mcount(void);
-EXPORT_SYMBOL(_mcount);
-#endif
-
 /* CPU online map and active count.  */
 EXPORT_SYMBOL(cpu_online_map);
 EXPORT_SYMBOL(phys_cpu_present_map);
@@ -136,6 +131,11 @@ EXPORT_SYMBOL(phys_cpu_present_map);
 EXPORT_SYMBOL(smp_call_function);
 #endif /* CONFIG_SMP */
 
+#if defined(CONFIG_MCOUNT)
+extern void _mcount(void);
+EXPORT_SYMBOL(_mcount);
+#endif
+
 EXPORT_SYMBOL(sparc64_get_clock_tick);
 
 /* semaphores */

+ 4 - 1
arch/sparc64/kernel/time.c

@@ -788,12 +788,15 @@ static int __devinit clock_probe(struct of_device *op, const struct of_device_id
 	if (!regs)
 		return -ENOMEM;
 
+#ifdef CONFIG_PCI
 	if (!strcmp(model, "ds1287") ||
 	    !strcmp(model, "m5819") ||
 	    !strcmp(model, "m5819p") ||
 	    !strcmp(model, "m5823")) {
 		ds1287_regs = (unsigned long) regs;
-	} else if (model[5] == '0' && model[6] == '2') {
+	} else
+#endif
+	if (model[5] == '0' && model[6] == '2') {
 		mstk48t02_regs = regs;
 	} else if(model[5] == '0' && model[6] == '8') {
 		mstk48t08_regs = regs;

+ 12 - 5
drivers/scsi/ahci.c

@@ -1052,7 +1052,7 @@ static void ahci_thaw(struct ata_port *ap)
 
 static void ahci_error_handler(struct ata_port *ap)
 {
-	if (!(ap->flags & ATA_FLAG_FROZEN)) {
+	if (!(ap->pflags & ATA_PFLAG_FROZEN)) {
 		/* restart engine */
 		ahci_stop_engine(ap);
 		ahci_start_engine(ap);
@@ -1323,6 +1323,17 @@ static int ahci_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!printed_version++)
 		dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n");
 
+	/* JMicron-specific fixup: make sure we're in AHCI mode */
+	/* This is protected from races with ata_jmicron by the pci probe
+	   locking */
+	if (pdev->vendor == PCI_VENDOR_ID_JMICRON) {
+		/* AHCI enable, AHCI on function 0 */
+		pci_write_config_byte(pdev, 0x41, 0xa1);
+		/* Function 1 is the PATA controller */
+		if (PCI_FUNC(pdev->devfn))
+			return -ENODEV;
+	}
+
 	rc = pci_enable_device(pdev);
 	if (rc)
 		return rc;
@@ -1378,10 +1389,6 @@ static int ahci_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (have_msi)
 		hpriv->flags |= AHCI_FLAG_MSI;
 
-	/* JMicron-specific fixup: make sure we're in AHCI mode */
-	if (pdev->vendor == 0x197b)
-		pci_write_config_byte(pdev, 0x41, 0xa1);
-
 	/* initialize adapter */
 	rc = ahci_host_init(probe_ent);
 	if (rc)

+ 185 - 104
drivers/scsi/libata-core.c

@@ -61,9 +61,9 @@
 #include "libata.h"
 
 /* debounce timing parameters in msecs { interval, duration, timeout } */
-const unsigned long sata_deb_timing_boot[]		= {   5,  100, 2000 };
-const unsigned long sata_deb_timing_eh[]		= {  25,  500, 2000 };
-const unsigned long sata_deb_timing_before_fsrst[]	= { 100, 2000, 5000 };
+const unsigned long sata_deb_timing_normal[]		= {   5,  100, 2000 };
+const unsigned long sata_deb_timing_hotplug[]		= {  25,  500, 2000 };
+const unsigned long sata_deb_timing_long[]		= { 100, 2000, 5000 };
 
 static unsigned int ata_dev_init_params(struct ata_device *dev,
 					u16 heads, u16 sectors);
@@ -907,7 +907,7 @@ void ata_port_queue_task(struct ata_port *ap, void (*fn)(void *), void *data,
 {
 	int rc;
 
-	if (ap->flags & ATA_FLAG_FLUSH_PORT_TASK)
+	if (ap->pflags & ATA_PFLAG_FLUSH_PORT_TASK)
 		return;
 
 	PREPARE_WORK(&ap->port_task, fn, data);
@@ -938,7 +938,7 @@ void ata_port_flush_task(struct ata_port *ap)
 	DPRINTK("ENTER\n");
 
 	spin_lock_irqsave(ap->lock, flags);
-	ap->flags |= ATA_FLAG_FLUSH_PORT_TASK;
+	ap->pflags |= ATA_PFLAG_FLUSH_PORT_TASK;
 	spin_unlock_irqrestore(ap->lock, flags);
 
 	DPRINTK("flush #1\n");
@@ -957,7 +957,7 @@ void ata_port_flush_task(struct ata_port *ap)
 	}
 
 	spin_lock_irqsave(ap->lock, flags);
-	ap->flags &= ~ATA_FLAG_FLUSH_PORT_TASK;
+	ap->pflags &= ~ATA_PFLAG_FLUSH_PORT_TASK;
 	spin_unlock_irqrestore(ap->lock, flags);
 
 	if (ata_msg_ctl(ap))
@@ -1009,7 +1009,7 @@ unsigned ata_exec_internal(struct ata_device *dev,
 	spin_lock_irqsave(ap->lock, flags);
 
 	/* no internal command while frozen */
-	if (ap->flags & ATA_FLAG_FROZEN) {
+	if (ap->pflags & ATA_PFLAG_FROZEN) {
 		spin_unlock_irqrestore(ap->lock, flags);
 		return AC_ERR_SYSTEM;
 	}
@@ -1325,6 +1325,19 @@ static void ata_dev_config_ncq(struct ata_device *dev,
 		snprintf(desc, desc_sz, "NCQ (depth %d/%d)", hdepth, ddepth);
 }
 
+static void ata_set_port_max_cmd_len(struct ata_port *ap)
+{
+	int i;
+
+	if (ap->host) {
+		ap->host->max_cmd_len = 0;
+		for (i = 0; i < ATA_MAX_DEVICES; i++)
+			ap->host->max_cmd_len = max_t(unsigned int,
+						      ap->host->max_cmd_len,
+						      ap->device[i].cdb_len);
+	}
+}
+
 /**
  *	ata_dev_configure - Configure the specified ATA/ATAPI device
  *	@dev: Target device to configure
@@ -1344,7 +1357,7 @@ int ata_dev_configure(struct ata_device *dev, int print_info)
 	struct ata_port *ap = dev->ap;
 	const u16 *id = dev->id;
 	unsigned int xfer_mask;
-	int i, rc;
+	int rc;
 
 	if (!ata_dev_enabled(dev) && ata_msg_info(ap)) {
 		ata_dev_printk(dev, KERN_INFO,
@@ -1404,7 +1417,7 @@ int ata_dev_configure(struct ata_device *dev, int print_info)
 			ata_dev_config_ncq(dev, ncq_desc, sizeof(ncq_desc));
 
 			/* print device info to dmesg */
-			if (ata_msg_info(ap))
+			if (ata_msg_drv(ap) && print_info)
 				ata_dev_printk(dev, KERN_INFO, "ATA-%d, "
 					"max %s, %Lu sectors: %s %s\n",
 					ata_id_major_version(id),
@@ -1427,7 +1440,7 @@ int ata_dev_configure(struct ata_device *dev, int print_info)
 			}
 
 			/* print device info to dmesg */
-			if (ata_msg_info(ap))
+			if (ata_msg_drv(ap) && print_info)
 				ata_dev_printk(dev, KERN_INFO, "ATA-%d, "
 					"max %s, %Lu sectors: CHS %u/%u/%u\n",
 					ata_id_major_version(id),
@@ -1439,7 +1452,7 @@ int ata_dev_configure(struct ata_device *dev, int print_info)
 
 		if (dev->id[59] & 0x100) {
 			dev->multi_count = dev->id[59] & 0xff;
-			if (ata_msg_info(ap))
+			if (ata_msg_drv(ap) && print_info)
 				ata_dev_printk(dev, KERN_INFO,
 					"ata%u: dev %u multi count %u\n",
 					ap->id, dev->devno, dev->multi_count);
@@ -1468,21 +1481,17 @@ int ata_dev_configure(struct ata_device *dev, int print_info)
 		}
 
 		/* print device info to dmesg */
-		if (ata_msg_info(ap))
+		if (ata_msg_drv(ap) && print_info)
 			ata_dev_printk(dev, KERN_INFO, "ATAPI, max %s%s\n",
 				       ata_mode_string(xfer_mask),
 				       cdb_intr_string);
 	}
 
-	ap->host->max_cmd_len = 0;
-	for (i = 0; i < ATA_MAX_DEVICES; i++)
-		ap->host->max_cmd_len = max_t(unsigned int,
-					      ap->host->max_cmd_len,
-					      ap->device[i].cdb_len);
+	ata_set_port_max_cmd_len(ap);
 
 	/* limit bridge transfers to udma5, 200 sectors */
 	if (ata_dev_knobble(dev)) {
-		if (ata_msg_info(ap))
+		if (ata_msg_drv(ap) && print_info)
 			ata_dev_printk(dev, KERN_INFO,
 				       "applying bridge limits\n");
 		dev->udma_mask &= ATA_UDMA5;
@@ -2137,7 +2146,7 @@ int ata_set_mode(struct ata_port *ap, struct ata_device **r_failed_dev)
 		 * return error code and failing device on failure.
 		 */
 		for (i = 0; i < ATA_MAX_DEVICES; i++) {
-			if (ata_dev_enabled(&ap->device[i])) {
+			if (ata_dev_ready(&ap->device[i])) {
 				ap->ops->set_mode(ap);
 				break;
 			}
@@ -2203,7 +2212,8 @@ int ata_set_mode(struct ata_port *ap, struct ata_device **r_failed_dev)
 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
 		dev = &ap->device[i];
 
-		if (!ata_dev_enabled(dev))
+		/* don't udpate suspended devices' xfer mode */
+		if (!ata_dev_ready(dev))
 			continue;
 
 		rc = ata_dev_set_mode(dev);
@@ -2579,7 +2589,7 @@ static void ata_wait_spinup(struct ata_port *ap)
 
 	/* first, debounce phy if SATA */
 	if (ap->cbl == ATA_CBL_SATA) {
-		rc = sata_phy_debounce(ap, sata_deb_timing_eh);
+		rc = sata_phy_debounce(ap, sata_deb_timing_hotplug);
 
 		/* if debounced successfully and offline, no need to wait */
 		if ((rc == 0 || rc == -EOPNOTSUPP) && ata_port_offline(ap))
@@ -2615,16 +2625,17 @@ static void ata_wait_spinup(struct ata_port *ap)
 int ata_std_prereset(struct ata_port *ap)
 {
 	struct ata_eh_context *ehc = &ap->eh_context;
-	const unsigned long *timing;
+	const unsigned long *timing = sata_ehc_deb_timing(ehc);
 	int rc;
 
-	/* hotplug? */
-	if (ehc->i.flags & ATA_EHI_HOTPLUGGED) {
-		if (ap->flags & ATA_FLAG_HRST_TO_RESUME)
-			ehc->i.action |= ATA_EH_HARDRESET;
-		if (ap->flags & ATA_FLAG_SKIP_D2H_BSY)
-			ata_wait_spinup(ap);
-	}
+	/* handle link resume & hotplug spinup */
+	if ((ehc->i.flags & ATA_EHI_RESUME_LINK) &&
+	    (ap->flags & ATA_FLAG_HRST_TO_RESUME))
+		ehc->i.action |= ATA_EH_HARDRESET;
+
+	if ((ehc->i.flags & ATA_EHI_HOTPLUGGED) &&
+	    (ap->flags & ATA_FLAG_SKIP_D2H_BSY))
+		ata_wait_spinup(ap);
 
 	/* if we're about to do hardreset, nothing more to do */
 	if (ehc->i.action & ATA_EH_HARDRESET)
@@ -2632,11 +2643,6 @@ int ata_std_prereset(struct ata_port *ap)
 
 	/* if SATA, resume phy */
 	if (ap->cbl == ATA_CBL_SATA) {
-		if (ap->flags & ATA_FLAG_LOADING)
-			timing = sata_deb_timing_boot;
-		else
-			timing = sata_deb_timing_eh;
-
 		rc = sata_phy_resume(ap, timing);
 		if (rc && rc != -EOPNOTSUPP) {
 			/* phy resume failed */
@@ -2724,6 +2730,8 @@ int ata_std_softreset(struct ata_port *ap, unsigned int *classes)
  */
 int sata_std_hardreset(struct ata_port *ap, unsigned int *class)
 {
+	struct ata_eh_context *ehc = &ap->eh_context;
+	const unsigned long *timing = sata_ehc_deb_timing(ehc);
 	u32 scontrol;
 	int rc;
 
@@ -2761,7 +2769,7 @@ int sata_std_hardreset(struct ata_port *ap, unsigned int *class)
 	msleep(1);
 
 	/* bring phy back */
-	sata_phy_resume(ap, sata_deb_timing_eh);
+	sata_phy_resume(ap, timing);
 
 	/* TODO: phy layer with polling, timeouts, etc. */
 	if (ata_port_offline(ap)) {
@@ -4285,7 +4293,7 @@ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
 	unsigned int i;
 
 	/* no command while frozen */
-	if (unlikely(ap->flags & ATA_FLAG_FROZEN))
+	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
 		return NULL;
 
 	/* the last tag is reserved for internal command. */
@@ -4407,7 +4415,7 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
 	 * taken care of.
 	 */
 	if (ap->ops->error_handler) {
-		WARN_ON(ap->flags & ATA_FLAG_FROZEN);
+		WARN_ON(ap->pflags & ATA_PFLAG_FROZEN);
 
 		if (unlikely(qc->err_mask))
 			qc->flags |= ATA_QCFLAG_FAILED;
@@ -5001,86 +5009,120 @@ int ata_flush_cache(struct ata_device *dev)
 	return 0;
 }
 
-static int ata_standby_drive(struct ata_device *dev)
+static int ata_host_set_request_pm(struct ata_host_set *host_set,
+				   pm_message_t mesg, unsigned int action,
+				   unsigned int ehi_flags, int wait)
 {
-	unsigned int err_mask;
+	unsigned long flags;
+	int i, rc;
 
-	err_mask = ata_do_simple_cmd(dev, ATA_CMD_STANDBYNOW1);
-	if (err_mask) {
-		ata_dev_printk(dev, KERN_ERR, "failed to standby drive "
-			       "(err_mask=0x%x)\n", err_mask);
-		return -EIO;
-	}
+	for (i = 0; i < host_set->n_ports; i++) {
+		struct ata_port *ap = host_set->ports[i];
 
-	return 0;
-}
+		/* Previous resume operation might still be in
+		 * progress.  Wait for PM_PENDING to clear.
+		 */
+		if (ap->pflags & ATA_PFLAG_PM_PENDING) {
+			ata_port_wait_eh(ap);
+			WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING);
+		}
 
-static int ata_start_drive(struct ata_device *dev)
-{
-	unsigned int err_mask;
+		/* request PM ops to EH */
+		spin_lock_irqsave(ap->lock, flags);
 
-	err_mask = ata_do_simple_cmd(dev, ATA_CMD_IDLEIMMEDIATE);
-	if (err_mask) {
-		ata_dev_printk(dev, KERN_ERR, "failed to start drive "
-			       "(err_mask=0x%x)\n", err_mask);
-		return -EIO;
+		ap->pm_mesg = mesg;
+		if (wait) {
+			rc = 0;
+			ap->pm_result = &rc;
+		}
+
+		ap->pflags |= ATA_PFLAG_PM_PENDING;
+		ap->eh_info.action |= action;
+		ap->eh_info.flags |= ehi_flags;
+
+		ata_port_schedule_eh(ap);
+
+		spin_unlock_irqrestore(ap->lock, flags);
+
+		/* wait and check result */
+		if (wait) {
+			ata_port_wait_eh(ap);
+			WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING);
+			if (rc)
+				return rc;
+		}
 	}
 
 	return 0;
 }
 
 /**
- *	ata_device_resume - wakeup a previously suspended devices
- *	@dev: the device to resume
+ *	ata_host_set_suspend - suspend host_set
+ *	@host_set: host_set to suspend
+ *	@mesg: PM message
  *
- *	Kick the drive back into action, by sending it an idle immediate
- *	command and making sure its transfer mode matches between drive
- *	and host.
+ *	Suspend @host_set.  Actual operation is performed by EH.  This
+ *	function requests EH to perform PM operations and waits for EH
+ *	to finish.
  *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ *
+ *	RETURNS:
+ *	0 on success, -errno on failure.
  */
-int ata_device_resume(struct ata_device *dev)
+int ata_host_set_suspend(struct ata_host_set *host_set, pm_message_t mesg)
 {
-	struct ata_port *ap = dev->ap;
+	int i, j, rc;
 
-	if (ap->flags & ATA_FLAG_SUSPENDED) {
-		struct ata_device *failed_dev;
+	rc = ata_host_set_request_pm(host_set, mesg, 0, ATA_EHI_QUIET, 1);
+	if (rc)
+		goto fail;
 
-		ata_busy_sleep(ap, ATA_TMOUT_BOOT_QUICK, ATA_TMOUT_BOOT);
-		ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 200000);
+	/* EH is quiescent now.  Fail if we have any ready device.
+	 * This happens if hotplug occurs between completion of device
+	 * suspension and here.
+	 */
+	for (i = 0; i < host_set->n_ports; i++) {
+		struct ata_port *ap = host_set->ports[i];
 
-		ap->flags &= ~ATA_FLAG_SUSPENDED;
-		while (ata_set_mode(ap, &failed_dev))
-			ata_dev_disable(failed_dev);
+		for (j = 0; j < ATA_MAX_DEVICES; j++) {
+			struct ata_device *dev = &ap->device[j];
+
+			if (ata_dev_ready(dev)) {
+				ata_port_printk(ap, KERN_WARNING,
+						"suspend failed, device %d "
+						"still active\n", dev->devno);
+				rc = -EBUSY;
+				goto fail;
+			}
+		}
 	}
-	if (!ata_dev_enabled(dev))
-		return 0;
-	if (dev->class == ATA_DEV_ATA)
-		ata_start_drive(dev);
 
+	host_set->dev->power.power_state = mesg;
 	return 0;
+
+ fail:
+	ata_host_set_resume(host_set);
+	return rc;
 }
 
 /**
- *	ata_device_suspend - prepare a device for suspend
- *	@dev: the device to suspend
- *	@state: target power management state
+ *	ata_host_set_resume - resume host_set
+ *	@host_set: host_set to resume
+ *
+ *	Resume @host_set.  Actual operation is performed by EH.  This
+ *	function requests EH to perform PM operations and returns.
+ *	Note that all resume operations are performed parallely.
  *
- *	Flush the cache on the drive, if appropriate, then issue a
- *	standbynow command.
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
  */
-int ata_device_suspend(struct ata_device *dev, pm_message_t state)
+void ata_host_set_resume(struct ata_host_set *host_set)
 {
-	struct ata_port *ap = dev->ap;
-
-	if (!ata_dev_enabled(dev))
-		return 0;
-	if (dev->class == ATA_DEV_ATA)
-		ata_flush_cache(dev);
-
-	if (state.event != PM_EVENT_FREEZE)
-		ata_standby_drive(dev);
-	ap->flags |= ATA_FLAG_SUSPENDED;
-	return 0;
+	ata_host_set_request_pm(host_set, PMSG_ON, ATA_EH_SOFTRESET,
+				ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET, 0);
+	host_set->dev->power.power_state = PMSG_ON;
 }
 
 /**
@@ -5440,6 +5482,7 @@ int ata_device_add(const struct ata_probe_ent *ent)
 		}
 
 		if (ap->ops->error_handler) {
+			struct ata_eh_info *ehi = &ap->eh_info;
 			unsigned long flags;
 
 			ata_port_probe(ap);
@@ -5447,10 +5490,11 @@ int ata_device_add(const struct ata_probe_ent *ent)
 			/* kick EH for boot probing */
 			spin_lock_irqsave(ap->lock, flags);
 
-			ap->eh_info.probe_mask = (1 << ATA_MAX_DEVICES) - 1;
-			ap->eh_info.action |= ATA_EH_SOFTRESET;
+			ehi->probe_mask = (1 << ATA_MAX_DEVICES) - 1;
+			ehi->action |= ATA_EH_SOFTRESET;
+			ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET;
 
-			ap->flags |= ATA_FLAG_LOADING;
+			ap->pflags |= ATA_PFLAG_LOADING;
 			ata_port_schedule_eh(ap);
 
 			spin_unlock_irqrestore(ap->lock, flags);
@@ -5518,7 +5562,7 @@ void ata_port_detach(struct ata_port *ap)
 
 	/* tell EH we're leaving & flush EH */
 	spin_lock_irqsave(ap->lock, flags);
-	ap->flags |= ATA_FLAG_UNLOADING;
+	ap->pflags |= ATA_PFLAG_UNLOADING;
 	spin_unlock_irqrestore(ap->lock, flags);
 
 	ata_port_wait_eh(ap);
@@ -5723,20 +5767,55 @@ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits)
 	return (tmp == bits->val) ? 1 : 0;
 }
 
-int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state)
+void ata_pci_device_do_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	pci_save_state(pdev);
-	pci_disable_device(pdev);
-	pci_set_power_state(pdev, PCI_D3hot);
-	return 0;
+
+	if (state.event == PM_EVENT_SUSPEND) {
+		pci_disable_device(pdev);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
 }
 
-int ata_pci_device_resume(struct pci_dev *pdev)
+void ata_pci_device_do_resume(struct pci_dev *pdev)
 {
 	pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
 	pci_enable_device(pdev);
 	pci_set_master(pdev);
+}
+
+int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev);
+	int rc = 0;
+
+	rc = ata_host_set_suspend(host_set, state);
+	if (rc)
+		return rc;
+
+	if (host_set->next) {
+		rc = ata_host_set_suspend(host_set->next, state);
+		if (rc) {
+			ata_host_set_resume(host_set);
+			return rc;
+		}
+	}
+
+	ata_pci_device_do_suspend(pdev, state);
+
+	return 0;
+}
+
+int ata_pci_device_resume(struct pci_dev *pdev)
+{
+	struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev);
+
+	ata_pci_device_do_resume(pdev);
+	ata_host_set_resume(host_set);
+	if (host_set->next)
+		ata_host_set_resume(host_set->next);
+
 	return 0;
 }
 #endif /* CONFIG_PCI */
@@ -5842,9 +5921,9 @@ u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
  * Do not depend on ABI/API stability.
  */
 
-EXPORT_SYMBOL_GPL(sata_deb_timing_boot);
-EXPORT_SYMBOL_GPL(sata_deb_timing_eh);
-EXPORT_SYMBOL_GPL(sata_deb_timing_before_fsrst);
+EXPORT_SYMBOL_GPL(sata_deb_timing_normal);
+EXPORT_SYMBOL_GPL(sata_deb_timing_hotplug);
+EXPORT_SYMBOL_GPL(sata_deb_timing_long);
 EXPORT_SYMBOL_GPL(ata_std_bios_param);
 EXPORT_SYMBOL_GPL(ata_std_ports);
 EXPORT_SYMBOL_GPL(ata_device_add);
@@ -5916,6 +5995,8 @@ EXPORT_SYMBOL_GPL(sata_scr_write);
 EXPORT_SYMBOL_GPL(sata_scr_write_flush);
 EXPORT_SYMBOL_GPL(ata_port_online);
 EXPORT_SYMBOL_GPL(ata_port_offline);
+EXPORT_SYMBOL_GPL(ata_host_set_suspend);
+EXPORT_SYMBOL_GPL(ata_host_set_resume);
 EXPORT_SYMBOL_GPL(ata_id_string);
 EXPORT_SYMBOL_GPL(ata_id_c_string);
 EXPORT_SYMBOL_GPL(ata_scsi_simulate);
@@ -5930,14 +6011,14 @@ EXPORT_SYMBOL_GPL(ata_pci_host_stop);
 EXPORT_SYMBOL_GPL(ata_pci_init_native_mode);
 EXPORT_SYMBOL_GPL(ata_pci_init_one);
 EXPORT_SYMBOL_GPL(ata_pci_remove_one);
+EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend);
+EXPORT_SYMBOL_GPL(ata_pci_device_do_resume);
 EXPORT_SYMBOL_GPL(ata_pci_device_suspend);
 EXPORT_SYMBOL_GPL(ata_pci_device_resume);
 EXPORT_SYMBOL_GPL(ata_pci_default_filter);
 EXPORT_SYMBOL_GPL(ata_pci_clear_simplex);
 #endif /* CONFIG_PCI */
 
-EXPORT_SYMBOL_GPL(ata_device_suspend);
-EXPORT_SYMBOL_GPL(ata_device_resume);
 EXPORT_SYMBOL_GPL(ata_scsi_device_suspend);
 EXPORT_SYMBOL_GPL(ata_scsi_device_resume);
 

+ 357 - 48
drivers/scsi/libata-eh.c

@@ -47,6 +47,8 @@
 
 static void __ata_port_freeze(struct ata_port *ap);
 static void ata_eh_finish(struct ata_port *ap);
+static void ata_eh_handle_port_suspend(struct ata_port *ap);
+static void ata_eh_handle_port_resume(struct ata_port *ap);
 
 static void ata_ering_record(struct ata_ering *ering, int is_io,
 			     unsigned int err_mask)
@@ -190,7 +192,6 @@ enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
 void ata_scsi_error(struct Scsi_Host *host)
 {
 	struct ata_port *ap = ata_shost_to_port(host);
-	spinlock_t *ap_lock = ap->lock;
 	int i, repeat_cnt = ATA_EH_MAX_REPEAT;
 	unsigned long flags;
 
@@ -217,7 +218,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 		struct scsi_cmnd *scmd, *tmp;
 		int nr_timedout = 0;
 
-		spin_lock_irqsave(ap_lock, flags);
+		spin_lock_irqsave(ap->lock, flags);
 
 		list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
 			struct ata_queued_cmd *qc;
@@ -256,43 +257,49 @@ void ata_scsi_error(struct Scsi_Host *host)
 		if (nr_timedout)
 			__ata_port_freeze(ap);
 
-		spin_unlock_irqrestore(ap_lock, flags);
+		spin_unlock_irqrestore(ap->lock, flags);
 	} else
-		spin_unlock_wait(ap_lock);
+		spin_unlock_wait(ap->lock);
 
  repeat:
 	/* invoke error handler */
 	if (ap->ops->error_handler) {
+		/* process port resume request */
+		ata_eh_handle_port_resume(ap);
+
 		/* fetch & clear EH info */
-		spin_lock_irqsave(ap_lock, flags);
+		spin_lock_irqsave(ap->lock, flags);
 
 		memset(&ap->eh_context, 0, sizeof(ap->eh_context));
 		ap->eh_context.i = ap->eh_info;
 		memset(&ap->eh_info, 0, sizeof(ap->eh_info));
 
-		ap->flags |= ATA_FLAG_EH_IN_PROGRESS;
-		ap->flags &= ~ATA_FLAG_EH_PENDING;
+		ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
+		ap->pflags &= ~ATA_PFLAG_EH_PENDING;
 
-		spin_unlock_irqrestore(ap_lock, flags);
+		spin_unlock_irqrestore(ap->lock, flags);
 
-		/* invoke EH.  if unloading, just finish failed qcs */
-		if (!(ap->flags & ATA_FLAG_UNLOADING))
+		/* invoke EH, skip if unloading or suspended */
+		if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
 			ap->ops->error_handler(ap);
 		else
 			ata_eh_finish(ap);
 
+		/* process port suspend request */
+		ata_eh_handle_port_suspend(ap);
+
 		/* Exception might have happend after ->error_handler
 		 * recovered the port but before this point.  Repeat
 		 * EH in such case.
 		 */
-		spin_lock_irqsave(ap_lock, flags);
+		spin_lock_irqsave(ap->lock, flags);
 
-		if (ap->flags & ATA_FLAG_EH_PENDING) {
+		if (ap->pflags & ATA_PFLAG_EH_PENDING) {
 			if (--repeat_cnt) {
 				ata_port_printk(ap, KERN_INFO,
 					"EH pending after completion, "
 					"repeating EH (cnt=%d)\n", repeat_cnt);
-				spin_unlock_irqrestore(ap_lock, flags);
+				spin_unlock_irqrestore(ap->lock, flags);
 				goto repeat;
 			}
 			ata_port_printk(ap, KERN_ERR, "EH pending after %d "
@@ -302,14 +309,14 @@ void ata_scsi_error(struct Scsi_Host *host)
 		/* this run is complete, make sure EH info is clear */
 		memset(&ap->eh_info, 0, sizeof(ap->eh_info));
 
-		/* Clear host_eh_scheduled while holding ap_lock such
+		/* Clear host_eh_scheduled while holding ap->lock such
 		 * that if exception occurs after this point but
 		 * before EH completion, SCSI midlayer will
 		 * re-initiate EH.
 		 */
 		host->host_eh_scheduled = 0;
 
-		spin_unlock_irqrestore(ap_lock, flags);
+		spin_unlock_irqrestore(ap->lock, flags);
 	} else {
 		WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
 		ap->ops->eng_timeout(ap);
@@ -321,24 +328,23 @@ void ata_scsi_error(struct Scsi_Host *host)
 	scsi_eh_flush_done_q(&ap->eh_done_q);
 
 	/* clean up */
-	spin_lock_irqsave(ap_lock, flags);
+	spin_lock_irqsave(ap->lock, flags);
 
-	if (ap->flags & ATA_FLAG_LOADING) {
-		ap->flags &= ~ATA_FLAG_LOADING;
-	} else {
-		if (ap->flags & ATA_FLAG_SCSI_HOTPLUG)
-			queue_work(ata_aux_wq, &ap->hotplug_task);
-		if (ap->flags & ATA_FLAG_RECOVERED)
-			ata_port_printk(ap, KERN_INFO, "EH complete\n");
-	}
+	if (ap->pflags & ATA_PFLAG_LOADING)
+		ap->pflags &= ~ATA_PFLAG_LOADING;
+	else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG)
+		queue_work(ata_aux_wq, &ap->hotplug_task);
+
+	if (ap->pflags & ATA_PFLAG_RECOVERED)
+		ata_port_printk(ap, KERN_INFO, "EH complete\n");
 
-	ap->flags &= ~(ATA_FLAG_SCSI_HOTPLUG | ATA_FLAG_RECOVERED);
+	ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED);
 
 	/* tell wait_eh that we're done */
-	ap->flags &= ~ATA_FLAG_EH_IN_PROGRESS;
+	ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS;
 	wake_up_all(&ap->eh_wait_q);
 
-	spin_unlock_irqrestore(ap_lock, flags);
+	spin_unlock_irqrestore(ap->lock, flags);
 
 	DPRINTK("EXIT\n");
 }
@@ -360,7 +366,7 @@ void ata_port_wait_eh(struct ata_port *ap)
  retry:
 	spin_lock_irqsave(ap->lock, flags);
 
-	while (ap->flags & (ATA_FLAG_EH_PENDING | ATA_FLAG_EH_IN_PROGRESS)) {
+	while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) {
 		prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE);
 		spin_unlock_irqrestore(ap->lock, flags);
 		schedule();
@@ -489,7 +495,7 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
 	WARN_ON(!ap->ops->error_handler);
 
 	qc->flags |= ATA_QCFLAG_FAILED;
-	qc->ap->flags |= ATA_FLAG_EH_PENDING;
+	qc->ap->pflags |= ATA_PFLAG_EH_PENDING;
 
 	/* The following will fail if timeout has already expired.
 	 * ata_scsi_error() takes care of such scmds on EH entry.
@@ -513,7 +519,7 @@ void ata_port_schedule_eh(struct ata_port *ap)
 {
 	WARN_ON(!ap->ops->error_handler);
 
-	ap->flags |= ATA_FLAG_EH_PENDING;
+	ap->pflags |= ATA_PFLAG_EH_PENDING;
 	scsi_schedule_eh(ap->host);
 
 	DPRINTK("port EH scheduled\n");
@@ -578,7 +584,7 @@ static void __ata_port_freeze(struct ata_port *ap)
 	if (ap->ops->freeze)
 		ap->ops->freeze(ap);
 
-	ap->flags |= ATA_FLAG_FROZEN;
+	ap->pflags |= ATA_PFLAG_FROZEN;
 
 	DPRINTK("ata%u port frozen\n", ap->id);
 }
@@ -646,7 +652,7 @@ void ata_eh_thaw_port(struct ata_port *ap)
 
 	spin_lock_irqsave(ap->lock, flags);
 
-	ap->flags &= ~ATA_FLAG_FROZEN;
+	ap->pflags &= ~ATA_PFLAG_FROZEN;
 
 	if (ap->ops->thaw)
 		ap->ops->thaw(ap);
@@ -731,7 +737,7 @@ static void ata_eh_detach_dev(struct ata_device *dev)
 
 	if (ata_scsi_offline_dev(dev)) {
 		dev->flags |= ATA_DFLAG_DETACHED;
-		ap->flags |= ATA_FLAG_SCSI_HOTPLUG;
+		ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
 	}
 
 	/* clear per-dev EH actions */
@@ -760,8 +766,12 @@ static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev,
 	unsigned long flags;
 
 	spin_lock_irqsave(ap->lock, flags);
+
 	ata_eh_clear_action(dev, &ap->eh_info, action);
-	ap->flags |= ATA_FLAG_RECOVERED;
+
+	if (!(ap->eh_context.i.flags & ATA_EHI_QUIET))
+		ap->pflags |= ATA_PFLAG_RECOVERED;
+
 	spin_unlock_irqrestore(ap->lock, flags);
 }
 
@@ -1027,7 +1037,7 @@ static void ata_eh_analyze_ncq_error(struct ata_port *ap)
 	int tag, rc;
 
 	/* if frozen, we can't do much */
-	if (ap->flags & ATA_FLAG_FROZEN)
+	if (ap->pflags & ATA_PFLAG_FROZEN)
 		return;
 
 	/* is it NCQ device error? */
@@ -1275,6 +1285,9 @@ static void ata_eh_autopsy(struct ata_port *ap)
 
 	DPRINTK("ENTER\n");
 
+	if (ehc->i.flags & ATA_EHI_NO_AUTOPSY)
+		return;
+
 	/* obtain and analyze SError */
 	rc = sata_scr_read(ap, SCR_ERROR, &serror);
 	if (rc == 0) {
@@ -1327,7 +1340,7 @@ static void ata_eh_autopsy(struct ata_port *ap)
 	}
 
 	/* enforce default EH actions */
-	if (ap->flags & ATA_FLAG_FROZEN ||
+	if (ap->pflags & ATA_PFLAG_FROZEN ||
 	    all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT))
 		action |= ATA_EH_SOFTRESET;
 	else if (all_err_mask)
@@ -1346,7 +1359,7 @@ static void ata_eh_autopsy(struct ata_port *ap)
 
 	/* record autopsy result */
 	ehc->i.dev = failed_dev;
-	ehc->i.action = action;
+	ehc->i.action |= action;
 
 	DPRINTK("EXIT\n");
 }
@@ -1385,7 +1398,7 @@ static void ata_eh_report(struct ata_port *ap)
 		return;
 
 	frozen = "";
-	if (ap->flags & ATA_FLAG_FROZEN)
+	if (ap->pflags & ATA_PFLAG_FROZEN)
 		frozen = " frozen";
 
 	if (ehc->i.dev) {
@@ -1465,7 +1478,7 @@ static int ata_eh_reset(struct ata_port *ap, int classify,
 	struct ata_eh_context *ehc = &ap->eh_context;
 	unsigned int *classes = ehc->classes;
 	int tries = ATA_EH_RESET_TRIES;
-	int verbose = !(ap->flags & ATA_FLAG_LOADING);
+	int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
 	unsigned int action;
 	ata_reset_fn_t reset;
 	int i, did_followup_srst, rc;
@@ -1605,7 +1618,7 @@ static int ata_eh_revalidate_and_attach(struct ata_port *ap,
 		dev = &ap->device[i];
 		action = ata_eh_dev_action(dev);
 
-		if (action & ATA_EH_REVALIDATE && ata_dev_enabled(dev)) {
+		if (action & ATA_EH_REVALIDATE && ata_dev_ready(dev)) {
 			if (ata_port_offline(ap)) {
 				rc = -EIO;
 				break;
@@ -1636,7 +1649,7 @@ static int ata_eh_revalidate_and_attach(struct ata_port *ap,
 			}
 
 			spin_lock_irqsave(ap->lock, flags);
-			ap->flags |= ATA_FLAG_SCSI_HOTPLUG;
+			ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
 			spin_unlock_irqrestore(ap->lock, flags);
 		}
 	}
@@ -1648,6 +1661,164 @@ static int ata_eh_revalidate_and_attach(struct ata_port *ap,
 	return rc;
 }
 
+/**
+ *	ata_eh_suspend - handle suspend EH action
+ *	@ap: target host port
+ *	@r_failed_dev: result parameter to indicate failing device
+ *
+ *	Handle suspend EH action.  Disk devices are spinned down and
+ *	other types of devices are just marked suspended.  Once
+ *	suspended, no EH action to the device is allowed until it is
+ *	resumed.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ *
+ *	RETURNS:
+ *	0 on success, -errno otherwise
+ */
+static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev)
+{
+	struct ata_device *dev;
+	int i, rc = 0;
+
+	DPRINTK("ENTER\n");
+
+	for (i = 0; i < ATA_MAX_DEVICES; i++) {
+		unsigned long flags;
+		unsigned int action, err_mask;
+
+		dev = &ap->device[i];
+		action = ata_eh_dev_action(dev);
+
+		if (!ata_dev_enabled(dev) || !(action & ATA_EH_SUSPEND))
+			continue;
+
+		WARN_ON(dev->flags & ATA_DFLAG_SUSPENDED);
+
+		ata_eh_about_to_do(ap, dev, ATA_EH_SUSPEND);
+
+		if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) {
+			/* flush cache */
+			rc = ata_flush_cache(dev);
+			if (rc)
+				break;
+
+			/* spin down */
+			err_mask = ata_do_simple_cmd(dev, ATA_CMD_STANDBYNOW1);
+			if (err_mask) {
+				ata_dev_printk(dev, KERN_ERR, "failed to "
+					       "spin down (err_mask=0x%x)\n",
+					       err_mask);
+				rc = -EIO;
+				break;
+			}
+		}
+
+		spin_lock_irqsave(ap->lock, flags);
+		dev->flags |= ATA_DFLAG_SUSPENDED;
+		spin_unlock_irqrestore(ap->lock, flags);
+
+		ata_eh_done(ap, dev, ATA_EH_SUSPEND);
+	}
+
+	if (rc)
+		*r_failed_dev = dev;
+
+	DPRINTK("EXIT\n");
+	return 0;
+}
+
+/**
+ *	ata_eh_prep_resume - prep for resume EH action
+ *	@ap: target host port
+ *
+ *	Clear SUSPENDED in preparation for scheduled resume actions.
+ *	This allows other parts of EH to access the devices being
+ *	resumed.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ */
+static void ata_eh_prep_resume(struct ata_port *ap)
+{
+	struct ata_device *dev;
+	unsigned long flags;
+	int i;
+
+	DPRINTK("ENTER\n");
+
+	for (i = 0; i < ATA_MAX_DEVICES; i++) {
+		unsigned int action;
+
+		dev = &ap->device[i];
+		action = ata_eh_dev_action(dev);
+
+		if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME))
+			continue;
+
+		spin_lock_irqsave(ap->lock, flags);
+		dev->flags &= ~ATA_DFLAG_SUSPENDED;
+		spin_unlock_irqrestore(ap->lock, flags);
+	}
+
+	DPRINTK("EXIT\n");
+}
+
+/**
+ *	ata_eh_resume - handle resume EH action
+ *	@ap: target host port
+ *	@r_failed_dev: result parameter to indicate failing device
+ *
+ *	Handle resume EH action.  Target devices are already reset and
+ *	revalidated.  Spinning up is the only operation left.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ *
+ *	RETURNS:
+ *	0 on success, -errno otherwise
+ */
+static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev)
+{
+	struct ata_device *dev;
+	int i, rc = 0;
+
+	DPRINTK("ENTER\n");
+
+	for (i = 0; i < ATA_MAX_DEVICES; i++) {
+		unsigned int action, err_mask;
+
+		dev = &ap->device[i];
+		action = ata_eh_dev_action(dev);
+
+		if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME))
+			continue;
+
+		ata_eh_about_to_do(ap, dev, ATA_EH_RESUME);
+
+		if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) {
+			err_mask = ata_do_simple_cmd(dev,
+						     ATA_CMD_IDLEIMMEDIATE);
+			if (err_mask) {
+				ata_dev_printk(dev, KERN_ERR, "failed to "
+					       "spin up (err_mask=0x%x)\n",
+					       err_mask);
+				rc = -EIO;
+				break;
+			}
+		}
+
+		ata_eh_done(ap, dev, ATA_EH_RESUME);
+	}
+
+	if (rc)
+		*r_failed_dev = dev;
+
+	DPRINTK("EXIT\n");
+	return 0;
+}
+
 static int ata_port_nr_enabled(struct ata_port *ap)
 {
 	int i, cnt = 0;
@@ -1673,7 +1844,19 @@ static int ata_eh_skip_recovery(struct ata_port *ap)
 	struct ata_eh_context *ehc = &ap->eh_context;
 	int i;
 
-	if (ap->flags & ATA_FLAG_FROZEN || ata_port_nr_enabled(ap))
+	/* skip if all possible devices are suspended */
+	for (i = 0; i < ata_port_max_devices(ap); i++) {
+		struct ata_device *dev = &ap->device[i];
+
+		if (ata_dev_absent(dev) || ata_dev_ready(dev))
+			break;
+	}
+
+	if (i == ata_port_max_devices(ap))
+		return 1;
+
+	/* always thaw frozen port and recover failed devices */
+	if (ap->pflags & ATA_PFLAG_FROZEN || ata_port_nr_enabled(ap))
 		return 0;
 
 	/* skip if class codes for all vacant slots are ATA_DEV_NONE */
@@ -1744,9 +1927,12 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 	rc = 0;
 
 	/* if UNLOADING, finish immediately */
-	if (ap->flags & ATA_FLAG_UNLOADING)
+	if (ap->pflags & ATA_PFLAG_UNLOADING)
 		goto out;
 
+	/* prep for resume */
+	ata_eh_prep_resume(ap);
+
 	/* skip EH if possible. */
 	if (ata_eh_skip_recovery(ap))
 		ehc->i.action = 0;
@@ -1774,6 +1960,11 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 	if (rc)
 		goto dev_fail;
 
+	/* resume devices */
+	rc = ata_eh_resume(ap, &dev);
+	if (rc)
+		goto dev_fail;
+
 	/* configure transfer mode if the port has been reset */
 	if (ehc->i.flags & ATA_EHI_DID_RESET) {
 		rc = ata_set_mode(ap, &dev);
@@ -1783,6 +1974,11 @@ static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		}
 	}
 
+	/* suspend devices */
+	rc = ata_eh_suspend(ap, &dev);
+	if (rc)
+		goto dev_fail;
+
 	goto out;
 
  dev_fail:
@@ -1908,11 +2104,124 @@ void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
 	       ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
 	       ata_postreset_fn_t postreset)
 {
-	if (!(ap->flags & ATA_FLAG_LOADING)) {
-		ata_eh_autopsy(ap);
-		ata_eh_report(ap);
-	}
-
+	ata_eh_autopsy(ap);
+	ata_eh_report(ap);
 	ata_eh_recover(ap, prereset, softreset, hardreset, postreset);
 	ata_eh_finish(ap);
 }
+
+/**
+ *	ata_eh_handle_port_suspend - perform port suspend operation
+ *	@ap: port to suspend
+ *
+ *	Suspend @ap.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ */
+static void ata_eh_handle_port_suspend(struct ata_port *ap)
+{
+	unsigned long flags;
+	int rc = 0;
+
+	/* are we suspending? */
+	spin_lock_irqsave(ap->lock, flags);
+	if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
+	    ap->pm_mesg.event == PM_EVENT_ON) {
+		spin_unlock_irqrestore(ap->lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(ap->lock, flags);
+
+	WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED);
+
+	/* suspend */
+	ata_eh_freeze_port(ap);
+
+	if (ap->ops->port_suspend)
+		rc = ap->ops->port_suspend(ap, ap->pm_mesg);
+
+	/* report result */
+	spin_lock_irqsave(ap->lock, flags);
+
+	ap->pflags &= ~ATA_PFLAG_PM_PENDING;
+	if (rc == 0)
+		ap->pflags |= ATA_PFLAG_SUSPENDED;
+	else
+		ata_port_schedule_eh(ap);
+
+	if (ap->pm_result) {
+		*ap->pm_result = rc;
+		ap->pm_result = NULL;
+	}
+
+	spin_unlock_irqrestore(ap->lock, flags);
+
+	return;
+}
+
+/**
+ *	ata_eh_handle_port_resume - perform port resume operation
+ *	@ap: port to resume
+ *
+ *	Resume @ap.
+ *
+ *	This function also waits upto one second until all devices
+ *	hanging off this port requests resume EH action.  This is to
+ *	prevent invoking EH and thus reset multiple times on resume.
+ *
+ *	On DPM resume, where some of devices might not be resumed
+ *	together, this may delay port resume upto one second, but such
+ *	DPM resumes are rare and 1 sec delay isn't too bad.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ */
+static void ata_eh_handle_port_resume(struct ata_port *ap)
+{
+	unsigned long timeout;
+	unsigned long flags;
+	int i, rc = 0;
+
+	/* are we resuming? */
+	spin_lock_irqsave(ap->lock, flags);
+	if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
+	    ap->pm_mesg.event != PM_EVENT_ON) {
+		spin_unlock_irqrestore(ap->lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(ap->lock, flags);
+
+	/* spurious? */
+	if (!(ap->pflags & ATA_PFLAG_SUSPENDED))
+		goto done;
+
+	if (ap->ops->port_resume)
+		rc = ap->ops->port_resume(ap);
+
+	/* give devices time to request EH */
+	timeout = jiffies + HZ; /* 1s max */
+	while (1) {
+		for (i = 0; i < ATA_MAX_DEVICES; i++) {
+			struct ata_device *dev = &ap->device[i];
+			unsigned int action = ata_eh_dev_action(dev);
+
+			if ((dev->flags & ATA_DFLAG_SUSPENDED) &&
+			    !(action & ATA_EH_RESUME))
+				break;
+		}
+
+		if (i == ATA_MAX_DEVICES || time_after(jiffies, timeout))
+			break;
+		msleep(10);
+	}
+
+ done:
+	spin_lock_irqsave(ap->lock, flags);
+	ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED);
+	if (ap->pm_result) {
+		*ap->pm_result = rc;
+		ap->pm_result = NULL;
+	}
+	spin_unlock_irqrestore(ap->lock, flags);
+}

+ 117 - 7
drivers/scsi/libata-scsi.c

@@ -397,20 +397,129 @@ void ata_dump_status(unsigned id, struct ata_taskfile *tf)
 	}
 }
 
-int ata_scsi_device_resume(struct scsi_device *sdev)
+/**
+ *	ata_scsi_device_suspend - suspend ATA device associated with sdev
+ *	@sdev: the SCSI device to suspend
+ *	@state: target power management state
+ *
+ *	Request suspend EH action on the ATA device associated with
+ *	@sdev and wait for the operation to complete.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ *
+ *	RETURNS:
+ *	0 on success, -errno otherwise.
+ */
+int ata_scsi_device_suspend(struct scsi_device *sdev, pm_message_t state)
 {
 	struct ata_port *ap = ata_shost_to_port(sdev->host);
-	struct ata_device *dev = __ata_scsi_find_dev(ap, sdev);
+	struct ata_device *dev = ata_scsi_find_dev(ap, sdev);
+	unsigned long flags;
+	unsigned int action;
+	int rc = 0;
+
+	if (!dev)
+		goto out;
+
+	spin_lock_irqsave(ap->lock, flags);
+
+	/* wait for the previous resume to complete */
+	while (dev->flags & ATA_DFLAG_SUSPENDED) {
+		spin_unlock_irqrestore(ap->lock, flags);
+		ata_port_wait_eh(ap);
+		spin_lock_irqsave(ap->lock, flags);
+	}
+
+	/* if @sdev is already detached, nothing to do */
+	if (sdev->sdev_state == SDEV_OFFLINE ||
+	    sdev->sdev_state == SDEV_CANCEL || sdev->sdev_state == SDEV_DEL)
+		goto out_unlock;
+
+	/* request suspend */
+	action = ATA_EH_SUSPEND;
+	if (state.event != PM_EVENT_SUSPEND)
+		action |= ATA_EH_PM_FREEZE;
+	ap->eh_info.dev_action[dev->devno] |= action;
+	ap->eh_info.flags |= ATA_EHI_QUIET;
+	ata_port_schedule_eh(ap);
+
+	spin_unlock_irqrestore(ap->lock, flags);
+
+	/* wait for EH to do the job */
+	ata_port_wait_eh(ap);
+
+	spin_lock_irqsave(ap->lock, flags);
+
+	/* If @sdev is still attached but the associated ATA device
+	 * isn't suspended, the operation failed.
+	 */
+	if (sdev->sdev_state != SDEV_OFFLINE &&
+	    sdev->sdev_state != SDEV_CANCEL && sdev->sdev_state != SDEV_DEL &&
+	    !(dev->flags & ATA_DFLAG_SUSPENDED))
+		rc = -EIO;
 
-	return ata_device_resume(dev);
+ out_unlock:
+	spin_unlock_irqrestore(ap->lock, flags);
+ out:
+	if (rc == 0)
+		sdev->sdev_gendev.power.power_state = state;
+	return rc;
 }
 
-int ata_scsi_device_suspend(struct scsi_device *sdev, pm_message_t state)
+/**
+ *	ata_scsi_device_resume - resume ATA device associated with sdev
+ *	@sdev: the SCSI device to resume
+ *
+ *	Request resume EH action on the ATA device associated with
+ *	@sdev and return immediately.  This enables parallel
+ *	wakeup/spinup of devices.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ *
+ *	RETURNS:
+ *	0.
+ */
+int ata_scsi_device_resume(struct scsi_device *sdev)
 {
 	struct ata_port *ap = ata_shost_to_port(sdev->host);
-	struct ata_device *dev = __ata_scsi_find_dev(ap, sdev);
+	struct ata_device *dev = ata_scsi_find_dev(ap, sdev);
+	struct ata_eh_info *ehi = &ap->eh_info;
+	unsigned long flags;
+	unsigned int action;
+
+	if (!dev)
+		goto out;
+
+	spin_lock_irqsave(ap->lock, flags);
+
+	/* if @sdev is already detached, nothing to do */
+	if (sdev->sdev_state == SDEV_OFFLINE ||
+	    sdev->sdev_state == SDEV_CANCEL || sdev->sdev_state == SDEV_DEL)
+		goto out_unlock;
 
-	return ata_device_suspend(dev, state);
+	/* request resume */
+	action = ATA_EH_RESUME;
+	if (sdev->sdev_gendev.power.power_state.event == PM_EVENT_SUSPEND)
+		__ata_ehi_hotplugged(ehi);
+	else
+		action |= ATA_EH_PM_FREEZE | ATA_EH_SOFTRESET;
+	ehi->dev_action[dev->devno] |= action;
+
+	/* We don't want autopsy and verbose EH messages.  Disable
+	 * those if we're the only device on this link.
+	 */
+	if (ata_port_max_devices(ap) == 1)
+		ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET;
+
+	ata_port_schedule_eh(ap);
+
+ out_unlock:
+	spin_unlock_irqrestore(ap->lock, flags);
+ out:
+	sdev->sdev_gendev.power.power_state = PMSG_ON;
+	return 0;
 }
 
 /**
@@ -2930,7 +3039,7 @@ void ata_scsi_hotplug(void *data)
 	struct ata_port *ap = data;
 	int i;
 
-	if (ap->flags & ATA_FLAG_UNLOADING) {
+	if (ap->pflags & ATA_PFLAG_UNLOADING) {
 		DPRINTK("ENTER/EXIT - unloading\n");
 		return;
 	}
@@ -3011,6 +3120,7 @@ static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
 		if (dev) {
 			ap->eh_info.probe_mask |= 1 << dev->devno;
 			ap->eh_info.action |= ATA_EH_SOFTRESET;
+			ap->eh_info.flags |= ATA_EHI_RESUME_LINK;
 		} else
 			rc = -EINVAL;
 	}

+ 66 - 39
drivers/scsi/sata_sil.c

@@ -109,6 +109,7 @@ enum {
 };
 
 static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
+static int sil_pci_device_resume(struct pci_dev *pdev);
 static void sil_dev_config(struct ata_port *ap, struct ata_device *dev);
 static u32 sil_scr_read (struct ata_port *ap, unsigned int sc_reg);
 static void sil_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val);
@@ -160,6 +161,8 @@ static struct pci_driver sil_pci_driver = {
 	.id_table		= sil_pci_tbl,
 	.probe			= sil_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= sil_pci_device_resume,
 };
 
 static struct scsi_host_template sil_sht = {
@@ -178,6 +181,8 @@ static struct scsi_host_template sil_sht = {
 	.slave_configure	= ata_scsi_slave_config,
 	.slave_destroy		= ata_scsi_slave_destroy,
 	.bios_param		= ata_std_bios_param,
+	.suspend		= ata_scsi_device_suspend,
+	.resume			= ata_scsi_device_resume,
 };
 
 static const struct ata_port_operations sil_ops = {
@@ -370,7 +375,7 @@ static void sil_host_intr(struct ata_port *ap, u32 bmdma2)
 		 * during hardreset makes controllers with broken SIEN
 		 * repeat probing needlessly.
 		 */
-		if (!(ap->flags & ATA_FLAG_FROZEN)) {
+		if (!(ap->pflags & ATA_PFLAG_FROZEN)) {
 			ata_ehi_hotplugged(&ap->eh_info);
 			ap->eh_info.serror |= serror;
 		}
@@ -561,6 +566,52 @@ static void sil_dev_config(struct ata_port *ap, struct ata_device *dev)
 	}
 }
 
+static void sil_init_controller(struct pci_dev *pdev,
+				int n_ports, unsigned long host_flags,
+				void __iomem *mmio_base)
+{
+	u8 cls;
+	u32 tmp;
+	int i;
+
+	/* Initialize FIFO PCI bus arbitration */
+	cls = sil_get_device_cache_line(pdev);
+	if (cls) {
+		cls >>= 3;
+		cls++;  /* cls = (line_size/8)+1 */
+		for (i = 0; i < n_ports; i++)
+			writew(cls << 8 | cls,
+			       mmio_base + sil_port[i].fifo_cfg);
+	} else
+		dev_printk(KERN_WARNING, &pdev->dev,
+			   "cache line size not set.  Driver may not function\n");
+
+	/* Apply R_ERR on DMA activate FIS errata workaround */
+	if (host_flags & SIL_FLAG_RERR_ON_DMA_ACT) {
+		int cnt;
+
+		for (i = 0, cnt = 0; i < n_ports; i++) {
+			tmp = readl(mmio_base + sil_port[i].sfis_cfg);
+			if ((tmp & 0x3) != 0x01)
+				continue;
+			if (!cnt)
+				dev_printk(KERN_INFO, &pdev->dev,
+					   "Applying R_ERR on DMA activate "
+					   "FIS errata fix\n");
+			writel(tmp & ~0x3, mmio_base + sil_port[i].sfis_cfg);
+			cnt++;
+		}
+	}
+
+	if (n_ports == 4) {
+		/* flip the magic "make 4 ports work" bit */
+		tmp = readl(mmio_base + sil_port[2].bmdma);
+		if ((tmp & SIL_INTR_STEERING) == 0)
+			writel(tmp | SIL_INTR_STEERING,
+			       mmio_base + sil_port[2].bmdma);
+	}
+}
+
 static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	static int printed_version;
@@ -570,8 +621,6 @@ static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	int rc;
 	unsigned int i;
 	int pci_dev_busy = 0;
-	u32 tmp;
-	u8 cls;
 
 	if (!printed_version++)
 		dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n");
@@ -630,42 +679,8 @@ static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 		ata_std_ports(&probe_ent->port[i]);
 	}
 
-	/* Initialize FIFO PCI bus arbitration */
-	cls = sil_get_device_cache_line(pdev);
-	if (cls) {
-		cls >>= 3;
-		cls++;  /* cls = (line_size/8)+1 */
-		for (i = 0; i < probe_ent->n_ports; i++)
-			writew(cls << 8 | cls,
-			       mmio_base + sil_port[i].fifo_cfg);
-	} else
-		dev_printk(KERN_WARNING, &pdev->dev,
-			   "cache line size not set.  Driver may not function\n");
-
-	/* Apply R_ERR on DMA activate FIS errata workaround */
-	if (probe_ent->host_flags & SIL_FLAG_RERR_ON_DMA_ACT) {
-		int cnt;
-
-		for (i = 0, cnt = 0; i < probe_ent->n_ports; i++) {
-			tmp = readl(mmio_base + sil_port[i].sfis_cfg);
-			if ((tmp & 0x3) != 0x01)
-				continue;
-			if (!cnt)
-				dev_printk(KERN_INFO, &pdev->dev,
-					   "Applying R_ERR on DMA activate "
-					   "FIS errata fix\n");
-			writel(tmp & ~0x3, mmio_base + sil_port[i].sfis_cfg);
-			cnt++;
-		}
-	}
-
-	if (ent->driver_data == sil_3114) {
-		/* flip the magic "make 4 ports work" bit */
-		tmp = readl(mmio_base + sil_port[2].bmdma);
-		if ((tmp & SIL_INTR_STEERING) == 0)
-			writel(tmp | SIL_INTR_STEERING,
-			       mmio_base + sil_port[2].bmdma);
-	}
+	sil_init_controller(pdev, probe_ent->n_ports, probe_ent->host_flags,
+			    mmio_base);
 
 	pci_set_master(pdev);
 
@@ -685,6 +700,18 @@ err_out:
 	return rc;
 }
 
+static int sil_pci_device_resume(struct pci_dev *pdev)
+{
+	struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev);
+
+	ata_pci_device_do_resume(pdev);
+	sil_init_controller(pdev, host_set->n_ports, host_set->ports[0]->flags,
+			    host_set->mmio_base);
+	ata_host_set_resume(host_set);
+
+	return 0;
+}
+
 static int __init sil_init(void)
 {
 	return pci_module_init(&sil_pci_driver);

+ 88 - 46
drivers/scsi/sata_sil24.c

@@ -92,6 +92,7 @@ enum {
 	HOST_CTRL_STOP		= (1 << 18), /* latched PCI STOP */
 	HOST_CTRL_DEVSEL	= (1 << 19), /* latched PCI DEVSEL */
 	HOST_CTRL_REQ64		= (1 << 20), /* latched PCI REQ64 */
+	HOST_CTRL_GLOBAL_RST	= (1 << 31), /* global reset */
 
 	/*
 	 * Port registers
@@ -338,6 +339,7 @@ static int sil24_port_start(struct ata_port *ap);
 static void sil24_port_stop(struct ata_port *ap);
 static void sil24_host_stop(struct ata_host_set *host_set);
 static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
+static int sil24_pci_device_resume(struct pci_dev *pdev);
 
 static const struct pci_device_id sil24_pci_tbl[] = {
 	{ 0x1095, 0x3124, PCI_ANY_ID, PCI_ANY_ID, 0, 0, BID_SIL3124 },
@@ -353,6 +355,8 @@ static struct pci_driver sil24_pci_driver = {
 	.id_table		= sil24_pci_tbl,
 	.probe			= sil24_init_one,
 	.remove			= ata_pci_remove_one, /* safe? */
+	.suspend		= ata_pci_device_suspend,
+	.resume			= sil24_pci_device_resume,
 };
 
 static struct scsi_host_template sil24_sht = {
@@ -372,6 +376,8 @@ static struct scsi_host_template sil24_sht = {
 	.slave_configure	= ata_scsi_slave_config,
 	.slave_destroy		= ata_scsi_slave_destroy,
 	.bios_param		= ata_std_bios_param,
+	.suspend		= ata_scsi_device_suspend,
+	.resume			= ata_scsi_device_resume,
 };
 
 static const struct ata_port_operations sil24_ops = {
@@ -607,7 +613,7 @@ static int sil24_hardreset(struct ata_port *ap, unsigned int *class)
 	/* SStatus oscillates between zero and valid status after
 	 * DEV_RST, debounce it.
 	 */
-	rc = sata_phy_debounce(ap, sata_deb_timing_before_fsrst);
+	rc = sata_phy_debounce(ap, sata_deb_timing_long);
 	if (rc) {
 		reason = "PHY debouncing failed";
 		goto err;
@@ -988,6 +994,64 @@ static void sil24_host_stop(struct ata_host_set *host_set)
 	kfree(hpriv);
 }
 
+static void sil24_init_controller(struct pci_dev *pdev, int n_ports,
+				  unsigned long host_flags,
+				  void __iomem *host_base,
+				  void __iomem *port_base)
+{
+	u32 tmp;
+	int i;
+
+	/* GPIO off */
+	writel(0, host_base + HOST_FLASH_CMD);
+
+	/* clear global reset & mask interrupts during initialization */
+	writel(0, host_base + HOST_CTRL);
+
+	/* init ports */
+	for (i = 0; i < n_ports; i++) {
+		void __iomem *port = port_base + i * PORT_REGS_SIZE;
+
+		/* Initial PHY setting */
+		writel(0x20c, port + PORT_PHY_CFG);
+
+		/* Clear port RST */
+		tmp = readl(port + PORT_CTRL_STAT);
+		if (tmp & PORT_CS_PORT_RST) {
+			writel(PORT_CS_PORT_RST, port + PORT_CTRL_CLR);
+			tmp = ata_wait_register(port + PORT_CTRL_STAT,
+						PORT_CS_PORT_RST,
+						PORT_CS_PORT_RST, 10, 100);
+			if (tmp & PORT_CS_PORT_RST)
+				dev_printk(KERN_ERR, &pdev->dev,
+				           "failed to clear port RST\n");
+		}
+
+		/* Configure IRQ WoC */
+		if (host_flags & SIL24_FLAG_PCIX_IRQ_WOC)
+			writel(PORT_CS_IRQ_WOC, port + PORT_CTRL_STAT);
+		else
+			writel(PORT_CS_IRQ_WOC, port + PORT_CTRL_CLR);
+
+		/* Zero error counters. */
+		writel(0x8000, port + PORT_DECODE_ERR_THRESH);
+		writel(0x8000, port + PORT_CRC_ERR_THRESH);
+		writel(0x8000, port + PORT_HSHK_ERR_THRESH);
+		writel(0x0000, port + PORT_DECODE_ERR_CNT);
+		writel(0x0000, port + PORT_CRC_ERR_CNT);
+		writel(0x0000, port + PORT_HSHK_ERR_CNT);
+
+		/* Always use 64bit activation */
+		writel(PORT_CS_32BIT_ACTV, port + PORT_CTRL_CLR);
+
+		/* Clear port multiplier enable and resume bits */
+		writel(PORT_CS_PM_EN | PORT_CS_RESUME, port + PORT_CTRL_CLR);
+	}
+
+	/* Turn on interrupts */
+	writel(IRQ_STAT_4PORTS, host_base + HOST_CTRL);
+}
+
 static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	static int printed_version = 0;
@@ -1076,9 +1140,6 @@ static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		}
 	}
 
-	/* GPIO off */
-	writel(0, host_base + HOST_FLASH_CMD);
-
 	/* Apply workaround for completion IRQ loss on PCI-X errata */
 	if (probe_ent->host_flags & SIL24_FLAG_PCIX_IRQ_WOC) {
 		tmp = readl(host_base + HOST_CTRL);
@@ -1090,56 +1151,18 @@ static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			probe_ent->host_flags &= ~SIL24_FLAG_PCIX_IRQ_WOC;
 	}
 
-	/* clear global reset & mask interrupts during initialization */
-	writel(0, host_base + HOST_CTRL);
-
 	for (i = 0; i < probe_ent->n_ports; i++) {
-		void __iomem *port = port_base + i * PORT_REGS_SIZE;
-		unsigned long portu = (unsigned long)port;
+		unsigned long portu =
+			(unsigned long)port_base + i * PORT_REGS_SIZE;
 
 		probe_ent->port[i].cmd_addr = portu;
 		probe_ent->port[i].scr_addr = portu + PORT_SCONTROL;
 
 		ata_std_ports(&probe_ent->port[i]);
-
-		/* Initial PHY setting */
-		writel(0x20c, port + PORT_PHY_CFG);
-
-		/* Clear port RST */
-		tmp = readl(port + PORT_CTRL_STAT);
-		if (tmp & PORT_CS_PORT_RST) {
-			writel(PORT_CS_PORT_RST, port + PORT_CTRL_CLR);
-			tmp = ata_wait_register(port + PORT_CTRL_STAT,
-						PORT_CS_PORT_RST,
-						PORT_CS_PORT_RST, 10, 100);
-			if (tmp & PORT_CS_PORT_RST)
-				dev_printk(KERN_ERR, &pdev->dev,
-				           "failed to clear port RST\n");
-		}
-
-		/* Configure IRQ WoC */
-		if (probe_ent->host_flags & SIL24_FLAG_PCIX_IRQ_WOC)
-			writel(PORT_CS_IRQ_WOC, port + PORT_CTRL_STAT);
-		else
-			writel(PORT_CS_IRQ_WOC, port + PORT_CTRL_CLR);
-
-		/* Zero error counters. */
-		writel(0x8000, port + PORT_DECODE_ERR_THRESH);
-		writel(0x8000, port + PORT_CRC_ERR_THRESH);
-		writel(0x8000, port + PORT_HSHK_ERR_THRESH);
-		writel(0x0000, port + PORT_DECODE_ERR_CNT);
-		writel(0x0000, port + PORT_CRC_ERR_CNT);
-		writel(0x0000, port + PORT_HSHK_ERR_CNT);
-
-		/* Always use 64bit activation */
-		writel(PORT_CS_32BIT_ACTV, port + PORT_CTRL_CLR);
-
-		/* Clear port multiplier enable and resume bits */
-		writel(PORT_CS_PM_EN | PORT_CS_RESUME, port + PORT_CTRL_CLR);
 	}
 
-	/* Turn on interrupts */
-	writel(IRQ_STAT_4PORTS, host_base + HOST_CTRL);
+	sil24_init_controller(pdev, probe_ent->n_ports, probe_ent->host_flags,
+			      host_base, port_base);
 
 	pci_set_master(pdev);
 
@@ -1162,6 +1185,25 @@ static int sil24_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	return rc;
 }
 
+static int sil24_pci_device_resume(struct pci_dev *pdev)
+{
+	struct ata_host_set *host_set = dev_get_drvdata(&pdev->dev);
+	struct sil24_host_priv *hpriv = host_set->private_data;
+
+	ata_pci_device_do_resume(pdev);
+
+	if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND)
+		writel(HOST_CTRL_GLOBAL_RST, hpriv->host_base + HOST_CTRL);
+
+	sil24_init_controller(pdev, host_set->n_ports,
+			      host_set->ports[0]->flags,
+			      hpriv->host_base, hpriv->port_base);
+
+	ata_host_set_resume(host_set);
+
+	return 0;
+}
+
 static int __init sil24_init(void)
 {
 	return pci_module_init(&sil24_pci_driver);

+ 1 - 1
drivers/scsi/sata_vsc.c

@@ -297,7 +297,7 @@ static const struct ata_port_operations vsc_sata_ops = {
 	.bmdma_status		= ata_bmdma_status,
 	.qc_prep		= ata_qc_prep,
 	.qc_issue		= ata_qc_issue_prot,
-	.data_xfer		= ata_pio_data_xfer,
+	.data_xfer		= ata_mmio_data_xfer,
 	.freeze			= ata_bmdma_freeze,
 	.thaw			= ata_bmdma_thaw,
 	.error_handler		= ata_bmdma_error_handler,

+ 3 - 2
drivers/serial/at91_serial.c

@@ -41,6 +41,7 @@
 #include <asm/mach/serial_at91.h>
 #include <asm/arch/board.h>
 #include <asm/arch/system.h>
+#include <asm/arch/gpio.h>
 
 #if defined(CONFIG_SERIAL_AT91_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
 #define SUPPORT_SYSRQ
@@ -140,9 +141,9 @@ static void at91_set_mctrl(struct uart_port *port, u_int mctrl)
 		 */
 		if (port->mapbase == AT91_BASE_US0) {
 			if (mctrl & TIOCM_RTS)
-				at91_sys_write(AT91_PIOA + PIO_CODR, AT91_PA21_RTS0);
+				at91_set_gpio_value(AT91_PIN_PA21, 0);
 			else
-				at91_sys_write(AT91_PIOA + PIO_SODR, AT91_PA21_RTS0);
+				at91_set_gpio_value(AT91_PIN_PA21, 1);
 		}
 	}
 

+ 17 - 9
fs/lockd/clntproc.c

@@ -454,7 +454,7 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho
 	fl->fl_ops = &nlmclnt_lock_ops;
 }
 
-static void do_vfs_lock(struct file_lock *fl)
+static int do_vfs_lock(struct file_lock *fl)
 {
 	int res = 0;
 	switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
@@ -467,9 +467,7 @@ static void do_vfs_lock(struct file_lock *fl)
 		default:
 			BUG();
 	}
-	if (res < 0)
-		printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
-				__FUNCTION__);
+	return res;
 }
 
 /*
@@ -498,6 +496,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
 	struct nlm_wait *block = NULL;
+	unsigned char fl_flags = fl->fl_flags;
 	int status = -ENOLCK;
 
 	if (!host->h_monitored && nsm_monitor(host) < 0) {
@@ -505,6 +504,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 					host->h_name);
 		goto out;
 	}
+	fl->fl_flags |= FL_ACCESS;
+	status = do_vfs_lock(fl);
+	if (status < 0)
+		goto out;
 
 	block = nlmclnt_prepare_block(host, fl);
 again:
@@ -539,9 +542,10 @@ again:
 			up_read(&host->h_rwsem);
 			goto again;
 		}
-		fl->fl_flags |= FL_SLEEP;
 		/* Ensure the resulting lock will get added to granted list */
-		do_vfs_lock(fl);
+		fl->fl_flags = fl_flags | FL_SLEEP;
+		if (do_vfs_lock(fl) < 0)
+			printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
 		up_read(&host->h_rwsem);
 	}
 	status = nlm_stat_to_errno(resp->status);
@@ -552,6 +556,7 @@ out_unblock:
 		nlmclnt_cancel(host, req->a_args.block, fl);
 out:
 	nlm_release_call(req);
+	fl->fl_flags = fl_flags;
 	return status;
 }
 
@@ -606,15 +611,19 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 {
 	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
-	int		status;
+	int status = 0;
 
 	/*
 	 * Note: the server is supposed to either grant us the unlock
 	 * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
 	 * case, we want to unlock.
 	 */
+	fl->fl_flags |= FL_EXISTS;
 	down_read(&host->h_rwsem);
-	do_vfs_lock(fl);
+	if (do_vfs_lock(fl) == -ENOENT) {
+		up_read(&host->h_rwsem);
+		goto out;
+	}
 	up_read(&host->h_rwsem);
 
 	if (req->a_flags & RPC_TASK_ASYNC)
@@ -624,7 +633,6 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	if (status < 0)
 		goto out;
 
-	status = 0;
 	if (resp->status == NLM_LCK_GRANTED)
 		goto out;
 

+ 21 - 2
fs/locks.c

@@ -725,6 +725,10 @@ next_task:
 /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
  * at the head of the list, but that's secret knowledge known only to
  * flock_lock_file and posix_lock_file.
+ *
+ * Note that if called with an FL_EXISTS argument, the caller may determine
+ * whether or not a lock was successfully freed by testing the return
+ * value for -ENOENT.
  */
 static int flock_lock_file(struct file *filp, struct file_lock *request)
 {
@@ -735,6 +739,8 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 	int found = 0;
 
 	lock_kernel();
+	if (request->fl_flags & FL_ACCESS)
+		goto find_conflict;
 	for_each_lock(inode, before) {
 		struct file_lock *fl = *before;
 		if (IS_POSIX(fl))
@@ -750,8 +756,11 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 		break;
 	}
 
-	if (request->fl_type == F_UNLCK)
+	if (request->fl_type == F_UNLCK) {
+		if ((request->fl_flags & FL_EXISTS) && !found)
+			error = -ENOENT;
 		goto out;
+	}
 
 	error = -ENOMEM;
 	new_fl = locks_alloc_lock();
@@ -764,6 +773,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 	if (found)
 		cond_resched();
 
+find_conflict:
 	for_each_lock(inode, before) {
 		struct file_lock *fl = *before;
 		if (IS_POSIX(fl))
@@ -777,6 +787,8 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 			locks_insert_block(fl, request);
 		goto out;
 	}
+	if (request->fl_flags & FL_ACCESS)
+		goto out;
 	locks_copy_lock(new_fl, request);
 	locks_insert_lock(&inode->i_flock, new_fl);
 	new_fl = NULL;
@@ -948,8 +960,11 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
 
 	error = 0;
 	if (!added) {
-		if (request->fl_type == F_UNLCK)
+		if (request->fl_type == F_UNLCK) {
+			if (request->fl_flags & FL_EXISTS)
+				error = -ENOENT;
 			goto out;
+		}
 
 		if (!new_fl) {
 			error = -ENOLCK;
@@ -996,6 +1011,10 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
  * Add a POSIX style lock to a file.
  * We merge adjacent & overlapping locks whenever possible.
  * POSIX locks are sorted by owner task, then by starting address
+ *
+ * Note that if called with an FL_EXISTS argument, the caller may determine
+ * whether or not a lock was successfully freed by testing the return
+ * value for -ENOENT.
  */
 int posix_lock_file(struct file *filp, struct file_lock *fl)
 {

+ 3 - 1
fs/nfs/dir.c

@@ -690,7 +690,9 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
 			goto out_force;
 		/* This is an open(2) */
 		if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 &&
-				!(server->flags & NFS_MOUNT_NOCTO))
+				!(server->flags & NFS_MOUNT_NOCTO) &&
+				(S_ISREG(inode->i_mode) ||
+				 S_ISDIR(inode->i_mode)))
 			goto out_force;
 	}
 	return nfs_revalidate_inode(server, inode);

+ 201 - 234
fs/nfs/direct.c

@@ -67,25 +67,19 @@ struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
 
 	/* I/O parameters */
-	struct list_head	list,		/* nfs_read/write_data structs */
-				rewrite_list;	/* saved nfs_write_data structs */
 	struct nfs_open_context	*ctx;		/* file open context info */
 	struct kiocb *		iocb;		/* controlling i/o request */
 	struct inode *		inode;		/* target file of i/o */
-	unsigned long		user_addr;	/* location of user's buffer */
-	size_t			user_count;	/* total bytes to move */
-	loff_t			pos;		/* starting offset in file */
-	struct page **		pages;		/* pages in our buffer */
-	unsigned int		npages;		/* count of pages */
 
 	/* completion state */
+	atomic_t		io_count;	/* i/os we're waiting for */
 	spinlock_t		lock;		/* protect completion state */
-	int			outstanding;	/* i/os we're waiting for */
 	ssize_t			count,		/* bytes actually processed */
 				error;		/* any reported error */
 	struct completion	completion;	/* wait for i/o completion */
 
 	/* commit state */
+	struct list_head	rewrite_list;	/* saved nfs_write_data structs */
 	struct nfs_write_data *	commit_data;	/* special write_data for commits */
 	int			flags;
 #define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
@@ -93,8 +87,37 @@ struct nfs_direct_req {
 	struct nfs_writeverf	verf;		/* unstable write verifier */
 };
 
-static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync);
 static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
+static const struct rpc_call_ops nfs_write_direct_ops;
+
+static inline void get_dreq(struct nfs_direct_req *dreq)
+{
+	atomic_inc(&dreq->io_count);
+}
+
+static inline int put_dreq(struct nfs_direct_req *dreq)
+{
+	return atomic_dec_and_test(&dreq->io_count);
+}
+
+/*
+ * "size" is never larger than rsize or wsize.
+ */
+static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size)
+{
+	int page_count;
+
+	page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	page_count -= user_addr >> PAGE_SHIFT;
+	BUG_ON(page_count < 0);
+
+	return page_count;
+}
+
+static inline unsigned int nfs_max_pages(unsigned int size)
+{
+	return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+}
 
 /**
  * nfs_direct_IO - NFS address space operation for direct I/O
@@ -118,50 +141,21 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
 	return -EINVAL;
 }
 
-static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+static void nfs_direct_dirty_pages(struct page **pages, int npages)
 {
 	int i;
 	for (i = 0; i < npages; i++) {
 		struct page *page = pages[i];
-		if (do_dirty && !PageCompound(page))
+		if (!PageCompound(page))
 			set_page_dirty_lock(page);
-		page_cache_release(page);
 	}
-	kfree(pages);
 }
 
-static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
+static void nfs_direct_release_pages(struct page **pages, int npages)
 {
-	int result = -ENOMEM;
-	unsigned long page_count;
-	size_t array_size;
-
-	page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	page_count -= user_addr >> PAGE_SHIFT;
-
-	array_size = (page_count * sizeof(struct page *));
-	*pages = kmalloc(array_size, GFP_KERNEL);
-	if (*pages) {
-		down_read(&current->mm->mmap_sem);
-		result = get_user_pages(current, current->mm, user_addr,
-					page_count, (rw == READ), 0,
-					*pages, NULL);
-		up_read(&current->mm->mmap_sem);
-		if (result != page_count) {
-			/*
-			 * If we got fewer pages than expected from
-			 * get_user_pages(), the user buffer runs off the
-			 * end of a mapping; return EFAULT.
-			 */
-			if (result >= 0) {
-				nfs_free_user_pages(*pages, result, 0);
-				result = -EFAULT;
-			} else
-				kfree(*pages);
-			*pages = NULL;
-		}
-	}
-	return result;
+	int i;
+	for (i = 0; i < npages; i++)
+		page_cache_release(pages[i]);
 }
 
 static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
@@ -173,13 +167,13 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 		return NULL;
 
 	kref_init(&dreq->kref);
+	kref_get(&dreq->kref);
 	init_completion(&dreq->completion);
-	INIT_LIST_HEAD(&dreq->list);
 	INIT_LIST_HEAD(&dreq->rewrite_list);
 	dreq->iocb = NULL;
 	dreq->ctx = NULL;
 	spin_lock_init(&dreq->lock);
-	dreq->outstanding = 0;
+	atomic_set(&dreq->io_count, 0);
 	dreq->count = 0;
 	dreq->error = 0;
 	dreq->flags = 0;
@@ -220,18 +214,11 @@ out:
 }
 
 /*
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete.  This could be long *after* we are woken up in
- * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
- *
- * In addition, synchronous I/O uses a stack-allocated iocb.  Thus we
- * can't trust the iocb is still valid here if this is a synchronous
- * request.  If the waiter is woken prematurely, the iocb is long gone.
+ * Synchronous I/O uses a stack-allocated iocb.  Thus we can't trust
+ * the iocb is still valid here if this is a synchronous request.
  */
 static void nfs_direct_complete(struct nfs_direct_req *dreq)
 {
-	nfs_free_user_pages(dreq->pages, dreq->npages, 1);
-
 	if (dreq->iocb) {
 		long res = (long) dreq->error;
 		if (!res)
@@ -244,48 +231,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 }
 
 /*
- * Note we also set the number of requests we have in the dreq when we are
- * done.  This prevents races with I/O completion so we will always wait
- * until all requests have been dispatched and completed.
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete.  This could be long *after* we are woken up in
+ * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
  */
-static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
-{
-	struct list_head *list;
-	struct nfs_direct_req *dreq;
-	unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
-	dreq = nfs_direct_req_alloc();
-	if (!dreq)
-		return NULL;
-
-	list = &dreq->list;
-	for(;;) {
-		struct nfs_read_data *data = nfs_readdata_alloc(rpages);
-
-		if (unlikely(!data)) {
-			while (!list_empty(list)) {
-				data = list_entry(list->next,
-						  struct nfs_read_data, pages);
-				list_del(&data->pages);
-				nfs_readdata_free(data);
-			}
-			kref_put(&dreq->kref, nfs_direct_req_release);
-			return NULL;
-		}
-
-		INIT_LIST_HEAD(&data->pages);
-		list_add(&data->pages, list);
-
-		data->req = (struct nfs_page *) dreq;
-		dreq->outstanding++;
-		if (nbytes <= rsize)
-			break;
-		nbytes -= rsize;
-	}
-	kref_get(&dreq->kref);
-	return dreq;
-}
-
 static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;
@@ -294,6 +243,9 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 	if (nfs_readpage_result(task, data) != 0)
 		return;
 
+	nfs_direct_dirty_pages(data->pagevec, data->npages);
+	nfs_direct_release_pages(data->pagevec, data->npages);
+
 	spin_lock(&dreq->lock);
 
 	if (likely(task->tk_status >= 0))
@@ -301,13 +253,10 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 	else
 		dreq->error = task->tk_status;
 
-	if (--dreq->outstanding) {
-		spin_unlock(&dreq->lock);
-		return;
-	}
-
 	spin_unlock(&dreq->lock);
-	nfs_direct_complete(dreq);
+
+	if (put_dreq(dreq))
+		nfs_direct_complete(dreq);
 }
 
 static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -316,41 +265,60 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
 };
 
 /*
- * For each nfs_read_data struct that was allocated on the list, dispatch
- * an NFS READ operation
+ * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
+ * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
+ * bail and stop sending more reads.  Read length accounting is
+ * handled automatically by nfs_direct_read_result().  Otherwise, if
+ * no requests have been sent, just return an error.
  */
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
+static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
 {
 	struct nfs_open_context *ctx = dreq->ctx;
 	struct inode *inode = ctx->dentry->d_inode;
-	struct list_head *list = &dreq->list;
-	struct page **pages = dreq->pages;
-	size_t count = dreq->user_count;
-	loff_t pos = dreq->pos;
 	size_t rsize = NFS_SERVER(inode)->rsize;
-	unsigned int curpage, pgbase;
+	unsigned int rpages = nfs_max_pages(rsize);
+	unsigned int pgbase;
+	int result;
+	ssize_t started = 0;
+
+	get_dreq(dreq);
 
-	curpage = 0;
-	pgbase = dreq->user_addr & ~PAGE_MASK;
+	pgbase = user_addr & ~PAGE_MASK;
 	do {
 		struct nfs_read_data *data;
 		size_t bytes;
 
+		result = -ENOMEM;
+		data = nfs_readdata_alloc(rpages);
+		if (unlikely(!data))
+			break;
+
 		bytes = rsize;
 		if (count < rsize)
 			bytes = count;
 
-		BUG_ON(list_empty(list));
-		data = list_entry(list->next, struct nfs_read_data, pages);
-		list_del_init(&data->pages);
+		data->npages = nfs_direct_count_pages(user_addr, bytes);
+		down_read(&current->mm->mmap_sem);
+		result = get_user_pages(current, current->mm, user_addr,
+					data->npages, 1, 0, data->pagevec, NULL);
+		up_read(&current->mm->mmap_sem);
+		if (unlikely(result < data->npages)) {
+			if (result > 0)
+				nfs_direct_release_pages(data->pagevec, result);
+			nfs_readdata_release(data);
+			break;
+		}
+
+		get_dreq(dreq);
 
+		data->req = (struct nfs_page *) dreq;
 		data->inode = inode;
 		data->cred = ctx->cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
-		data->args.pages = &pages[curpage];
+		data->args.pages = data->pagevec;
 		data->args.count = bytes;
 		data->res.fattr = &data->fattr;
 		data->res.eof = 0;
@@ -373,33 +341,35 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 				bytes,
 				(unsigned long long)data->args.offset);
 
+		started += bytes;
+		user_addr += bytes;
 		pos += bytes;
 		pgbase += bytes;
-		curpage += pgbase >> PAGE_SHIFT;
 		pgbase &= ~PAGE_MASK;
 
 		count -= bytes;
 	} while (count != 0);
-	BUG_ON(!list_empty(list));
+
+	if (put_dreq(dreq))
+		nfs_direct_complete(dreq);
+
+	if (started)
+		return 0;
+	return result < 0 ? (ssize_t) result : -EFAULT;
 }
 
-static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos)
 {
-	ssize_t result;
+	ssize_t result = 0;
 	sigset_t oldset;
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_direct_req *dreq;
 
-	dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
+	dreq = nfs_direct_req_alloc();
 	if (!dreq)
 		return -ENOMEM;
 
-	dreq->user_addr = user_addr;
-	dreq->user_count = count;
-	dreq->pos = pos;
-	dreq->pages = pages;
-	dreq->npages = nr_pages;
 	dreq->inode = inode;
 	dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
 	if (!is_sync_kiocb(iocb))
@@ -407,8 +377,9 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 
 	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_read_schedule(dreq);
-	result = nfs_direct_wait(dreq);
+	result = nfs_direct_read_schedule(dreq, user_addr, count, pos);
+	if (!result)
+		result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
 	return result;
@@ -416,10 +387,10 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
 
 static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
 {
-	list_splice_init(&dreq->rewrite_list, &dreq->list);
-	while (!list_empty(&dreq->list)) {
-		struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
+	while (!list_empty(&dreq->rewrite_list)) {
+		struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
 		list_del(&data->pages);
+		nfs_direct_release_pages(data->pagevec, data->npages);
 		nfs_writedata_release(data);
 	}
 }
@@ -427,14 +398,51 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 {
-	struct list_head *pos;
+	struct inode *inode = dreq->inode;
+	struct list_head *p;
+	struct nfs_write_data *data;
 
-	list_splice_init(&dreq->rewrite_list, &dreq->list);
-	list_for_each(pos, &dreq->list)
-		dreq->outstanding++;
 	dreq->count = 0;
+	get_dreq(dreq);
+
+	list_for_each(p, &dreq->rewrite_list) {
+		data = list_entry(p, struct nfs_write_data, pages);
+
+		get_dreq(dreq);
+
+		/*
+		 * Reset data->res.
+		 */
+		nfs_fattr_init(&data->fattr);
+		data->res.count = data->args.count;
+		memset(&data->verf, 0, sizeof(data->verf));
+
+		/*
+		 * Reuse data->task; data->args should not have changed
+		 * since the original request was sent.
+		 */
+		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+				&nfs_write_direct_ops, data);
+		NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE);
+
+		data->task.tk_priority = RPC_PRIORITY_NORMAL;
+		data->task.tk_cookie = (unsigned long) inode;
+
+		/*
+		 * We're called via an RPC callback, so BKL is already held.
+		 */
+		rpc_execute(&data->task);
+
+		dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+				data->task.tk_pid,
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode),
+				data->args.count,
+				(unsigned long long)data->args.offset);
+	}
 
-	nfs_direct_write_schedule(dreq, FLUSH_STABLE);
+	if (put_dreq(dreq))
+		nfs_direct_write_complete(dreq, inode);
 }
 
 static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
@@ -471,8 +479,8 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 	data->cred = dreq->ctx->cred;
 
 	data->args.fh = NFS_FH(data->inode);
-	data->args.offset = dreq->pos;
-	data->args.count = dreq->user_count;
+	data->args.offset = 0;
+	data->args.count = 0;
 	data->res.count = 0;
 	data->res.fattr = &data->fattr;
 	data->res.verf = &data->verf;
@@ -534,47 +542,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 }
 #endif
 
-static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
-{
-	struct list_head *list;
-	struct nfs_direct_req *dreq;
-	unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
-	dreq = nfs_direct_req_alloc();
-	if (!dreq)
-		return NULL;
-
-	list = &dreq->list;
-	for(;;) {
-		struct nfs_write_data *data = nfs_writedata_alloc(wpages);
-
-		if (unlikely(!data)) {
-			while (!list_empty(list)) {
-				data = list_entry(list->next,
-						  struct nfs_write_data, pages);
-				list_del(&data->pages);
-				nfs_writedata_free(data);
-			}
-			kref_put(&dreq->kref, nfs_direct_req_release);
-			return NULL;
-		}
-
-		INIT_LIST_HEAD(&data->pages);
-		list_add(&data->pages, list);
-
-		data->req = (struct nfs_page *) dreq;
-		dreq->outstanding++;
-		if (nbytes <= wsize)
-			break;
-		nbytes -= wsize;
-	}
-
-	nfs_alloc_commit_data(dreq);
-
-	kref_get(&dreq->kref);
-	return dreq;
-}
-
 static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
@@ -604,8 +571,6 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 				}
 		}
 	}
-	/* In case we have to resend */
-	data->args.stable = NFS_FILE_SYNC;
 
 	spin_unlock(&dreq->lock);
 }
@@ -619,14 +584,8 @@ static void nfs_direct_write_release(void *calldata)
 	struct nfs_write_data *data = calldata;
 	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
 
-	spin_lock(&dreq->lock);
-	if (--dreq->outstanding) {
-		spin_unlock(&dreq->lock);
-		return;
-	}
-	spin_unlock(&dreq->lock);
-
-	nfs_direct_write_complete(dreq, data->inode);
+	if (put_dreq(dreq))
+		nfs_direct_write_complete(dreq, data->inode);
 }
 
 static const struct rpc_call_ops nfs_write_direct_ops = {
@@ -635,41 +594,62 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
 };
 
 /*
- * For each nfs_write_data struct that was allocated on the list, dispatch
- * an NFS WRITE operation
+ * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+ * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
+ * bail and stop sending more writes.  Write length accounting is
+ * handled automatically by nfs_direct_write_result().  Otherwise, if
+ * no requests have been sent, just return an error.
  */
-static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
+static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
 {
 	struct nfs_open_context *ctx = dreq->ctx;
 	struct inode *inode = ctx->dentry->d_inode;
-	struct list_head *list = &dreq->list;
-	struct page **pages = dreq->pages;
-	size_t count = dreq->user_count;
-	loff_t pos = dreq->pos;
 	size_t wsize = NFS_SERVER(inode)->wsize;
-	unsigned int curpage, pgbase;
+	unsigned int wpages = nfs_max_pages(wsize);
+	unsigned int pgbase;
+	int result;
+	ssize_t started = 0;
 
-	curpage = 0;
-	pgbase = dreq->user_addr & ~PAGE_MASK;
+	get_dreq(dreq);
+
+	pgbase = user_addr & ~PAGE_MASK;
 	do {
 		struct nfs_write_data *data;
 		size_t bytes;
 
+		result = -ENOMEM;
+		data = nfs_writedata_alloc(wpages);
+		if (unlikely(!data))
+			break;
+
 		bytes = wsize;
 		if (count < wsize)
 			bytes = count;
 
-		BUG_ON(list_empty(list));
-		data = list_entry(list->next, struct nfs_write_data, pages);
+		data->npages = nfs_direct_count_pages(user_addr, bytes);
+		down_read(&current->mm->mmap_sem);
+		result = get_user_pages(current, current->mm, user_addr,
+					data->npages, 0, 0, data->pagevec, NULL);
+		up_read(&current->mm->mmap_sem);
+		if (unlikely(result < data->npages)) {
+			if (result > 0)
+				nfs_direct_release_pages(data->pagevec, result);
+			nfs_writedata_release(data);
+			break;
+		}
+
+		get_dreq(dreq);
+
 		list_move_tail(&data->pages, &dreq->rewrite_list);
 
+		data->req = (struct nfs_page *) dreq;
 		data->inode = inode;
 		data->cred = ctx->cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
-		data->args.pages = &pages[curpage];
+		data->args.pages = data->pagevec;
 		data->args.count = bytes;
 		data->res.fattr = &data->fattr;
 		data->res.count = bytes;
@@ -693,19 +673,26 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
 				bytes,
 				(unsigned long long)data->args.offset);
 
+		started += bytes;
+		user_addr += bytes;
 		pos += bytes;
 		pgbase += bytes;
-		curpage += pgbase >> PAGE_SHIFT;
 		pgbase &= ~PAGE_MASK;
 
 		count -= bytes;
 	} while (count != 0);
-	BUG_ON(!list_empty(list));
+
+	if (put_dreq(dreq))
+		nfs_direct_write_complete(dreq, inode);
+
+	if (started)
+		return 0;
+	return result < 0 ? (ssize_t) result : -EFAULT;
 }
 
-static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
+static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos)
 {
-	ssize_t result;
+	ssize_t result = 0;
 	sigset_t oldset;
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
@@ -713,17 +700,14 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 	size_t wsize = NFS_SERVER(inode)->wsize;
 	int sync = 0;
 
-	dreq = nfs_direct_write_alloc(count, wsize);
+	dreq = nfs_direct_req_alloc();
 	if (!dreq)
 		return -ENOMEM;
+	nfs_alloc_commit_data(dreq);
+
 	if (dreq->commit_data == NULL || count < wsize)
 		sync = FLUSH_STABLE;
 
-	dreq->user_addr = user_addr;
-	dreq->user_count = count;
-	dreq->pos = pos;
-	dreq->pages = pages;
-	dreq->npages = nr_pages;
 	dreq->inode = inode;
 	dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
 	if (!is_sync_kiocb(iocb))
@@ -734,8 +718,9 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 	nfs_begin_data_update(inode);
 
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_write_schedule(dreq, sync);
-	result = nfs_direct_wait(dreq);
+	result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync);
+	if (!result)
+		result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
 	return result;
@@ -765,8 +750,6 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
 ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval = -EINVAL;
-	int page_count;
-	struct page **pages;
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 
@@ -788,14 +771,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
 	if (retval)
 		goto out;
 
-	retval = nfs_get_user_pages(READ, (unsigned long) buf,
-						count, &pages);
-	if (retval < 0)
-		goto out;
-	page_count = retval;
-
-	retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
-						pages, page_count);
+	retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos);
 	if (retval > 0)
 		iocb->ki_pos = pos + retval;
 
@@ -831,8 +807,6 @@ out:
 ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval;
-	int page_count;
-	struct page **pages;
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 
@@ -860,14 +834,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
 	if (retval)
 		goto out;
 
-	retval = nfs_get_user_pages(WRITE, (unsigned long) buf,
-						count, &pages);
-	if (retval < 0)
-		goto out;
-	page_count = retval;
-
-	retval = nfs_direct_write(iocb, (unsigned long) buf, count,
-					pos, pages, page_count);
+	retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos);
 
 	/*
 	 * XXX: nfs_end_data_update() already ensures this file's

+ 41 - 33
fs/nfs/nfs4proc.c

@@ -3144,9 +3144,6 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
 		default:
 			BUG();
 	}
-	if (res < 0)
-		printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
-				__FUNCTION__);
 	return res;
 }
 
@@ -3258,8 +3255,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 		return ERR_PTR(-ENOMEM);
 	}
 
-	/* Unlock _before_ we do the RPC call */
-	do_vfs_lock(fl->fl_file, fl);
 	return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
 }
 
@@ -3270,30 +3265,28 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
 	struct rpc_task *task;
 	int status = 0;
 
-	/* Is this a delegated lock? */
-	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
-		goto out_unlock;
-	/* Is this open_owner holding any locks on the server? */
-	if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
-		goto out_unlock;
-
 	status = nfs4_set_lock_state(state, request);
+	/* Unlock _before_ we do the RPC call */
+	request->fl_flags |= FL_EXISTS;
+	if (do_vfs_lock(request->fl_file, request) == -ENOENT)
+		goto out;
 	if (status != 0)
-		goto out_unlock;
+		goto out;
+	/* Is this a delegated lock? */
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
+		goto out;
 	lsp = request->fl_u.nfs4_fl.owner;
-	status = -ENOMEM;
 	seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+	status = -ENOMEM;
 	if (seqid == NULL)
-		goto out_unlock;
+		goto out;
 	task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid);
 	status = PTR_ERR(task);
 	if (IS_ERR(task))
-		goto out_unlock;
+		goto out;
 	status = nfs4_wait_for_completion_rpc_task(task);
 	rpc_release_task(task);
-	return status;
-out_unlock:
-	do_vfs_lock(request->fl_file, request);
+out:
 	return status;
 }
 
@@ -3461,10 +3454,10 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
 	struct nfs4_exception exception = { };
 	int err;
 
-	/* Cache the lock if possible... */
-	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
-		return 0;
 	do {
+		/* Cache the lock if possible... */
+		if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
+			return 0;
 		err = _nfs4_do_setlk(state, F_SETLK, request, 1);
 		if (err != -NFS4ERR_DELAY)
 			break;
@@ -3483,6 +3476,8 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
 	if (err != 0)
 		return err;
 	do {
+		if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
+			return 0;
 		err = _nfs4_do_setlk(state, F_SETLK, request, 0);
 		if (err != -NFS4ERR_DELAY)
 			break;
@@ -3494,29 +3489,42 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
 	struct nfs4_client *clp = state->owner->so_client;
+	unsigned char fl_flags = request->fl_flags;
 	int status;
 
 	/* Is this a delegated open? */
-	if (NFS_I(state->inode)->delegation_state != 0) {
-		/* Yes: cache locks! */
-		status = do_vfs_lock(request->fl_file, request);
-		/* ...but avoid races with delegation recall... */
-		if (status < 0 || test_bit(NFS_DELEGATED_STATE, &state->flags))
-			return status;
-	}
-	down_read(&clp->cl_sem);
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
 		goto out;
+	request->fl_flags |= FL_ACCESS;
+	status = do_vfs_lock(request->fl_file, request);
+	if (status < 0)
+		goto out;
+	down_read(&clp->cl_sem);
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+		struct nfs_inode *nfsi = NFS_I(state->inode);
+		/* Yes: cache locks! */
+		down_read(&nfsi->rwsem);
+		/* ...but avoid races with delegation recall... */
+		if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+			request->fl_flags = fl_flags & ~FL_SLEEP;
+			status = do_vfs_lock(request->fl_file, request);
+			up_read(&nfsi->rwsem);
+			goto out_unlock;
+		}
+		up_read(&nfsi->rwsem);
+	}
 	status = _nfs4_do_setlk(state, cmd, request, 0);
 	if (status != 0)
-		goto out;
+		goto out_unlock;
 	/* Note: we always want to sleep here! */
-	request->fl_flags |= FL_SLEEP;
+	request->fl_flags = fl_flags | FL_SLEEP;
 	if (do_vfs_lock(request->fl_file, request) < 0)
 		printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
-out:
+out_unlock:
 	up_read(&clp->cl_sem);
+out:
+	request->fl_flags = fl_flags;
 	return status;
 }
 

+ 17 - 3
fs/nfs/write.c

@@ -578,7 +578,7 @@ static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, un
 	return ret;
 }
 
-static void nfs_cancel_requests(struct list_head *head)
+static void nfs_cancel_dirty_list(struct list_head *head)
 {
 	struct nfs_page *req;
 	while(!list_empty(head)) {
@@ -589,6 +589,19 @@ static void nfs_cancel_requests(struct list_head *head)
 	}
 }
 
+static void nfs_cancel_commit_list(struct list_head *head)
+{
+	struct nfs_page *req;
+
+	while(!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_inode_remove_request(req);
+		nfs_clear_page_writeback(req);
+		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+	}
+}
+
 /*
  * nfs_scan_dirty - Scan an inode for dirty requests
  * @inode: NFS inode to scan
@@ -1381,6 +1394,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 		nfs_list_remove_request(req);
 		nfs_mark_request_commit(req);
 		nfs_clear_page_writeback(req);
+		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 	}
 	return -ENOMEM;
 }
@@ -1499,7 +1513,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
 		if (pages != 0) {
 			spin_unlock(&nfsi->req_lock);
 			if (how & FLUSH_INVALIDATE)
-				nfs_cancel_requests(&head);
+				nfs_cancel_dirty_list(&head);
 			else
 				ret = nfs_flush_list(inode, &head, pages, how);
 			spin_lock(&nfsi->req_lock);
@@ -1512,7 +1526,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
 			break;
 		if (how & FLUSH_INVALIDATE) {
 			spin_unlock(&nfsi->req_lock);
-			nfs_cancel_requests(&head);
+			nfs_cancel_commit_list(&head);
 			spin_lock(&nfsi->req_lock);
 			continue;
 		}

+ 0 - 8
include/asm-arm/arch-at91rm9200/irqs.h

@@ -39,12 +39,4 @@
  */
 #define	NR_IRQS		(NR_AIC_IRQS + (4 * 32))
 
-
-#ifndef __ASSEMBLY__
-/*
- * Initialize the IRQ controller.
- */
-extern void at91rm9200_init_irq(unsigned int priority[]);
-#endif
-
 #endif

+ 18 - 0
include/asm-powerpc/cputime.h

@@ -43,6 +43,7 @@ typedef u64 cputime64_t;
 
 #define cputime64_zero			((cputime64_t)0)
 #define cputime64_add(__a, __b)		((__a) + (__b))
+#define cputime64_sub(__a, __b)		((__a) - (__b))
 #define cputime_to_cputime64(__ct)	(__ct)
 
 #ifdef __KERNEL__
@@ -74,6 +75,23 @@ static inline cputime_t jiffies_to_cputime(const unsigned long jif)
 	return ct;
 }
 
+static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
+{
+	cputime_t ct;
+	u64 sec;
+
+	/* have to be a little careful about overflow */
+	ct = jif % HZ;
+	sec = jif / HZ;
+	if (ct) {
+		ct *= tb_ticks_per_sec;
+		do_div(ct, HZ);
+	}
+	if (sec)
+		ct += (cputime_t) sec * tb_ticks_per_sec;
+	return ct;
+}
+
 static inline u64 cputime64_to_jiffies64(const cputime_t ct)
 {
 	return mulhdu(ct, __cputime_jiffies_factor);

+ 14 - 0
include/asm-sparc64/dma-mapping.h

@@ -160,6 +160,20 @@ static inline void dma_free_coherent(struct device *dev, size_t size,
 	BUG();
 }
 
+static inline void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+			enum dma_data_direction direction)
+{
+	BUG();
+}
+
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+			   enum dma_data_direction direction)
+{
+	BUG();
+}
+
 #endif /* PCI */
 
 

+ 1 - 0
include/linux/fs.h

@@ -716,6 +716,7 @@ extern spinlock_t files_lock;
 #define FL_POSIX	1
 #define FL_FLOCK	2
 #define FL_ACCESS	8	/* not trying to lock, just looking */
+#define FL_EXISTS	16	/* when unlocking, test for existence */
 #define FL_LEASE	32	/* lease held on this file */
 #define FL_CLOSE	64	/* unlock on close */
 #define FL_SLEEP	128	/* A blocking lock */

+ 64 - 21
include/linux/libata.h

@@ -131,6 +131,7 @@ enum {
 	ATA_DFLAG_CFG_MASK	= (1 << 8) - 1,
 
 	ATA_DFLAG_PIO		= (1 << 8), /* device currently in PIO mode */
+	ATA_DFLAG_SUSPENDED	= (1 << 9), /* device suspended */
 	ATA_DFLAG_INIT_MASK	= (1 << 16) - 1,
 
 	ATA_DFLAG_DETACH	= (1 << 16),
@@ -160,22 +161,28 @@ enum {
 	ATA_FLAG_HRST_TO_RESUME	= (1 << 11), /* hardreset to resume phy */
 	ATA_FLAG_SKIP_D2H_BSY	= (1 << 12), /* can't wait for the first D2H
 					      * Register FIS clearing BSY */
-
 	ATA_FLAG_DEBUGMSG	= (1 << 13),
-	ATA_FLAG_FLUSH_PORT_TASK = (1 << 14), /* flush port task */
 
-	ATA_FLAG_EH_PENDING	= (1 << 15), /* EH pending */
-	ATA_FLAG_EH_IN_PROGRESS	= (1 << 16), /* EH in progress */
-	ATA_FLAG_FROZEN		= (1 << 17), /* port is frozen */
-	ATA_FLAG_RECOVERED	= (1 << 18), /* recovery action performed */
-	ATA_FLAG_LOADING	= (1 << 19), /* boot/loading probe */
-	ATA_FLAG_UNLOADING	= (1 << 20), /* module is unloading */
-	ATA_FLAG_SCSI_HOTPLUG	= (1 << 21), /* SCSI hotplug scheduled */
+	/* The following flag belongs to ap->pflags but is kept in
+	 * ap->flags because it's referenced in many LLDs and will be
+	 * removed in not-too-distant future.
+	 */
+	ATA_FLAG_DISABLED	= (1 << 23), /* port is disabled, ignore it */
+
+	/* bits 24:31 of ap->flags are reserved for LLD specific flags */
 
-	ATA_FLAG_DISABLED	= (1 << 22), /* port is disabled, ignore it */
-	ATA_FLAG_SUSPENDED	= (1 << 23), /* port is suspended (power) */
+	/* struct ata_port pflags */
+	ATA_PFLAG_EH_PENDING	= (1 << 0), /* EH pending */
+	ATA_PFLAG_EH_IN_PROGRESS = (1 << 1), /* EH in progress */
+	ATA_PFLAG_FROZEN	= (1 << 2), /* port is frozen */
+	ATA_PFLAG_RECOVERED	= (1 << 3), /* recovery action performed */
+	ATA_PFLAG_LOADING	= (1 << 4), /* boot/loading probe */
+	ATA_PFLAG_UNLOADING	= (1 << 5), /* module is unloading */
+	ATA_PFLAG_SCSI_HOTPLUG	= (1 << 6), /* SCSI hotplug scheduled */
 
-	/* bits 24:31 of ap->flags are reserved for LLDD specific flags */
+	ATA_PFLAG_FLUSH_PORT_TASK = (1 << 16), /* flush port task */
+	ATA_PFLAG_SUSPENDED	= (1 << 17), /* port is suspended (power) */
+	ATA_PFLAG_PM_PENDING	= (1 << 18), /* PM operation pending */
 
 	/* struct ata_queued_cmd flags */
 	ATA_QCFLAG_ACTIVE	= (1 << 0), /* cmd not yet ack'd to scsi lyer */
@@ -248,12 +255,19 @@ enum {
 	ATA_EH_REVALIDATE	= (1 << 0),
 	ATA_EH_SOFTRESET	= (1 << 1),
 	ATA_EH_HARDRESET	= (1 << 2),
+	ATA_EH_SUSPEND		= (1 << 3),
+	ATA_EH_RESUME		= (1 << 4),
+	ATA_EH_PM_FREEZE	= (1 << 5),
 
 	ATA_EH_RESET_MASK	= ATA_EH_SOFTRESET | ATA_EH_HARDRESET,
-	ATA_EH_PERDEV_MASK	= ATA_EH_REVALIDATE,
+	ATA_EH_PERDEV_MASK	= ATA_EH_REVALIDATE | ATA_EH_SUSPEND |
+				  ATA_EH_RESUME | ATA_EH_PM_FREEZE,
 
 	/* ata_eh_info->flags */
 	ATA_EHI_HOTPLUGGED	= (1 << 0),  /* could have been hotplugged */
+	ATA_EHI_RESUME_LINK	= (1 << 1),  /* need to resume link */
+	ATA_EHI_NO_AUTOPSY	= (1 << 2),  /* no autopsy */
+	ATA_EHI_QUIET		= (1 << 3),  /* be quiet */
 
 	ATA_EHI_DID_RESET	= (1 << 16), /* already reset this port */
 
@@ -486,6 +500,7 @@ struct ata_port {
 	const struct ata_port_operations *ops;
 	spinlock_t		*lock;
 	unsigned long		flags;	/* ATA_FLAG_xxx */
+	unsigned int		pflags; /* ATA_PFLAG_xxx */
 	unsigned int		id;	/* unique id req'd by scsi midlyr */
 	unsigned int		port_no; /* unique port #; from zero */
 	unsigned int		hard_port_no;	/* hardware port #; from zero */
@@ -535,6 +550,9 @@ struct ata_port {
 	struct list_head	eh_done_q;
 	wait_queue_head_t	eh_wait_q;
 
+	pm_message_t		pm_mesg;
+	int			*pm_result;
+
 	void			*private_data;
 
 	u8			sector_buf[ATA_SECT_SIZE]; /* owned by EH */
@@ -589,6 +607,9 @@ struct ata_port_operations {
 	void (*scr_write) (struct ata_port *ap, unsigned int sc_reg,
 			   u32 val);
 
+	int (*port_suspend) (struct ata_port *ap, pm_message_t mesg);
+	int (*port_resume) (struct ata_port *ap);
+
 	int (*port_start) (struct ata_port *ap);
 	void (*port_stop) (struct ata_port *ap);
 
@@ -622,9 +643,18 @@ struct ata_timing {
 
 #define FIT(v,vmin,vmax)	max_t(short,min_t(short,v,vmax),vmin)
 
-extern const unsigned long sata_deb_timing_boot[];
-extern const unsigned long sata_deb_timing_eh[];
-extern const unsigned long sata_deb_timing_before_fsrst[];
+extern const unsigned long sata_deb_timing_normal[];
+extern const unsigned long sata_deb_timing_hotplug[];
+extern const unsigned long sata_deb_timing_long[];
+
+static inline const unsigned long *
+sata_ehc_deb_timing(struct ata_eh_context *ehc)
+{
+	if (ehc->i.flags & ATA_EHI_HOTPLUGGED)
+		return sata_deb_timing_hotplug;
+	else
+		return sata_deb_timing_normal;
+}
 
 extern void ata_port_probe(struct ata_port *);
 extern void __sata_phy_reset(struct ata_port *ap);
@@ -644,6 +674,8 @@ extern void ata_std_ports(struct ata_ioports *ioaddr);
 extern int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info,
 			     unsigned int n_ports);
 extern void ata_pci_remove_one (struct pci_dev *pdev);
+extern void ata_pci_device_do_suspend(struct pci_dev *pdev, pm_message_t state);
+extern void ata_pci_device_do_resume(struct pci_dev *pdev);
 extern int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state);
 extern int ata_pci_device_resume(struct pci_dev *pdev);
 extern int ata_pci_clear_simplex(struct pci_dev *pdev);
@@ -664,8 +696,9 @@ extern int ata_port_online(struct ata_port *ap);
 extern int ata_port_offline(struct ata_port *ap);
 extern int ata_scsi_device_resume(struct scsi_device *);
 extern int ata_scsi_device_suspend(struct scsi_device *, pm_message_t state);
-extern int ata_device_resume(struct ata_device *);
-extern int ata_device_suspend(struct ata_device *, pm_message_t state);
+extern int ata_host_set_suspend(struct ata_host_set *host_set,
+				pm_message_t mesg);
+extern void ata_host_set_resume(struct ata_host_set *host_set);
 extern int ata_ratelimit(void);
 extern unsigned int ata_busy_sleep(struct ata_port *ap,
 				   unsigned long timeout_pat,
@@ -825,19 +858,24 @@ extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
 	(ehi)->desc_len = 0; \
 } while (0)
 
-static inline void ata_ehi_hotplugged(struct ata_eh_info *ehi)
+static inline void __ata_ehi_hotplugged(struct ata_eh_info *ehi)
 {
 	if (ehi->flags & ATA_EHI_HOTPLUGGED)
 		return;
 
-	ehi->flags |= ATA_EHI_HOTPLUGGED;
+	ehi->flags |= ATA_EHI_HOTPLUGGED | ATA_EHI_RESUME_LINK;
 	ehi->hotplug_timestamp = jiffies;
 
-	ehi->err_mask |= AC_ERR_ATA_BUS;
 	ehi->action |= ATA_EH_SOFTRESET;
 	ehi->probe_mask |= (1 << ATA_MAX_DEVICES) - 1;
 }
 
+static inline void ata_ehi_hotplugged(struct ata_eh_info *ehi)
+{
+	__ata_ehi_hotplugged(ehi);
+	ehi->err_mask |= AC_ERR_ATA_BUS;
+}
+
 /*
  * qc helpers
  */
@@ -921,6 +959,11 @@ static inline unsigned int ata_dev_absent(const struct ata_device *dev)
 	return ata_class_absent(dev->class);
 }
 
+static inline unsigned int ata_dev_ready(const struct ata_device *dev)
+{
+	return ata_dev_enabled(dev) && !(dev->flags & ATA_DFLAG_SUSPENDED);
+}
+
 /*
  * port helpers
  */

+ 2 - 0
include/linux/nfs_xdr.h

@@ -729,6 +729,7 @@ struct nfs_read_data {
 	struct list_head	pages;	/* Coalesced read requests */
 	struct nfs_page		*req;	/* multi ops per nfs_page */
 	struct page		**pagevec;
+	unsigned int		npages;	/* active pages in pagevec */
 	struct nfs_readargs args;
 	struct nfs_readres  res;
 #ifdef CONFIG_NFS_V4
@@ -747,6 +748,7 @@ struct nfs_write_data {
 	struct list_head	pages;		/* Coalesced requests we wish to flush */
 	struct nfs_page		*req;		/* multi ops per nfs_page */
 	struct page		**pagevec;
+	unsigned int		npages;		/* active pages in pagevec */
 	struct nfs_writeargs	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
 #ifdef CONFIG_NFS_V4

+ 7 - 0
include/linux/pci_ids.h

@@ -2019,6 +2019,13 @@
 #define PCI_VENDOR_ID_TDI               0x192E
 #define PCI_DEVICE_ID_TDI_EHCI          0x0101
 
+#define PCI_VENDOR_ID_JMICRON		0x197B
+#define PCI_DEVICE_ID_JMICRON_JMB360	0x2360
+#define PCI_DEVICE_ID_JMICRON_JMB361	0x2361
+#define PCI_DEVICE_ID_JMICRON_JMB363	0x2363
+#define PCI_DEVICE_ID_JMICRON_JMB365	0x2365
+#define PCI_DEVICE_ID_JMICRON_JMB366	0x2366
+#define PCI_DEVICE_ID_JMICRON_JMB368	0x2368
 
 #define PCI_VENDOR_ID_TEKRAM		0x1de1
 #define PCI_DEVICE_ID_TEKRAM_DC290	0xdc29

+ 10 - 8
net/sched/act_api.c

@@ -250,15 +250,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
 		RTA_PUT(skb, a->order, 0, NULL);
 		err = tcf_action_dump_1(skb, a, bind, ref);
 		if (err < 0)
-			goto rtattr_failure;
+			goto errout;
 		r->rta_len = skb->tail - (u8*)r;
 	}
 
 	return 0;
 
 rtattr_failure:
+	err = -EINVAL;
+errout:
 	skb_trim(skb, b - skb->data);
-	return -err;
+	return err;
 }
 
 struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
@@ -305,6 +307,7 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
 			goto err_mod;
 		}
 #endif
+		*err = -ENOENT;
 		goto err_out;
 	}
 
@@ -776,7 +779,7 @@ replay:
 	return ret;
 }
 
-static char *
+static struct rtattr *
 find_dump_kind(struct nlmsghdr *n)
 {
 	struct rtattr *tb1, *tb2[TCA_ACT_MAX+1];
@@ -804,7 +807,7 @@ find_dump_kind(struct nlmsghdr *n)
 		return NULL;
 	kind = tb2[TCA_ACT_KIND-1];
 
-	return (char *) RTA_DATA(kind);
+	return kind;
 }
 
 static int
@@ -817,16 +820,15 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tc_action a;
 	int ret = 0;
 	struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
-	char *kind = find_dump_kind(cb->nlh);
+	struct rtattr *kind = find_dump_kind(cb->nlh);
 
 	if (kind == NULL) {
 		printk("tc_dump_action: action bad kind\n");
 		return 0;
 	}
 
-	a_o = tc_lookup_action_n(kind);
+	a_o = tc_lookup_action(kind);
 	if (a_o == NULL) {
-		printk("failed to find %s\n", kind);
 		return 0;
 	}
 
@@ -834,7 +836,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	a.ops = a_o;
 
 	if (a_o->walk == NULL) {
-		printk("tc_dump_action: %s !capable of dumping table\n", kind);
+		printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind);
 		goto rtattr_failure;
 	}
 

+ 1 - 2
net/sunrpc/xdr.c

@@ -191,7 +191,6 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
 	do {
 		/* Are any pointers crossing a page boundary? */
 		if (pgto_base == 0) {
-			flush_dcache_page(*pgto);
 			pgto_base = PAGE_CACHE_SIZE;
 			pgto--;
 		}
@@ -211,11 +210,11 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
 		vto = kmap_atomic(*pgto, KM_USER0);
 		vfrom = kmap_atomic(*pgfrom, KM_USER1);
 		memmove(vto + pgto_base, vfrom + pgfrom_base, copy);
+		flush_dcache_page(*pgto);
 		kunmap_atomic(vfrom, KM_USER1);
 		kunmap_atomic(vto, KM_USER0);
 
 	} while ((len -= copy) != 0);
-	flush_dcache_page(*pgto);
 }
 
 /*