瀏覽代碼

Merge rsync://rsync.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

Greg KH 20 年之前
父節點
當前提交
8644d2a42b
共有 100 個文件被更改,包括 4305 次插入2746 次删除
  1. 18 6
      Documentation/Changes
  2. 176 0
      Documentation/block/ioprio.txt
  3. 1 0
      Documentation/cciss.txt
  4. 1 1
      Documentation/kernel-parameters.txt
  5. 64 0
      Documentation/pcmcia/devicetable.txt
  6. 51 0
      Documentation/pcmcia/driver-changes.txt
  7. 1 1
      MAINTAINERS
  8. 4 1
      arch/arm/kernel/process.c
  9. 7 3
      arch/arm/kernel/time.c
  10. 1 0
      arch/arm/mach-aaec2000/Makefile.boot
  11. 4 4
      arch/arm/mach-omap/usb.c
  12. 65 6
      arch/arm/mm/init.c
  13. 7 80
      arch/arm/mm/mm-armv.c
  14. 66 3
      arch/arm/tools/mach-types
  15. 70 63
      arch/i386/kernel/kprobes.c
  16. 29 0
      arch/i386/kernel/process.c
  17. 2 0
      arch/i386/kernel/syscall_table.S
  18. 2 2
      arch/ia64/kernel/entry.S
  19. 124 4
      arch/ia64/kernel/kprobes.c
  20. 16 0
      arch/ia64/kernel/process.c
  21. 6 1
      arch/ia64/kernel/vmlinux.lds.S
  22. 1 1
      arch/mips/kernel/signal.c
  23. 2 0
      arch/ppc/kernel/misc.S
  24. 12 2
      arch/ppc/mm/init.c
  25. 2 2
      arch/ppc/platforms/pmac_sleep.S
  26. 4 4
      arch/ppc/platforms/pmac_time.c
  27. 1 0
      arch/ppc/platforms/sandpoint.c
  28. 19 7
      arch/ppc/syslib/open_pic.c
  29. 121 4
      arch/ppc64/kernel/kprobes.c
  30. 1 0
      arch/ppc64/kernel/ppc_ksyms.c
  31. 4 0
      arch/ppc64/kernel/process.c
  32. 1 0
      arch/ppc64/kernel/time.c
  33. 1 1
      arch/sparc64/kernel/auxio.c
  34. 4 112
      arch/sparc64/kernel/entry.S
  35. 50 121
      arch/sparc64/kernel/irq.c
  36. 8 4
      arch/sparc64/kernel/semaphore.c
  37. 0 1
      arch/sparc64/kernel/sparc64_ksyms.c
  38. 2 1
      arch/sparc64/kernel/trampoline.S
  39. 53 50
      arch/sparc64/lib/U1memcpy.S
  40. 13 2
      arch/sparc64/lib/VISsave.S
  41. 26 16
      arch/sparc64/lib/atomic.S
  42. 20 11
      arch/sparc64/lib/bitops.S
  43. 4 2
      arch/sparc64/lib/debuglocks.c
  44. 4 2
      arch/sparc64/lib/dec_and_lock.S
  45. 10 5
      arch/sparc64/lib/rwsem.S
  46. 4 2
      arch/sparc64/mm/init.c
  47. 2 1
      arch/sparc64/mm/ultra.S
  48. 66 170
      arch/x86_64/kernel/kprobes.c
  49. 29 0
      arch/x86_64/kernel/process.c
  50. 3 2
      drivers/block/as-iosched.c
  51. 7 10
      drivers/block/cciss.c
  52. 691 369
      drivers/block/cfq-iosched.c
  53. 2 1
      drivers/block/deadline-iosched.c
  54. 5 4
      drivers/block/elevator.c
  55. 36 23
      drivers/block/ll_rw_blk.c
  56. 9 1
      drivers/block/swim3.c
  57. 4 3
      drivers/block/sx8.c
  58. 9 0
      drivers/bluetooth/bluecard_cs.c
  59. 7 0
      drivers/bluetooth/bt3c_cs.c
  60. 7 0
      drivers/bluetooth/btuart_cs.c
  61. 8 0
      drivers/bluetooth/dtl1_cs.c
  62. 0 3
      drivers/char/misc.c
  63. 8 1
      drivers/char/pcmcia/synclink_cs.c
  64. 6 0
      drivers/ide/Kconfig
  65. 4 0
      drivers/ide/ide-disk.c
  66. 0 1
      drivers/ide/ide-dma.c
  67. 2 1
      drivers/ide/ide-iops.c
  68. 35 0
      drivers/ide/legacy/ide-cs.c
  69. 1 0
      drivers/ide/pci/Makefile
  70. 48 25
      drivers/ide/pci/generic.c
  71. 228 242
      drivers/ide/pci/hpt366.c
  72. 812 0
      drivers/ide/pci/it821x.c
  73. 5 5
      drivers/ide/pci/serverworks.c
  74. 4 4
      drivers/ide/ppc/pmac.c
  75. 5 5
      drivers/ieee1394/ohci1394.c
  76. 2 2
      drivers/infiniband/core/packer.c
  77. 13 5
      drivers/infiniband/core/sa_query.c
  78. 1 0
      drivers/infiniband/hw/mthca/mthca_av.c
  79. 227 304
      drivers/infiniband/hw/mthca/mthca_cmd.c
  80. 28 20
      drivers/infiniband/hw/mthca/mthca_cmd.h
  81. 49 52
      drivers/infiniband/hw/mthca/mthca_cq.c
  82. 10 2
      drivers/infiniband/hw/mthca/mthca_dev.h
  83. 1 0
      drivers/infiniband/hw/mthca/mthca_doorbell.h
  84. 27 31
      drivers/infiniband/hw/mthca/mthca_eq.c
  85. 12 20
      drivers/infiniband/hw/mthca/mthca_main.c
  86. 32 31
      drivers/infiniband/hw/mthca/mthca_mcg.c
  87. 9 1
      drivers/infiniband/hw/mthca/mthca_memfree.c
  88. 182 185
      drivers/infiniband/hw/mthca/mthca_mr.c
  89. 3 1
      drivers/infiniband/hw/mthca/mthca_provider.c
  90. 7 7
      drivers/infiniband/hw/mthca/mthca_provider.h
  91. 109 30
      drivers/infiniband/hw/mthca/mthca_qp.c
  92. 327 80
      drivers/input/evdev.c
  93. 0 14
      drivers/input/gameport/Kconfig
  94. 0 2
      drivers/input/gameport/Makefile
  95. 0 322
      drivers/input/gameport/cs461x.c
  96. 12 19
      drivers/input/gameport/gameport.c
  97. 6 6
      drivers/input/gameport/ns558.c
  98. 0 186
      drivers/input/gameport/vortex.c
  99. 32 5
      drivers/input/input.c
  100. 91 25
      drivers/input/joydev.c

+ 18 - 6
Documentation/Changes

@@ -44,9 +44,9 @@ running, the suggested command should tell you.
 
 Again, keep in mind that this list assumes you are already
 functionally running a Linux 2.4 kernel.  Also, not all tools are
-necessary on all systems; obviously, if you don't have any PCMCIA (PC
-Card) hardware, for example, you probably needn't concern yourself
-with pcmcia-cs.
+necessary on all systems; obviously, if you don't have any ISDN
+hardware, for example, you probably needn't concern yourself with
+isdn4k-utils.
 
 o  Gnu C                  2.95.3                  # gcc --version
 o  Gnu make               3.79.1                  # make --version
@@ -57,6 +57,7 @@ o  e2fsprogs              1.29                    # tune2fs
 o  jfsutils               1.1.3                   # fsck.jfs -V
 o  reiserfsprogs          3.6.3                   # reiserfsck -V 2>&1|grep reiserfsprogs
 o  xfsprogs               2.6.0                   # xfs_db -V
+o  pcmciautils            001
 o  pcmcia-cs              3.1.21                  # cardmgr -V
 o  quota-tools            3.09                    # quota -V
 o  PPP                    2.4.0                   # pppd --version
@@ -186,13 +187,20 @@ architecture independent and any version from 2.0.0 onward should
 work correctly with this version of the XFS kernel code (2.6.0 or
 later is recommended, due to some significant improvements).
 
+PCMCIAutils
+-----------
+
+PCMCIAutils replaces pcmcia-cs (see below). It properly sets up
+PCMCIA sockets at system startup and loads the appropriate modules
+for 16-bit PCMCIA devices if the kernel is modularized and the hotplug
+subsystem is used.
 
 Pcmcia-cs
 ---------
 
 PCMCIA (PC Card) support is now partially implemented in the main
-kernel source.  Pay attention when you recompile your kernel ;-).
-Also, be sure to upgrade to the latest pcmcia-cs release.
+kernel source. The "pcmciautils" package (see above) replaces pcmcia-cs
+for newest kernels.
 
 Quota-tools
 -----------
@@ -349,9 +357,13 @@ Xfsprogs
 --------
 o  <ftp://oss.sgi.com/projects/xfs/download/>
 
+Pcmciautils
+-----------
+o  <ftp://ftp.kernel.org/pub/linux/utils/kernel/pcmcia/>
+
 Pcmcia-cs
 ---------
-o  <ftp://pcmcia-cs.sourceforge.net/pub/pcmcia-cs/pcmcia-cs-3.1.21.tar.gz>
+o  <http://pcmcia-cs.sourceforge.net/>
 
 Quota-tools
 ----------

+ 176 - 0
Documentation/block/ioprio.txt

@@ -0,0 +1,176 @@
+Block io priorities
+===================
+
+
+Intro
+-----
+
+With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
+priorities is supported for reads on files. This enables users to io nice
+processes or process groups, similar to what has been possible to cpu
+scheduling for ages. This document mainly details the current possibilites
+with cfq, other io schedulers do not support io priorities so far.
+
+Scheduling classes
+------------------
+
+CFQ implements three generic scheduling classes that determine how io is
+served for a process.
+
+IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
+higher priority than any other in the system, processes from this class are
+given first access to the disk every time. Thus it needs to be used with some
+care, one io RT process can starve the entire system. Within the RT class,
+there are 8 levels of class data that determine exactly how much time this
+process needs the disk for on each service. In the future this might change
+to be more directly mappable to performance, by passing in a wanted data
+rate instead.
+
+IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
+for any process that hasn't set a specific io priority. The class data
+determines how much io bandwidth the process will get, it's directly mappable
+to the cpu nice levels just more coarsely implemented. 0 is the highest
+BE prio level, 7 is the lowest. The mapping between cpu nice level and io
+nice level is determined as: io_nice = (cpu_nice + 20) / 5.
+
+IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
+level only get io time when no one else needs the disk. The idle class has no
+class data, since it doesn't really apply here.
+
+Tools
+-----
+
+See below for a sample ionice tool. Usage:
+
+# ionice -c<class> -n<level> -p<pid>
+
+If pid isn't given, the current process is assumed. IO priority settings
+are inherited on fork, so you can use ionice to start the process at a given
+level:
+
+# ionice -c2 -n0 /bin/ls
+
+will run ls at the best-effort scheduling class at the highest priority.
+For a running process, you can give the pid instead:
+
+# ionice -c1 -n2 -p100
+
+will change pid 100 to run at the realtime scheduling class, at priority 2.
+
+---> snip ionice.c tool <---
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <asm/unistd.h>
+
+extern int sys_ioprio_set(int, int, int);
+extern int sys_ioprio_get(int, int);
+
+#if defined(__i386__)
+#define __NR_ioprio_set		289
+#define __NR_ioprio_get		290
+#elif defined(__ppc__)
+#define __NR_ioprio_set		273
+#define __NR_ioprio_get		274
+#elif defined(__x86_64__)
+#define __NR_ioprio_set		251
+#define __NR_ioprio_get		252
+#elif defined(__ia64__)
+#define __NR_ioprio_set		1274
+#define __NR_ioprio_get		1275
+#else
+#error "Unsupported arch"
+#endif
+
+_syscall3(int, ioprio_set, int, which, int, who, int, ioprio);
+_syscall2(int, ioprio_get, int, which, int, who);
+
+enum {
+	IOPRIO_CLASS_NONE,
+	IOPRIO_CLASS_RT,
+	IOPRIO_CLASS_BE,
+	IOPRIO_CLASS_IDLE,
+};
+
+enum {
+	IOPRIO_WHO_PROCESS = 1,
+	IOPRIO_WHO_PGRP,
+	IOPRIO_WHO_USER,
+};
+
+#define IOPRIO_CLASS_SHIFT	13
+
+const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
+
+int main(int argc, char *argv[])
+{
+	int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
+	int c, pid = 0;
+
+	while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
+		switch (c) {
+		case 'n':
+			ioprio = strtol(optarg, NULL, 10);
+			set = 1;
+			break;
+		case 'c':
+			ioprio_class = strtol(optarg, NULL, 10);
+			set = 1;
+			break;
+		case 'p':
+			pid = strtol(optarg, NULL, 10);
+			break;
+		}
+	}
+
+	switch (ioprio_class) {
+		case IOPRIO_CLASS_NONE:
+			ioprio_class = IOPRIO_CLASS_BE;
+			break;
+		case IOPRIO_CLASS_RT:
+		case IOPRIO_CLASS_BE:
+			break;
+		case IOPRIO_CLASS_IDLE:
+			ioprio = 7;
+			break;
+		default:
+			printf("bad prio class %d\n", ioprio_class);
+			return 1;
+	}
+
+	if (!set) {
+		if (!pid && argv[optind])
+			pid = strtol(argv[optind], NULL, 10);
+
+		ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
+
+		printf("pid=%d, %d\n", pid, ioprio);
+
+		if (ioprio == -1)
+			perror("ioprio_get");
+		else {
+			ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
+			ioprio = ioprio & 0xff;
+			printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
+		}
+	} else {
+		if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
+			perror("ioprio_set");
+			return 1;
+		}
+
+		if (argv[optind])
+			execvp(argv[optind], &argv[optind]);
+	}
+
+	return 0;
+}
+
+---> snip ionice.c tool <---
+
+
+March 11 2005, Jens Axboe <axboe@suse.de>

+ 1 - 0
Documentation/cciss.txt

@@ -17,6 +17,7 @@ This driver is known to work with the following cards:
 	* SA P600
 	* SA P800
 	* SA E400
+	* SA E300
 
 If nodes are not already created in the /dev/cciss directory, run as root:
 

+ 1 - 1
Documentation/kernel-parameters.txt

@@ -1119,7 +1119,7 @@ running once the system is up.
 			See Documentation/ramdisk.txt.
 
 	psmouse.proto=  [HW,MOUSE] Highest PS2 mouse protocol extension to
-			probe for (bare|imps|exps).
+			probe for (bare|imps|exps|lifebook|any).
 	psmouse.rate=	[HW,MOUSE] Set desired mouse report rate, in reports
 			per second.
 	psmouse.resetafter=

+ 64 - 0
Documentation/pcmcia/devicetable.txt

@@ -0,0 +1,64 @@
+Matching of PCMCIA devices to drivers is done using one or more of the
+following criteria:
+
+- manufactor ID
+- card ID
+- product ID strings _and_ hashes of these strings
+- function ID
+- device function (actual and pseudo)
+
+You should use the helpers in include/pcmcia/device_id.h for generating the
+struct pcmcia_device_id[] entries which match devices to drivers.
+
+If you want to match product ID strings, you also need to pass the crc32
+hashes of the string to the macro, e.g. if you want to match the product ID
+string 1, you need to use
+
+PCMCIA_DEVICE_PROD_ID1("some_string", 0x(hash_of_some_string)),
+
+If the hash is incorrect, the kernel will inform you about this in "dmesg"
+upon module initialization, and tell you of the correct hash.
+
+You can determine the hash of the product ID strings by running
+"pcmcia-modalias %n.%m" [%n being replaced with the socket number and %m being
+replaced with the device function] from pcmciautils. It generates a string
+in the following form:
+pcmcia:m0149cC1ABf06pfn00fn00pa725B842DpbF1EFEE84pc0877B627pd00000000
+
+The hex value after "pa" is the hash of product ID string 1, after "pb" for
+string 2 and so on.
+
+Alternatively, you can use this small tool to determine the crc32 hash.
+simply pass the string you want to evaluate as argument to this program,
+e.g.
+$ ./crc32hash "Dual Speed"
+
+-------------------------------------------------------------------------
+/* crc32hash.c - derived from linux/lib/crc32.c, GNU GPL v2 */
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+unsigned int crc32(unsigned char const *p, unsigned int len)
+{
+	int i;
+	unsigned int crc = 0;
+	while (len--) {
+		crc ^= *p++;
+		for (i = 0; i < 8; i++)
+			crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
+	}
+	return crc;
+}
+
+int main(int argc, char **argv) {
+	unsigned int result;
+	if (argc != 2) {
+		printf("no string passed as argument\n");
+		return -1;
+	}
+	result = crc32(argv[1], strlen(argv[1]));
+	printf("0x%x\n", result);
+	return 0;
+}

+ 51 - 0
Documentation/pcmcia/driver-changes.txt

@@ -0,0 +1,51 @@
+This file details changes in 2.6 which affect PCMCIA card driver authors:
+
+* in-kernel device<->driver matching
+   PCMCIA devices and their correct drivers can now be matched in
+   kernelspace. See 'devicetable.txt' for details.
+
+* Device model integration (as of 2.6.11)
+   A struct pcmcia_device is registered with the device model core,
+   and can be used (e.g. for SET_NETDEV_DEV) by using
+   handle_to_dev(client_handle_t * handle).
+
+* Convert internal I/O port addresses to unsigned long (as of 2.6.11)
+   ioaddr_t should be replaced by kio_addr_t in PCMCIA card drivers.
+
+* irq_mask and irq_list parameters (as of 2.6.11)
+   The irq_mask and irq_list parameters should no longer be used in
+   PCMCIA card drivers. Instead, it is the job of the PCMCIA core to
+   determine which IRQ should be used. Therefore, link->irq.IRQInfo2
+   is ignored.
+
+* client->PendingEvents is gone (as of 2.6.11)
+   client->PendingEvents is no longer available.
+
+* client->Attributes are gone (as of 2.6.11)
+   client->Attributes is unused, therefore it is removed from all
+   PCMCIA card drivers
+
+* core functions no longer available (as of 2.6.11)
+   The following functions have been removed from the kernel source
+   because they are unused by all in-kernel drivers, and no external
+   driver was reported to rely on them:
+	pcmcia_get_first_region()
+	pcmcia_get_next_region()
+	pcmcia_modify_window()
+	pcmcia_set_event_mask()
+	pcmcia_get_first_window()
+	pcmcia_get_next_window()
+
+* device list iteration upon module removal (as of 2.6.10)
+   It is no longer necessary to iterate on the driver's internal
+   client list and call the ->detach() function upon module removal.
+
+* Resource management. (as of 2.6.8)
+   Although the PCMCIA subsystem will allocate resources for cards,
+   it no longer marks these resources busy. This means that driver
+   authors are now responsible for claiming your resources as per
+   other drivers in Linux. You should use request_region() to mark
+   your IO regions in-use, and request_mem_region() to mark your
+   memory regions in-use. The name argument should be a pointer to
+   your driver name. Eg, for pcnet_cs, name should point to the
+   string "pcnet_cs".

+ 1 - 1
MAINTAINERS

@@ -1149,7 +1149,7 @@ S:	Maintained
 
 INFINIBAND SUBSYSTEM
 P:	Roland Dreier
-M:	roland@topspin.com
+M:	rolandd@cisco.com
 P:	Sean Hefty
 M:	mshefty@ichips.intel.com
 P:	Hal Rosenstock

+ 4 - 1
arch/arm/kernel/process.c

@@ -32,6 +32,7 @@
 #include <asm/leds.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
+#include <asm/mach/time.h>
 
 extern const char *processor_modes[];
 extern void setup_mm_for_reboot(char mode);
@@ -85,8 +86,10 @@ EXPORT_SYMBOL(pm_power_off);
 void default_idle(void)
 {
 	local_irq_disable();
-	if (!need_resched() && !hlt_counter)
+	if (!need_resched() && !hlt_counter) {
+		timer_dyn_reprogram();
 		arch_idle();
+	}
 	local_irq_enable();
 }
 

+ 7 - 3
arch/arm/kernel/time.c

@@ -424,15 +424,19 @@ static int timer_dyn_tick_disable(void)
 	return ret;
 }
 
+/*
+ * Reprogram the system timer for at least the calculated time interval.
+ * This function should be called from the idle thread with IRQs disabled,
+ * immediately before sleeping.
+ */
 void timer_dyn_reprogram(void)
 {
 	struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick;
-	unsigned long flags;
 
-	write_seqlock_irqsave(&xtime_lock, flags);
+	write_seqlock(&xtime_lock);
 	if (dyn_tick->state & DYN_TICK_ENABLED)
 		dyn_tick->reprogram(next_timer_interrupt() - jiffies);
-	write_sequnlock_irqrestore(&xtime_lock, flags);
+	write_sequnlock(&xtime_lock);
 }
 
 static ssize_t timer_show_dyn_tick(struct sys_device *dev, char *buf)

+ 1 - 0
arch/arm/mach-aaec2000/Makefile.boot

@@ -0,0 +1 @@
+	zreladdr-y := 0xf0008000

+ 4 - 4
arch/arm/mach-omap/usb.c

@@ -288,8 +288,8 @@ static void usb_release(struct device *dev)
 static struct resource udc_resources[] = {
 	/* order is significant! */
 	{		/* registers */
-		.start		= IO_ADDRESS(UDC_BASE),
-		.end		= IO_ADDRESS(UDC_BASE + 0xff),
+		.start		= UDC_BASE,
+		.end		= UDC_BASE + 0xff,
 		.flags		= IORESOURCE_MEM,
 	}, {		/* general IRQ */
 		.start		= IH2_BASE + 20,
@@ -355,8 +355,8 @@ static struct platform_device ohci_device = {
 static struct resource otg_resources[] = {
 	/* order is significant! */
 	{
-		.start		= IO_ADDRESS(OTG_BASE),
-		.end		= IO_ADDRESS(OTG_BASE + 0xff),
+		.start		= OTG_BASE,
+		.end		= OTG_BASE + 0xff,
 		.flags		= IORESOURCE_MEM,
 	}, {
 		.start		= IH2_BASE + 8,

+ 65 - 6
arch/arm/mm/init.c

@@ -522,6 +522,69 @@ static inline void free_area(unsigned long addr, unsigned long end, char *s)
 		printk(KERN_INFO "Freeing %s memory: %dK\n", s, size);
 }
 
+static inline void
+free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn)
+{
+	struct page *start_pg, *end_pg;
+	unsigned long pg, pgend;
+
+	/*
+	 * Convert start_pfn/end_pfn to a struct page pointer.
+	 */
+	start_pg = pfn_to_page(start_pfn);
+	end_pg = pfn_to_page(end_pfn);
+
+	/*
+	 * Convert to physical addresses, and
+	 * round start upwards and end downwards.
+	 */
+	pg = PAGE_ALIGN(__pa(start_pg));
+	pgend = __pa(end_pg) & PAGE_MASK;
+
+	/*
+	 * If there are free pages between these,
+	 * free the section of the memmap array.
+	 */
+	if (pg < pgend)
+		free_bootmem_node(NODE_DATA(node), pg, pgend - pg);
+}
+
+/*
+ * The mem_map array can get very big.  Free the unused area of the memory map.
+ */
+static void __init free_unused_memmap_node(int node, struct meminfo *mi)
+{
+	unsigned long bank_start, prev_bank_end = 0;
+	unsigned int i;
+
+	/*
+	 * [FIXME] This relies on each bank being in address order.  This
+	 * may not be the case, especially if the user has provided the
+	 * information on the command line.
+	 */
+	for (i = 0; i < mi->nr_banks; i++) {
+		if (mi->bank[i].size == 0 || mi->bank[i].node != node)
+			continue;
+
+		bank_start = mi->bank[i].start >> PAGE_SHIFT;
+		if (bank_start < prev_bank_end) {
+			printk(KERN_ERR "MEM: unordered memory banks.  "
+				"Not freeing memmap.\n");
+			break;
+		}
+
+		/*
+		 * If we had a previous bank, and there is a space
+		 * between the current bank and the previous, free it.
+		 */
+		if (prev_bank_end && prev_bank_end != bank_start)
+			free_memmap(node, prev_bank_end, bank_start);
+
+		prev_bank_end = (mi->bank[i].start +
+				 mi->bank[i].size) >> PAGE_SHIFT;
+	}
+}
+
 /*
  * mem_init() marks the free areas in the mem_map and tells us how much
  * memory is free.  This is done after various parts of the system have
@@ -540,16 +603,12 @@ void __init mem_init(void)
 	max_mapnr   = virt_to_page(high_memory) - mem_map;
 #endif
 
-	/*
-	 * We may have non-contiguous memory.
-	 */
-	if (meminfo.nr_banks != 1)
-		create_memmap_holes(&meminfo);
-
 	/* this will put all unused low memory onto the freelists */
 	for_each_online_node(node) {
 		pg_data_t *pgdat = NODE_DATA(node);
 
+		free_unused_memmap_node(node, &meminfo);
+
 		if (pgdat->node_spanned_pages != 0)
 			totalram_pages += free_all_bootmem_node(pgdat);
 	}

+ 7 - 80
arch/arm/mm/mm-armv.c

@@ -169,7 +169,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 
 	memzero(new_pgd, FIRST_KERNEL_PGD_NR * sizeof(pgd_t));
 
+	/*
+	 * Copy over the kernel and IO PGD entries
+	 */
 	init_pgd = pgd_offset_k(0);
+	memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
+		       (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
+
+	clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
 
 	if (!vectors_high()) {
 		/*
@@ -198,14 +205,6 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 		spin_unlock(&mm->page_table_lock);
 	}
 
-	/*
-	 * Copy over the kernel and IO PGD entries
-	 */
-	memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
-		       (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
-
-	clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
-
 	return new_pgd;
 
 no_pte:
@@ -698,75 +697,3 @@ void __init iotable_init(struct map_desc *io_desc, int nr)
 	for (i = 0; i < nr; i++)
 		create_mapping(io_desc + i);
 }
-
-static inline void
-free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn)
-{
-	struct page *start_pg, *end_pg;
-	unsigned long pg, pgend;
-
-	/*
-	 * Convert start_pfn/end_pfn to a struct page pointer.
-	 */
-	start_pg = pfn_to_page(start_pfn);
-	end_pg = pfn_to_page(end_pfn);
-
-	/*
-	 * Convert to physical addresses, and
-	 * round start upwards and end downwards.
-	 */
-	pg = PAGE_ALIGN(__pa(start_pg));
-	pgend = __pa(end_pg) & PAGE_MASK;
-
-	/*
-	 * If there are free pages between these,
-	 * free the section of the memmap array.
-	 */
-	if (pg < pgend)
-		free_bootmem_node(NODE_DATA(node), pg, pgend - pg);
-}
-
-static inline void free_unused_memmap_node(int node, struct meminfo *mi)
-{
-	unsigned long bank_start, prev_bank_end = 0;
-	unsigned int i;
-
-	/*
-	 * [FIXME] This relies on each bank being in address order.  This
-	 * may not be the case, especially if the user has provided the
-	 * information on the command line.
-	 */
-	for (i = 0; i < mi->nr_banks; i++) {
-		if (mi->bank[i].size == 0 || mi->bank[i].node != node)
-			continue;
-
-		bank_start = mi->bank[i].start >> PAGE_SHIFT;
-		if (bank_start < prev_bank_end) {
-			printk(KERN_ERR "MEM: unordered memory banks.  "
-				"Not freeing memmap.\n");
-			break;
-		}
-
-		/*
-		 * If we had a previous bank, and there is a space
-		 * between the current bank and the previous, free it.
-		 */
-		if (prev_bank_end && prev_bank_end != bank_start)
-			free_memmap(node, prev_bank_end, bank_start);
-
-		prev_bank_end = PAGE_ALIGN(mi->bank[i].start +
-					   mi->bank[i].size) >> PAGE_SHIFT;
-	}
-}
-
-/*
- * The mem_map array can get very big.  Free
- * the unused area of the memory map.
- */
-void __init create_memmap_holes(struct meminfo *mi)
-{
-	int node;
-
-	for_each_online_node(node)
-		free_unused_memmap_node(node, mi);
-}

+ 66 - 3
arch/arm/tools/mach-types

@@ -6,7 +6,7 @@
 # To add an entry into this database, please see Documentation/arm/README,
 # or contact rmk@arm.linux.org.uk
 #
-# Last update: Thu Mar 24 14:34:50 2005
+# Last update: Thu Jun 23 20:19:33 2005
 #
 # machine_is_xxx	CONFIG_xxxx		MACH_TYPE_xxx		number
 #
@@ -243,7 +243,7 @@ yoho			ARCH_YOHO		YOHO			231
 jasper			ARCH_JASPER		JASPER			232
 dsc25			ARCH_DSC25		DSC25			233
 omap_innovator		MACH_OMAP_INNOVATOR	OMAP_INNOVATOR		234
-ramses			ARCH_RAMSES		RAMSES			235
+mnci			ARCH_RAMSES		RAMSES			235
 s28x			ARCH_S28X		S28X			236
 mport3			ARCH_MPORT3		MPORT3			237
 pxa_eagle250		ARCH_PXA_EAGLE250	PXA_EAGLE250		238
@@ -323,7 +323,7 @@ nimbra29x		ARCH_NIMBRA29X		NIMBRA29X		311
 nimbra210		ARCH_NIMBRA210		NIMBRA210		312
 hhp_d95xx		ARCH_HHP_D95XX		HHP_D95XX		313
 labarm			ARCH_LABARM		LABARM			314
-m825xx			ARCH_M825XX		M825XX			315
+comcerto		ARCH_M825XX		M825XX			315
 m7100			SA1100_M7100		M7100			316
 nipc2			ARCH_NIPC2		NIPC2			317
 fu7202			ARCH_FU7202		FU7202			318
@@ -724,3 +724,66 @@ lpc22xx			MACH_LPC22XX		LPC22XX			715
 omap_comet3		MACH_COMET3		COMET3			716
 omap_comet4		MACH_COMET4		COMET4			717
 csb625			MACH_CSB625		CSB625			718
+fortunet2		MACH_FORTUNET2		FORTUNET2		719
+s5h2200			MACH_S5H2200		S5H2200			720
+optorm920		MACH_OPTORM920		OPTORM920		721
+adsbitsyxb		MACH_ADSBITSYXB		ADSBITSYXB		722
+adssphere		MACH_ADSSPHERE		ADSSPHERE		723
+adsportal		MACH_ADSPORTAL		ADSPORTAL		724
+ln2410sbc		MACH_LN2410SBC		LN2410SBC		725
+cb3rufc			MACH_CB3RUFC		CB3RUFC			726
+mp2usb			MACH_MP2USB		MP2USB			727
+ntnp425c		MACH_NTNP425C		NTNP425C		728
+colibri			MACH_COLIBRI		COLIBRI			729
+pcm7220			MACH_PCM7220		PCM7220			730
+gateway7001		MACH_GATEWAY7001	GATEWAY7001		731
+pcm027			MACH_PCM027		PCM027			732
+cmpxa			MACH_CMPXA		CMPXA			733
+anubis			MACH_ANUBIS		ANUBIS			734
+ite8152			MACH_ITE8152		ITE8152			735
+lpc3xxx			MACH_LPC3XXX		LPC3XXX			736
+puppeteer		MACH_PUPPETEER		PUPPETEER		737
+vt001			MACH_MACH_VADATECH	MACH_VADATECH		738
+e570			MACH_E570		E570			739
+x50			MACH_X50		X50			740
+recon			MACH_RECON		RECON			741
+xboardgp8		MACH_XBOARDGP8		XBOARDGP8		742
+fpic2			MACH_FPIC2		FPIC2			743
+akita			MACH_AKITA		AKITA			744
+a81			MACH_A81		A81			745
+svm_sc25x		MACH_SVM_SC25X		SVM_SC25X		746
+vt020			MACH_VADATECH020	VADATECH020		747
+tli			MACH_TLI		TLI			748
+edb9315lc		MACH_EDB9315LC		EDB9315LC		749
+passec			MACH_PASSEC		PASSEC			750
+ds_tiger		MACH_DS_TIGER		DS_TIGER		751
+e310			MACH_E310		E310			752
+e330			MACH_E330		E330			753
+rt3000			MACH_RT3000		RT3000			754
+nokia770		MACH_NOKIA770		NOKIA770		755
+pnx0106			MACH_PNX0106		PNX0106			756
+hx21xx			MACH_HX21XX		HX21XX			757
+faraday			MACH_FARADAY		FARADAY			758
+sbc9312			MACH_SBC9312		SBC9312			759
+batman			MACH_BATMAN		BATMAN			760
+jpd201			MACH_JPD201		JPD201			761
+mipsa			MACH_MIPSA		MIPSA			762
+kacom			MACH_KACOM		KACOM			763
+swarcocpu		MACH_SWARCOCPU		SWARCOCPU		764
+swarcodsl		MACH_SWARCODSL		SWARCODSL		765
+blueangel		MACH_BLUEANGEL		BLUEANGEL		766
+hairygrama		MACH_HAIRYGRAMA		HAIRYGRAMA		767
+banff			MACH_BANFF		BANFF			768
+carmeva			MACH_CARMEVA		CARMEVA			769
+sam255			MACH_SAM255		SAM255			770
+ppm10			MACH_PPM10		PPM10			771
+edb9315a		MACH_EDB9315A		EDB9315A		772
+sunset			MACH_SUNSET		SUNSET			773
+stargate2		MACH_STARGATE2		STARGATE2		774
+intelmote2		MACH_INTELMOTE2		INTELMOTE2		775
+trizeps4		MACH_TRIZEPS4		TRIZEPS4		776
+mainstone2		MACH_MAINSTONE2		MAINSTONE2		777
+ez_ixp42x		MACH_EZ_IXP42X		EZ_IXP42X		778
+tapwave_zodiac		MACH_TAPWAVE_ZODIAC	TAPWAVE_ZODIAC		779
+universalmeter		MACH_UNIVERSALMETER	UNIVERSALMETER		780
+hicoarm9		MACH_HICOARM9		HICOARM9		781

+ 70 - 63
arch/i386/kernel/kprobes.c

@@ -127,48 +127,23 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 		regs->eip = (unsigned long)&p->ainsn.insn;
 }
 
-struct task_struct  *arch_get_kprobe_task(void *ptr)
-{
-	return ((struct thread_info *) (((unsigned long) ptr) &
-					(~(THREAD_SIZE -1))))->task;
-}
-
 void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
 {
 	unsigned long *sara = (unsigned long *)&regs->esp;
-	struct kretprobe_instance *ri;
-	static void *orig_ret_addr;
+        struct kretprobe_instance *ri;
+
+        if ((ri = get_free_rp_inst(rp)) != NULL) {
+                ri->rp = rp;
+                ri->task = current;
+		ri->ret_addr = (kprobe_opcode_t *) *sara;
 
-	/*
-	 * Save the return address when the return probe hits
-	 * the first time, and use it to populate the (krprobe
-	 * instance)->ret_addr for subsequent return probes at
-	 * the same addrress since stack address would have
-	 * the kretprobe_trampoline by then.
-	 */
-	if (((void*) *sara) != kretprobe_trampoline)
-		orig_ret_addr = (void*) *sara;
-
-	if ((ri = get_free_rp_inst(rp)) != NULL) {
-		ri->rp = rp;
-		ri->stack_addr = sara;
-		ri->ret_addr = orig_ret_addr;
-		add_rp_inst(ri);
 		/* Replace the return addr with trampoline addr */
 		*sara = (unsigned long) &kretprobe_trampoline;
-	} else {
-		rp->nmissed++;
-	}
-}
 
-void arch_kprobe_flush_task(struct task_struct *tk)
-{
-	struct kretprobe_instance *ri;
-	while ((ri = get_rp_inst_tsk(tk)) != NULL) {
-		*((unsigned long *)(ri->stack_addr)) =
-					(unsigned long) ri->ret_addr;
-		recycle_rp_inst(ri);
-	}
+                add_rp_inst(ri);
+        } else {
+                rp->nmissed++;
+        }
 }
 
 /*
@@ -286,36 +261,59 @@ no_kprobe:
  */
 int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
-	struct task_struct *tsk;
-	struct kretprobe_instance *ri;
-	struct hlist_head *head;
-	struct hlist_node *node;
-	unsigned long *sara = ((unsigned long *) &regs->esp) - 1;
-
-	tsk = arch_get_kprobe_task(sara);
-	head = kretprobe_inst_table_head(tsk);
-
-	hlist_for_each_entry(ri, node, head, hlist) {
-		if (ri->stack_addr == sara && ri->rp) {
-			if (ri->rp->handler)
-				ri->rp->handler(ri, regs);
-		}
-	}
-	return 0;
-}
+        struct kretprobe_instance *ri = NULL;
+        struct hlist_head *head;
+        struct hlist_node *node, *tmp;
+	unsigned long orig_ret_address = 0;
+	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
-void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs,
-						unsigned long flags)
-{
-	struct kretprobe_instance *ri;
-	/* RA already popped */
-	unsigned long *sara = ((unsigned long *)&regs->esp) - 1;
+        head = kretprobe_inst_table_head(current);
 
-	while ((ri = get_rp_inst(sara))) {
-		regs->eip = (unsigned long)ri->ret_addr;
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more then one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+         *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+                if (ri->task != current)
+			/* another task is sharing our hash bucket */
+                        continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
 		recycle_rp_inst(ri);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
 	}
-	regs->eflags &= ~TF_MASK;
+
+	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+	regs->eip = orig_ret_address;
+
+	unlock_kprobes();
+	preempt_enable_no_resched();
+
+        /*
+         * By returning a non-zero value, we are telling
+         * kprobe_handler() that we have handled unlocking
+         * and re-enabling preemption.
+         */
+        return 1;
 }
 
 /*
@@ -403,8 +401,7 @@ static inline int post_kprobe_handler(struct pt_regs *regs)
 		current_kprobe->post_handler(current_kprobe, regs, 0);
 	}
 
-	if (current_kprobe->post_handler != trampoline_post_handler)
-		resume_execution(current_kprobe, regs);
+	resume_execution(current_kprobe, regs);
 	regs->eflags |= kprobe_saved_eflags;
 
 	/*Restore back the original saved kprobes variables and continue. */
@@ -534,3 +531,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	}
 	return 0;
 }
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init(void)
+{
+	return register_kprobe(&trampoline_p);
+}

+ 29 - 0
arch/i386/kernel/process.c

@@ -616,6 +616,33 @@ handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
 	tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
 }
 
+/*
+ * This function selects if the context switch from prev to next
+ * has to tweak the TSC disable bit in the cr4.
+ */
+static inline void disable_tsc(struct task_struct *prev_p,
+			       struct task_struct *next_p)
+{
+	struct thread_info *prev, *next;
+
+	/*
+	 * gcc should eliminate the ->thread_info dereference if
+	 * has_secure_computing returns 0 at compile time (SECCOMP=n).
+	 */
+	prev = prev_p->thread_info;
+	next = next_p->thread_info;
+
+	if (has_secure_computing(prev) || has_secure_computing(next)) {
+		/* slow path here */
+		if (has_secure_computing(prev) &&
+		    !has_secure_computing(next)) {
+			write_cr4(read_cr4() & ~X86_CR4_TSD);
+		} else if (!has_secure_computing(prev) &&
+			   has_secure_computing(next))
+			write_cr4(read_cr4() | X86_CR4_TSD);
+	}
+}
+
 /*
  *	switch_to(x,yn) should switch tasks from x to y.
  *
@@ -695,6 +722,8 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
 	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
 		handle_io_bitmap(next, tss);
 
+	disable_tsc(prev_p, next_p);
+
 	return prev_p;
 }
 

+ 2 - 0
arch/i386/kernel/syscall_table.S

@@ -289,3 +289,5 @@ ENTRY(sys_call_table)
 	.long sys_add_key
 	.long sys_request_key
 	.long sys_keyctl
+	.long sys_ioprio_set
+	.long sys_ioprio_get		/* 290 */

+ 2 - 2
arch/ia64/kernel/entry.S

@@ -1577,8 +1577,8 @@ sys_call_table:
 	data8 sys_add_key
 	data8 sys_request_key
 	data8 sys_keyctl
-	data8 sys_ni_syscall
-	data8 sys_ni_syscall			// 1275
+	data8 sys_ioprio_set
+	data8 sys_ioprio_get			// 1275
 	data8 sys_set_zone_reclaim
 	data8 sys_ni_syscall
 	data8 sys_ni_syscall

+ 124 - 4
arch/ia64/kernel/kprobes.c

@@ -34,6 +34,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/kdebug.h>
+#include <asm/sections.h>
 
 extern void jprobe_inst_return(void);
 
@@ -263,13 +264,33 @@ static inline void get_kprobe_inst(bundle_t *bundle, uint slot,
 	}
 }
 
+/* Returns non-zero if the addr is in the Interrupt Vector Table */
+static inline int in_ivt_functions(unsigned long addr)
+{
+	return (addr >= (unsigned long)__start_ivt_text
+		&& addr < (unsigned long)__end_ivt_text);
+}
+
 static int valid_kprobe_addr(int template, int slot, unsigned long addr)
 {
 	if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) {
-		printk(KERN_WARNING "Attempting to insert unaligned kprobe at 0x%lx\n",
-				addr);
+		printk(KERN_WARNING "Attempting to insert unaligned kprobe "
+				"at 0x%lx\n", addr);
 		return -EINVAL;
 	}
+
+ 	if (in_ivt_functions(addr)) {
+ 		printk(KERN_WARNING "Kprobes can't be inserted inside "
+				"IVT functions at 0x%lx\n", addr);
+ 		return -EINVAL;
+ 	}
+
+	if (slot == 1 && bundle_encoding[template][1] != L) {
+		printk(KERN_WARNING "Inserting kprobes on slot #1 "
+		       "is not supported\n");
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -290,6 +311,94 @@ static inline void set_current_kprobe(struct kprobe *p)
 	current_kprobe = p;
 }
 
+static void kretprobe_trampoline(void)
+{
+}
+
+/*
+ * At this point the target function has been tricked into
+ * returning into our trampoline.  Lookup the associated instance
+ * and then:
+ *    - call the handler function
+ *    - cleanup by marking the instance as unused
+ *    - long jump back to the original return address
+ */
+int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri = NULL;
+	struct hlist_head *head;
+	struct hlist_node *node, *tmp;
+	unsigned long orig_ret_address = 0;
+	unsigned long trampoline_address =
+		((struct fnptr *)kretprobe_trampoline)->ip;
+
+        head = kretprobe_inst_table_head(current);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more then one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+	 *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+                if (ri->task != current)
+			/* another task is sharing our hash bucket */
+                        continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		recycle_rp_inst(ri);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+	regs->cr_iip = orig_ret_address;
+
+	unlock_kprobes();
+	preempt_enable_no_resched();
+
+        /*
+         * By returning a non-zero value, we are telling
+         * kprobe_handler() that we have handled unlocking
+         * and re-enabling preemption.
+         */
+        return 1;
+}
+
+void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri;
+
+	if ((ri = get_free_rp_inst(rp)) != NULL) {
+		ri->rp = rp;
+		ri->task = current;
+		ri->ret_addr = (kprobe_opcode_t *)regs->b0;
+
+		/* Replace the return addr with trampoline addr */
+		regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
+
+		add_rp_inst(ri);
+	} else {
+		rp->nmissed++;
+	}
+}
+
 int arch_prepare_kprobe(struct kprobe *p)
 {
 	unsigned long addr = (unsigned long) p->addr;
@@ -492,8 +601,8 @@ static int pre_kprobes_handler(struct die_args *args)
 	if (p->pre_handler && p->pre_handler(p, regs))
 		/*
 		 * Our pre-handler is specifically requesting that we just
-		 * do a return.  This is handling the case where the
-		 * pre-handler is really our special jprobe pre-handler.
+		 * do a return.  This is used for both the jprobe pre-handler
+		 * and the kretprobe trampoline
 		 */
 		return 1;
 
@@ -599,3 +708,14 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	*regs = jprobe_saved_regs;
 	return 1;
 }
+
+static struct kprobe trampoline_p = {
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init(void)
+{
+	trampoline_p.addr =
+		(kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip;
+	return register_kprobe(&trampoline_p);
+}

+ 16 - 0
arch/ia64/kernel/process.c

@@ -27,6 +27,7 @@
 #include <linux/efi.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
+#include <linux/kprobes.h>
 
 #include <asm/cpu.h>
 #include <asm/delay.h>
@@ -707,6 +708,13 @@ kernel_thread_helper (int (*fn)(void *), void *arg)
 void
 flush_thread (void)
 {
+	/*
+	 * Remove function-return probe instances associated with this task
+	 * and put them back on the free list. Do not insert an exit probe for
+	 * this function, it will be disabled by kprobe_flush_task if you do.
+	 */
+	kprobe_flush_task(current);
+
 	/* drop floating-point and debug-register state if it exists: */
 	current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
 	ia64_drop_fpu(current);
@@ -721,6 +729,14 @@ flush_thread (void)
 void
 exit_thread (void)
 {
+
+	/*
+	 * Remove function-return probe instances associated with this task
+	 * and put them back on the free list. Do not insert an exit probe for
+	 * this function, it will be disabled by kprobe_flush_task if you do.
+	 */
+	kprobe_flush_task(current);
+
 	ia64_drop_fpu(current);
 #ifdef CONFIG_PERFMON
        /* if needed, stop monitoring and flush state to perfmon context */

+ 6 - 1
arch/ia64/kernel/vmlinux.lds.S

@@ -8,6 +8,11 @@
 #define LOAD_OFFSET	(KERNEL_START - KERNEL_TR_PAGE_SIZE)
 #include <asm-generic/vmlinux.lds.h>
 
+#define IVT_TEXT							\
+		VMLINUX_SYMBOL(__start_ivt_text) = .;			\
+		*(.text.ivt)						\
+		VMLINUX_SYMBOL(__end_ivt_text) = .;
+
 OUTPUT_FORMAT("elf64-ia64-little")
 OUTPUT_ARCH(ia64)
 ENTRY(phys_start)
@@ -39,7 +44,7 @@ SECTIONS
 
   .text : AT(ADDR(.text) - LOAD_OFFSET)
     {
-	*(.text.ivt)
+	IVT_TEXT
 	*(.text)
 	SCHED_TEXT
 	LOCK_TEXT

+ 1 - 1
arch/mips/kernel/signal.c

@@ -457,7 +457,7 @@ static int do_signal(sigset_t *oldset, struct pt_regs *regs)
 	if (!user_mode(regs))
 		return 1;
 
-	if (try_to_freeze(0))
+	if (try_to_freeze())
 		goto no_signal;
 
 	if (!oldset)

+ 2 - 0
arch/ppc/kernel/misc.S

@@ -1449,3 +1449,5 @@ _GLOBAL(sys_call_table)
 	.long sys_request_key		/* 270 */
 	.long sys_keyctl
 	.long sys_waitid
+	.long sys_ioprio_set
+	.long sys_ioprio_get

+ 12 - 2
arch/ppc/mm/init.c

@@ -606,9 +606,19 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 		struct page *page = pfn_to_page(pfn);
 		if (!PageReserved(page)
 		    && !test_bit(PG_arch_1, &page->flags)) {
-			if (vma->vm_mm == current->active_mm)
+			if (vma->vm_mm == current->active_mm) {
+#ifdef CONFIG_8xx
+			/* On 8xx, cache control instructions (particularly 
+		 	 * "dcbst" from flush_dcache_icache) fault as write 
+			 * operation if there is an unpopulated TLB entry 
+			 * for the address in question. To workaround that, 
+			 * we invalidate the TLB here, thus avoiding dcbst 
+			 * misbehaviour.
+			 */
+				_tlbie(address);
+#endif
 				__flush_dcache_icache((void *) address);
-			else
+			} else
 				flush_dcache_icache_page(page);
 			set_bit(PG_arch_1, &page->flags);
 		}

+ 2 - 2
arch/ppc/platforms/pmac_sleep.S

@@ -46,7 +46,7 @@
 	.section .text
 	.align	5
 
-#if defined(CONFIG_PMAC_PBOOK) || defined(CONFIG_CPU_FREQ_PMAC)
+#if defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ_PMAC)
 
 /* This gets called by via-pmu.c late during the sleep process.
  * The PMU was already send the sleep command and will shut us down
@@ -382,7 +382,7 @@ turn_on_mmu:
 	isync
 	rfi
 
-#endif /* defined(CONFIG_PMAC_PBOOK) || defined(CONFIG_CPU_FREQ) */
+#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */
 
 	.section .data
 	.balign	L1_CACHE_LINE_SIZE

+ 4 - 4
arch/ppc/platforms/pmac_time.c

@@ -206,7 +206,7 @@ via_calibrate_decr(void)
 	return 1;
 }
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 /*
  * Reset the time after a sleep.
  */
@@ -238,7 +238,7 @@ time_sleep_notify(struct pmu_sleep_notifier *self, int when)
 static struct pmu_sleep_notifier time_sleep_notifier __pmacdata = {
 	time_sleep_notify, SLEEP_LEVEL_MISC,
 };
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 
 /*
  * Query the OF and get the decr frequency.
@@ -251,9 +251,9 @@ pmac_calibrate_decr(void)
 	struct device_node *cpu;
 	unsigned int freq, *fp;
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 	pmu_register_sleep_notifier(&time_sleep_notifier);
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 
 	/* We assume MacRISC2 machines have correct device-tree
 	 * calibration. That's better since the VIA itself seems

+ 1 - 0
arch/ppc/platforms/sandpoint.c

@@ -324,6 +324,7 @@ sandpoint_setup_arch(void)
 			pdata[1].irq = 0;
 			pdata[1].mapbase = 0;
 		}
+	}
 
 	printk(KERN_INFO "Motorola SPS Sandpoint Test Platform\n");
 	printk(KERN_INFO "Port by MontaVista Software, Inc. (source@mvista.com)\n");

+ 19 - 7
arch/ppc/syslib/open_pic.c

@@ -370,8 +370,9 @@ void __init openpic_init(int offset)
 	/* Initialize IPI interrupts */
 	if ( ppc_md.progress ) ppc_md.progress("openpic: ipi",0x3bb);
 	for (i = 0; i < OPENPIC_NUM_IPI; i++) {
-		/* Disabled, Priority 10..13 */
-		openpic_initipi(i, 10+i, OPENPIC_VEC_IPI+i+offset);
+		/* Disabled, increased priorities 10..13 */
+		openpic_initipi(i, OPENPIC_PRIORITY_IPI_BASE+i,
+				OPENPIC_VEC_IPI+i+offset);
 		/* IPIs are per-CPU */
 		irq_desc[OPENPIC_VEC_IPI+i+offset].status |= IRQ_PER_CPU;
 		irq_desc[OPENPIC_VEC_IPI+i+offset].handler = &open_pic_ipi;
@@ -399,8 +400,9 @@ void __init openpic_init(int offset)
 		if (sense & IRQ_SENSE_MASK)
 			irq_desc[i+offset].status = IRQ_LEVEL;
 
-		/* Enabled, Priority 8 */
-		openpic_initirq(i, 8, i+offset, (sense & IRQ_POLARITY_MASK),
+		/* Enabled, Default priority */
+		openpic_initirq(i, OPENPIC_PRIORITY_DEFAULT, i+offset,
+				(sense & IRQ_POLARITY_MASK),
 				(sense & IRQ_SENSE_MASK));
 		/* Processor 0 */
 		openpic_mapirq(i, CPU_MASK_CPU0, CPU_MASK_NONE);
@@ -655,6 +657,18 @@ static void __init openpic_maptimer(u_int timer, cpumask_t cpumask)
 		      cpus_addr(phys)[0]);
 }
 
+/*
+ * Change the priority of an interrupt
+ */
+void __init
+openpic_set_irq_priority(u_int irq, u_int pri)
+{
+	check_arg_irq(irq);
+	openpic_safe_writefield(&ISR[irq - open_pic_irq_offset]->Vector_Priority,
+				OPENPIC_PRIORITY_MASK,
+				pri << OPENPIC_PRIORITY_SHIFT);
+}
+
 /*
  * Initalize the interrupt source which will generate an NMI.
  * This raises the interrupt's priority from 8 to 9.
@@ -665,9 +679,7 @@ void __init
 openpic_init_nmi_irq(u_int irq)
 {
 	check_arg_irq(irq);
-	openpic_safe_writefield(&ISR[irq - open_pic_irq_offset]->Vector_Priority,
-				OPENPIC_PRIORITY_MASK,
-				9 << OPENPIC_PRIORITY_SHIFT);
+	openpic_set_irq_priority(irq, OPENPIC_PRIORITY_NMI);
 }
 
 /*

+ 121 - 4
arch/ppc64/kernel/kprobes.c

@@ -36,6 +36,8 @@
 #include <asm/kdebug.h>
 #include <asm/sstep.h>
 
+static DECLARE_MUTEX(kprobe_mutex);
+
 static struct kprobe *current_kprobe;
 static unsigned long kprobe_status, kprobe_saved_msr;
 static struct kprobe *kprobe_prev;
@@ -54,6 +56,15 @@ int arch_prepare_kprobe(struct kprobe *p)
 		printk("Cannot register a kprobe on rfid or mtmsrd\n");
 		ret = -EINVAL;
 	}
+
+	/* insn must be on a special executable page on ppc64 */
+	if (!ret) {
+		up(&kprobe_mutex);
+		p->ainsn.insn = get_insn_slot();
+		down(&kprobe_mutex);
+		if (!p->ainsn.insn)
+			ret = -ENOMEM;
+	}
 	return ret;
 }
 
@@ -79,16 +90,22 @@ void arch_disarm_kprobe(struct kprobe *p)
 
 void arch_remove_kprobe(struct kprobe *p)
 {
+	up(&kprobe_mutex);
+	free_insn_slot(p->ainsn.insn);
+	down(&kprobe_mutex);
 }
 
 static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
+	kprobe_opcode_t insn = *p->ainsn.insn;
+
 	regs->msr |= MSR_SE;
-	/*single step inline if it a breakpoint instruction*/
-	if (p->opcode == BREAKPOINT_INSTRUCTION)
+
+	/* single step inline if it is a trap variant */
+	if (IS_TW(insn) || IS_TD(insn) || IS_TWI(insn) || IS_TDI(insn))
 		regs->nip = (unsigned long)p->addr;
 	else
-		regs->nip = (unsigned long)&p->ainsn.insn;
+		regs->nip = (unsigned long)p->ainsn.insn;
 }
 
 static inline void save_previous_kprobe(void)
@@ -105,6 +122,23 @@ static inline void restore_previous_kprobe(void)
 	kprobe_saved_msr = kprobe_saved_msr_prev;
 }
 
+void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri;
+
+	if ((ri = get_free_rp_inst(rp)) != NULL) {
+		ri->rp = rp;
+		ri->task = current;
+		ri->ret_addr = (kprobe_opcode_t *)regs->link;
+
+		/* Replace the return addr with trampoline addr */
+		regs->link = (unsigned long)kretprobe_trampoline;
+		add_rp_inst(ri);
+	} else {
+		rp->nmissed++;
+	}
+}
+
 static inline int kprobe_handler(struct pt_regs *regs)
 {
 	struct kprobe *p;
@@ -194,6 +228,78 @@ no_kprobe:
 	return ret;
 }
 
+/*
+ * Function return probe trampoline:
+ * 	- init_kprobes() establishes a probepoint here
+ * 	- When the probed function returns, this probe
+ * 		causes the handlers to fire
+ */
+void kretprobe_trampoline_holder(void)
+{
+	asm volatile(".global kretprobe_trampoline\n"
+			"kretprobe_trampoline:\n"
+			"nop\n");
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+        struct kretprobe_instance *ri = NULL;
+        struct hlist_head *head;
+        struct hlist_node *node, *tmp;
+	unsigned long orig_ret_address = 0;
+	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+
+        head = kretprobe_inst_table_head(current);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more then one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+         *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+                if (ri->task != current)
+			/* another task is sharing our hash bucket */
+                        continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		recycle_rp_inst(ri);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+	regs->nip = orig_ret_address;
+
+	unlock_kprobes();
+
+        /*
+         * By returning a non-zero value, we are telling
+         * kprobe_handler() that we have handled unlocking
+         * and re-enabling preemption.
+         */
+        return 1;
+}
+
 /*
  * Called after single-stepping.  p->addr is the address of the
  * instruction whose first byte has been replaced by the "breakpoint"
@@ -205,9 +311,10 @@ no_kprobe:
 static void resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
 	int ret;
+	unsigned int insn = *p->ainsn.insn;
 
 	regs->nip = (unsigned long)p->addr;
-	ret = emulate_step(regs, p->ainsn.insn[0]);
+	ret = emulate_step(regs, insn);
 	if (ret == 0)
 		regs->nip = (unsigned long)p->addr + 4;
 }
@@ -331,3 +438,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs));
 	return 1;
 }
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init(void)
+{
+	return register_kprobe(&trampoline_p);
+}

+ 1 - 0
arch/ppc64/kernel/ppc_ksyms.c

@@ -75,6 +75,7 @@ EXPORT_SYMBOL(giveup_fpu);
 EXPORT_SYMBOL(giveup_altivec);
 #endif
 EXPORT_SYMBOL(__flush_icache_range);
+EXPORT_SYMBOL(flush_dcache_range);
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PPC_ISERIES

+ 4 - 0
arch/ppc64/kernel/process.c

@@ -36,6 +36,7 @@
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
 #include <linux/utsname.h>
+#include <linux/kprobes.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -307,6 +308,8 @@ void show_regs(struct pt_regs * regs)
 
 void exit_thread(void)
 {
+	kprobe_flush_task(current);
+
 #ifndef CONFIG_SMP
 	if (last_task_used_math == current)
 		last_task_used_math = NULL;
@@ -321,6 +324,7 @@ void flush_thread(void)
 {
 	struct thread_info *t = current_thread_info();
 
+	kprobe_flush_task(current);
 	if (t->flags & _TIF_ABI_PENDING)
 		t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
 

+ 1 - 0
arch/ppc64/kernel/time.c

@@ -91,6 +91,7 @@ unsigned long tb_to_xs;
 unsigned      tb_to_us;
 unsigned long processor_freq;
 DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL_GPL(rtc_lock);
 
 unsigned long tb_to_ns_scale;
 unsigned long tb_to_ns_shift;

+ 1 - 1
arch/sparc64/kernel/auxio.c

@@ -16,7 +16,7 @@
 #include <asm/ebus.h>
 #include <asm/auxio.h>
 
-/* This cannot be static, as it is referenced in entry.S */
+/* This cannot be static, as it is referenced in irq.c */
 void __iomem *auxio_register = NULL;
 
 enum auxio_type {

+ 4 - 112
arch/sparc64/kernel/entry.S

@@ -271,8 +271,9 @@ cplus_fptrap_insn_1:
 	fmuld		%f0, %f2, %f26
 	faddd		%f0, %f2, %f28
 	fmuld		%f0, %f2, %f30
+	membar		#Sync
 	b,pt		%xcc, fpdis_exit
-	 membar		#Sync
+	 nop
 2:	andcc		%g5, FPRS_DU, %g0
 	bne,pt		%icc, 3f
 	 fzero		%f32
@@ -301,8 +302,9 @@ cplus_fptrap_insn_2:
 	fmuld		%f32, %f34, %f58
 	faddd		%f32, %f34, %f60
 	fmuld		%f32, %f34, %f62
+	membar		#Sync
 	ba,pt		%xcc, fpdis_exit
-	 membar		#Sync
+	 nop
 3:	mov		SECONDARY_CONTEXT, %g3
 	add		%g6, TI_FPREGS, %g1
 	ldxa		[%g3] ASI_DMMU, %g5
@@ -699,116 +701,6 @@ utrap_ill:
 	ba,pt		%xcc, rtrap
 	 clr		%l6
 
-#ifdef CONFIG_BLK_DEV_FD
-	.globl		floppy_hardint
-floppy_hardint:
-	wr		%g0, (1 << 11), %clear_softint
-	sethi		%hi(doing_pdma), %g1
-	ld		[%g1 + %lo(doing_pdma)], %g2
-	brz,pn		%g2, floppy_dosoftint
-	 sethi		%hi(fdc_status), %g3
-	ldx		[%g3 + %lo(fdc_status)], %g3
-	sethi		%hi(pdma_vaddr), %g5
-	ldx		[%g5 + %lo(pdma_vaddr)], %g4
-	sethi		%hi(pdma_size), %g5
-	ldx		[%g5 + %lo(pdma_size)], %g5
-
-next_byte:
-	lduba		[%g3] ASI_PHYS_BYPASS_EC_E, %g7
-	andcc		%g7, 0x80, %g0
-	be,pn		%icc, floppy_fifo_emptied
-	 andcc		%g7, 0x20, %g0
-	be,pn		%icc, floppy_overrun
-	 andcc		%g7, 0x40, %g0
-	be,pn		%icc, floppy_write
-	 sub		%g5, 1, %g5
-
-	inc		%g3
-	lduba		[%g3] ASI_PHYS_BYPASS_EC_E, %g7
-	dec		%g3
-	orcc		%g0, %g5, %g0
-	stb		%g7, [%g4]
-	bne,pn		%xcc, next_byte
-	 add		%g4, 1, %g4
-
-	b,pt		%xcc, floppy_tdone
-	 nop
-
-floppy_write:
-	ldub		[%g4], %g7
-	orcc		%g0, %g5, %g0
-	inc		%g3
-	stba		%g7, [%g3] ASI_PHYS_BYPASS_EC_E
-	dec		%g3
-	bne,pn		%xcc, next_byte
-	 add		%g4, 1, %g4
-
-floppy_tdone:
-	sethi		%hi(pdma_vaddr), %g1
-	stx		%g4, [%g1 + %lo(pdma_vaddr)]
-	sethi		%hi(pdma_size), %g1
-	stx		%g5, [%g1 + %lo(pdma_size)]
-	sethi		%hi(auxio_register), %g1
-	ldx		[%g1 + %lo(auxio_register)], %g7
-	lduba		[%g7] ASI_PHYS_BYPASS_EC_E, %g5
-	or		%g5, AUXIO_AUX1_FTCNT, %g5
-/*	andn		%g5, AUXIO_AUX1_MASK, %g5 */
-	stba		%g5, [%g7] ASI_PHYS_BYPASS_EC_E
-	andn		%g5, AUXIO_AUX1_FTCNT, %g5
-/*	andn		%g5, AUXIO_AUX1_MASK, %g5 */
-
-	nop; nop;  nop; nop;  nop; nop;
-	nop; nop;  nop; nop;  nop; nop;
-
-	stba		%g5, [%g7] ASI_PHYS_BYPASS_EC_E
-	sethi		%hi(doing_pdma), %g1
-	b,pt		%xcc, floppy_dosoftint
-	 st		%g0, [%g1 + %lo(doing_pdma)]
-
-floppy_fifo_emptied:
-	sethi		%hi(pdma_vaddr), %g1
-	stx		%g4, [%g1 + %lo(pdma_vaddr)]
-	sethi		%hi(pdma_size), %g1
-	stx		%g5, [%g1 + %lo(pdma_size)]
-	sethi		%hi(irq_action), %g1
-	or		%g1, %lo(irq_action), %g1
-	ldx		[%g1 + (11 << 3)], %g3		! irqaction[floppy_irq]
-	ldx		[%g3 + 0x08], %g4		! action->flags>>48==ino
-	sethi		%hi(ivector_table), %g3
-	srlx		%g4, 48, %g4
-	or		%g3, %lo(ivector_table), %g3
-	sllx		%g4, 5, %g4
-	ldx		[%g3 + %g4], %g4		! &ivector_table[ino]
-	ldx		[%g4 + 0x10], %g4		! bucket->iclr
-	stwa		%g0, [%g4] ASI_PHYS_BYPASS_EC_E	! ICLR_IDLE
-	membar		#Sync				! probably not needed...
-	retry
-
-floppy_overrun:
-	sethi		%hi(pdma_vaddr), %g1
-	stx		%g4, [%g1 + %lo(pdma_vaddr)]
-	sethi		%hi(pdma_size), %g1
-	stx		%g5, [%g1 + %lo(pdma_size)]
-	sethi		%hi(doing_pdma), %g1
-	st		%g0, [%g1 + %lo(doing_pdma)]
-
-floppy_dosoftint:
-	rdpr		%pil, %g2
-	wrpr		%g0, 15, %pil
-	sethi		%hi(109f), %g7
-	b,pt		%xcc, etrap_irq
-109:	 or		%g7, %lo(109b), %g7
-
-	mov		11, %o0
-	mov		0, %o1
-	call		sparc_floppy_irq
-	 add		%sp, PTREGS_OFF, %o2
-
-	b,pt		%xcc, rtrap_irq
-	 nop
-
-#endif /* CONFIG_BLK_DEV_FD */
-
 	/* XXX Here is stuff we still need to write... -DaveM XXX */
 	.globl		netbsd_syscall
 netbsd_syscall:

+ 50 - 121
arch/sparc64/kernel/irq.c

@@ -37,6 +37,7 @@
 #include <asm/uaccess.h>
 #include <asm/cache.h>
 #include <asm/cpudata.h>
+#include <asm/auxio.h>
 
 #ifdef CONFIG_SMP
 static void distribute_irqs(void);
@@ -834,137 +835,65 @@ void handler_irq(int irq, struct pt_regs *regs)
 }
 
 #ifdef CONFIG_BLK_DEV_FD
-extern void floppy_interrupt(int irq, void *dev_cookie, struct pt_regs *regs);
+extern irqreturn_t floppy_interrupt(int, void *, struct pt_regs *);;
 
-void sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs)
-{
-	struct irqaction *action = *(irq + irq_action);
-	struct ino_bucket *bucket;
-	int cpu = smp_processor_id();
-
-	irq_enter();
-	kstat_this_cpu.irqs[irq]++;
-
-	*(irq_work(cpu, irq)) = 0;
-	bucket = get_ino_in_irqaction(action) + ivector_table;
-
-	bucket->flags |= IBF_INPROGRESS;
-
-	floppy_interrupt(irq, dev_cookie, regs);
-	upa_writel(ICLR_IDLE, bucket->iclr);
-
-	bucket->flags &= ~IBF_INPROGRESS;
-
-	irq_exit();
-}
-#endif
-
-/* The following assumes that the branch lies before the place we
- * are branching to.  This is the case for a trap vector...
- * You have been warned.
- */
-#define SPARC_BRANCH(dest_addr, inst_addr) \
-          (0x10800000 | ((((dest_addr)-(inst_addr))>>2)&0x3fffff))
-
-#define SPARC_NOP (0x01000000)
+/* XXX No easy way to include asm/floppy.h XXX */
+extern unsigned char *pdma_vaddr;
+extern unsigned long pdma_size;
+extern volatile int doing_pdma;
+extern unsigned long fdc_status;
 
-static void install_fast_irq(unsigned int cpu_irq,
-			     irqreturn_t (*handler)(int, void *, struct pt_regs *))
+irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs)
 {
-	extern unsigned long sparc64_ttable_tl0;
-	unsigned long ttent = (unsigned long) &sparc64_ttable_tl0;
-	unsigned int *insns;
-
-	ttent += 0x820;
-	ttent += (cpu_irq - 1) << 5;
-	insns = (unsigned int *) ttent;
-	insns[0] = SPARC_BRANCH(((unsigned long) handler),
-				((unsigned long)&insns[0]));
-	insns[1] = SPARC_NOP;
-	__asm__ __volatile__("membar #StoreStore; flush %0" : : "r" (ttent));
-}
-
-int request_fast_irq(unsigned int irq,
-		     irqreturn_t (*handler)(int, void *, struct pt_regs *),
-		     unsigned long irqflags, const char *name, void *dev_id)
-{
-	struct irqaction *action;
-	struct ino_bucket *bucket = __bucket(irq);
-	unsigned long flags;
-
-	/* No pil0 dummy buckets allowed here. */
-	if (bucket < &ivector_table[0] ||
-	    bucket >= &ivector_table[NUM_IVECS]) {
-		unsigned int *caller;
-
-		__asm__ __volatile__("mov %%i7, %0" : "=r" (caller));
-		printk(KERN_CRIT "request_fast_irq: Old style IRQ registry attempt "
-		       "from %p, irq %08x.\n", caller, irq);
-		return -EINVAL;
-	}	
-	
-	if (!handler)
-		return -EINVAL;
+	if (likely(doing_pdma)) {
+		void __iomem *stat = (void __iomem *) fdc_status;
+		unsigned char *vaddr = pdma_vaddr;
+		unsigned long size = pdma_size;
+		u8 val;
+
+		while (size) {
+			val = readb(stat);
+			if (unlikely(!(val & 0x80))) {
+				pdma_vaddr = vaddr;
+				pdma_size = size;
+				return IRQ_HANDLED;
+			}
+			if (unlikely(!(val & 0x20))) {
+				pdma_vaddr = vaddr;
+				pdma_size = size;
+				doing_pdma = 0;
+				goto main_interrupt;
+			}
+			if (val & 0x40) {
+				/* read */
+				*vaddr++ = readb(stat + 1);
+			} else {
+				unsigned char data = *vaddr++;
 
-	if ((bucket->pil == 0) || (bucket->pil == 14)) {
-		printk("request_fast_irq: Trying to register shared IRQ 0 or 14.\n");
-		return -EBUSY;
-	}
+				/* write */
+				writeb(data, stat + 1);
+			}
+			size--;
+		}
 
-	spin_lock_irqsave(&irq_action_lock, flags);
+		pdma_vaddr = vaddr;
+		pdma_size = size;
 
-	action = *(bucket->pil + irq_action);
-	if (action) {
-		if (action->flags & SA_SHIRQ)
-			panic("Trying to register fast irq when already shared.\n");
-		if (irqflags & SA_SHIRQ)
-			panic("Trying to register fast irq as shared.\n");
-		printk("request_fast_irq: Trying to register yet already owned.\n");
-		spin_unlock_irqrestore(&irq_action_lock, flags);
-		return -EBUSY;
-	}
+		/* Send Terminal Count pulse to floppy controller. */
+		val = readb(auxio_register);
+		val |= AUXIO_AUX1_FTCNT;
+		writeb(val, auxio_register);
+		val &= AUXIO_AUX1_FTCNT;
+		writeb(val, auxio_register);
 
-	/*
-	 * We do not check for SA_SAMPLE_RANDOM in this path. Neither do we
-	 * support smp intr affinity in this path.
-	 */
-	if (irqflags & SA_STATIC_ALLOC) {
-		if (static_irq_count < MAX_STATIC_ALLOC)
-			action = &static_irqaction[static_irq_count++];
-		else
-			printk("Request for IRQ%d (%s) SA_STATIC_ALLOC failed "
-			       "using kmalloc\n", bucket->pil, name);
-	}
-	if (action == NULL)
-		action = (struct irqaction *)kmalloc(sizeof(struct irqaction),
-						     GFP_ATOMIC);
-	if (!action) {
-		spin_unlock_irqrestore(&irq_action_lock, flags);
-		return -ENOMEM;
+		doing_pdma = 0;
 	}
-	install_fast_irq(bucket->pil, handler);
 
-	bucket->irq_info = action;
-	bucket->flags |= IBF_ACTIVE;
-
-	action->handler = handler;
-	action->flags = irqflags;
-	action->dev_id = NULL;
-	action->name = name;
-	action->next = NULL;
-	put_ino_in_irqaction(action, irq);
-	put_smpaff_in_irqaction(action, CPU_MASK_NONE);
-
-	*(bucket->pil + irq_action) = action;
-	enable_irq(irq);
-
-	spin_unlock_irqrestore(&irq_action_lock, flags);
-
-#ifdef CONFIG_SMP
-	distribute_irqs();
-#endif
-	return 0;
+main_interrupt:
+	return floppy_interrupt(irq, dev_cookie, regs);
 }
+EXPORT_SYMBOL(sparc_floppy_irq);
+#endif
 
 /* We really don't need these at all on the Sparc.  We only have
  * stubs here because they are exported to modules.

+ 8 - 4
arch/sparc64/kernel/semaphore.c

@@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr)
 "	add	%1, %4, %1\n"
 "	cas	[%3], %0, %1\n"
 "	cmp	%0, %1\n"
+"	membar	#StoreLoad | #StoreStore\n"
 "	bne,pn	%%icc, 1b\n"
-"	 membar #StoreLoad | #StoreStore\n"
+"	 nop\n"
 	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
 	: "r" (&sem->count), "r" (incr), "m" (sem->count)
 	: "cc");
@@ -71,8 +72,9 @@ void up(struct semaphore *sem)
 "	cmp	%%g1, %%g7\n"
 "	bne,pn	%%icc, 1b\n"
 "	 addcc	%%g7, 1, %%g0\n"
+"	membar	#StoreLoad | #StoreStore\n"
 "	ble,pn	%%icc, 3f\n"
-"	 membar	#StoreLoad | #StoreStore\n"
+"	 nop\n"
 "2:\n"
 "	.subsection 2\n"
 "3:	mov	%0, %%g1\n"
@@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem)
 "	cmp	%%g1, %%g7\n"
 "	bne,pn	%%icc, 1b\n"
 "	 cmp	%%g7, 1\n"
+"	membar	#StoreLoad | #StoreStore\n"
 "	bl,pn	%%icc, 3f\n"
-"	 membar	#StoreLoad | #StoreStore\n"
+"	 nop\n"
 "2:\n"
 "	.subsection 2\n"
 "3:	mov	%0, %%g1\n"
@@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem)
 "	cmp	%%g1, %%g7\n"
 "	bne,pn	%%icc, 1b\n"
 "	 cmp	%%g7, 1\n"
+"	membar	#StoreLoad | #StoreStore\n"
 "	bl,pn	%%icc, 3f\n"
-"	 membar	#StoreLoad | #StoreStore\n"
+"	 nop\n"
 "2:\n"
 "	.subsection 2\n"
 "3:	mov	%2, %%g1\n"

+ 0 - 1
arch/sparc64/kernel/sparc64_ksyms.c

@@ -227,7 +227,6 @@ EXPORT_SYMBOL(__flush_dcache_range);
 
 EXPORT_SYMBOL(mostek_lock);
 EXPORT_SYMBOL(mstk48t02_regs);
-EXPORT_SYMBOL(request_fast_irq);
 #ifdef CONFIG_SUN_AUXIO
 EXPORT_SYMBOL(auxio_set_led);
 EXPORT_SYMBOL(auxio_set_lte);

+ 2 - 1
arch/sparc64/kernel/trampoline.S

@@ -98,8 +98,9 @@ startup_continue:
 
 	sethi		%hi(prom_entry_lock), %g2
 1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
+	membar		#StoreLoad | #StoreStore
 	brnz,pn		%g1, 1b
-	 membar		#StoreLoad | #StoreStore
+	 nop
 
 	sethi		%hi(p1275buf), %g2
 	or		%g2, %lo(p1275buf), %g2

+ 53 - 50
arch/sparc64/lib/U1memcpy.S

@@ -87,14 +87,17 @@
 #define LOOP_CHUNK3(src, dest, len, branch_dest)		\
 	MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
 
+#define DO_SYNC			membar	#Sync;
 #define STORE_SYNC(dest, fsrc)				\
 	EX_ST(STORE_BLK(%fsrc, %dest));			\
-	add			%dest, 0x40, %dest;
+	add			%dest, 0x40, %dest;	\
+	DO_SYNC
 
 #define STORE_JUMP(dest, fsrc, target)			\
 	EX_ST(STORE_BLK(%fsrc, %dest));			\
 	add			%dest, 0x40, %dest;	\
-	ba,pt			%xcc, target;
+	ba,pt			%xcc, target;		\
+	 nop;
 
 #define FINISH_VISCHUNK(dest, f0, f1, left)	\
 	subcc			%left, 8, %left;\
@@ -239,17 +242,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f0, %f2, %f48
 1:	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
-	STORE_JUMP(o0, f48, 40f) membar #Sync
+	STORE_JUMP(o0, f48, 40f)
 2:	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
-	STORE_JUMP(o0, f48, 48f) membar #Sync
+	STORE_JUMP(o0, f48, 48f)
 3:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
-	STORE_JUMP(o0, f48, 56f) membar #Sync
+	STORE_JUMP(o0, f48, 56f)
 
 1:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -260,17 +263,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f2, %f4, %f48
 1:	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
-	STORE_JUMP(o0, f48, 41f) membar #Sync
+	STORE_JUMP(o0, f48, 41f)
 2:	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
-	STORE_JUMP(o0, f48, 49f) membar #Sync
+	STORE_JUMP(o0, f48, 49f)
 3:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
-	STORE_JUMP(o0, f48, 57f) membar #Sync
+	STORE_JUMP(o0, f48, 57f)
 
 1:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -281,17 +284,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f4, %f6, %f48
 1:	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
-	STORE_JUMP(o0, f48, 42f) membar #Sync
+	STORE_JUMP(o0, f48, 42f)
 2:	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
-	STORE_JUMP(o0, f48, 50f) membar #Sync
+	STORE_JUMP(o0, f48, 50f)
 3:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
-	STORE_JUMP(o0, f48, 58f) membar #Sync
+	STORE_JUMP(o0, f48, 58f)
 
 1:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -302,17 +305,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f6, %f8, %f48
 1:	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
-	STORE_JUMP(o0, f48, 43f) membar #Sync
+	STORE_JUMP(o0, f48, 43f)
 2:	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
-	STORE_JUMP(o0, f48, 51f) membar #Sync
+	STORE_JUMP(o0, f48, 51f)
 3:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
-	STORE_JUMP(o0, f48, 59f) membar #Sync
+	STORE_JUMP(o0, f48, 59f)
 
 1:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -323,17 +326,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f8, %f10, %f48
 1:	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
-	STORE_JUMP(o0, f48, 44f) membar #Sync
+	STORE_JUMP(o0, f48, 44f)
 2:	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
-	STORE_JUMP(o0, f48, 52f) membar #Sync
+	STORE_JUMP(o0, f48, 52f)
 3:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
-	STORE_JUMP(o0, f48, 60f) membar #Sync
+	STORE_JUMP(o0, f48, 60f)
 
 1:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -344,17 +347,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f10, %f12, %f48
 1:	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
-	STORE_JUMP(o0, f48, 45f) membar #Sync
+	STORE_JUMP(o0, f48, 45f)
 2:	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
-	STORE_JUMP(o0, f48, 53f) membar #Sync
+	STORE_JUMP(o0, f48, 53f)
 3:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
-	STORE_JUMP(o0, f48, 61f) membar #Sync
+	STORE_JUMP(o0, f48, 61f)
 
 1:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -365,17 +368,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f12, %f14, %f48
 1:	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
-	STORE_JUMP(o0, f48, 46f) membar #Sync
+	STORE_JUMP(o0, f48, 46f)
 2:	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
-	STORE_JUMP(o0, f48, 54f) membar #Sync
+	STORE_JUMP(o0, f48, 54f)
 3:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
-	STORE_JUMP(o0, f48, 62f) membar #Sync
+	STORE_JUMP(o0, f48, 62f)
 
 1:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -386,17 +389,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f14, %f16, %f48
 1:	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
-	STORE_JUMP(o0, f48, 47f) membar #Sync
+	STORE_JUMP(o0, f48, 47f)
 2:	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
-	STORE_JUMP(o0, f48, 55f) membar #Sync
+	STORE_JUMP(o0, f48, 55f)
 3:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
-	STORE_JUMP(o0, f48, 63f) membar #Sync
+	STORE_JUMP(o0, f48, 63f)
 
 40:	FINISH_VISCHUNK(o0, f0,  f2,  g3)
 41:	FINISH_VISCHUNK(o0, f2,  f4,  g3)

+ 13 - 2
arch/sparc64/lib/VISsave.S

@@ -72,7 +72,11 @@ vis1:	ldub		[%g6 + TI_FPSAVED], %g3
 
 	stda		%f48, [%g3 + %g1] ASI_BLK_P
 5:	membar		#Sync
-	jmpl		%g7 + %g0, %g0
+	ba,pt		%xcc, 80f
+	 nop
+
+	.align		32
+80:	jmpl		%g7 + %g0, %g0
 	 nop
 
 6:	ldub		[%g3 + TI_FPSAVED], %o5
@@ -87,8 +91,11 @@ vis1:	ldub		[%g6 + TI_FPSAVED], %g3
 	stda		%f32, [%g2 + %g1] ASI_BLK_P
 	stda		%f48, [%g3 + %g1] ASI_BLK_P
 	membar		#Sync
-	jmpl		%g7 + %g0, %g0
+	ba,pt		%xcc, 80f
+	 nop
 
+	.align		32
+80:	jmpl		%g7 + %g0, %g0
 	 nop
 
 	.align		32
@@ -126,6 +133,10 @@ VISenterhalf:
 	stda		%f0, [%g2 + %g1] ASI_BLK_P
 	stda		%f16, [%g3 + %g1] ASI_BLK_P
 	membar		#Sync
+	ba,pt		%xcc, 4f
+	 nop
+
+	.align		32
 4:	and		%o5, FPRS_DU, %o5
 	jmpl		%g7 + %g0, %g0
 	 wr		%o5, FPRS_FEF, %fprs

+ 26 - 16
arch/sparc64/lib/atomic.S

@@ -7,18 +7,6 @@
 #include <linux/config.h>
 #include <asm/asi.h>
 
-	/* On SMP we need to use memory barriers to ensure
-	 * correct memory operation ordering, nop these out
-	 * for uniprocessor.
-	 */
-#ifdef CONFIG_SMP
-#define ATOMIC_PRE_BARRIER	membar #StoreLoad | #LoadLoad
-#define ATOMIC_POST_BARRIER	membar #StoreLoad | #StoreStore
-#else
-#define ATOMIC_PRE_BARRIER	nop
-#define ATOMIC_POST_BARRIER	nop
-#endif
-
 	.text
 
 	/* Two versions of the atomic routines, one that
@@ -52,6 +40,24 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */
 	 nop
 	.size	atomic_sub, .-atomic_sub
 
+	/* On SMP we need to use memory barriers to ensure
+	 * correct memory operation ordering, nop these out
+	 * for uniprocessor.
+	 */
+#ifdef CONFIG_SMP
+
+#define ATOMIC_PRE_BARRIER	membar #StoreLoad | #LoadLoad;
+#define ATOMIC_POST_BARRIER	\
+	ba,pt %xcc, 80b;	\
+	membar #StoreLoad | #StoreStore
+
+80:	retl
+	 nop
+#else
+#define ATOMIC_PRE_BARRIER
+#define ATOMIC_POST_BARRIER
+#endif
+
 	.globl	atomic_add_ret
 	.type	atomic_add_ret,#function
 atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
@@ -62,9 +68,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	cmp	%g1, %g7
 	bne,pn	%icc, 1b
 	 add	%g7, %o0, %g7
+	sra	%g7, 0, %o0
 	ATOMIC_POST_BARRIER
 	retl
-	 sra	%g7, 0, %o0
+	 nop
 	.size	atomic_add_ret, .-atomic_add_ret
 
 	.globl	atomic_sub_ret
@@ -77,9 +84,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	cmp	%g1, %g7
 	bne,pn	%icc, 1b
 	 sub	%g7, %o0, %g7
+	sra	%g7, 0, %o0
 	ATOMIC_POST_BARRIER
 	retl
-	 sra	%g7, 0, %o0
+	 nop
 	.size	atomic_sub_ret, .-atomic_sub_ret
 
 	.globl	atomic64_add
@@ -118,9 +126,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	cmp	%g1, %g7
 	bne,pn	%xcc, 1b
 	 add	%g7, %o0, %g7
+	mov	%g7, %o0
 	ATOMIC_POST_BARRIER
 	retl
-	 mov	%g7, %o0
+	 nop
 	.size	atomic64_add_ret, .-atomic64_add_ret
 
 	.globl	atomic64_sub_ret
@@ -133,7 +142,8 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	cmp	%g1, %g7
 	bne,pn	%xcc, 1b
 	 sub	%g7, %o0, %g7
+	mov	%g7, %o0
 	ATOMIC_POST_BARRIER
 	retl
-	 mov	%g7, %o0
+	 nop
 	.size	atomic64_sub_ret, .-atomic64_sub_ret

+ 20 - 11
arch/sparc64/lib/bitops.S

@@ -7,20 +7,26 @@
 #include <linux/config.h>
 #include <asm/asi.h>
 
+	.text
+
 	/* On SMP we need to use memory barriers to ensure
 	 * correct memory operation ordering, nop these out
 	 * for uniprocessor.
 	 */
+
 #ifdef CONFIG_SMP
 #define BITOP_PRE_BARRIER	membar #StoreLoad | #LoadLoad
-#define BITOP_POST_BARRIER	membar #StoreLoad | #StoreStore
+#define BITOP_POST_BARRIER	\
+	ba,pt	%xcc, 80b;	\
+	membar #StoreLoad | #StoreStore
+
+80:	retl
+	 nop
 #else
-#define BITOP_PRE_BARRIER	nop
-#define BITOP_POST_BARRIER	nop
+#define BITOP_PRE_BARRIER
+#define BITOP_POST_BARRIER
 #endif
 
-	.text
-
 	.globl	test_and_set_bit
 	.type	test_and_set_bit,#function
 test_and_set_bit:	/* %o0=nr, %o1=addr */
@@ -37,10 +43,11 @@ test_and_set_bit:	/* %o0=nr, %o1=addr */
 	cmp	%g7, %g1
 	bne,pn	%xcc, 1b
 	 and	%g7, %o2, %g2
-	BITOP_POST_BARRIER
 	clr	%o0
+	movrne	%g2, 1, %o0
+	BITOP_POST_BARRIER
 	retl
-	 movrne	%g2, 1, %o0
+	 nop
 	.size	test_and_set_bit, .-test_and_set_bit
 
 	.globl	test_and_clear_bit
@@ -59,10 +66,11 @@ test_and_clear_bit:	/* %o0=nr, %o1=addr */
 	cmp	%g7, %g1
 	bne,pn	%xcc, 1b
 	 and	%g7, %o2, %g2
-	BITOP_POST_BARRIER
 	clr	%o0
+	movrne	%g2, 1, %o0
+	BITOP_POST_BARRIER
 	retl
-	 movrne	%g2, 1, %o0
+	 nop
 	.size	test_and_clear_bit, .-test_and_clear_bit
 
 	.globl	test_and_change_bit
@@ -81,10 +89,11 @@ test_and_change_bit:	/* %o0=nr, %o1=addr */
 	cmp	%g7, %g1
 	bne,pn	%xcc, 1b
 	 and	%g7, %o2, %g2
-	BITOP_POST_BARRIER
 	clr	%o0
+	movrne	%g2, 1, %o0
+	BITOP_POST_BARRIER
 	retl
-	 movrne	%g2, 1, %o0
+	 nop
 	.size	test_and_change_bit, .-test_and_change_bit
 
 	.globl	set_bit

+ 4 - 2
arch/sparc64/lib/debuglocks.c

@@ -252,8 +252,9 @@ wlock_again:
 "		andn	%%g1, %%g3, %%g7\n"
 "		casx	[%0], %%g1, %%g7\n"
 "		cmp	%%g1, %%g7\n"
+"		membar	#StoreLoad | #StoreStore\n"
 "		bne,pn	%%xcc, 1b\n"
-"		 membar	#StoreLoad | #StoreStore"
+"		 nop"
 		: /* no outputs */
 		: "r" (&(rw->lock))
 		: "g3", "g1", "g7", "cc", "memory");
@@ -351,8 +352,9 @@ int _do_write_trylock (rwlock_t *rw, char *str)
 "		andn	%%g1, %%g3, %%g7\n"
 "		casx	[%0], %%g1, %%g7\n"
 "		cmp	%%g1, %%g7\n"
+"		membar	#StoreLoad | #StoreStore\n"
 "		bne,pn	%%xcc, 1b\n"
-"		 membar	#StoreLoad | #StoreStore"
+"		 nop"
 		: /* no outputs */
 		: "r" (&(rw->lock))
 		: "g3", "g1", "g7", "cc", "memory");

+ 4 - 2
arch/sparc64/lib/dec_and_lock.S

@@ -48,8 +48,9 @@ start_to_zero:
 #endif
 to_zero:
 	ldstub	[%o1], %g3
+	membar	#StoreLoad | #StoreStore
 	brnz,pn	%g3, spin_on_lock
-	 membar	#StoreLoad | #StoreStore
+	 nop
 loop2:	cas	[%o0], %g2, %g7		/* ASSERT(g7 == 0) */
 	cmp	%g2, %g7
 
@@ -71,8 +72,9 @@ loop2:	cas	[%o0], %g2, %g7		/* ASSERT(g7 == 0) */
 	 nop
 spin_on_lock:
 	ldub	[%o1], %g3
+	membar	#LoadLoad
 	brnz,pt	%g3, spin_on_lock
-	 membar	#LoadLoad
+	 nop
 	ba,pt	%xcc, to_zero
 	 nop
 	nop

+ 10 - 5
arch/sparc64/lib/rwsem.S

@@ -17,8 +17,9 @@ __down_read:
 	bne,pn		%icc, 1b
 	 add		%g7, 1, %g7
 	cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:
 	retl
 	 nop
@@ -57,8 +58,9 @@ __down_write:
 	cmp		%g3, %g7
 	bne,pn		%icc, 1b
 	 cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bne,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:	retl
 	 nop
 3:
@@ -97,8 +99,9 @@ __up_read:
 	cmp		%g1, %g7
 	bne,pn		%icc, 1b
 	 cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:	retl
 	 nop
 3:	sethi		%hi(RWSEM_ACTIVE_MASK), %g1
@@ -126,8 +129,9 @@ __up_write:
 	bne,pn		%icc, 1b
 	 sub		%g7, %g1, %g7
 	cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:
 	retl
 	 nop
@@ -151,8 +155,9 @@ __downgrade_write:
 	bne,pn		%icc, 1b
 	 sub		%g7, %g1, %g7
 	cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:
 	retl
 	 nop

+ 4 - 2
arch/sparc64/mm/init.c

@@ -136,8 +136,9 @@ static __inline__ void set_dcache_dirty(struct page *page, int this_cpu)
 			     "or	%%g1, %0, %%g1\n\t"
 			     "casx	[%2], %%g7, %%g1\n\t"
 			     "cmp	%%g7, %%g1\n\t"
+			     "membar	#StoreLoad | #StoreStore\n\t"
 			     "bne,pn	%%xcc, 1b\n\t"
-			     " membar	#StoreLoad | #StoreStore"
+			     " nop"
 			     : /* no outputs */
 			     : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
 			     : "g1", "g7");
@@ -157,8 +158,9 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
 			     " andn	%%g7, %1, %%g1\n\t"
 			     "casx	[%2], %%g7, %%g1\n\t"
 			     "cmp	%%g7, %%g1\n\t"
+			     "membar	#StoreLoad | #StoreStore\n\t"
 			     "bne,pn	%%xcc, 1b\n\t"
-			     " membar	#StoreLoad | #StoreStore\n"
+			     " nop\n"
 			     "2:"
 			     : /* no outputs */
 			     : "r" (cpu), "r" (mask), "r" (&page->flags),

+ 2 - 1
arch/sparc64/mm/ultra.S

@@ -266,8 +266,9 @@ __cheetah_flush_tlb_pending:	/* 22 insns */
 	 andn		%o3, 1, %o3
 	stxa		%g0, [%o3] ASI_IMMU_DEMAP
 2:	stxa		%g0, [%o3] ASI_DMMU_DEMAP	
+	membar		#Sync
 	brnz,pt		%o1, 1b
-	 membar		#Sync
+	 nop
 	stxa		%g2, [%o4] ASI_DMMU
 	flush		%g6
 	wrpr		%g0, 0, %tl

+ 66 - 170
arch/x86_64/kernel/kprobes.c

@@ -38,7 +38,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/preempt.h>
-#include <linux/moduleloader.h>
+
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 #include <asm/kdebug.h>
@@ -51,8 +51,6 @@ static struct kprobe *kprobe_prev;
 static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev;
 static struct pt_regs jprobe_saved_regs;
 static long *jprobe_saved_rsp;
-static kprobe_opcode_t *get_insn_slot(void);
-static void free_insn_slot(kprobe_opcode_t *slot);
 void jprobe_return_end(void);
 
 /* copy of the kernel stack at the probe fire time */
@@ -274,48 +272,23 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 		regs->rip = (unsigned long)p->ainsn.insn;
 }
 
-struct task_struct  *arch_get_kprobe_task(void *ptr)
-{
-	return ((struct thread_info *) (((unsigned long) ptr) &
-					(~(THREAD_SIZE -1))))->task;
-}
-
 void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
 {
 	unsigned long *sara = (unsigned long *)regs->rsp;
-	struct kretprobe_instance *ri;
-	static void *orig_ret_addr;
+        struct kretprobe_instance *ri;
+
+        if ((ri = get_free_rp_inst(rp)) != NULL) {
+                ri->rp = rp;
+                ri->task = current;
+		ri->ret_addr = (kprobe_opcode_t *) *sara;
 
-	/*
-	 * Save the return address when the return probe hits
-	 * the first time, and use it to populate the (krprobe
-	 * instance)->ret_addr for subsequent return probes at
-	 * the same addrress since stack address would have
-	 * the kretprobe_trampoline by then.
-	 */
-	if (((void*) *sara) != kretprobe_trampoline)
-		orig_ret_addr = (void*) *sara;
-
-	if ((ri = get_free_rp_inst(rp)) != NULL) {
-		ri->rp = rp;
-		ri->stack_addr = sara;
-		ri->ret_addr = orig_ret_addr;
-		add_rp_inst(ri);
 		/* Replace the return addr with trampoline addr */
 		*sara = (unsigned long) &kretprobe_trampoline;
-	} else {
-		rp->nmissed++;
-	}
-}
 
-void arch_kprobe_flush_task(struct task_struct *tk)
-{
-	struct kretprobe_instance *ri;
-	while ((ri = get_rp_inst_tsk(tk)) != NULL) {
-		*((unsigned long *)(ri->stack_addr)) =
-					(unsigned long) ri->ret_addr;
-		recycle_rp_inst(ri);
-	}
+                add_rp_inst(ri);
+        } else {
+                rp->nmissed++;
+        }
 }
 
 /*
@@ -428,36 +401,59 @@ no_kprobe:
  */
 int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
-	struct task_struct *tsk;
-	struct kretprobe_instance *ri;
-	struct hlist_head *head;
-	struct hlist_node *node;
-	unsigned long *sara = (unsigned long *)regs->rsp - 1;
-
-	tsk = arch_get_kprobe_task(sara);
-	head = kretprobe_inst_table_head(tsk);
-
-	hlist_for_each_entry(ri, node, head, hlist) {
-		if (ri->stack_addr == sara && ri->rp) {
-			if (ri->rp->handler)
-				ri->rp->handler(ri, regs);
-		}
-	}
-	return 0;
-}
+        struct kretprobe_instance *ri = NULL;
+        struct hlist_head *head;
+        struct hlist_node *node, *tmp;
+	unsigned long orig_ret_address = 0;
+	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
-void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs,
-						unsigned long flags)
-{
-	struct kretprobe_instance *ri;
-	/* RA already popped */
-	unsigned long *sara = ((unsigned long *)regs->rsp) - 1;
+        head = kretprobe_inst_table_head(current);
 
-	while ((ri = get_rp_inst(sara))) {
-		regs->rip = (unsigned long)ri->ret_addr;
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more then one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+         *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+                if (ri->task != current)
+			/* another task is sharing our hash bucket */
+                        continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
 		recycle_rp_inst(ri);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
 	}
-	regs->eflags &= ~TF_MASK;
+
+	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+	regs->rip = orig_ret_address;
+
+	unlock_kprobes();
+	preempt_enable_no_resched();
+
+        /*
+         * By returning a non-zero value, we are telling
+         * kprobe_handler() that we have handled unlocking
+         * and re-enabling preemption.
+         */
+        return 1;
 }
 
 /*
@@ -550,8 +546,7 @@ int post_kprobe_handler(struct pt_regs *regs)
 		current_kprobe->post_handler(current_kprobe, regs, 0);
 	}
 
-	if (current_kprobe->post_handler != trampoline_post_handler)
-		resume_execution(current_kprobe, regs);
+	resume_execution(current_kprobe, regs);
 	regs->eflags |= kprobe_saved_rflags;
 
 	/* Restore the original saved kprobes variables and continue. */
@@ -682,111 +677,12 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
-/*
- * kprobe->ainsn.insn points to the copy of the instruction to be single-stepped.
- * By default on x86_64, pages we get from kmalloc or vmalloc are not
- * executable.  Single-stepping an instruction on such a page yields an
- * oops.  So instead of storing the instruction copies in their respective
- * kprobe objects, we allocate a page, map it executable, and store all the
- * instruction copies there.  (We can allocate additional pages if somebody
- * inserts a huge number of probes.)  Each page can hold up to INSNS_PER_PAGE
- * instruction slots, each of which is MAX_INSN_SIZE*sizeof(kprobe_opcode_t)
- * bytes.
- */
-#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE*sizeof(kprobe_opcode_t)))
-struct kprobe_insn_page {
-	struct hlist_node hlist;
-	kprobe_opcode_t *insns;		/* page of instruction slots */
-	char slot_used[INSNS_PER_PAGE];
-	int nused;
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
 };
 
-static struct hlist_head kprobe_insn_pages;
-
-/**
- * get_insn_slot() - Find a slot on an executable page for an instruction.
- * We allocate an executable page if there's no room on existing ones.
- */
-static kprobe_opcode_t *get_insn_slot(void)
-{
-	struct kprobe_insn_page *kip;
-	struct hlist_node *pos;
-
-	hlist_for_each(pos, &kprobe_insn_pages) {
-		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
-		if (kip->nused < INSNS_PER_PAGE) {
-			int i;
-			for (i = 0; i < INSNS_PER_PAGE; i++) {
-				if (!kip->slot_used[i]) {
-					kip->slot_used[i] = 1;
-					kip->nused++;
-					return kip->insns + (i*MAX_INSN_SIZE);
-				}
-			}
-			/* Surprise!  No unused slots.  Fix kip->nused. */
-			kip->nused = INSNS_PER_PAGE;
-		}
-	}
-
-	/* All out of space.  Need to allocate a new page. Use slot 0.*/
-	kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
-	if (!kip) {
-		return NULL;
-	}
-
-	/*
-	 * For the %rip-relative displacement fixups to be doable, we
-	 * need our instruction copy to be within +/- 2GB of any data it
-	 * might access via %rip.  That is, within 2GB of where the
-	 * kernel image and loaded module images reside.  So we allocate
-	 * a page in the module loading area.
-	 */
-	kip->insns = module_alloc(PAGE_SIZE);
-	if (!kip->insns) {
-		kfree(kip);
-		return NULL;
-	}
-	INIT_HLIST_NODE(&kip->hlist);
-	hlist_add_head(&kip->hlist, &kprobe_insn_pages);
-	memset(kip->slot_used, 0, INSNS_PER_PAGE);
-	kip->slot_used[0] = 1;
-	kip->nused = 1;
-	return kip->insns;
-}
-
-/**
- * free_insn_slot() - Free instruction slot obtained from get_insn_slot().
- */
-static void free_insn_slot(kprobe_opcode_t *slot)
+int __init arch_init(void)
 {
-	struct kprobe_insn_page *kip;
-	struct hlist_node *pos;
-
-	hlist_for_each(pos, &kprobe_insn_pages) {
-		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
-		if (kip->insns <= slot
-		    && slot < kip->insns+(INSNS_PER_PAGE*MAX_INSN_SIZE)) {
-			int i = (slot - kip->insns) / MAX_INSN_SIZE;
-			kip->slot_used[i] = 0;
-			kip->nused--;
-			if (kip->nused == 0) {
-				/*
-				 * Page is no longer in use.  Free it unless
-				 * it's the last one.  We keep the last one
-				 * so as not to have to set it up again the
-				 * next time somebody inserts a probe.
-				 */
-				hlist_del(&kip->hlist);
-				if (hlist_empty(&kprobe_insn_pages)) {
-					INIT_HLIST_NODE(&kip->hlist);
-					hlist_add_head(&kip->hlist,
-						&kprobe_insn_pages);
-				} else {
-					module_free(NULL, kip->insns);
-					kfree(kip);
-				}
-			}
-			return;
-		}
-	}
+	return register_kprobe(&trampoline_p);
 }

+ 29 - 0
arch/x86_64/kernel/process.c

@@ -481,6 +481,33 @@ out:
 	return err;
 }
 
+/*
+ * This function selects if the context switch from prev to next
+ * has to tweak the TSC disable bit in the cr4.
+ */
+static inline void disable_tsc(struct task_struct *prev_p,
+			       struct task_struct *next_p)
+{
+	struct thread_info *prev, *next;
+
+	/*
+	 * gcc should eliminate the ->thread_info dereference if
+	 * has_secure_computing returns 0 at compile time (SECCOMP=n).
+	 */
+	prev = prev_p->thread_info;
+	next = next_p->thread_info;
+
+	if (has_secure_computing(prev) || has_secure_computing(next)) {
+		/* slow path here */
+		if (has_secure_computing(prev) &&
+		    !has_secure_computing(next)) {
+			write_cr4(read_cr4() & ~X86_CR4_TSD);
+		} else if (!has_secure_computing(prev) &&
+			   has_secure_computing(next))
+			write_cr4(read_cr4() | X86_CR4_TSD);
+	}
+}
+
 /*
  * This special macro can be used to load a debugging register
  */
@@ -599,6 +626,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *
 		}
 	}
 
+	disable_tsc(prev_p, next_p);
+
 	return prev_p;
 }
 

+ 3 - 2
drivers/block/as-iosched.c

@@ -1806,7 +1806,8 @@ static void as_put_request(request_queue_t *q, struct request *rq)
 	rq->elevator_private = NULL;
 }
 
-static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+static int as_set_request(request_queue_t *q, struct request *rq,
+			  struct bio *bio, int gfp_mask)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
@@ -1827,7 +1828,7 @@ static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
 	return 1;
 }
 
-static int as_may_queue(request_queue_t *q, int rw)
+static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
 {
 	int ret = ELV_MQUEUE_MAY;
 	struct as_data *ad = q->elevator->elevator_data;

+ 7 - 10
drivers/block/cciss.c

@@ -1,6 +1,6 @@
 /*
  *    Disk Array driver for HP SA 5xxx and 6xxx Controllers
- *    Copyright 2000, 2002 Hewlett-Packard Development Company, L.P.
+ *    Copyright 2000, 2005 Hewlett-Packard Development Company, L.P.
  *
  *    This program is free software; you can redistribute it and/or modify
  *    it under the terms of the GNU General Public License as published by
@@ -54,7 +54,7 @@
 MODULE_AUTHOR("Hewlett-Packard Company");
 MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.6");
 MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
-			" SA6i P600 P800 E400");
+			" SA6i P600 P800 E400 E300");
 MODULE_LICENSE("GPL");
 
 #include "cciss_cmd.h"
@@ -85,8 +85,10 @@ static const struct pci_device_id cciss_pci_device_id[] = {
 		0x103C, 0x3225, 0, 0, 0},
 	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSB,
 		0x103c, 0x3223, 0, 0, 0},
-	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSB,
+	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC,
 		0x103c, 0x3231, 0, 0, 0},
+	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC,
+		0x103c, 0x3233, 0, 0, 0},
 	{0,}
 };
 MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
@@ -110,6 +112,7 @@ static struct board_type products[] = {
 	{ 0x3225103C, "Smart Array P600", &SA5_access},
 	{ 0x3223103C, "Smart Array P800", &SA5_access},
 	{ 0x3231103C, "Smart Array E400", &SA5_access},
+	{ 0x3233103C, "Smart Array E300", &SA5_access},
 };
 
 /* How long to wait (in millesconds) for board to go into simple mode */
@@ -635,6 +638,7 @@ static int cciss_ioctl(struct inode *inode, struct file *filep,
 		cciss_pci_info_struct pciinfo;
 
 		if (!arg) return -EINVAL;
+		pciinfo.domain = pci_domain_nr(host->pdev->bus);
 		pciinfo.bus = host->pdev->bus->number;
 		pciinfo.dev_fn = host->pdev->devfn;
 		pciinfo.board_id = host->board_id;
@@ -787,13 +791,6 @@ static int cciss_ioctl(struct inode *inode, struct file *filep,
  		luninfo.LunID = drv->LunID;
  		luninfo.num_opens = drv->usage_count;
  		luninfo.num_parts = 0;
- 		/* count partitions 1 to 15 with sizes > 0 */
- 		for (i = 0; i < MAX_PART - 1; i++) {
-			if (!disk->part[i])
-				continue;
-			if (disk->part[i]->nr_sects != 0)
-				luninfo.num_parts++;
-		}
  		if (copy_to_user(argp, &luninfo,
  				sizeof(LogvolInfo_struct)))
  			return -EFAULT;

文件差異過大導致無法顯示
+ 691 - 369
drivers/block/cfq-iosched.c


+ 2 - 1
drivers/block/deadline-iosched.c

@@ -760,7 +760,8 @@ static void deadline_put_request(request_queue_t *q, struct request *rq)
 }
 
 static int
-deadline_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
+		     int gfp_mask)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq;

+ 5 - 4
drivers/block/elevator.c

@@ -486,12 +486,13 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
 	return NULL;
 }
 
-int elv_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
+		    int gfp_mask)
 {
 	elevator_t *e = q->elevator;
 
 	if (e->ops->elevator_set_req_fn)
-		return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
+		return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
 
 	rq->elevator_private = NULL;
 	return 0;
@@ -505,12 +506,12 @@ void elv_put_request(request_queue_t *q, struct request *rq)
 		e->ops->elevator_put_req_fn(q, rq);
 }
 
-int elv_may_queue(request_queue_t *q, int rw)
+int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
 {
 	elevator_t *e = q->elevator;
 
 	if (e->ops->elevator_may_queue_fn)
-		return e->ops->elevator_may_queue_fn(q, rw);
+		return e->ops->elevator_may_queue_fn(q, rw, bio);
 
 	return ELV_MQUEUE_MAY;
 }

+ 36 - 23
drivers/block/ll_rw_blk.c

@@ -276,6 +276,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
 	rq->errors = 0;
 	rq->rq_status = RQ_ACTIVE;
 	rq->bio = rq->biotail = NULL;
+	rq->ioprio = 0;
 	rq->buffer = NULL;
 	rq->ref_count = 1;
 	rq->q = q;
@@ -1442,11 +1443,7 @@ void __generic_unplug_device(request_queue_t *q)
 	if (!blk_remove_plug(q))
 		return;
 
-	/*
-	 * was plugged, fire request_fn if queue has stuff to do
-	 */
-	if (elv_next_request(q))
-		q->request_fn(q);
+	q->request_fn(q);
 }
 EXPORT_SYMBOL(__generic_unplug_device);
 
@@ -1776,8 +1773,8 @@ static inline void blk_free_request(request_queue_t *q, struct request *rq)
 	mempool_free(rq, q->rq.rq_pool);
 }
 
-static inline struct request *blk_alloc_request(request_queue_t *q, int rw,
-						int gfp_mask)
+static inline struct request *
+blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
@@ -1790,7 +1787,7 @@ static inline struct request *blk_alloc_request(request_queue_t *q, int rw,
 	 */
 	rq->flags = rw;
 
-	if (!elv_set_request(q, rq, gfp_mask))
+	if (!elv_set_request(q, rq, bio, gfp_mask))
 		return rq;
 
 	mempool_free(rq, q->rq.rq_pool);
@@ -1872,7 +1869,8 @@ static void freed_request(request_queue_t *q, int rw)
 /*
  * Get a free request, queue_lock must not be held
  */
-static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
+static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
+				   int gfp_mask)
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
@@ -1895,7 +1893,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
 		}
 	}
 
-	switch (elv_may_queue(q, rw)) {
+	switch (elv_may_queue(q, rw, bio)) {
 		case ELV_MQUEUE_NO:
 			goto rq_starved;
 		case ELV_MQUEUE_MAY:
@@ -1920,7 +1918,7 @@ get_rq:
 		set_queue_congested(q, rw);
 	spin_unlock_irq(q->queue_lock);
 
-	rq = blk_alloc_request(q, rw, gfp_mask);
+	rq = blk_alloc_request(q, rw, bio, gfp_mask);
 	if (!rq) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
@@ -1961,7 +1959,8 @@ out:
  * No available requests for this queue, unplug the device and wait for some
  * requests to become available.
  */
-static struct request *get_request_wait(request_queue_t *q, int rw)
+static struct request *get_request_wait(request_queue_t *q, int rw,
+					struct bio *bio)
 {
 	DEFINE_WAIT(wait);
 	struct request *rq;
@@ -1972,7 +1971,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw)
 		prepare_to_wait_exclusive(&rl->wait[rw], &wait,
 				TASK_UNINTERRUPTIBLE);
 
-		rq = get_request(q, rw, GFP_NOIO);
+		rq = get_request(q, rw, bio, GFP_NOIO);
 
 		if (!rq) {
 			struct io_context *ioc;
@@ -2003,9 +2002,9 @@ struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask)
 	BUG_ON(rw != READ && rw != WRITE);
 
 	if (gfp_mask & __GFP_WAIT)
-		rq = get_request_wait(q, rw);
+		rq = get_request_wait(q, rw, NULL);
 	else
-		rq = get_request(q, rw, gfp_mask);
+		rq = get_request(q, rw, NULL, gfp_mask);
 
 	return rq;
 }
@@ -2333,7 +2332,6 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
 		return;
 
 	req->rq_status = RQ_INACTIVE;
-	req->q = NULL;
 	req->rl = NULL;
 
 	/*
@@ -2462,6 +2460,8 @@ static int attempt_merge(request_queue_t *q, struct request *req,
 		req->rq_disk->in_flight--;
 	}
 
+	req->ioprio = ioprio_best(req->ioprio, next->ioprio);
+
 	__blk_put_request(q, next);
 	return 1;
 }
@@ -2514,11 +2514,13 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 {
 	struct request *req, *freereq = NULL;
 	int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
+	unsigned short prio;
 	sector_t sector;
 
 	sector = bio->bi_sector;
 	nr_sectors = bio_sectors(bio);
 	cur_nr_sectors = bio_cur_sectors(bio);
+	prio = bio_prio(bio);
 
 	rw = bio_data_dir(bio);
 	sync = bio_sync(bio);
@@ -2559,6 +2561,7 @@ again:
 			req->biotail->bi_next = bio;
 			req->biotail = bio;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
+			req->ioprio = ioprio_best(req->ioprio, prio);
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_back_merge(q, req))
 				elv_merged_request(q, req);
@@ -2583,6 +2586,7 @@ again:
 			req->hard_cur_sectors = cur_nr_sectors;
 			req->sector = req->hard_sector = sector;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
+			req->ioprio = ioprio_best(req->ioprio, prio);
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_front_merge(q, req))
 				elv_merged_request(q, req);
@@ -2610,7 +2614,7 @@ get_rq:
 		freereq = NULL;
 	} else {
 		spin_unlock_irq(q->queue_lock);
-		if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) {
+		if ((freereq = get_request(q, rw, bio, GFP_ATOMIC)) == NULL) {
 			/*
 			 * READA bit set
 			 */
@@ -2618,7 +2622,7 @@ get_rq:
 			if (bio_rw_ahead(bio))
 				goto end_io;
 	
-			freereq = get_request_wait(q, rw);
+			freereq = get_request_wait(q, rw, bio);
 		}
 		goto again;
 	}
@@ -2646,6 +2650,7 @@ get_rq:
 	req->buffer = bio_data(bio);	/* see ->buffer comment above */
 	req->waiting = NULL;
 	req->bio = req->biotail = bio;
+	req->ioprio = prio;
 	req->rq_disk = bio->bi_bdev->bd_disk;
 	req->start_time = jiffies;
 
@@ -2674,7 +2679,7 @@ static inline void blk_partition_remap(struct bio *bio)
 	if (bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
 
-		switch (bio->bi_rw) {
+		switch (bio_data_dir(bio)) {
 		case READ:
 			p->read_sectors += bio_sectors(bio);
 			p->reads++;
@@ -2693,6 +2698,7 @@ void blk_finish_queue_drain(request_queue_t *q)
 {
 	struct request_list *rl = &q->rq;
 	struct request *rq;
+	int requeued = 0;
 
 	spin_lock_irq(q->queue_lock);
 	clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
@@ -2701,9 +2707,13 @@ void blk_finish_queue_drain(request_queue_t *q)
 		rq = list_entry_rq(q->drain_list.next);
 
 		list_del_init(&rq->queuelist);
-		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
+		elv_requeue_request(q, rq);
+		requeued++;
 	}
 
+	if (requeued)
+		q->request_fn(q);
+
 	spin_unlock_irq(q->queue_lock);
 
 	wake_up(&rl->wait[0]);
@@ -2900,7 +2910,7 @@ void submit_bio(int rw, struct bio *bio)
 
 	BIO_BUG_ON(!bio->bi_size);
 	BIO_BUG_ON(!bio->bi_io_vec);
-	bio->bi_rw = rw;
+	bio->bi_rw |= rw;
 	if (rw & WRITE)
 		mod_page_state(pgpgout, count);
 	else
@@ -3257,8 +3267,11 @@ void exit_io_context(void)
 	struct io_context *ioc;
 
 	local_irq_save(flags);
+	task_lock(current);
 	ioc = current->io_context;
 	current->io_context = NULL;
+	ioc->task = NULL;
+	task_unlock(current);
 	local_irq_restore(flags);
 
 	if (ioc->aic && ioc->aic->exit)
@@ -3293,12 +3306,12 @@ struct io_context *get_io_context(int gfp_flags)
 	ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
 	if (ret) {
 		atomic_set(&ret->refcount, 1);
-		ret->pid = tsk->pid;
+		ret->task = current;
+		ret->set_ioprio = NULL;
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
 		ret->cic = NULL;
-		spin_lock_init(&ret->lock);
 
 		local_irq_save(flags);
 

+ 9 - 1
drivers/block/swim3.c

@@ -253,7 +253,7 @@ static int floppy_revalidate(struct gendisk *disk);
 static int swim3_add_device(struct device_node *swims);
 int swim3_init(void);
 
-#ifndef CONFIG_PMAC_PBOOK
+#ifndef CONFIG_PMAC_MEDIABAY
 #define check_media_bay(which, what)	1
 #endif
 
@@ -297,9 +297,11 @@ static void do_fd_request(request_queue_t * q)
 	int i;
 	for(i=0;i<floppy_count;i++)
 	{
+#ifdef CONFIG_PMAC_MEDIABAY
 		if (floppy_states[i].media_bay &&
 			check_media_bay(floppy_states[i].media_bay, MB_FD))
 			continue;
+#endif /* CONFIG_PMAC_MEDIABAY */
 		start_request(&floppy_states[i]);
 	}
 	sti();
@@ -856,8 +858,10 @@ static int floppy_ioctl(struct inode *inode, struct file *filp,
 	if ((cmd & 0x80) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+#ifdef CONFIG_PMAC_MEDIABAY
 	if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
 		return -ENXIO;
+#endif
 
 	switch (cmd) {
 	case FDEJECT:
@@ -881,8 +885,10 @@ static int floppy_open(struct inode *inode, struct file *filp)
 	int n, err = 0;
 
 	if (fs->ref_count == 0) {
+#ifdef CONFIG_PMAC_MEDIABAY
 		if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
 			return -ENXIO;
+#endif
 		out_8(&sw->setup, S_IBM_DRIVE | S_FCLK_DIV2);
 		out_8(&sw->control_bic, 0xff);
 		out_8(&sw->mode, 0x95);
@@ -967,8 +973,10 @@ static int floppy_revalidate(struct gendisk *disk)
 	struct swim3 __iomem *sw;
 	int ret, n;
 
+#ifdef CONFIG_PMAC_MEDIABAY
 	if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
 		return -ENXIO;
+#endif
 
 	sw = fs->swim3;
 	grab_drive(fs, revalidating, 0);

+ 4 - 3
drivers/block/sx8.c

@@ -26,6 +26,7 @@
 #include <linux/delay.h>
 #include <linux/time.h>
 #include <linux/hdreg.h>
+#include <linux/dma-mapping.h>
 #include <asm/io.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
@@ -1582,9 +1583,9 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_out;
 
 #if IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */
-	rc = pci_set_dma_mask(pdev, 0xffffffffffffffffULL);
+	rc = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
 	if (!rc) {
-		rc = pci_set_consistent_dma_mask(pdev, 0xffffffffffffffffULL);
+		rc = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
 		if (rc) {
 			printk(KERN_ERR DRV_NAME "(%s): consistent DMA mask failure\n",
 				pci_name(pdev));
@@ -1593,7 +1594,7 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 		pci_dac = 1;
 	} else {
 #endif
-		rc = pci_set_dma_mask(pdev, 0xffffffffULL);
+		rc = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
 		if (rc) {
 			printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n",
 				pci_name(pdev));

+ 9 - 0
drivers/bluetooth/bluecard_cs.c

@@ -1089,6 +1089,14 @@ static int bluecard_event(event_t event, int priority, event_callback_args_t *ar
 	return 0;
 }
 
+static struct pcmcia_device_id bluecard_ids[] = {
+	PCMCIA_DEVICE_PROD_ID12("BlueCard", "LSE041", 0xbaf16fbf, 0x657cc15e),
+	PCMCIA_DEVICE_PROD_ID12("BTCFCARD", "LSE139", 0xe3987764, 0x2524b59c),
+	PCMCIA_DEVICE_PROD_ID12("WSS", "LSE039", 0x0a0736ec, 0x24e6dfab),
+	PCMCIA_DEVICE_NULL
+};
+MODULE_DEVICE_TABLE(pcmcia, bluecard_ids);
+
 static struct pcmcia_driver bluecard_driver = {
 	.owner		= THIS_MODULE,
 	.drv		= {
@@ -1096,6 +1104,7 @@ static struct pcmcia_driver bluecard_driver = {
 	},
 	.attach		= bluecard_attach,
 	.detach		= bluecard_detach,
+	.id_table	= bluecard_ids,
 };
 
 static int __init init_bluecard_cs(void)

+ 7 - 0
drivers/bluetooth/bt3c_cs.c

@@ -935,6 +935,12 @@ static int bt3c_event(event_t event, int priority, event_callback_args_t *args)
 	return 0;
 }
 
+static struct pcmcia_device_id bt3c_ids[] = {
+	PCMCIA_DEVICE_PROD_ID13("3COM", "Bluetooth PC Card", 0xefce0a31, 0xd4ce9b02),
+	PCMCIA_DEVICE_NULL
+};
+MODULE_DEVICE_TABLE(pcmcia, bt3c_ids);
+
 static struct pcmcia_driver bt3c_driver = {
 	.owner		= THIS_MODULE,
 	.drv		= {
@@ -942,6 +948,7 @@ static struct pcmcia_driver bt3c_driver = {
 	},
 	.attach		= bt3c_attach,
 	.detach		= bt3c_detach,
+	.id_table	= bt3c_ids,
 };
 
 static int __init init_bt3c_cs(void)

+ 7 - 0
drivers/bluetooth/btuart_cs.c

@@ -855,6 +855,12 @@ static int btuart_event(event_t event, int priority, event_callback_args_t *args
 	return 0;
 }
 
+static struct pcmcia_device_id btuart_ids[] = {
+	/* don't use this driver. Use serial_cs + hci_uart instead */
+	PCMCIA_DEVICE_NULL
+};
+MODULE_DEVICE_TABLE(pcmcia, btuart_ids);
+
 static struct pcmcia_driver btuart_driver = {
 	.owner		= THIS_MODULE,
 	.drv		= {
@@ -862,6 +868,7 @@ static struct pcmcia_driver btuart_driver = {
 	},
 	.attach		= btuart_attach,
 	.detach		= btuart_detach,
+	.id_table	= btuart_ids,
 };
 
 static int __init init_btuart_cs(void)

+ 8 - 0
drivers/bluetooth/dtl1_cs.c

@@ -807,6 +807,13 @@ static int dtl1_event(event_t event, int priority, event_callback_args_t *args)
 	return 0;
 }
 
+static struct pcmcia_device_id dtl1_ids[] = {
+	PCMCIA_DEVICE_PROD_ID12("Nokia Mobile Phones", "DTL-1", 0xe1bfdd64, 0xe168480d),
+	PCMCIA_DEVICE_PROD_ID12("Socket", "CF", 0xb38bcc2e, 0x44ebf863),
+	PCMCIA_DEVICE_NULL
+};
+MODULE_DEVICE_TABLE(pcmcia, dtl1_ids);
+
 static struct pcmcia_driver dtl1_driver = {
 	.owner		= THIS_MODULE,
 	.drv		= {
@@ -814,6 +821,7 @@ static struct pcmcia_driver dtl1_driver = {
 	},
 	.attach		= dtl1_attach,
 	.detach		= dtl1_detach,
+	.id_table	= dtl1_ids,
 };
 
 static int __init init_dtl1_cs(void)

+ 0 - 3
drivers/char/misc.c

@@ -308,9 +308,6 @@ static int __init misc_init(void)
 #endif
 #ifdef CONFIG_BVME6000
 	rtc_DP8570A_init();
-#endif
-#ifdef CONFIG_PMAC_PBOOK
-	pmu_device_init();
 #endif
 	if (register_chrdev(MISC_MAJOR,"misc",&misc_fops)) {
 		printk("unable to get major %d for misc devices\n",

+ 8 - 1
drivers/char/pcmcia/synclink_cs.c

@@ -581,7 +581,7 @@ static dev_link_t *mgslpc_attach(void)
 
     /* Interrupt setup */
     link->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
-    link->irq.IRQInfo1   = IRQ_INFO2_VALID | IRQ_LEVEL_ID;
+    link->irq.IRQInfo1   = IRQ_LEVEL_ID;
     link->irq.Handler = NULL;
     
     link->conf.Attributes = 0;
@@ -3081,6 +3081,12 @@ void mgslpc_remove_device(MGSLPC_INFO *remove_info)
 	}
 }
 
+static struct pcmcia_device_id mgslpc_ids[] = {
+	PCMCIA_DEVICE_MANF_CARD(0x02c5, 0x0050),
+	PCMCIA_DEVICE_NULL
+};
+MODULE_DEVICE_TABLE(pcmcia, mgslpc_ids);
+
 static struct pcmcia_driver mgslpc_driver = {
 	.owner		= THIS_MODULE,
 	.drv		= {
@@ -3088,6 +3094,7 @@ static struct pcmcia_driver mgslpc_driver = {
 	},
 	.attach		= mgslpc_attach,
 	.detach		= mgslpc_detach,
+	.id_table	= mgslpc_ids,
 };
 
 static struct tty_operations mgslpc_ops = {

+ 6 - 0
drivers/ide/Kconfig

@@ -606,6 +606,12 @@ config BLK_DEV_IT8172
 	  <http://www.ite.com.tw/ia/brief_it8172bsp.htm>; picture of the
 	  board at <http://www.mvista.com/partners/semiconductor/ite.html>.
 
+config BLK_DEV_IT821X
+	tristate "IT821X IDE support"
+	help
+	  This driver adds support for the ITE 8211 IDE controller and the
+	  IT 8212 IDE RAID controller in both RAID and pass-through mode.
+
 config BLK_DEV_NS87415
 	tristate "NS87415 chipset support"
 	help

+ 4 - 0
drivers/ide/ide-disk.c

@@ -119,6 +119,10 @@ static int lba_capacity_is_ok (struct hd_driveid *id)
 {
 	unsigned long lba_sects, chs_sects, head, tail;
 
+	/* No non-LBA info .. so valid! */
+	if (id->cyls == 0)
+		return 1;
+
 	/*
 	 * The ATA spec tells large drives to return
 	 * C/H/S = 16383/16/63 independent of their size.

+ 0 - 1
drivers/ide/ide-dma.c

@@ -132,7 +132,6 @@ static const struct drive_list_entry drive_blacklist [] = {
 	{ "SAMSUNG CD-ROM SC-148C",	"ALL"		},
 	{ "SAMSUNG CD-ROM SC",	"ALL"		},
 	{ "SanDisk SDP3B-64"	,	"ALL"		},
-	{ "SAMSUNG CD-ROM SN-124",	"ALL"		},
 	{ "ATAPI CD-ROM DRIVE 40X MAXIMUM",	"ALL"		},
 	{ "_NEC DV5800A",               "ALL"           },  
 	{ NULL			,	NULL		}

+ 2 - 1
drivers/ide/ide-iops.c

@@ -1181,7 +1181,8 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 		pre_reset(drive);
 		SELECT_DRIVE(drive);
 		udelay (20);
-		hwif->OUTB(WIN_SRST, IDE_COMMAND_REG);
+		hwif->OUTBSYNC(drive, WIN_SRST, IDE_COMMAND_REG);
+		ndelay(400);
 		hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
 		hwgroup->polling = 1;
 		__ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL);

+ 35 - 0
drivers/ide/legacy/ide-cs.c

@@ -457,6 +457,40 @@ int ide_event(event_t event, int priority,
     return 0;
 } /* ide_event */
 
+static struct pcmcia_device_id ide_ids[] = {
+	PCMCIA_DEVICE_FUNC_ID(4),
+	PCMCIA_DEVICE_MANF_CARD(0x0032, 0x0704),
+	PCMCIA_DEVICE_MANF_CARD(0x00a4, 0x002d),
+	PCMCIA_DEVICE_MANF_CARD(0x2080, 0x0001),
+	PCMCIA_DEVICE_MANF_CARD(0x0045, 0x0401),
+	PCMCIA_DEVICE_PROD_ID123("Caravelle", "PSC-IDE ", "PSC000", 0x8c36137c, 0xd0693ab8, 0x2768a9f0),
+	PCMCIA_DEVICE_PROD_ID123("CDROM", "IDE", "MCD-601p", 0x1b9179ca, 0xede88951, 0x0d902f74),
+	PCMCIA_DEVICE_PROD_ID123("PCMCIA", "IDE CARD", "F1", 0x281f1c5d, 0x1907960c, 0xf7fde8b9),
+	PCMCIA_DEVICE_PROD_ID12("ARGOSY", "CD-ROM", 0x78f308dc, 0x66536591),
+	PCMCIA_DEVICE_PROD_ID12("ARGOSY", "PnPIDE", 0x78f308dc, 0x0c694728),
+	PCMCIA_DEVICE_PROD_ID12("CNF CD-M", "CD-ROM", 0x7d93b852, 0x66536591),
+	PCMCIA_DEVICE_PROD_ID12("Creative Technology Ltd.", "PCMCIA CD-ROM Interface Card", 0xff8c8a45, 0xfe8020c4),
+	PCMCIA_DEVICE_PROD_ID12("Digital Equipment Corporation.", "Digital Mobile Media CD-ROM", 0x17692a66, 0xef1dcbde),
+	PCMCIA_DEVICE_PROD_ID12("EXP", "CD", 0x6f58c983, 0xaae5994f),
+	PCMCIA_DEVICE_PROD_ID12("EXP   ", "CD-ROM", 0x0a5c52fd, 0x66536591),
+	PCMCIA_DEVICE_PROD_ID12("EXP   ", "PnPIDE", 0x0a5c52fd, 0x0c694728),
+	PCMCIA_DEVICE_PROD_ID12("FREECOM", "PCCARD-IDE", 0x5714cbf7, 0x48e0ab8e),
+	PCMCIA_DEVICE_PROD_ID12("IBM", "IBM17JSSFP20", 0xb569a6e5, 0xf2508753),
+	PCMCIA_DEVICE_PROD_ID12("IO DATA", "CBIDE2      ", 0x547e66dc, 0x8671043b),
+	PCMCIA_DEVICE_PROD_ID12("IO DATA", "PCIDE", 0x547e66dc, 0x5c5ab149),
+	PCMCIA_DEVICE_PROD_ID12("IO DATA", "PCIDEII", 0x547e66dc, 0xb3662674),
+	PCMCIA_DEVICE_PROD_ID12("LOOKMEET", "CBIDE2      ", 0xe37be2b5, 0x8671043b),
+	PCMCIA_DEVICE_PROD_ID12(" ", "NinjaATA-", 0x3b6e20c8, 0xebe0bd79),
+	PCMCIA_DEVICE_PROD_ID12("PCMCIA", "CD-ROM", 0x281f1c5d, 0x66536591),
+	PCMCIA_DEVICE_PROD_ID12("PCMCIA", "PnPIDE", 0x281f1c5d, 0x0c694728),
+	PCMCIA_DEVICE_PROD_ID12("SHUTTLE TECHNOLOGY LTD.", "PCCARD-IDE/ATAPI Adapter", 0x4a3f0ba0, 0x322560e1),
+	PCMCIA_DEVICE_PROD_ID12("TOSHIBA", "MK2001MPL", 0xb4585a1a, 0x3489e003),
+	PCMCIA_DEVICE_PROD_ID12("WIT", "IDE16", 0x244e5994, 0x3e232852),
+	PCMCIA_DEVICE_PROD_ID1("STI Flash", 0xe4a13209),
+	PCMCIA_DEVICE_NULL,
+};
+MODULE_DEVICE_TABLE(pcmcia, ide_ids);
+
 static struct pcmcia_driver ide_cs_driver = {
 	.owner		= THIS_MODULE,
 	.drv		= {
@@ -464,6 +498,7 @@ static struct pcmcia_driver ide_cs_driver = {
 	},
 	.attach		= ide_attach,
 	.detach		= ide_detach,
+	.id_table       = ide_ids,
 };
 
 static int __init init_ide_cs(void)

+ 1 - 0
drivers/ide/pci/Makefile

@@ -12,6 +12,7 @@ obj-$(CONFIG_BLK_DEV_HPT34X)		+= hpt34x.o
 obj-$(CONFIG_BLK_DEV_HPT366)		+= hpt366.o
 #obj-$(CONFIG_BLK_DEV_HPT37X)		+= hpt37x.o
 obj-$(CONFIG_BLK_DEV_IT8172)		+= it8172.o
+obj-$(CONFIG_BLK_DEV_IT821X)		+= it821x.o
 obj-$(CONFIG_BLK_DEV_NS87415)		+= ns87415.o
 obj-$(CONFIG_BLK_DEV_OPTI621)		+= opti621.o
 obj-$(CONFIG_BLK_DEV_PDC202XX_OLD)	+= pdc202xx_old.o

+ 48 - 25
drivers/ide/pci/generic.c

@@ -39,6 +39,17 @@
 
 #include <asm/io.h>
 
+static int ide_generic_all;		/* Set to claim all devices */
+
+static int __init ide_generic_all_on(char *unused)
+{
+	ide_generic_all = 1;
+	printk(KERN_INFO "IDE generic will claim all unknown PCI IDE storage controllers.\n");
+	return 1;
+}
+
+__setup("all-generic-ide", ide_generic_all_on);
+
 static void __devinit init_hwif_generic (ide_hwif_t *hwif)
 {
 	switch(hwif->pci_dev->device) {
@@ -78,79 +89,85 @@ static void __devinit init_hwif_generic (ide_hwif_t *hwif)
 
 static ide_pci_device_t generic_chipsets[] __devinitdata = {
 	{	/* 0 */
+		.name		= "Unknown",
+		.init_hwif	= init_hwif_generic,
+		.channels	= 2,
+		.autodma	= AUTODMA,
+		.bootable	= ON_BOARD,
+	},{	/* 1 */
 		.name		= "NS87410",
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.autodma	= AUTODMA,
 		.enablebits	= {{0x43,0x08,0x08}, {0x47,0x08,0x08}},
 		.bootable	= ON_BOARD,
-        },{	/* 1 */
+        },{	/* 2 */
 		.name		= "SAMURAI",
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.autodma	= AUTODMA,
 		.bootable	= ON_BOARD,
-	},{	/* 2 */
+	},{	/* 3 */
 		.name		= "HT6565",
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.autodma	= AUTODMA,
 		.bootable	= ON_BOARD,
-	},{	/* 3 */
+	},{	/* 4 */
 		.name		= "UM8673F",
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.autodma	= NODMA,
 		.bootable	= ON_BOARD,
-	},{	/* 4 */
+	},{	/* 5 */
 		.name		= "UM8886A",
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.autodma	= NODMA,
 		.bootable	= ON_BOARD,
-	},{	/* 5 */
+	},{	/* 6 */
 		.name		= "UM8886BF",
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.autodma	= NODMA,
 		.bootable	= ON_BOARD,
-	},{	/* 6 */
+	},{	/* 7 */
 		.name		= "HINT_IDE",
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.autodma	= AUTODMA,
 		.bootable	= ON_BOARD,
-	},{	/* 7 */
+	},{	/* 8 */
 		.name		= "VIA_IDE",
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.autodma	= NOAUTODMA,
 		.bootable	= ON_BOARD,
-	},{	/* 8 */
+	},{	/* 9 */
 		.name		= "OPTI621V",
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.autodma	= NOAUTODMA,
 		.bootable	= ON_BOARD,
-	},{	/* 9 */
+	},{	/* 10 */
 		.name		= "VIA8237SATA",
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.autodma	= AUTODMA,
 		.bootable	= OFF_BOARD,
-	},{	/* 10 */
+	},{	/* 11 */
 		.name 		= "Piccolo0102",
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.autodma	= NOAUTODMA,
 		.bootable	= ON_BOARD,
-	},{	/* 11 */
+	},{	/* 12 */
 		.name 		= "Piccolo0103",
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.autodma	= NOAUTODMA,
 		.bootable	= ON_BOARD,
-	},{	/* 12 */
+	},{	/* 13 */
 		.name 		= "Piccolo0105",
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
@@ -174,6 +191,10 @@ static int __devinit generic_init_one(struct pci_dev *dev, const struct pci_devi
 	u16 command;
 	int ret = -ENODEV;
 
+	/* Don't use the generic entry unless instructed to do so */
+	if (id->driver_data == 0 && ide_generic_all == 0)
+			goto out;
+
 	if (dev->vendor == PCI_VENDOR_ID_UMC &&
 	    dev->device == PCI_DEVICE_ID_UMC_UM8886A &&
 	    (!(PCI_FUNC(dev->devfn) & 1)))
@@ -195,21 +216,23 @@ out:
 }
 
 static struct pci_device_id generic_pci_tbl[] = {
-	{ PCI_VENDOR_ID_NS,     PCI_DEVICE_ID_NS_87410,            PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
-	{ PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_SAMURAI_IDE,  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1},
-	{ PCI_VENDOR_ID_HOLTEK, PCI_DEVICE_ID_HOLTEK_6565,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2},
-	{ PCI_VENDOR_ID_UMC,    PCI_DEVICE_ID_UMC_UM8673F,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 3},
-	{ PCI_VENDOR_ID_UMC,    PCI_DEVICE_ID_UMC_UM8886A,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4},
-	{ PCI_VENDOR_ID_UMC,    PCI_DEVICE_ID_UMC_UM8886BF,        PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5},
-	{ PCI_VENDOR_ID_HINT,   PCI_DEVICE_ID_HINT_VXPROII_IDE,    PCI_ANY_ID, PCI_ANY_ID, 0, 0, 6},
-	{ PCI_VENDOR_ID_VIA,    PCI_DEVICE_ID_VIA_82C561,          PCI_ANY_ID, PCI_ANY_ID, 0, 0, 7},
-	{ PCI_VENDOR_ID_OPTI,   PCI_DEVICE_ID_OPTI_82C558,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8},
+	{ PCI_VENDOR_ID_NS,     PCI_DEVICE_ID_NS_87410,            PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1},
+	{ PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_SAMURAI_IDE,  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2},
+	{ PCI_VENDOR_ID_HOLTEK, PCI_DEVICE_ID_HOLTEK_6565,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 3},
+	{ PCI_VENDOR_ID_UMC,    PCI_DEVICE_ID_UMC_UM8673F,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4},
+	{ PCI_VENDOR_ID_UMC,    PCI_DEVICE_ID_UMC_UM8886A,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5},
+	{ PCI_VENDOR_ID_UMC,    PCI_DEVICE_ID_UMC_UM8886BF,        PCI_ANY_ID, PCI_ANY_ID, 0, 0, 6},
+	{ PCI_VENDOR_ID_HINT,   PCI_DEVICE_ID_HINT_VXPROII_IDE,    PCI_ANY_ID, PCI_ANY_ID, 0, 0, 7},
+	{ PCI_VENDOR_ID_VIA,    PCI_DEVICE_ID_VIA_82C561,          PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8},
+	{ PCI_VENDOR_ID_OPTI,   PCI_DEVICE_ID_OPTI_82C558,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 9},
 #ifdef CONFIG_BLK_DEV_IDE_SATA
-	{ PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8237_SATA,	   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 9},
+	{ PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8237_SATA,	   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 10},
 #endif
-	{ PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO,     PCI_ANY_ID, PCI_ANY_ID, 0, 0, 10},
-	{ PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1,   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 11},
-	{ PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2,   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 12},
+	{ PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO,     PCI_ANY_ID, PCI_ANY_ID, 0, 0, 11},
+	{ PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1,   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 12},
+	{ PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2,   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 13},
+	/* Must come last. If you add entries adjust this table appropriately and the init_one code */
+	{ PCI_ANY_ID,		PCI_ANY_ID,			   PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL, 0},
 	{ 0, },
 };
 MODULE_DEVICE_TABLE(pci, generic_pci_tbl);

+ 228 - 242
drivers/ide/pci/hpt366.c

@@ -10,6 +10,11 @@
  * donation of an ABit BP6 mainboard, processor, and memory acellerated
  * development and support.
  *
+ *
+ * Highpoint have their own driver (source except for the raid part)
+ * available from http://www.highpoint-tech.com/hpt3xx-opensource-v131.tgz
+ * This may be useful to anyone wanting to work on the mainstream hpt IDE.
+ *
  * Note that final HPT370 support was done by force extraction of GPL.
  *
  * - add function for getting/setting power status of drive
@@ -446,44 +451,29 @@ static struct chipset_bus_clock_list_entry sixty_six_base_hpt374[] = {
 #define F_LOW_PCI_50	0x2d
 #define F_LOW_PCI_66	0x42
 
-/* FIXME: compare with driver's code before removing */
-#if 0
-		if (hpt_minimum_revision(dev, 3)) {
-			u8 cbl;
-			cbl = inb(iobase + 0x7b);
-			outb(cbl | 1, iobase + 0x7b);
-			outb(cbl & ~1, iobase + 0x7b);
-			cbl = inb(iobase + 0x7a);
-			p += sprintf(p, "Cable:          ATA-%d"
-					"                          ATA-%d\n",
-				(cbl & 0x02) ? 33 : 66,
-				(cbl & 0x01) ? 33 : 66);
-			p += sprintf(p, "\n");
-		}
-		{
-			u8 c2, c3;
-			/* older revs don't have these registers mapped 
-			 * into io space */
-			pci_read_config_byte(dev, 0x43, &c0);
-			pci_read_config_byte(dev, 0x47, &c1);
-			pci_read_config_byte(dev, 0x4b, &c2);
-			pci_read_config_byte(dev, 0x4f, &c3);
-
-			p += sprintf(p, "Mode:           %s             %s"
-					"           %s              %s\n",
-				(c0 & 0x10) ? "UDMA" : (c0 & 0x20) ? "DMA " : 
-					(c0 & 0x80) ? "PIO " : "off ",
-				(c1 & 0x10) ? "UDMA" : (c1 & 0x20) ? "DMA " :
-					(c1 & 0x80) ? "PIO " : "off ",
-				(c2 & 0x10) ? "UDMA" : (c2 & 0x20) ? "DMA " :
-					(c2 & 0x80) ? "PIO " : "off ",
-				(c3 & 0x10) ? "UDMA" : (c3 & 0x20) ? "DMA " :
-					(c3 & 0x80) ? "PIO " : "off ");
-		}
-	}
-#endif
+/*
+ *	Hold all the highpoint quirks and revision information in one
+ *	place.
+ */
 
-static u32 hpt_revision (struct pci_dev *dev)
+struct hpt_info
+{
+	u8 max_mode;		/* Speeds allowed */
+	int revision;		/* Chipset revision */
+	int flags;		/* Chipset properties */
+#define PLL_MODE	1
+#define IS_372N		2
+				/* Speed table */
+	struct chipset_bus_clock_list_entry *speed;
+};
+
+/*
+ *	This wants fixing so that we do everything not by classrev
+ *	(which breaks on the newest chips) but by creating an
+ *	enumeration of chip variants and using that
+ */
+
+static __devinit u32 hpt_revision (struct pci_dev *dev)
 {
 	u32 class_rev;
 	pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
@@ -507,37 +497,33 @@ static u32 hpt_revision (struct pci_dev *dev)
 	return class_rev;
 }
 
-static u32 hpt_minimum_revision (struct pci_dev *dev, int revision)
-{
-	unsigned int class_rev = hpt_revision(dev);
-	revision--;
-	return ((int) (class_rev > revision) ? 1 : 0);
-}
-
 static int check_in_drive_lists(ide_drive_t *drive, const char **list);
 
 static u8 hpt3xx_ratemask (ide_drive_t *drive)
 {
-	struct pci_dev *dev	= HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif	= drive->hwif;
+	struct hpt_info *info	= ide_get_hwifdata(hwif);
 	u8 mode			= 0;
 
-	if (hpt_minimum_revision(dev, 8)) {		/* HPT374 */
+	/* FIXME: TODO - move this to set info->mode once at boot */
+
+	if (info->revision >= 8) {		/* HPT374 */
 		mode = (HPT374_ALLOW_ATA133_6) ? 4 : 3;
-	} else if (hpt_minimum_revision(dev, 7)) {	/* HPT371 */
+	} else if (info->revision >= 7) {	/* HPT371 */
 		mode = (HPT371_ALLOW_ATA133_6) ? 4 : 3;
-	} else if (hpt_minimum_revision(dev, 6)) {	/* HPT302 */
+	} else if (info->revision >= 6) {	/* HPT302 */
 		mode = (HPT302_ALLOW_ATA133_6) ? 4 : 3;
-	} else if (hpt_minimum_revision(dev, 5)) {	/* HPT372 */
+	} else if (info->revision >= 5) {	/* HPT372 */
 		mode = (HPT372_ALLOW_ATA133_6) ? 4 : 3;
-	} else if (hpt_minimum_revision(dev, 4)) {	/* HPT370A */
+	} else if (info->revision >= 4) {	/* HPT370A */
 		mode = (HPT370_ALLOW_ATA100_5) ? 3 : 2;
-	} else if (hpt_minimum_revision(dev, 3)) {	/* HPT370 */
+	} else if (info->revision >= 3) {	/* HPT370 */
 		mode = (HPT370_ALLOW_ATA100_5) ? 3 : 2;
 		mode = (check_in_drive_lists(drive, bad_ata33)) ? 0 : mode;
 	} else {				/* HPT366 and HPT368 */
 		mode = (check_in_drive_lists(drive, bad_ata33)) ? 0 : 2;
 	}
-	if (!eighty_ninty_three(drive) && (mode))
+	if (!eighty_ninty_three(drive) && mode)
 		mode = min(mode, (u8)1);
 	return mode;
 }
@@ -549,7 +535,8 @@ static u8 hpt3xx_ratemask (ide_drive_t *drive)
  
 static u8 hpt3xx_ratefilter (ide_drive_t *drive, u8 speed)
 {
-	struct pci_dev *dev	= HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif	= drive->hwif;
+	struct hpt_info *info	= ide_get_hwifdata(hwif);
 	u8 mode			= hpt3xx_ratemask(drive);
 
 	if (drive->media != ide_disk)
@@ -561,7 +548,7 @@ static u8 hpt3xx_ratefilter (ide_drive_t *drive, u8 speed)
 			break;
 		case 0x03:
 			speed = min(speed, (u8)XFER_UDMA_5);
-			if (hpt_minimum_revision(dev, 5))
+			if (info->revision >= 5)
 				break;
 			if (check_in_drive_lists(drive, bad_ata100_5))
 				speed = min(speed, (u8)XFER_UDMA_4);
@@ -571,7 +558,7 @@ static u8 hpt3xx_ratefilter (ide_drive_t *drive, u8 speed)
 	/*
 	 * CHECK ME, Does this need to be set to 5 ??
 	 */
-			if (hpt_minimum_revision(dev, 3))
+			if (info->revision >= 3)
 				break;
 			if ((check_in_drive_lists(drive, bad_ata66_4)) ||
 			    (!(HPT366_ALLOW_ATA66_4)))
@@ -585,7 +572,7 @@ static u8 hpt3xx_ratefilter (ide_drive_t *drive, u8 speed)
 	/*
 	 * CHECK ME, Does this need to be set to 5 ??
 	 */
-			if (hpt_minimum_revision(dev, 3))
+			if (info->revision >= 3)
 				break;
 			if (check_in_drive_lists(drive, bad_ata33))
 				speed = min(speed, (u8)XFER_MW_DMA_2);
@@ -624,11 +611,12 @@ static unsigned int pci_bus_clock_list (u8 speed, struct chipset_bus_clock_list_
 
 static int hpt36x_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 {
-	struct pci_dev *dev	= HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif	= drive->hwif;
+	struct pci_dev *dev	= hwif->pci_dev;
+	struct hpt_info	*info	= ide_get_hwifdata(hwif);
 	u8 speed		= hpt3xx_ratefilter(drive, xferspeed);
-//	u8 speed		= ide_rate_filter(hpt3xx_ratemask(drive), xferspeed);
 	u8 regtime		= (drive->select.b.unit & 0x01) ? 0x44 : 0x40;
-	u8 regfast		= (HWIF(drive)->channel) ? 0x55 : 0x51;
+	u8 regfast		= (hwif->channel) ? 0x55 : 0x51;
 	u8 drive_fast		= 0;
 	u32 reg1 = 0, reg2	= 0;
 
@@ -636,16 +624,11 @@ static int hpt36x_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 	 * Disable the "fast interrupt" prediction.
 	 */
 	pci_read_config_byte(dev, regfast, &drive_fast);
-#if 0
-	if (drive_fast & 0x02)
-		pci_write_config_byte(dev, regfast, drive_fast & ~0x20);
-#else
 	if (drive_fast & 0x80)
 		pci_write_config_byte(dev, regfast, drive_fast & ~0x80);
-#endif
 
-	reg2 = pci_bus_clock_list(speed,
-		(struct chipset_bus_clock_list_entry *) pci_get_drvdata(dev));
+	reg2 = pci_bus_clock_list(speed, info->speed);
+
 	/*
 	 * Disable on-chip PIO FIFO/buffer
 	 *  (to avoid problems handling I/O errors later)
@@ -665,10 +648,11 @@ static int hpt36x_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 
 static int hpt370_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 {
-	struct pci_dev *dev = HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif	= drive->hwif;
+	struct pci_dev *dev = hwif->pci_dev;
+	struct hpt_info	*info	= ide_get_hwifdata(hwif);
 	u8 speed	= hpt3xx_ratefilter(drive, xferspeed);
-//	u8 speed	= ide_rate_filter(hpt3xx_ratemask(drive), xferspeed);
-	u8 regfast	= (HWIF(drive)->channel) ? 0x55 : 0x51;
+	u8 regfast	= (drive->hwif->channel) ? 0x55 : 0x51;
 	u8 drive_pci	= 0x40 + (drive->dn * 4);
 	u8 new_fast	= 0, drive_fast = 0;
 	u32 list_conf	= 0, drive_conf = 0;
@@ -693,17 +677,13 @@ static int hpt370_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 	if (new_fast != drive_fast)
 		pci_write_config_byte(dev, regfast, new_fast);
 
-	list_conf = pci_bus_clock_list(speed, 
-				       (struct chipset_bus_clock_list_entry *)
-				       pci_get_drvdata(dev));
+	list_conf = pci_bus_clock_list(speed, info->speed);
 
 	pci_read_config_dword(dev, drive_pci, &drive_conf);
 	list_conf = (list_conf & ~conf_mask) | (drive_conf & conf_mask);
 	
-	if (speed < XFER_MW_DMA_0) {
+	if (speed < XFER_MW_DMA_0)
 		list_conf &= ~0x80000000; /* Disable on-chip PIO FIFO/buffer */
-	}
-
 	pci_write_config_dword(dev, drive_pci, list_conf);
 
 	return ide_config_drive_speed(drive, speed);
@@ -711,10 +691,11 @@ static int hpt370_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 
 static int hpt372_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 {
-	struct pci_dev *dev	= HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif	= drive->hwif;
+	struct pci_dev *dev	= hwif->pci_dev;
+	struct hpt_info	*info	= ide_get_hwifdata(hwif);
 	u8 speed	= hpt3xx_ratefilter(drive, xferspeed);
-//	u8 speed	= ide_rate_filter(hpt3xx_ratemask(drive), xferspeed);
-	u8 regfast	= (HWIF(drive)->channel) ? 0x55 : 0x51;
+	u8 regfast	= (drive->hwif->channel) ? 0x55 : 0x51;
 	u8 drive_fast	= 0, drive_pci = 0x40 + (drive->dn * 4);
 	u32 list_conf	= 0, drive_conf = 0;
 	u32 conf_mask	= (speed >= XFER_MW_DMA_0) ? 0xc0000000 : 0x30070000;
@@ -726,10 +707,8 @@ static int hpt372_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 	pci_read_config_byte(dev, regfast, &drive_fast);
 	drive_fast &= ~0x07;
 	pci_write_config_byte(dev, regfast, drive_fast);
-					
-	list_conf = pci_bus_clock_list(speed,
-			(struct chipset_bus_clock_list_entry *)
-					pci_get_drvdata(dev));
+
+	list_conf = pci_bus_clock_list(speed, info->speed);
 	pci_read_config_dword(dev, drive_pci, &drive_conf);
 	list_conf = (list_conf & ~conf_mask) | (drive_conf & conf_mask);
 	if (speed < XFER_MW_DMA_0)
@@ -741,19 +720,14 @@ static int hpt372_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 
 static int hpt3xx_tune_chipset (ide_drive_t *drive, u8 speed)
 {
-	struct pci_dev *dev	= HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif	= drive->hwif;
+	struct hpt_info	*info	= ide_get_hwifdata(hwif);
 
-	if (hpt_minimum_revision(dev, 8))
+	if (info->revision >= 8)
 		return hpt372_tune_chipset(drive, speed); /* not a typo */
-#if 0
-	else if (hpt_minimum_revision(dev, 7))
-		hpt371_tune_chipset(drive, speed);
-	else if (hpt_minimum_revision(dev, 6))
-		hpt302_tune_chipset(drive, speed);
-#endif
-	else if (hpt_minimum_revision(dev, 5))
+	else if (info->revision >= 5)
 		return hpt372_tune_chipset(drive, speed);
-	else if (hpt_minimum_revision(dev, 3))
+	else if (info->revision >= 3)
 		return hpt370_tune_chipset(drive, speed);
 	else	/* hpt368: hpt_minimum_revision(dev, 2) */
 		return hpt36x_tune_chipset(drive, speed);
@@ -779,8 +753,14 @@ static void hpt3xx_tune_drive (ide_drive_t *drive, u8 pio)
 static int config_chipset_for_dma (ide_drive_t *drive)
 {
 	u8 speed = ide_dma_speed(drive, hpt3xx_ratemask(drive));
+	ide_hwif_t *hwif = drive->hwif;
+	struct hpt_info	*info	= ide_get_hwifdata(hwif);
 
-	if (!(speed))
+	if (!speed)
+		return 0;
+
+	/* If we don't have any timings we can't do a lot */
+	if (info->speed == NULL)
 		return 0;
 
 	(void) hpt3xx_tune_chipset(drive, speed);
@@ -794,7 +774,7 @@ static int hpt3xx_quirkproc (ide_drive_t *drive)
 
 static void hpt3xx_intrproc (ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 
 	if (drive->quirk_list)
 		return;
@@ -804,24 +784,26 @@ static void hpt3xx_intrproc (ide_drive_t *drive)
 
 static void hpt3xx_maskproc (ide_drive_t *drive, int mask)
 {
-	struct pci_dev *dev = HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif = drive->hwif;
+	struct hpt_info *info = ide_get_hwifdata(hwif);
+	struct pci_dev *dev = hwif->pci_dev;
 
 	if (drive->quirk_list) {
-		if (hpt_minimum_revision(dev,3)) {
+		if (info->revision >= 3) {
 			u8 reg5a = 0;
 			pci_read_config_byte(dev, 0x5a, &reg5a);
 			if (((reg5a & 0x10) >> 4) != mask)
 				pci_write_config_byte(dev, 0x5a, mask ? (reg5a | 0x10) : (reg5a & ~0x10));
 		} else {
 			if (mask) {
-				disable_irq(HWIF(drive)->irq);
+				disable_irq(hwif->irq);
 			} else {
-				enable_irq(HWIF(drive)->irq);
+				enable_irq(hwif->irq);
 			}
 		}
 	} else {
 		if (IDE_CONTROL_REG)
-			HWIF(drive)->OUTB(mask ? (drive->ctl | 2) :
+			hwif->OUTB(mask ? (drive->ctl | 2) :
 						 (drive->ctl & ~2),
 						 IDE_CONTROL_REG);
 	}
@@ -829,12 +811,12 @@ static void hpt3xx_maskproc (ide_drive_t *drive, int mask)
 
 static int hpt366_config_drive_xfer_rate (ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct hd_driveid *id	= drive->id;
 
 	drive->init_speed = 0;
 
-	if (id && (id->capability & 1) && drive->autodma) {
+	if ((id->capability & 1) && drive->autodma) {
 
 		if (ide_use_dma(drive)) {
 			if (config_chipset_for_dma(drive))
@@ -868,15 +850,6 @@ static int hpt366_ide_dma_lostirq (ide_drive_t *drive)
 		drive->name, __FUNCTION__, reg50h, reg52h, reg5ah);
 	if (reg5ah & 0x10)
 		pci_write_config_byte(dev, 0x5a, reg5ah & ~0x10);
-#if 0
-	/* how about we flush and reset, mmmkay? */
-	pci_write_config_byte(dev, 0x51, 0x1F);
-	/* fall through to a reset */
-	case dma_start:
-	case ide_dma_end:
-	/* reset the chips state over and over.. */
-	pci_write_config_byte(dev, 0x51, 0x13);
-#endif
 	return __ide_dma_lostirq(drive);
 }
 
@@ -919,7 +892,7 @@ static void hpt370_lostirq_timeout (ide_drive_t *drive)
 	u8 dma_stat = 0, dma_cmd = 0;
 
 	pci_read_config_byte(HWIF(drive)->pci_dev, reginfo, &bfifo);
-	printk("%s: %d bytes in FIFO\n", drive->name, bfifo);
+	printk(KERN_DEBUG "%s: %d bytes in FIFO\n", drive->name, bfifo);
 	hpt370_clear_engine(drive);
 	/* get dma command mode */
 	dma_cmd = hwif->INB(hwif->dma_command);
@@ -1047,15 +1020,6 @@ static void hpt372n_rw_disk(ide_drive_t *drive, struct request *rq)
 
 static void hpt3xx_reset (ide_drive_t *drive)
 {
-#if 0
-	unsigned long high_16	= pci_resource_start(HWIF(drive)->pci_dev, 4);
-	u8 reset	= (HWIF(drive)->channel) ? 0x80 : 0x40;
-	u8 reg59h	= 0;
-
-	pci_read_config_byte(HWIF(drive)->pci_dev, 0x59, &reg59h);
-	pci_write_config_byte(HWIF(drive)->pci_dev, 0x59, reg59h|reset);
-	pci_write_config_byte(HWIF(drive)->pci_dev, 0x59, reg59h);
-#endif
 }
 
 static int hpt3xx_tristate (ide_drive_t * drive, int state)
@@ -1065,8 +1029,6 @@ static int hpt3xx_tristate (ide_drive_t * drive, int state)
 	u8 reg59h = 0, reset	= (hwif->channel) ? 0x80 : 0x40;
 	u8 regXXh = 0, state_reg= (hwif->channel) ? 0x57 : 0x53;
 
-//	hwif->bus_state = state;
-
 	pci_read_config_byte(dev, 0x59, &reg59h);
 	pci_read_config_byte(dev, state_reg, &regXXh);
 
@@ -1093,7 +1055,7 @@ static int hpt3xx_tristate (ide_drive_t * drive, int state)
 #define TRISTATE_BIT  0x8000
 static int hpt370_busproc(ide_drive_t * drive, int state)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= hwif->pci_dev;
 	u8 tristate = 0, resetmask = 0, bus_reg = 0;
 	u16 tri_reg;
@@ -1148,33 +1110,44 @@ static int hpt370_busproc(ide_drive_t * drive, int state)
 	return 0;
 }
 
-static int __devinit init_hpt37x(struct pci_dev *dev)
+static void __devinit hpt366_clocking(ide_hwif_t *hwif)
 {
+	u32 reg1	= 0;
+	struct hpt_info *info = ide_get_hwifdata(hwif);
+
+	pci_read_config_dword(hwif->pci_dev, 0x40, &reg1);
+
+	/* detect bus speed by looking at control reg timing: */
+	switch((reg1 >> 8) & 7) {
+		case 5:
+			info->speed = forty_base_hpt366;
+			break;
+		case 9:
+			info->speed = twenty_five_base_hpt366;
+			break;
+		case 7:
+		default:
+			info->speed = thirty_three_base_hpt366;
+			break;
+	}
+}
+
+static void __devinit hpt37x_clocking(ide_hwif_t *hwif)
+{
+	struct hpt_info *info = ide_get_hwifdata(hwif);
+	struct pci_dev *dev = hwif->pci_dev;
 	int adjust, i;
 	u16 freq;
 	u32 pll;
 	u8 reg5bh;
-	u8 reg5ah = 0;
-	unsigned long dmabase = pci_resource_start(dev, 4);
-	u8 did, rid;	
-	int is_372n = 0;
 	
-	pci_read_config_byte(dev, 0x5a, &reg5ah);
-	/* interrupt force enable */
-	pci_write_config_byte(dev, 0x5a, (reg5ah & ~0x10));
-
-	if(dmabase)
-	{
-		did = inb(dmabase + 0x22);
-		rid = inb(dmabase + 0x28);
-	
-		if((did == 4 && rid == 6) || (did == 5 && rid > 1))
-			is_372n = 1;
-	}
-
 	/*
 	 * default to pci clock. make sure MA15/16 are set to output
-	 * to prevent drives having problems with 40-pin cables.
+	 * to prevent drives having problems with 40-pin cables. Needed
+	 * for some drives such as IBM-DTLA which will not enter ready
+	 * state on reset when PDIAG is a input.
+	 *
+	 * ToDo: should we set 0x21 when using PLL mode ?
 	 */
 	pci_write_config_byte(dev, 0x5b, 0x23);
 
@@ -1197,9 +1170,7 @@ static int __devinit init_hpt37x(struct pci_dev *dev)
 	 * Currently we always set up the PLL for the 372N
 	 */
 	 
-	pci_set_drvdata(dev, NULL);
-	
-	if(is_372n)
+	if(info->flags & IS_372N)
 	{
 		printk(KERN_INFO "hpt: HPT372N detected, using 372N timing.\n");
 		if(freq < 0x55)
@@ -1227,39 +1198,38 @@ static int __devinit init_hpt37x(struct pci_dev *dev)
 			pll = F_LOW_PCI_66;
 	
 		if (pll == F_LOW_PCI_33) {
-			if (hpt_minimum_revision(dev,8))
-				pci_set_drvdata(dev, (void *) thirty_three_base_hpt374);
-			else if (hpt_minimum_revision(dev,5))
-				pci_set_drvdata(dev, (void *) thirty_three_base_hpt372);
-			else if (hpt_minimum_revision(dev,4))
-				pci_set_drvdata(dev, (void *) thirty_three_base_hpt370a);
+			if (info->revision >= 8)
+				info->speed = thirty_three_base_hpt374;
+			else if (info->revision >= 5)
+				info->speed = thirty_three_base_hpt372;
+			else if (info->revision >= 4)
+				info->speed = thirty_three_base_hpt370a;
 			else
-				pci_set_drvdata(dev, (void *) thirty_three_base_hpt370);
-			printk("HPT37X: using 33MHz PCI clock\n");
+				info->speed = thirty_three_base_hpt370;
+			printk(KERN_DEBUG "HPT37X: using 33MHz PCI clock\n");
 		} else if (pll == F_LOW_PCI_40) {
 			/* Unsupported */
 		} else if (pll == F_LOW_PCI_50) {
-			if (hpt_minimum_revision(dev,8))
-				pci_set_drvdata(dev, (void *) fifty_base_hpt370a);
-			else if (hpt_minimum_revision(dev,5))
-				pci_set_drvdata(dev, (void *) fifty_base_hpt372);
-			else if (hpt_minimum_revision(dev,4))
-				pci_set_drvdata(dev, (void *) fifty_base_hpt370a);
+			if (info->revision >= 8)
+				info->speed = fifty_base_hpt370a;
+			else if (info->revision >= 5)
+				info->speed = fifty_base_hpt372;
+			else if (info->revision >= 4)
+				info->speed = fifty_base_hpt370a;
 			else
-				pci_set_drvdata(dev, (void *) fifty_base_hpt370a);
-			printk("HPT37X: using 50MHz PCI clock\n");
+				info->speed = fifty_base_hpt370a;
+			printk(KERN_DEBUG "HPT37X: using 50MHz PCI clock\n");
 		} else {
-			if (hpt_minimum_revision(dev,8))
-			{
+			if (info->revision >= 8) {
 				printk(KERN_ERR "HPT37x: 66MHz timings are not supported.\n");
 			}
-			else if (hpt_minimum_revision(dev,5))
-				pci_set_drvdata(dev, (void *) sixty_six_base_hpt372);
-			else if (hpt_minimum_revision(dev,4))
-				pci_set_drvdata(dev, (void *) sixty_six_base_hpt370a);
+			else if (info->revision >= 5)
+				info->speed = sixty_six_base_hpt372;
+			else if (info->revision >= 4)
+				info->speed = sixty_six_base_hpt370a;
 			else
-				pci_set_drvdata(dev, (void *) sixty_six_base_hpt370);
-			printk("HPT37X: using 66MHz PCI clock\n");
+				info->speed = sixty_six_base_hpt370;
+			printk(KERN_DEBUG "HPT37X: using 66MHz PCI clock\n");
 		}
 	}
 	
@@ -1269,11 +1239,19 @@ static int __devinit init_hpt37x(struct pci_dev *dev)
 	 * result in slow reads when using a 33MHz PCI clock. we also
 	 * don't like to use the PLL because it will cause glitches
 	 * on PRST/SRST when the HPT state engine gets reset.
+	 *
+	 * ToDo: Use 66MHz PLL when ATA133 devices are present on a
+	 * 372 device so we can get ATA133 support
 	 */
-	if (pci_get_drvdata(dev)) 
+	if (info->speed)
 		goto init_hpt37X_done;
+
+	info->flags |= PLL_MODE;
 	
 	/*
+	 * FIXME: make this work correctly, esp with 372N as per
+	 * reference driver code.
+	 *
 	 * adjust PLL based upon PCI clock, enable it, and wait for
 	 * stabilization.
 	 */
@@ -1298,14 +1276,14 @@ static int __devinit init_hpt37x(struct pci_dev *dev)
 				pci_write_config_dword(dev, 0x5c, 
 						       pll & ~0x100);
 				pci_write_config_byte(dev, 0x5b, 0x21);
-				if (hpt_minimum_revision(dev,8))
-					pci_set_drvdata(dev, (void *) fifty_base_hpt370a);
-				else if (hpt_minimum_revision(dev,5))
-					pci_set_drvdata(dev, (void *) fifty_base_hpt372);
-				else if (hpt_minimum_revision(dev,4))
-					pci_set_drvdata(dev, (void *) fifty_base_hpt370a);
+				if (info->revision >= 8)
+					info->speed = fifty_base_hpt370a;
+				else if (info->revision >= 5)
+					info->speed = fifty_base_hpt372;
+				else if (info->revision >= 4)
+					info->speed = fifty_base_hpt370a;
 				else
-					pci_set_drvdata(dev, (void *) fifty_base_hpt370a);
+					info->speed = fifty_base_hpt370a;
 				printk("HPT37X: using 50MHz internal PLL\n");
 				goto init_hpt37X_done;
 			}
@@ -1318,10 +1296,22 @@ pll_recal:
 	} 
 
 init_hpt37X_done:
+	if (!info->speed)
+		printk(KERN_ERR "HPT37X%s: unknown bus timing [%d %d].\n",
+			(info->flags & IS_372N)?"N":"", pll, freq);
 	/* reset state engine */
 	pci_write_config_byte(dev, 0x50, 0x37); 
 	pci_write_config_byte(dev, 0x54, 0x37); 
 	udelay(100);
+}
+
+static int __devinit init_hpt37x(struct pci_dev *dev)
+{
+	u8 reg5ah;
+
+	pci_read_config_byte(dev, 0x5a, &reg5ah);
+	/* interrupt force enable */
+	pci_write_config_byte(dev, 0x5a, (reg5ah & ~0x10));
 	return 0;
 }
 
@@ -1338,59 +1328,27 @@ static int __devinit init_hpt366(struct pci_dev *dev)
 		pci_write_config_byte(dev, 0x51, drive_fast & ~0x80);
 	pci_read_config_dword(dev, 0x40, &reg1);
 									
-	/* detect bus speed by looking at control reg timing: */
-	switch((reg1 >> 8) & 7) {
-		case 5:
-			pci_set_drvdata(dev, (void *) forty_base_hpt366);
-			break;
-		case 9:
-			pci_set_drvdata(dev, (void *) twenty_five_base_hpt366);
-			break;
-		case 7:
-		default:
-			pci_set_drvdata(dev, (void *) thirty_three_base_hpt366);
-			break;
-	}
-
-	if (!pci_get_drvdata(dev))
-	{
-		printk(KERN_ERR "hpt366: unknown bus timing.\n");
-		pci_set_drvdata(dev, NULL);
-	}
 	return 0;
 }
 
 static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const char *name)
 {
 	int ret = 0;
-	u8 test = 0;
-
+	/* FIXME: Not portable */
 	if (dev->resource[PCI_ROM_RESOURCE].start)
 		pci_write_config_byte(dev, PCI_ROM_ADDRESS,
 			dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
 
-	pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &test);
-	if (test != (L1_CACHE_BYTES / 4))
-		pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
-			(L1_CACHE_BYTES / 4));
-
-	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &test);
-	if (test != 0x78)
-		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x78);
+	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, (L1_CACHE_BYTES / 4));
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x78);
+	pci_write_config_byte(dev, PCI_MIN_GNT, 0x08);
+	pci_write_config_byte(dev, PCI_MAX_LAT, 0x08);
 
-	pci_read_config_byte(dev, PCI_MIN_GNT, &test);
-	if (test != 0x08)
-		pci_write_config_byte(dev, PCI_MIN_GNT, 0x08);
-
-	pci_read_config_byte(dev, PCI_MAX_LAT, &test);
-	if (test != 0x08)
-		pci_write_config_byte(dev, PCI_MAX_LAT, 0x08);
-
-	if (hpt_minimum_revision(dev, 3)) {
+	if (hpt_revision(dev) >= 3)
 		ret = init_hpt37x(dev);
-	} else {
-		ret =init_hpt366(dev);
-	}
+	else
+		ret = init_hpt366(dev);
+
 	if (ret)
 		return ret;
 
@@ -1400,27 +1358,16 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const cha
 static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 {
 	struct pci_dev *dev		= hwif->pci_dev;
+	struct hpt_info *info		= ide_get_hwifdata(hwif);
 	u8 ata66 = 0, regmask		= (hwif->channel) ? 0x01 : 0x02;
-	u8 did, rid;
-	unsigned long dmabase		= hwif->dma_base;
-	int is_372n = 0;
 	
-	if(dmabase)
-	{
-		did = inb(dmabase + 0x22);
-		rid = inb(dmabase + 0x28);
-	
-		if((did == 4 && rid == 6) || (did == 5 && rid > 1))
-			is_372n = 1;
-	}
-		
 	hwif->tuneproc			= &hpt3xx_tune_drive;
 	hwif->speedproc			= &hpt3xx_tune_chipset;
 	hwif->quirkproc			= &hpt3xx_quirkproc;
 	hwif->intrproc			= &hpt3xx_intrproc;
 	hwif->maskproc			= &hpt3xx_maskproc;
 	
-	if(is_372n)
+	if(info->flags & IS_372N)
 		hwif->rw_disk = &hpt372n_rw_disk;
 
 	/*
@@ -1428,7 +1375,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 	 * address lines to access an external eeprom.  To read valid
 	 * cable detect state the pins must be enabled as inputs.
 	 */
-	if (hpt_minimum_revision(dev, 8) && PCI_FUNC(dev->devfn) & 1) {
+	if (info->revision >= 8 && (PCI_FUNC(dev->devfn) & 1)) {
 		/*
 		 * HPT374 PCI function 1
 		 * - set bit 15 of reg 0x52 to enable TCBLID as input
@@ -1443,7 +1390,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 		pci_read_config_byte(dev, 0x5a, &ata66);
 		pci_write_config_word(dev, 0x52, mcr3);
 		pci_write_config_word(dev, 0x56, mcr6);
-	} else if (hpt_minimum_revision(dev, 3)) {
+	} else if (info->revision >= 3) {
 		/*
 		 * HPT370/372 and 374 pcifn 0
 		 * - clear bit 0 of 0x5b to enable P/SCBLID as inputs
@@ -1470,7 +1417,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 		hwif->serialized = hwif->mate->serialized = 1;
 #endif
 
-	if (hpt_minimum_revision(dev,3)) {
+	if (info->revision >= 3) {
 		u8 reg5ah = 0;
 			pci_write_config_byte(dev, 0x5a, reg5ah & ~0x10);
 		/*
@@ -1480,8 +1427,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 		 */
 		hwif->resetproc	= &hpt3xx_reset;
 		hwif->busproc	= &hpt370_busproc;
-//		hwif->drives[0].autotune = hwif->drives[1].autotune = 1;
-	} else if (hpt_minimum_revision(dev,2)) {
+	} else if (info->revision >= 2) {
 		hwif->resetproc	= &hpt3xx_reset;
 		hwif->busproc	= &hpt3xx_tristate;
 	} else {
@@ -1502,18 +1448,18 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 		hwif->udma_four = ((ata66 & regmask) ? 0 : 1);
 	hwif->ide_dma_check = &hpt366_config_drive_xfer_rate;
 
-	if (hpt_minimum_revision(dev,8)) {
+	if (info->revision >= 8) {
 		hwif->ide_dma_test_irq = &hpt374_ide_dma_test_irq;
 		hwif->ide_dma_end = &hpt374_ide_dma_end;
-	} else if (hpt_minimum_revision(dev,5)) {
+	} else if (info->revision >= 5) {
 		hwif->ide_dma_test_irq = &hpt374_ide_dma_test_irq;
 		hwif->ide_dma_end = &hpt374_ide_dma_end;
-	} else if (hpt_minimum_revision(dev,3)) {
+	} else if (info->revision >= 3) {
 		hwif->dma_start = &hpt370_ide_dma_start;
 		hwif->ide_dma_end = &hpt370_ide_dma_end;
 		hwif->ide_dma_timeout = &hpt370_ide_dma_timeout;
 		hwif->ide_dma_lostirq = &hpt370_ide_dma_lostirq;
-	} else if (hpt_minimum_revision(dev,2))
+	} else if (info->revision >= 2)
 		hwif->ide_dma_lostirq = &hpt366_ide_dma_lostirq;
 	else
 		hwif->ide_dma_lostirq = &hpt366_ide_dma_lostirq;
@@ -1526,6 +1472,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 
 static void __devinit init_dma_hpt366(ide_hwif_t *hwif, unsigned long dmabase)
 {
+	struct hpt_info	*info	= ide_get_hwifdata(hwif);
 	u8 masterdma	= 0, slavedma = 0;
 	u8 dma_new	= 0, dma_old = 0;
 	u8 primary	= hwif->channel ? 0x4b : 0x43;
@@ -1535,8 +1482,7 @@ static void __devinit init_dma_hpt366(ide_hwif_t *hwif, unsigned long dmabase)
 	if (!dmabase)
 		return;
 		
-	if(pci_get_drvdata(hwif->pci_dev) == NULL)
-	{
+	if(info->speed == NULL) {
 		printk(KERN_WARNING "hpt: no known IDE timings, disabling DMA.\n");
 		return;
 	}
@@ -1559,6 +1505,40 @@ static void __devinit init_dma_hpt366(ide_hwif_t *hwif, unsigned long dmabase)
 	ide_setup_dma(hwif, dmabase, 8);
 }
 
+/*
+ *	We "borrow" this hook in order to set the data structures
+ *	up early enough before dma or init_hwif calls are made.
+ */
+
+static void __devinit init_iops_hpt366(ide_hwif_t *hwif)
+{
+	struct hpt_info *info = kmalloc(sizeof(struct hpt_info), GFP_KERNEL);
+	unsigned long dmabase = pci_resource_start(hwif->pci_dev, 4);
+	u8 did, rid;
+
+	if(info == NULL) {
+		printk(KERN_WARNING "hpt366: out of memory.\n");
+		return;
+	}
+	memset(info, 0, sizeof(struct hpt_info));
+	ide_set_hwifdata(hwif, info);
+
+	if(dmabase) {
+		did = inb(dmabase + 0x22);
+		rid = inb(dmabase + 0x28);
+
+		if((did == 4 && rid == 6) || (did == 5 && rid > 1))
+			info->flags |= IS_372N;
+	}
+
+	info->revision = hpt_revision(hwif->pci_dev);
+
+	if (info->revision >= 3)
+		hpt37x_clocking(hwif);
+	else
+		hpt366_clocking(hwif);
+}
+
 static int __devinit init_setup_hpt374(struct pci_dev *dev, ide_pci_device_t *d)
 {
 	struct pci_dev *findev = NULL;
@@ -1646,6 +1626,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
 		.name		= "HPT366",
 		.init_setup	= init_setup_hpt366,
 		.init_chipset	= init_chipset_hpt366,
+		.init_iops	= init_iops_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.channels	= 2,
@@ -1656,6 +1637,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
 		.name		= "HPT372A",
 		.init_setup	= init_setup_hpt37x,
 		.init_chipset	= init_chipset_hpt366,
+		.init_iops	= init_iops_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.channels	= 2,
@@ -1665,6 +1647,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
 		.name		= "HPT302",
 		.init_setup	= init_setup_hpt37x,
 		.init_chipset	= init_chipset_hpt366,
+		.init_iops	= init_iops_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.channels	= 2,
@@ -1674,6 +1657,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
 		.name		= "HPT371",
 		.init_setup	= init_setup_hpt37x,
 		.init_chipset	= init_chipset_hpt366,
+		.init_iops	= init_iops_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.channels	= 2,
@@ -1683,6 +1667,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
 		.name		= "HPT374",
 		.init_setup	= init_setup_hpt374,
 		.init_chipset	= init_chipset_hpt366,
+		.init_iops	= init_iops_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.channels	= 2,	/* 4 */
@@ -1692,6 +1677,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
 		.name		= "HPT372N",
 		.init_setup	= init_setup_hpt37x,
 		.init_chipset	= init_chipset_hpt366,
+		.init_iops	= init_iops_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.channels	= 2,	/* 4 */

+ 812 - 0
drivers/ide/pci/it821x.c

@@ -0,0 +1,812 @@
+
+/*
+ * linux/drivers/ide/pci/it821x.c		Version 0.09	December 2004
+ *
+ * Copyright (C) 2004		Red Hat <alan@redhat.com>
+ *
+ *  May be copied or modified under the terms of the GNU General Public License
+ *  Based in part on the ITE vendor provided SCSI driver.
+ *
+ *  Documentation available from
+ * 	http://www.ite.com.tw/pc/IT8212F_V04.pdf
+ *  Some other documents are NDA.
+ *
+ *  The ITE8212 isn't exactly a standard IDE controller. It has two
+ *  modes. In pass through mode then it is an IDE controller. In its smart
+ *  mode its actually quite a capable hardware raid controller disguised
+ *  as an IDE controller. Smart mode only understands DMA read/write and
+ *  identify, none of the fancier commands apply. The IT8211 is identical
+ *  in other respects but lacks the raid mode.
+ *
+ *  Errata:
+ *  o	Rev 0x10 also requires master/slave hold the same DMA timings and
+ *	cannot do ATAPI MWDMA.
+ *  o	The identify data for raid volumes lacks CHS info (technically ok)
+ *	but also fails to set the LBA28 and other bits. We fix these in
+ *	the IDE probe quirk code.
+ *  o	If you write LBA48 sized I/O's (ie > 256 sector) in smart mode
+ *	raid then the controller firmware dies
+ *  o	Smart mode without RAID doesn't clear all the necessary identify
+ *	bits to reduce the command set to the one used
+ *
+ *  This has a few impacts on the driver
+ *  - In pass through mode we do all the work you would expect
+ *  - In smart mode the clocking set up is done by the controller generally
+ *    but we must watch the other limits and filter.
+ *  - There are a few extra vendor commands that actually talk to the
+ *    controller but only work PIO with no IRQ.
+ *
+ *  Vendor areas of the identify block in smart mode are used for the
+ *  timing and policy set up. Each HDD in raid mode also has a serial
+ *  block on the disk. The hardware extra commands are get/set chip status,
+ *  rebuild, get rebuild status.
+ *
+ *  In Linux the driver supports pass through mode as if the device was
+ *  just another IDE controller. If the smart mode is running then
+ *  volumes are managed by the controller firmware and each IDE "disk"
+ *  is a raid volume. Even more cute - the controller can do automated
+ *  hotplug and rebuild.
+ *
+ *  The pass through controller itself is a little demented. It has a
+ *  flaw that it has a single set of PIO/MWDMA timings per channel so
+ *  non UDMA devices restrict each others performance. It also has a
+ *  single clock source per channel so mixed UDMA100/133 performance
+ *  isn't perfect and we have to pick a clock. Thankfully none of this
+ *  matters in smart mode. ATAPI DMA is not currently supported.
+ *
+ *  It seems the smart mode is a win for RAID1/RAID10 but otherwise not.
+ *
+ *  TODO
+ *	-	ATAPI UDMA is ok but not MWDMA it seems
+ *	-	RAID configuration ioctls
+ *	-	Move to libata once it grows up
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/hdreg.h>
+#include <linux/ide.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+
+struct it821x_dev
+{
+	unsigned int smart:1,		/* Are we in smart raid mode */
+		timing10:1;		/* Rev 0x10 */
+	u8	clock_mode;		/* 0, ATA_50 or ATA_66 */
+	u8	want[2][2];		/* Mode/Pri log for master slave */
+	/* We need these for switching the clock when DMA goes on/off
+	   The high byte is the 66Mhz timing */
+	u16	pio[2];			/* Cached PIO values */
+	u16	mwdma[2];		/* Cached MWDMA values */
+	u16	udma[2];		/* Cached UDMA values (per drive) */
+};
+
+#define ATA_66		0
+#define ATA_50		1
+#define ATA_ANY		2
+
+#define UDMA_OFF	0
+#define MWDMA_OFF	0
+
+/*
+ *	We allow users to force the card into non raid mode without
+ *	flashing the alternative BIOS. This is also neccessary right now
+ *	for embedded platforms that cannot run a PC BIOS but are using this
+ *	device.
+ */
+
+static int it8212_noraid;
+
+/**
+ *	it821x_program	-	program the PIO/MWDMA registers
+ *	@drive: drive to tune
+ *
+ *	Program the PIO/MWDMA timing for this channel according to the
+ *	current clock.
+ */
+
+static void it821x_program(ide_drive_t *drive, u16 timing)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int channel = hwif->channel;
+	u8 conf;
+
+	/* Program PIO/MWDMA timing bits */
+	if(itdev->clock_mode == ATA_66)
+		conf = timing >> 8;
+	else
+		conf = timing & 0xFF;
+	pci_write_config_byte(hwif->pci_dev, 0x54 + 4 * channel, conf);
+}
+
+/**
+ *	it821x_program_udma	-	program the UDMA registers
+ *	@drive: drive to tune
+ *
+ *	Program the UDMA timing for this drive according to the
+ *	current clock.
+ */
+
+static void it821x_program_udma(ide_drive_t *drive, u16 timing)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int channel = hwif->channel;
+	int unit = drive->select.b.unit;
+	u8 conf;
+
+	/* Program UDMA timing bits */
+	if(itdev->clock_mode == ATA_66)
+		conf = timing >> 8;
+	else
+		conf = timing & 0xFF;
+	if(itdev->timing10 == 0)
+		pci_write_config_byte(hwif->pci_dev, 0x56 + 4 * channel + unit, conf);
+	else {
+		pci_write_config_byte(hwif->pci_dev, 0x56 + 4 * channel, conf);
+		pci_write_config_byte(hwif->pci_dev, 0x56 + 4 * channel + 1, conf);
+	}
+}
+
+
+/**
+ *	it821x_clock_strategy
+ *	@hwif: hardware interface
+ *
+ *	Select between the 50 and 66Mhz base clocks to get the best
+ *	results for this interface.
+ */
+
+static void it821x_clock_strategy(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif = drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+
+	u8 unit = drive->select.b.unit;
+	ide_drive_t *pair = &hwif->drives[1-unit];
+
+	int clock, altclock;
+	u8 v;
+	int sel = 0;
+
+	if(itdev->want[0][0] > itdev->want[1][0]) {
+		clock = itdev->want[0][1];
+		altclock = itdev->want[1][1];
+	} else {
+		clock = itdev->want[1][1];
+		altclock = itdev->want[0][1];
+	}
+
+	/* Master doesn't care does the slave ? */
+	if(clock == ATA_ANY)
+		clock = altclock;
+
+	/* Nobody cares - keep the same clock */
+	if(clock == ATA_ANY)
+		return;
+	/* No change */
+	if(clock == itdev->clock_mode)
+		return;
+
+	/* Load this into the controller ? */
+	if(clock == ATA_66)
+		itdev->clock_mode = ATA_66;
+	else {
+		itdev->clock_mode = ATA_50;
+		sel = 1;
+	}
+	pci_read_config_byte(hwif->pci_dev, 0x50, &v);
+	v &= ~(1 << (1 + hwif->channel));
+	v |= sel << (1 + hwif->channel);
+	pci_write_config_byte(hwif->pci_dev, 0x50, v);
+
+	/*
+	 *	Reprogram the UDMA/PIO of the pair drive for the switch
+	 *	MWDMA will be dealt with by the dma switcher
+	 */
+	if(pair && itdev->udma[1-unit] != UDMA_OFF) {
+		it821x_program_udma(pair, itdev->udma[1-unit]);
+		it821x_program(pair, itdev->pio[1-unit]);
+	}
+	/*
+	 *	Reprogram the UDMA/PIO of our drive for the switch.
+	 *	MWDMA will be dealt with by the dma switcher
+	 */
+	if(itdev->udma[unit] != UDMA_OFF) {
+		it821x_program_udma(drive, itdev->udma[unit]);
+		it821x_program(drive, itdev->pio[unit]);
+	}
+}
+
+/**
+ *	it821x_ratemask	-	Compute available modes
+ *	@drive: IDE drive
+ *
+ *	Compute the available speeds for the devices on the interface. This
+ *	is all modes to ATA133 clipped by drive cable setup.
+ */
+
+static u8 it821x_ratemask (ide_drive_t *drive)
+{
+	u8 mode	= 4;
+	if (!eighty_ninty_three(drive))
+		mode = min(mode, (u8)1);
+	return mode;
+}
+
+/**
+ *	it821x_tuneproc	-	tune a drive
+ *	@drive: drive to tune
+ *	@mode_wanted: the target operating mode
+ *
+ *	Load the timing settings for this device mode into the
+ *	controller. By the time we are called the mode has been
+ *	modified as neccessary to handle the absence of seperate
+ *	master/slave timers for MWDMA/PIO.
+ *
+ *	This code is only used in pass through mode.
+ */
+
+static void it821x_tuneproc (ide_drive_t *drive, byte mode_wanted)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int unit = drive->select.b.unit;
+
+	/* Spec says 89 ref driver uses 88 */
+	static u16 pio[]	= { 0xAA88, 0xA382, 0xA181, 0x3332, 0x3121 };
+	static u8 pio_want[]    = { ATA_66, ATA_66, ATA_66, ATA_66, ATA_ANY };
+
+	if(itdev->smart)
+		return;
+
+	/* We prefer 66Mhz clock for PIO 0-3, don't care for PIO4 */
+	itdev->want[unit][1] = pio_want[mode_wanted];
+	itdev->want[unit][0] = 1;	/* PIO is lowest priority */
+	itdev->pio[unit] = pio[mode_wanted];
+	it821x_clock_strategy(drive);
+	it821x_program(drive, itdev->pio[unit]);
+}
+
+/**
+ *	it821x_tune_mwdma	-	tune a channel for MWDMA
+ *	@drive: drive to set up
+ *	@mode_wanted: the target operating mode
+ *
+ *	Load the timing settings for this device mode into the
+ *	controller when doing MWDMA in pass through mode. The caller
+ *	must manage the whole lack of per device MWDMA/PIO timings and
+ *	the shared MWDMA/PIO timing register.
+ */
+
+static void it821x_tune_mwdma (ide_drive_t *drive, byte mode_wanted)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = (void *)ide_get_hwifdata(hwif);
+	int unit = drive->select.b.unit;
+	int channel = hwif->channel;
+	u8 conf;
+
+	static u16 dma[]	= { 0x8866, 0x3222, 0x3121 };
+	static u8 mwdma_want[]	= { ATA_ANY, ATA_66, ATA_ANY };
+
+	itdev->want[unit][1] = mwdma_want[mode_wanted];
+	itdev->want[unit][0] = 2;	/* MWDMA is low priority */
+	itdev->mwdma[unit] = dma[mode_wanted];
+	itdev->udma[unit] = UDMA_OFF;
+
+	/* UDMA bits off - Revision 0x10 do them in pairs */
+	pci_read_config_byte(hwif->pci_dev, 0x50, &conf);
+	if(itdev->timing10)
+		conf |= channel ? 0x60: 0x18;
+	else
+		conf |= 1 << (3 + 2 * channel + unit);
+	pci_write_config_byte(hwif->pci_dev, 0x50, conf);
+
+	it821x_clock_strategy(drive);
+	/* FIXME: do we need to program this ? */
+	/* it821x_program(drive, itdev->mwdma[unit]); */
+}
+
+/**
+ *	it821x_tune_udma	-	tune a channel for UDMA
+ *	@drive: drive to set up
+ *	@mode_wanted: the target operating mode
+ *
+ *	Load the timing settings for this device mode into the
+ *	controller when doing UDMA modes in pass through.
+ */
+
+static void it821x_tune_udma (ide_drive_t *drive, byte mode_wanted)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int unit = drive->select.b.unit;
+	int channel = hwif->channel;
+	u8 conf;
+
+	static u16 udma[]	= { 0x4433, 0x4231, 0x3121, 0x2121, 0x1111, 0x2211, 0x1111 };
+	static u8 udma_want[]	= { ATA_ANY, ATA_50, ATA_ANY, ATA_66, ATA_66, ATA_50, ATA_66 };
+
+	itdev->want[unit][1] = udma_want[mode_wanted];
+	itdev->want[unit][0] = 3;	/* UDMA is high priority */
+	itdev->mwdma[unit] = MWDMA_OFF;
+	itdev->udma[unit] = udma[mode_wanted];
+	if(mode_wanted >= 5)
+		itdev->udma[unit] |= 0x8080;	/* UDMA 5/6 select on */
+
+	/* UDMA on. Again revision 0x10 must do the pair */
+	pci_read_config_byte(hwif->pci_dev, 0x50, &conf);
+	if(itdev->timing10)
+		conf &= channel ? 0x9F: 0xE7;
+	else
+		conf &= ~ (1 << (3 + 2 * channel + unit));
+	pci_write_config_byte(hwif->pci_dev, 0x50, conf);
+
+	it821x_clock_strategy(drive);
+	it821x_program_udma(drive, itdev->udma[unit]);
+
+}
+
+/**
+ *	config_it821x_chipset_for_pio	-	set drive timings
+ *	@drive: drive to tune
+ *	@speed we want
+ *
+ *	Compute the best pio mode we can for a given device. We must
+ *	pick a speed that does not cause problems with the other device
+ *	on the cable.
+ */
+
+static void config_it821x_chipset_for_pio (ide_drive_t *drive, byte set_speed)
+{
+	u8 unit = drive->select.b.unit;
+	ide_hwif_t *hwif = drive->hwif;
+	ide_drive_t *pair = &hwif->drives[1-unit];
+	u8 speed = 0, set_pio	= ide_get_best_pio_mode(drive, 255, 5, NULL);
+	u8 pair_pio;
+
+	/* We have to deal with this mess in pairs */
+	if(pair != NULL) {
+		pair_pio = ide_get_best_pio_mode(pair, 255, 5, NULL);
+		/* Trim PIO to the slowest of the master/slave */
+		if(pair_pio < set_pio)
+			set_pio = pair_pio;
+	}
+	it821x_tuneproc(drive, set_pio);
+	speed = XFER_PIO_0 + set_pio;
+	/* XXX - We trim to the lowest of the pair so the other drive
+	   will always be fine at this point until we do hotplug passthru */
+
+	if (set_speed)
+		(void) ide_config_drive_speed(drive, speed);
+}
+
+/**
+ *	it821x_dma_read	-	DMA hook
+ *	@drive: drive for DMA
+ *
+ *	The IT821x has a single timing register for MWDMA and for PIO
+ *	operations. As we flip back and forth we have to reload the
+ *	clock. In addition the rev 0x10 device only works if the same
+ *	timing value is loaded into the master and slave UDMA clock
+ * 	so we must also reload that.
+ *
+ *	FIXME: we could figure out in advance if we need to do reloads
+ */
+
+static void it821x_dma_start(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif = drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int unit = drive->select.b.unit;
+	if(itdev->mwdma[unit] != MWDMA_OFF)
+		it821x_program(drive, itdev->mwdma[unit]);
+	else if(itdev->udma[unit] != UDMA_OFF && itdev->timing10)
+		it821x_program_udma(drive, itdev->udma[unit]);
+	ide_dma_start(drive);
+}
+
+/**
+ *	it821x_dma_write	-	DMA hook
+ *	@drive: drive for DMA stop
+ *
+ *	The IT821x has a single timing register for MWDMA and for PIO
+ *	operations. As we flip back and forth we have to reload the
+ *	clock.
+ */
+
+static int it821x_dma_end(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif = drive->hwif;
+	int unit = drive->select.b.unit;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int ret = __ide_dma_end(drive);
+	if(itdev->mwdma[unit] != MWDMA_OFF)
+		it821x_program(drive, itdev->pio[unit]);
+	return ret;
+}
+
+
+/**
+ *	it821x_tune_chipset	-	set controller timings
+ *	@drive: Drive to set up
+ *	@xferspeed: speed we want to achieve
+ *
+ *	Tune the ITE chipset for the desired mode. If we can't achieve
+ *	the desired mode then tune for a lower one, but ultimately
+ *	make the thing work.
+ */
+
+static int it821x_tune_chipset (ide_drive_t *drive, byte xferspeed)
+{
+
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	u8 speed		= ide_rate_filter(it821x_ratemask(drive), xferspeed);
+
+	if(!itdev->smart) {
+		switch(speed) {
+			case XFER_PIO_4:
+			case XFER_PIO_3:
+			case XFER_PIO_2:
+			case XFER_PIO_1:
+			case XFER_PIO_0:
+				it821x_tuneproc(drive, (speed - XFER_PIO_0));
+				break;
+			/* MWDMA tuning is really hard because our MWDMA and PIO
+			   timings are kept in the same place. We can switch in the
+			   host dma on/off callbacks */
+			case XFER_MW_DMA_2:
+			case XFER_MW_DMA_1:
+			case XFER_MW_DMA_0:
+				it821x_tune_mwdma(drive, (speed - XFER_MW_DMA_0));
+				break;
+			case XFER_UDMA_6:
+			case XFER_UDMA_5:
+			case XFER_UDMA_4:
+			case XFER_UDMA_3:
+			case XFER_UDMA_2:
+			case XFER_UDMA_1:
+			case XFER_UDMA_0:
+				it821x_tune_udma(drive, (speed - XFER_UDMA_0));
+				break;
+			default:
+				return 1;
+		}
+	}
+	/*
+	 *	In smart mode the clocking is done by the host controller
+	 * 	snooping the mode we picked. The rest of it is not our problem
+	 */
+	return ide_config_drive_speed(drive, speed);
+}
+
+/**
+ *	config_chipset_for_dma	-	configure for DMA
+ *	@drive: drive to configure
+ *
+ *	Called by the IDE layer when it wants the timings set up.
+ */
+
+static int config_chipset_for_dma (ide_drive_t *drive)
+{
+	u8 speed	= ide_dma_speed(drive, it821x_ratemask(drive));
+
+	config_it821x_chipset_for_pio(drive, !speed);
+	it821x_tune_chipset(drive, speed);
+	return ide_dma_enable(drive);
+}
+
+/**
+ *	it821x_configure_drive_for_dma	-	set up for DMA transfers
+ *	@drive: drive we are going to set up
+ *
+ *	Set up the drive for DMA, tune the controller and drive as
+ *	required. If the drive isn't suitable for DMA or we hit
+ *	other problems then we will drop down to PIO and set up
+ *	PIO appropriately
+ */
+
+static int it821x_config_drive_for_dma (ide_drive_t *drive)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+
+	if (ide_use_dma(drive)) {
+		if (config_chipset_for_dma(drive))
+			return hwif->ide_dma_on(drive);
+	}
+	config_it821x_chipset_for_pio(drive, 1);
+	return hwif->ide_dma_off_quietly(drive);
+}
+
+/**
+ *	ata66_it821x	-	check for 80 pin cable
+ *	@hwif: interface to check
+ *
+ *	Check for the presence of an ATA66 capable cable on the
+ *	interface. Problematic as it seems some cards don't have
+ *	the needed logic onboard.
+ */
+
+static unsigned int __devinit ata66_it821x(ide_hwif_t *hwif)
+{
+	/* The reference driver also only does disk side */
+	return 1;
+}
+
+/**
+ *	it821x_fixup	-	post init callback
+ *	@hwif: interface
+ *
+ *	This callback is run after the drives have been probed but
+ *	before anything gets attached. It allows drivers to do any
+ *	final tuning that is needed, or fixups to work around bugs.
+ */
+
+static void __devinit it821x_fixups(ide_hwif_t *hwif)
+{
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int i;
+
+	if(!itdev->smart) {
+		/*
+		 *	If we are in pass through mode then not much
+		 *	needs to be done, but we do bother to clear the
+		 *	IRQ mask as we may well be in PIO (eg rev 0x10)
+		 *	for now and we know unmasking is safe on this chipset.
+		 */
+		for (i = 0; i < 2; i++) {
+			ide_drive_t *drive = &hwif->drives[i];
+			if(drive->present)
+				drive->unmask = 1;
+		}
+		return;
+	}
+	/*
+	 *	Perform fixups on smart mode. We need to "lose" some
+	 *	capabilities the firmware lacks but does not filter, and
+	 *	also patch up some capability bits that it forgets to set
+	 *	in RAID mode.
+	 */
+
+	for(i = 0; i < 2; i++) {
+		ide_drive_t *drive = &hwif->drives[i];
+		struct hd_driveid *id;
+		u16 *idbits;
+
+		if(!drive->present)
+			continue;
+		id = drive->id;
+		idbits = (u16 *)drive->id;
+
+		/* Check for RAID v native */
+		if(strstr(id->model, "Integrated Technology Express")) {
+			/* In raid mode the ident block is slightly buggy
+			   We need to set the bits so that the IDE layer knows
+			   LBA28. LBA48 and DMA ar valid */
+			id->capability |= 3;		/* LBA28, DMA */
+			id->command_set_2 |= 0x0400;	/* LBA48 valid */
+			id->cfs_enable_2 |= 0x0400;	/* LBA48 on */
+			/* Reporting logic */
+			printk(KERN_INFO "%s: IT8212 %sRAID %d volume",
+				drive->name,
+				idbits[147] ? "Bootable ":"",
+				idbits[129]);
+				if(idbits[129] != 1)
+					printk("(%dK stripe)", idbits[146]);
+				printk(".\n");
+			/* Now the core code will have wrongly decided no DMA
+			   so we need to fix this */
+			hwif->ide_dma_off_quietly(drive);
+#ifdef CONFIG_IDEDMA_ONLYDISK
+			if (drive->media == ide_disk)
+#endif
+				hwif->ide_dma_check(drive);
+		} else {
+			/* Non RAID volume. Fixups to stop the core code
+			   doing unsupported things */
+			id->field_valid &= 1;
+			id->queue_depth = 0;
+			id->command_set_1 = 0;
+			id->command_set_2 &= 0xC400;
+			id->cfsse &= 0xC000;
+			id->cfs_enable_1 = 0;
+			id->cfs_enable_2 &= 0xC400;
+			id->csf_default &= 0xC000;
+			id->word127 = 0;
+			id->dlf = 0;
+			id->csfo = 0;
+			id->cfa_power = 0;
+			printk(KERN_INFO "%s: Performing identify fixups.\n",
+				drive->name);
+		}
+	}
+
+}
+
+/**
+ *	init_hwif_it821x	-	set up hwif structs
+ *	@hwif: interface to set up
+ *
+ *	We do the basic set up of the interface structure. The IT8212
+ *	requires several custom handlers so we override the default
+ *	ide DMA handlers appropriately
+ */
+
+static void __devinit init_hwif_it821x(ide_hwif_t *hwif)
+{
+	struct it821x_dev *idev = kmalloc(sizeof(struct it821x_dev), GFP_KERNEL);
+	u8 conf;
+
+	if(idev == NULL) {
+		printk(KERN_ERR "it821x: out of memory, falling back to legacy behaviour.\n");
+		goto fallback;
+	}
+	memset(idev, 0, sizeof(struct it821x_dev));
+	ide_set_hwifdata(hwif, idev);
+
+	pci_read_config_byte(hwif->pci_dev, 0x50, &conf);
+	if(conf & 1) {
+		idev->smart = 1;
+		hwif->atapi_dma = 0;
+		/* Long I/O's although allowed in LBA48 space cause the
+		   onboard firmware to enter the twighlight zone */
+		hwif->rqsize = 256;
+	}
+
+	/* Pull the current clocks from 0x50 also */
+	if (conf & (1 << (1 + hwif->channel)))
+		idev->clock_mode = ATA_50;
+	else
+		idev->clock_mode = ATA_66;
+
+	idev->want[0][1] = ATA_ANY;
+	idev->want[1][1] = ATA_ANY;
+
+	/*
+	 *	Not in the docs but according to the reference driver
+	 *	this is neccessary.
+	 */
+
+	pci_read_config_byte(hwif->pci_dev, 0x08, &conf);
+	if(conf == 0x10) {
+		idev->timing10 = 1;
+		hwif->atapi_dma = 0;
+		if(!idev->smart)
+			printk(KERN_WARNING "it821x: Revision 0x10, workarounds activated.\n");
+	}
+
+	hwif->speedproc = &it821x_tune_chipset;
+	hwif->tuneproc	= &it821x_tuneproc;
+
+	/* MWDMA/PIO clock switching for pass through mode */
+	if(!idev->smart) {
+		hwif->dma_start = &it821x_dma_start;
+		hwif->ide_dma_end = &it821x_dma_end;
+	}
+
+	hwif->drives[0].autotune = 1;
+	hwif->drives[1].autotune = 1;
+
+	if (!hwif->dma_base)
+		goto fallback;
+
+	hwif->ultra_mask = 0x7f;
+	hwif->mwdma_mask = 0x07;
+	hwif->swdma_mask = 0x07;
+
+	hwif->ide_dma_check = &it821x_config_drive_for_dma;
+	if (!(hwif->udma_four))
+		hwif->udma_four = ata66_it821x(hwif);
+
+	/*
+	 *	The BIOS often doesn't set up DMA on this controller
+	 *	so we always do it.
+	 */
+
+	hwif->autodma = 1;
+	hwif->drives[0].autodma = hwif->autodma;
+	hwif->drives[1].autodma = hwif->autodma;
+	return;
+fallback:
+	hwif->autodma = 0;
+	return;
+}
+
+static void __devinit it8212_disable_raid(struct pci_dev *dev)
+{
+	/* Reset local CPU, and set BIOS not ready */
+	pci_write_config_byte(dev, 0x5E, 0x01);
+
+	/* Set to bypass mode, and reset PCI bus */
+	pci_write_config_byte(dev, 0x50, 0x00);
+	pci_write_config_word(dev, PCI_COMMAND,
+			      PCI_COMMAND_PARITY | PCI_COMMAND_IO |
+			      PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+	pci_write_config_word(dev, 0x40, 0xA0F3);
+
+	pci_write_config_dword(dev,0x4C, 0x02040204);
+	pci_write_config_byte(dev, 0x42, 0x36);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0);
+}
+
+static unsigned int __devinit init_chipset_it821x(struct pci_dev *dev, const char *name)
+{
+	u8 conf;
+	static char *mode[2] = { "pass through", "smart" };
+
+	/* Force the card into bypass mode if so requested */
+	if (it8212_noraid) {
+		printk(KERN_INFO "it8212: forcing bypass mode.\n");
+		it8212_disable_raid(dev);
+	}
+	pci_read_config_byte(dev, 0x50, &conf);
+	printk(KERN_INFO "it821x: controller in %s mode.\n", mode[conf & 1]);
+	return 0;
+}
+
+
+#define DECLARE_ITE_DEV(name_str)			\
+	{						\
+		.name		= name_str,		\
+		.init_chipset	= init_chipset_it821x,	\
+		.init_hwif	= init_hwif_it821x,	\
+		.channels	= 2,			\
+		.autodma	= AUTODMA,		\
+		.bootable	= ON_BOARD,		\
+		.fixup	 	= it821x_fixups		\
+	}
+
+static ide_pci_device_t it821x_chipsets[] __devinitdata = {
+	/* 0 */ DECLARE_ITE_DEV("IT8212"),
+};
+
+/**
+ *	it821x_init_one	-	pci layer discovery entry
+ *	@dev: PCI device
+ *	@id: ident table entry
+ *
+ *	Called by the PCI code when it finds an ITE821x controller.
+ *	We then use the IDE PCI generic helper to do most of the work.
+ */
+
+static int __devinit it821x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	ide_setup_pci_device(dev, &it821x_chipsets[id->driver_data]);
+	return 0;
+}
+
+static struct pci_device_id it821x_pci_tbl[] = {
+	{ PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8211,  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8212,  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ 0, },
+};
+
+MODULE_DEVICE_TABLE(pci, it821x_pci_tbl);
+
+static struct pci_driver driver = {
+	.name		= "ITE821x IDE",
+	.id_table	= it821x_pci_tbl,
+	.probe		= it821x_init_one,
+};
+
+static int __init it821x_ide_init(void)
+{
+	return ide_pci_register_driver(&driver);
+}
+
+module_init(it821x_ide_init);
+
+module_param_named(noraid, it8212_noraid, int, S_IRUGO);
+MODULE_PARM_DESC(it8212_noraid, "Force card into bypass mode");
+
+MODULE_AUTHOR("Alan Cox");
+MODULE_DESCRIPTION("PCI driver module for the ITE 821x");
+MODULE_LICENSE("GPL");

+ 5 - 5
drivers/ide/pci/serverworks.c

@@ -442,7 +442,7 @@ static unsigned int __devinit init_chipset_svwks (struct pci_dev *dev, const cha
 	return (dev->irq) ? dev->irq : 0;
 }
 
-static unsigned int __init ata66_svwks_svwks (ide_hwif_t *hwif)
+static unsigned int __devinit ata66_svwks_svwks (ide_hwif_t *hwif)
 {
 	return 1;
 }
@@ -454,7 +454,7 @@ static unsigned int __init ata66_svwks_svwks (ide_hwif_t *hwif)
  * Bit 14 clear = primary IDE channel does not have 80-pin cable.
  * Bit 14 set   = primary IDE channel has 80-pin cable.
  */
-static unsigned int __init ata66_svwks_dell (ide_hwif_t *hwif)
+static unsigned int __devinit ata66_svwks_dell (ide_hwif_t *hwif)
 {
 	struct pci_dev *dev = hwif->pci_dev;
 	if (dev->subsystem_vendor == PCI_VENDOR_ID_DELL &&
@@ -472,7 +472,7 @@ static unsigned int __init ata66_svwks_dell (ide_hwif_t *hwif)
  *
  * WARNING: this only works on Alpine hardware!
  */
-static unsigned int __init ata66_svwks_cobalt (ide_hwif_t *hwif)
+static unsigned int __devinit ata66_svwks_cobalt (ide_hwif_t *hwif)
 {
 	struct pci_dev *dev = hwif->pci_dev;
 	if (dev->subsystem_vendor == PCI_VENDOR_ID_SUN &&
@@ -483,7 +483,7 @@ static unsigned int __init ata66_svwks_cobalt (ide_hwif_t *hwif)
 	return 0;
 }
 
-static unsigned int __init ata66_svwks (ide_hwif_t *hwif)
+static unsigned int __devinit ata66_svwks (ide_hwif_t *hwif)
 {
 	struct pci_dev *dev = hwif->pci_dev;
 
@@ -573,7 +573,7 @@ static int __devinit init_setup_svwks (struct pci_dev *dev, ide_pci_device_t *d)
 	return ide_setup_pci_device(dev, d);
 }
 
-static int __init init_setup_csb6 (struct pci_dev *dev, ide_pci_device_t *d)
+static int __devinit init_setup_csb6 (struct pci_dev *dev, ide_pci_device_t *d)
 {
 	if (!(PCI_FUNC(dev->devfn) & 1)) {
 		d->bootable = NEVER_BOARD;

+ 4 - 4
drivers/ide/ppc/pmac.c

@@ -1324,9 +1324,9 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
 	/* XXX FIXME: Media bay stuff need re-organizing */
 	if (np->parent && np->parent->name
 	    && strcasecmp(np->parent->name, "media-bay") == 0) {
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PMAC_MEDIABAY
 		media_bay_set_ide_infos(np->parent, pmif->regbase, pmif->irq, hwif->index);
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PMAC_MEDIABAY */
 		pmif->mediabay = 1;
 		if (!bidp)
 			pmif->aapl_bus_id = 1;
@@ -1382,10 +1382,10 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
 	       hwif->index, model_name[pmif->kind], pmif->aapl_bus_id,
 	       pmif->mediabay ? " (mediabay)" : "", hwif->irq);
 			
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PMAC_MEDIABAY
 	if (pmif->mediabay && check_media_bay_by_base(pmif->regbase, MB_CD) == 0)
 		hwif->noprobe = 0;
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PMAC_MEDIABAY */
 
 	hwif->sg_max_nents = MAX_DCMDS;
 

+ 5 - 5
drivers/ieee1394/ohci1394.c

@@ -3538,8 +3538,8 @@ static void ohci1394_pci_remove(struct pci_dev *pdev)
 
 static int ohci1394_pci_resume (struct pci_dev *pdev)
 {
-#ifdef CONFIG_PMAC_PBOOK
-	{
+#ifdef CONFIG_PPC_PMAC
+	if (_machine == _MACH_Pmac) {
 		struct device_node *of_node;
 
 		/* Re-enable 1394 */
@@ -3547,7 +3547,7 @@ static int ohci1394_pci_resume (struct pci_dev *pdev)
 		if (of_node)
 			pmac_call_feature (PMAC_FTR_1394_ENABLE, of_node, 0, 1);
 	}
-#endif
+#endif /* CONFIG_PPC_PMAC */
 
 	pci_enable_device(pdev);
 
@@ -3557,8 +3557,8 @@ static int ohci1394_pci_resume (struct pci_dev *pdev)
 
 static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 {
-#ifdef CONFIG_PMAC_PBOOK
-	{
+#ifdef CONFIG_PPC_PMAC
+	if (_machine == _MACH_Pmac) {
 		struct device_node *of_node;
 
 		/* Disable 1394 */

+ 2 - 2
drivers/infiniband/core/packer.c

@@ -96,7 +96,7 @@ void ib_pack(const struct ib_field        *desc,
 			else
 				val = 0;
 
-			mask = cpu_to_be64(((1ull << desc[i].size_bits) - 1) << shift);
+			mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift);
 			addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words);
 			*addr = (*addr & ~mask) | (cpu_to_be64(val) & mask);
 		} else {
@@ -176,7 +176,7 @@ void ib_unpack(const struct ib_field        *desc,
 			__be64 *addr;
 
 			shift = 64 - desc[i].offset_bits - desc[i].size_bits;
-			mask = ((1ull << desc[i].size_bits) - 1) << shift;
+			mask = (~0ull >> (64 - desc[i].size_bits)) << shift;
 			addr = (__be64 *) buf + desc[i].offset_words;
 			val = (be64_to_cpup(addr) & mask) >> shift;
 			value_write(desc[i].struct_offset_bytes,

+ 13 - 5
drivers/infiniband/core/sa_query.c

@@ -507,7 +507,13 @@ retry:
 		spin_unlock_irqrestore(&idr_lock, flags);
 	}
 
-	return ret;
+	/*
+	 * It's not safe to dereference query any more, because the
+	 * send may already have completed and freed the query in
+	 * another context.  So use wr.wr_id, which has a copy of the
+	 * query's id.
+	 */
+	return ret ? ret : wr.wr_id;
 }
 
 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
@@ -598,14 +604,15 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
 		rec, query->sa_query.mad->data);
 
 	*sa_query = &query->sa_query;
+
 	ret = send_mad(&query->sa_query, timeout_ms);
-	if (ret) {
+	if (ret < 0) {
 		*sa_query = NULL;
 		kfree(query->sa_query.mad);
 		kfree(query);
 	}
 
-	return ret ? ret : query->sa_query.id;
+	return ret;
 }
 EXPORT_SYMBOL(ib_sa_path_rec_get);
 
@@ -674,14 +681,15 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
 		rec, query->sa_query.mad->data);
 
 	*sa_query = &query->sa_query;
+
 	ret = send_mad(&query->sa_query, timeout_ms);
-	if (ret) {
+	if (ret < 0) {
 		*sa_query = NULL;
 		kfree(query->sa_query.mad);
 		kfree(query);
 	}
 
-	return ret ? ret : query->sa_query.id;
+	return ret;
 }
 EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
 

+ 1 - 0
drivers/infiniband/hw/mthca/mthca_av.c

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU

+ 227 - 304
drivers/infiniband/hw/mthca/mthca_cmd.c

@@ -431,6 +431,36 @@ static int mthca_cmd_imm(struct mthca_dev *dev,
 				      timeout, status);
 }
 
+int mthca_cmd_init(struct mthca_dev *dev)
+{
+	sema_init(&dev->cmd.hcr_sem, 1);
+	sema_init(&dev->cmd.poll_sem, 1);
+	dev->cmd.use_events = 0;
+
+	dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE,
+			   MTHCA_HCR_SIZE);
+	if (!dev->hcr) {
+		mthca_err(dev, "Couldn't map command register.");
+		return -ENOMEM;
+	}
+
+	dev->cmd.pool = pci_pool_create("mthca_cmd", dev->pdev,
+					MTHCA_MAILBOX_SIZE,
+					MTHCA_MAILBOX_SIZE, 0);
+	if (!dev->cmd.pool) {
+		iounmap(dev->hcr);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void mthca_cmd_cleanup(struct mthca_dev *dev)
+{
+	pci_pool_destroy(dev->cmd.pool);
+	iounmap(dev->hcr);
+}
+
 /*
  * Switch to using events to issue FW commands (should be called after
  * event queue to command events has been initialized).
@@ -489,6 +519,33 @@ void mthca_cmd_use_polling(struct mthca_dev *dev)
 	up(&dev->cmd.poll_sem);
 }
 
+struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
+					  unsigned int gfp_mask)
+{
+	struct mthca_mailbox *mailbox;
+
+	mailbox = kmalloc(sizeof *mailbox, gfp_mask);
+	if (!mailbox)
+		return ERR_PTR(-ENOMEM);
+
+	mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma);
+	if (!mailbox->buf) {
+		kfree(mailbox);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return mailbox;
+}
+
+void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox)
+{
+	if (!mailbox)
+		return;
+
+	pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
+	kfree(mailbox);
+}
+
 int mthca_SYS_EN(struct mthca_dev *dev, u8 *status)
 {
 	u64 out;
@@ -513,20 +570,20 @@ int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status)
 static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
 			 u64 virt, u8 *status)
 {
-	u32 *inbox;
-	dma_addr_t indma;
+	struct mthca_mailbox *mailbox;
 	struct mthca_icm_iter iter;
+	__be64 *pages;
 	int lg;
 	int nent = 0;
 	int i;
 	int err = 0;
 	int ts = 0, tc = 0;
 
-	inbox = pci_alloc_consistent(dev->pdev, PAGE_SIZE, &indma);
-	if (!inbox)
-		return -ENOMEM;
-
-	memset(inbox, 0, PAGE_SIZE);
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	memset(mailbox->buf, 0, MTHCA_MAILBOX_SIZE);
+	pages = mailbox->buf;
 
 	for (mthca_icm_first(icm, &iter);
 	     !mthca_icm_last(&iter);
@@ -546,19 +603,17 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
 		}
 		for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) {
 			if (virt != -1) {
-				*((__be64 *) (inbox + nent * 4)) =
-					cpu_to_be64(virt);
+				pages[nent * 2] = cpu_to_be64(virt);
 				virt += 1 << lg;
 			}
 
-			*((__be64 *) (inbox + nent * 4 + 2)) =
-				cpu_to_be64((mthca_icm_addr(&iter) +
-					     (i << lg)) | (lg - 12));
+			pages[nent * 2 + 1] = cpu_to_be64((mthca_icm_addr(&iter) +
+							   (i << lg)) | (lg - 12));
 			ts += 1 << (lg - 10);
 			++tc;
 
-			if (nent == PAGE_SIZE / 16) {
-				err = mthca_cmd(dev, indma, nent, 0, op,
+			if (nent == MTHCA_MAILBOX_SIZE / 16) {
+				err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
 						CMD_TIME_CLASS_B, status);
 				if (err || *status)
 					goto out;
@@ -568,7 +623,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
 	}
 
 	if (nent)
-		err = mthca_cmd(dev, indma, nent, 0, op,
+		err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
 				CMD_TIME_CLASS_B, status);
 
 	switch (op) {
@@ -585,7 +640,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
 	}
 
 out:
-	pci_free_consistent(dev->pdev, PAGE_SIZE, inbox, indma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 
@@ -606,8 +661,8 @@ int mthca_RUN_FW(struct mthca_dev *dev, u8 *status)
 
 int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
 {
+	struct mthca_mailbox *mailbox;
 	u32 *outbox;
-	dma_addr_t outdma;
 	int err = 0;
 	u8 lg;
 
@@ -625,12 +680,12 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
 #define QUERY_FW_EQ_ARM_BASE_OFFSET    0x40
 #define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48
 
-	outbox = pci_alloc_consistent(dev->pdev, QUERY_FW_OUT_SIZE, &outdma);
-	if (!outbox) {
-		return -ENOMEM;
-	}
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
 
-	err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_FW,
+	err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_FW,
 			    CMD_TIME_CLASS_A, status);
 
 	if (err)
@@ -681,15 +736,15 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
 	}
 
 out:
-	pci_free_consistent(dev->pdev, QUERY_FW_OUT_SIZE, outbox, outdma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 
 int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
 {
+	struct mthca_mailbox *mailbox;
 	u8 info;
 	u32 *outbox;
-	dma_addr_t outdma;
 	int err = 0;
 
 #define ENABLE_LAM_OUT_SIZE         0x100
@@ -700,11 +755,12 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
 #define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4)
 #define ENABLE_LAM_INFO_ECC_MASK    0x3
 
-	outbox = pci_alloc_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, &outdma);
-	if (!outbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
 
-	err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_ENABLE_LAM,
+	err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_ENABLE_LAM,
 			    CMD_TIME_CLASS_C, status);
 
 	if (err)
@@ -733,7 +789,7 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
 		  (unsigned long long) dev->ddr_end);
 
 out:
-	pci_free_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, outbox, outdma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 
@@ -744,9 +800,9 @@ int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status)
 
 int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
 {
+	struct mthca_mailbox *mailbox;
 	u8 info;
 	u32 *outbox;
-	dma_addr_t outdma;
 	int err = 0;
 
 #define QUERY_DDR_OUT_SIZE         0x100
@@ -757,11 +813,12 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
 #define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4)
 #define QUERY_DDR_INFO_ECC_MASK    0x3
 
-	outbox = pci_alloc_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, &outdma);
-	if (!outbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
 
-	err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DDR,
+	err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DDR,
 			    CMD_TIME_CLASS_A, status);
 
 	if (err)
@@ -787,15 +844,15 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
 		  (unsigned long long) dev->ddr_end);
 
 out:
-	pci_free_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, outbox, outdma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 
 int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
 			struct mthca_dev_lim *dev_lim, u8 *status)
 {
+	struct mthca_mailbox *mailbox;
 	u32 *outbox;
-	dma_addr_t outdma;
 	u8 field;
 	u16 size;
 	int err;
@@ -860,11 +917,12 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
 #define QUERY_DEV_LIM_LAMR_OFFSET           0x9f
 #define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET     0xa0
 
-	outbox = pci_alloc_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, &outdma);
-	if (!outbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
 
-	err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DEV_LIM,
+	err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DEV_LIM,
 			    CMD_TIME_CLASS_A, status);
 
 	if (err)
@@ -1020,15 +1078,15 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
 	}
 
 out:
-	pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 
 int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
 			struct mthca_adapter *adapter, u8 *status)
 {
+	struct mthca_mailbox *mailbox;
 	u32 *outbox;
-	dma_addr_t outdma;
 	int err;
 
 #define QUERY_ADAPTER_OUT_SIZE             0x100
@@ -1037,23 +1095,24 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
 #define QUERY_ADAPTER_REVISION_ID_OFFSET   0x08
 #define QUERY_ADAPTER_INTA_PIN_OFFSET      0x10
 
-	outbox = pci_alloc_consistent(dev->pdev, QUERY_ADAPTER_OUT_SIZE, &outdma);
-	if (!outbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
 
-	err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_ADAPTER,
+	err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_ADAPTER,
 			    CMD_TIME_CLASS_A, status);
 
 	if (err)
 		goto out;
 
-	MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET);
-	MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET);
+	MTHCA_GET(adapter->vendor_id, outbox,   QUERY_ADAPTER_VENDOR_ID_OFFSET);
+	MTHCA_GET(adapter->device_id, outbox,   QUERY_ADAPTER_DEVICE_ID_OFFSET);
 	MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET);
-	MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
+	MTHCA_GET(adapter->inta_pin, outbox,    QUERY_ADAPTER_INTA_PIN_OFFSET);
 
 out:
-	pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 
@@ -1061,8 +1120,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
 		   struct mthca_init_hca_param *param,
 		   u8 *status)
 {
+	struct mthca_mailbox *mailbox;
 	u32 *inbox;
-	dma_addr_t indma;
 	int err;
 
 #define INIT_HCA_IN_SIZE             	 0x200
@@ -1102,9 +1161,10 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
 #define  INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10)
 #define  INIT_HCA_UAR_CTX_BASE_OFFSET    (INIT_HCA_UAR_OFFSET + 0x18)
 
-	inbox = pci_alloc_consistent(dev->pdev, INIT_HCA_IN_SIZE, &indma);
-	if (!inbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	inbox = mailbox->buf;
 
 	memset(inbox, 0, INIT_HCA_IN_SIZE);
 
@@ -1167,10 +1227,9 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
 		MTHCA_PUT(inbox, param->uarc_base,   INIT_HCA_UAR_CTX_BASE_OFFSET);
 	}
 
-	err = mthca_cmd(dev, indma, 0, 0, CMD_INIT_HCA,
-			HZ, status);
+	err = mthca_cmd(dev, mailbox->dma, 0, 0, CMD_INIT_HCA, HZ, status);
 
-	pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 
@@ -1178,8 +1237,8 @@ int mthca_INIT_IB(struct mthca_dev *dev,
 		  struct mthca_init_ib_param *param,
 		  int port, u8 *status)
 {
+	struct mthca_mailbox *mailbox;
 	u32 *inbox;
-	dma_addr_t indma;
 	int err;
 	u32 flags;
 
@@ -1199,9 +1258,10 @@ int mthca_INIT_IB(struct mthca_dev *dev,
 #define INIT_IB_NODE_GUID_OFFSET 0x18
 #define INIT_IB_SI_GUID_OFFSET   0x20
 
-	inbox = pci_alloc_consistent(dev->pdev, INIT_IB_IN_SIZE, &indma);
-	if (!inbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	inbox = mailbox->buf;
 
 	memset(inbox, 0, INIT_IB_IN_SIZE);
 
@@ -1221,10 +1281,10 @@ int mthca_INIT_IB(struct mthca_dev *dev,
 	MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET);
 	MTHCA_PUT(inbox, param->si_guid,   INIT_IB_SI_GUID_OFFSET);
 
-	err = mthca_cmd(dev, indma, port, 0, CMD_INIT_IB,
+	err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_INIT_IB,
 			CMD_TIME_CLASS_A, status);
 
-	pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 
@@ -1241,8 +1301,8 @@ int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status)
 int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
 		 int port, u8 *status)
 {
+	struct mthca_mailbox *mailbox;
 	u32 *inbox;
-	dma_addr_t indma;
 	int err;
 	u32 flags = 0;
 
@@ -1253,9 +1313,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
 #define SET_IB_CAP_MASK_OFFSET 0x04
 #define SET_IB_SI_GUID_OFFSET  0x08
 
-	inbox = pci_alloc_consistent(dev->pdev, SET_IB_IN_SIZE, &indma);
-	if (!inbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	inbox = mailbox->buf;
 
 	memset(inbox, 0, SET_IB_IN_SIZE);
 
@@ -1266,10 +1327,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
 	MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET);
 	MTHCA_PUT(inbox, param->si_guid,  SET_IB_SI_GUID_OFFSET);
 
-	err = mthca_cmd(dev, indma, port, 0, CMD_SET_IB,
+	err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_SET_IB,
 			CMD_TIME_CLASS_B, status);
 
-	pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 
@@ -1280,20 +1341,22 @@ int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *st
 
 int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status)
 {
+	struct mthca_mailbox *mailbox;
 	u64 *inbox;
-	dma_addr_t indma;
 	int err;
 
-	inbox = pci_alloc_consistent(dev->pdev, 16, &indma);
-	if (!inbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	inbox = mailbox->buf;
 
 	inbox[0] = cpu_to_be64(virt);
 	inbox[1] = cpu_to_be64(dma_addr);
 
-	err = mthca_cmd(dev, indma, 1, 0, CMD_MAP_ICM, CMD_TIME_CLASS_B, status);
+	err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM,
+			CMD_TIME_CLASS_B, status);
 
-	pci_free_consistent(dev->pdev, 16, inbox, indma);
+	mthca_free_mailbox(dev, mailbox);
 
 	if (!err)
 		mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n",
@@ -1338,69 +1401,26 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
 	return 0;
 }
 
-int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
+int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int mpt_index, u8 *status)
 {
-	dma_addr_t indma;
-	int err;
-
-	indma = pci_map_single(dev->pdev, mpt_entry,
-			       MTHCA_MPT_ENTRY_SIZE,
-			       PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(indma))
-		return -ENOMEM;
-
-	err = mthca_cmd(dev, indma, mpt_index, 0, CMD_SW2HW_MPT,
-			CMD_TIME_CLASS_B, status);
-
-	pci_unmap_single(dev->pdev, indma,
-			 MTHCA_MPT_ENTRY_SIZE, PCI_DMA_TODEVICE);
-	return err;
+	return mthca_cmd(dev, mailbox->dma, mpt_index, 0, CMD_SW2HW_MPT,
+			 CMD_TIME_CLASS_B, status);
 }
 
-int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry,
+int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int mpt_index, u8 *status)
 {
-	dma_addr_t outdma = 0;
-	int err;
-
-	if (mpt_entry) {
-		outdma = pci_map_single(dev->pdev, mpt_entry,
-					MTHCA_MPT_ENTRY_SIZE,
-					PCI_DMA_FROMDEVICE);
-		if (pci_dma_mapping_error(outdma))
-			return -ENOMEM;
-	}
-
-	err = mthca_cmd_box(dev, 0, outdma, mpt_index, !mpt_entry,
-			    CMD_HW2SW_MPT,
-			    CMD_TIME_CLASS_B, status);
-
-	if (mpt_entry)
-		pci_unmap_single(dev->pdev, outdma,
-				 MTHCA_MPT_ENTRY_SIZE,
-				 PCI_DMA_FROMDEVICE);
-	return err;
+	return mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
+			     !mailbox, CMD_HW2SW_MPT,
+			     CMD_TIME_CLASS_B, status);
 }
 
-int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
+int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int num_mtt, u8 *status)
 {
-	dma_addr_t indma;
-	int err;
-
-	indma = pci_map_single(dev->pdev, mtt_entry,
-			       (num_mtt + 2) * 8,
-			       PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(indma))
-		return -ENOMEM;
-
-	err = mthca_cmd(dev, indma, num_mtt, 0, CMD_WRITE_MTT,
-			CMD_TIME_CLASS_B, status);
-
-	pci_unmap_single(dev->pdev, indma,
-			 (num_mtt + 2) * 8, PCI_DMA_TODEVICE);
-	return err;
+	return mthca_cmd(dev, mailbox->dma, num_mtt, 0, CMD_WRITE_MTT,
+			 CMD_TIME_CLASS_B, status);
 }
 
 int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status)
@@ -1418,92 +1438,38 @@ int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
 			 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status);
 }
 
-int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
+int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int eq_num, u8 *status)
 {
-	dma_addr_t indma;
-	int err;
-
-	indma = pci_map_single(dev->pdev, eq_context,
-			       MTHCA_EQ_CONTEXT_SIZE,
-			       PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(indma))
-		return -ENOMEM;
-
-	err = mthca_cmd(dev, indma, eq_num, 0, CMD_SW2HW_EQ,
-			CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, indma,
-			 MTHCA_EQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
-	return err;
+	return mthca_cmd(dev, mailbox->dma, eq_num, 0, CMD_SW2HW_EQ,
+			 CMD_TIME_CLASS_A, status);
 }
 
-int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context,
+int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int eq_num, u8 *status)
 {
-	dma_addr_t outdma = 0;
-	int err;
-
-	outdma = pci_map_single(dev->pdev, eq_context,
-				MTHCA_EQ_CONTEXT_SIZE,
-				PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(outdma))
-		return -ENOMEM;
-
-	err = mthca_cmd_box(dev, 0, outdma, eq_num, 0,
-			    CMD_HW2SW_EQ,
-			    CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, outdma,
-			 MTHCA_EQ_CONTEXT_SIZE,
-			 PCI_DMA_FROMDEVICE);
-	return err;
+	return mthca_cmd_box(dev, 0, mailbox->dma, eq_num, 0,
+			     CMD_HW2SW_EQ,
+			     CMD_TIME_CLASS_A, status);
 }
 
-int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context,
+int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int cq_num, u8 *status)
 {
-	dma_addr_t indma;
-	int err;
-
-	indma = pci_map_single(dev->pdev, cq_context,
-			       MTHCA_CQ_CONTEXT_SIZE,
-			       PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(indma))
-		return -ENOMEM;
-
-	err = mthca_cmd(dev, indma, cq_num, 0, CMD_SW2HW_CQ,
+	return mthca_cmd(dev, mailbox->dma, cq_num, 0, CMD_SW2HW_CQ,
 			CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, indma,
-			 MTHCA_CQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
-	return err;
 }
 
-int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context,
+int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int cq_num, u8 *status)
 {
-	dma_addr_t outdma = 0;
-	int err;
-
-	outdma = pci_map_single(dev->pdev, cq_context,
-				MTHCA_CQ_CONTEXT_SIZE,
-				PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(outdma))
-		return -ENOMEM;
-
-	err = mthca_cmd_box(dev, 0, outdma, cq_num, 0,
-			    CMD_HW2SW_CQ,
-			    CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, outdma,
-			 MTHCA_CQ_CONTEXT_SIZE,
-			 PCI_DMA_FROMDEVICE);
-	return err;
+	return mthca_cmd_box(dev, 0, mailbox->dma, cq_num, 0,
+			     CMD_HW2SW_CQ,
+			     CMD_TIME_CLASS_A, status);
 }
 
 int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
-		    int is_ee, void *qp_context, u32 optmask,
+		    int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
 		    u8 *status)
 {
 	static const u16 op[] = {
@@ -1520,36 +1486,34 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
 		[MTHCA_TRANS_ANY2RST]   = CMD_ERR2RST_QPEE
 	};
 	u8 op_mod = 0;
-
-	dma_addr_t indma;
+	int my_mailbox = 0;
 	int err;
 
 	if (trans < 0 || trans >= ARRAY_SIZE(op))
 		return -EINVAL;
 
 	if (trans == MTHCA_TRANS_ANY2RST) {
-		indma  = 0;
 		op_mod = 3;	/* don't write outbox, any->reset */
 
 		/* For debugging */
-		qp_context = pci_alloc_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE,
-						  &indma);
-		op_mod = 2;	/* write outbox, any->reset */
+		if (!mailbox) {
+			mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+			if (!IS_ERR(mailbox)) {
+				my_mailbox = 1;
+				op_mod     = 2;	/* write outbox, any->reset */
+			} else
+				mailbox = NULL;
+		}
 	} else {
-		indma = pci_map_single(dev->pdev, qp_context,
-				       MTHCA_QP_CONTEXT_SIZE,
-				       PCI_DMA_TODEVICE);
-		if (pci_dma_mapping_error(indma))
-			return -ENOMEM;
-
 		if (0) {
 			int i;
 			mthca_dbg(dev, "Dumping QP context:\n");
-			printk("  opt param mask: %08x\n", be32_to_cpup(qp_context));
+			printk("  opt param mask: %08x\n", be32_to_cpup(mailbox->buf));
 			for (i = 0; i < 0x100 / 4; ++i) {
 				if (i % 8 == 0)
 					printk("  [%02x] ", i * 4);
-				printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2]));
+				printk(" %08x",
+				       be32_to_cpu(((u32 *) mailbox->buf)[i + 2]));
 				if ((i + 1) % 8 == 0)
 					printk("\n");
 			}
@@ -1557,55 +1521,39 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
 	}
 
 	if (trans == MTHCA_TRANS_ANY2RST) {
-		err = mthca_cmd_box(dev, 0, indma, (!!is_ee << 24) | num,
-				    op_mod, op[trans], CMD_TIME_CLASS_C, status);
+		err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
+				    (!!is_ee << 24) | num, op_mod,
+				    op[trans], CMD_TIME_CLASS_C, status);
 
-		if (0) {
+		if (0 && mailbox) {
 			int i;
 			mthca_dbg(dev, "Dumping QP context:\n");
-			printk(" %08x\n", be32_to_cpup(qp_context));
+			printk(" %08x\n", be32_to_cpup(mailbox->buf));
 			for (i = 0; i < 0x100 / 4; ++i) {
 				if (i % 8 == 0)
 					printk("[%02x] ", i * 4);
-				printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2]));
+				printk(" %08x",
+				       be32_to_cpu(((u32 *) mailbox->buf)[i + 2]));
 				if ((i + 1) % 8 == 0)
 					printk("\n");
 			}
 		}
 
 	} else
-		err = mthca_cmd(dev, indma, (!!is_ee << 24) | num,
+		err = mthca_cmd(dev, mailbox->dma, (!!is_ee << 24) | num,
 				op_mod, op[trans], CMD_TIME_CLASS_C, status);
 
-	if (trans != MTHCA_TRANS_ANY2RST)
-		pci_unmap_single(dev->pdev, indma,
-				 MTHCA_QP_CONTEXT_SIZE, PCI_DMA_TODEVICE);
-	else
-		pci_free_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE,
-				    qp_context, indma);
+	if (my_mailbox)
+		mthca_free_mailbox(dev, mailbox);
+
 	return err;
 }
 
 int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
-		   void *qp_context, u8 *status)
+		   struct mthca_mailbox *mailbox, u8 *status)
 {
-	dma_addr_t outdma = 0;
-	int err;
-
-	outdma = pci_map_single(dev->pdev, qp_context,
-				MTHCA_QP_CONTEXT_SIZE,
-				PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(outdma))
-		return -ENOMEM;
-
-	err = mthca_cmd_box(dev, 0, outdma, (!!is_ee << 24) | num, 0,
-			    CMD_QUERY_QPEE,
-			    CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, outdma,
-			 MTHCA_QP_CONTEXT_SIZE,
-			 PCI_DMA_FROMDEVICE);
-	return err;
+	return mthca_cmd_box(dev, 0, mailbox->dma, (!!is_ee << 24) | num, 0,
+			     CMD_QUERY_QPEE, CMD_TIME_CLASS_A, status);
 }
 
 int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
@@ -1635,11 +1583,11 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
 }
 
 int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
-		  int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
+		  int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
 		  void *in_mad, void *response_mad, u8 *status)
 {
-	void *box;
-	dma_addr_t dma;
+	struct mthca_mailbox *inmailbox, *outmailbox;
+	void *inbox;
 	int err;
 	u32 in_modifier = port;
 	u8 op_modifier = 0;
@@ -1653,11 +1601,18 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
 #define MAD_IFC_PKEY_OFFSET   0x10e
 #define MAD_IFC_GRH_OFFSET    0x140
 
-	box = pci_alloc_consistent(dev->pdev, MAD_IFC_BOX_SIZE, &dma);
-	if (!box)
-		return -ENOMEM;
+	inmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(inmailbox))
+		return PTR_ERR(inmailbox);
+	inbox = inmailbox->buf;
 
-	memcpy(box, in_mad, 256);
+	outmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(outmailbox)) {
+		mthca_free_mailbox(dev, inmailbox);
+		return PTR_ERR(outmailbox);
+	}
+
+	memcpy(inbox, in_mad, 256);
 
 	/*
 	 * Key check traps can't be generated unless we have in_wc to
@@ -1671,97 +1626,65 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
 	if (in_wc) {
 		u8 val;
 
-		memset(box + 256, 0, 256);
+		memset(inbox + 256, 0, 256);
 
-		MTHCA_PUT(box, in_wc->qp_num, 	  MAD_IFC_MY_QPN_OFFSET);
-		MTHCA_PUT(box, in_wc->src_qp, 	  MAD_IFC_RQPN_OFFSET);
+		MTHCA_PUT(inbox, in_wc->qp_num,     MAD_IFC_MY_QPN_OFFSET);
+		MTHCA_PUT(inbox, in_wc->src_qp,     MAD_IFC_RQPN_OFFSET);
 
 		val = in_wc->sl << 4;
-		MTHCA_PUT(box, val,               MAD_IFC_SL_OFFSET);
+		MTHCA_PUT(inbox, val,               MAD_IFC_SL_OFFSET);
 
 		val = in_wc->dlid_path_bits |
 			(in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
-		MTHCA_PUT(box, val,               MAD_IFC_GRH_OFFSET);
+		MTHCA_PUT(inbox, val,               MAD_IFC_GRH_OFFSET);
 
-		MTHCA_PUT(box, in_wc->slid,       MAD_IFC_RLID_OFFSET);
-		MTHCA_PUT(box, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
+		MTHCA_PUT(inbox, in_wc->slid,       MAD_IFC_RLID_OFFSET);
+		MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
 
 		if (in_grh)
-			memcpy((u8 *) box + MAD_IFC_GRH_OFFSET, in_grh, 40);
+			memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40);
 
 		op_modifier |= 0x10;
 
 		in_modifier |= in_wc->slid << 16;
 	}
 
-	err = mthca_cmd_box(dev, dma, dma + 512, in_modifier, op_modifier,
+	err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma,
+			    in_modifier, op_modifier,
 			    CMD_MAD_IFC, CMD_TIME_CLASS_C, status);
 
 	if (!err && !*status)
-		memcpy(response_mad, box + 512, 256);
+		memcpy(response_mad, outmailbox->buf, 256);
 
-	pci_free_consistent(dev->pdev, MAD_IFC_BOX_SIZE, box, dma);
+	mthca_free_mailbox(dev, inmailbox);
+	mthca_free_mailbox(dev, outmailbox);
 	return err;
 }
 
-int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm,
-		   u8 *status)
+int mthca_READ_MGM(struct mthca_dev *dev, int index,
+		   struct mthca_mailbox *mailbox, u8 *status)
 {
-	dma_addr_t outdma = 0;
-	int err;
-
-	outdma = pci_map_single(dev->pdev, mgm,
-				MTHCA_MGM_ENTRY_SIZE,
-				PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(outdma))
-		return -ENOMEM;
-
-	err = mthca_cmd_box(dev, 0, outdma, index, 0,
-			    CMD_READ_MGM,
-			    CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, outdma,
-			 MTHCA_MGM_ENTRY_SIZE,
-			 PCI_DMA_FROMDEVICE);
-	return err;
+	return mthca_cmd_box(dev, 0, mailbox->dma, index, 0,
+			     CMD_READ_MGM, CMD_TIME_CLASS_A, status);
 }
 
-int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm,
-		    u8 *status)
+int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
+		    struct mthca_mailbox *mailbox, u8 *status)
 {
-	dma_addr_t indma;
-	int err;
-
-	indma = pci_map_single(dev->pdev, mgm,
-			       MTHCA_MGM_ENTRY_SIZE,
-			       PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(indma))
-		return -ENOMEM;
-
-	err = mthca_cmd(dev, indma, index, 0, CMD_WRITE_MGM,
-			CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, indma,
-			 MTHCA_MGM_ENTRY_SIZE, PCI_DMA_TODEVICE);
-	return err;
+	return mthca_cmd(dev, mailbox->dma, index, 0, CMD_WRITE_MGM,
+			 CMD_TIME_CLASS_A, status);
 }
 
-int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash,
-		    u8 *status)
+int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+		    u16 *hash, u8 *status)
 {
-	dma_addr_t indma;
 	u64 imm;
 	int err;
 
-	indma = pci_map_single(dev->pdev, gid, 16, PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(indma))
-		return -ENOMEM;
-
-	err = mthca_cmd_imm(dev, indma, &imm, 0, 0, CMD_MGID_HASH,
+	err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH,
 			    CMD_TIME_CLASS_A, status);
-	*hash = imm;
 
-	pci_unmap_single(dev->pdev, indma, 16, PCI_DMA_TODEVICE);
+	*hash = imm;
 	return err;
 }
 

+ 28 - 20
drivers/infiniband/hw/mthca/mthca_cmd.h

@@ -37,8 +37,7 @@
 
 #include <ib_verbs.h>
 
-#define MTHCA_CMD_MAILBOX_ALIGN 16UL
-#define MTHCA_CMD_MAILBOX_EXTRA (MTHCA_CMD_MAILBOX_ALIGN - 1)
+#define MTHCA_MAILBOX_SIZE 4096
 
 enum {
 	/* command completed successfully: */
@@ -112,6 +111,11 @@ enum {
 	DEV_LIM_FLAG_UD_MULTI           = 1 << 21,
 };
 
+struct mthca_mailbox {
+	dma_addr_t dma;
+	void      *buf;
+};
+
 struct mthca_dev_lim {
 	int max_srq_sz;
 	int max_qp_sz;
@@ -235,11 +239,17 @@ struct mthca_set_ib_param {
 	u32 cap_mask;
 };
 
+int mthca_cmd_init(struct mthca_dev *dev);
+void mthca_cmd_cleanup(struct mthca_dev *dev);
 int mthca_cmd_use_events(struct mthca_dev *dev);
 void mthca_cmd_use_polling(struct mthca_dev *dev);
 void mthca_cmd_event(struct mthca_dev *dev, u16 token,
 		     u8  status, u64 out_param);
 
+struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
+					  unsigned int gfp_mask);
+void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox);
+
 int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);
 int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status);
 int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
@@ -270,41 +280,39 @@ int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
 int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status);
 int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
 		       u8 *status);
-int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
+int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int mpt_index, u8 *status);
-int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry,
+int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int mpt_index, u8 *status);
-int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
+int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int num_mtt, u8 *status);
 int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status);
 int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
 		 int eq_num, u8 *status);
-int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
+int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int eq_num, u8 *status);
-int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context,
+int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int eq_num, u8 *status);
-int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context,
+int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int cq_num, u8 *status);
-int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context,
+int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int cq_num, u8 *status);
 int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
-		    int is_ee, void *qp_context, u32 optmask,
+		    int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
 		    u8 *status);
 int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
-		   void *qp_context, u8 *status);
+		   struct mthca_mailbox *mailbox, u8 *status);
 int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
 			  u8 *status);
 int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
-		  int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
+		  int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
 		  void *in_mad, void *response_mad, u8 *status);
-int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm,
-		   u8 *status);
-int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm,
-		    u8 *status);
-int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash,
-		    u8 *status);
+int mthca_READ_MGM(struct mthca_dev *dev, int index,
+		   struct mthca_mailbox *mailbox, u8 *status);
+int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
+		    struct mthca_mailbox *mailbox, u8 *status);
+int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+		    u16 *hash, u8 *status);
 int mthca_NOP(struct mthca_dev *dev, u8 *status);
 
-#define MAILBOX_ALIGN(x) ((void *) ALIGN((unsigned long) (x), MTHCA_CMD_MAILBOX_ALIGN))
-
 #endif /* MTHCA_CMD_H */

+ 49 - 52
drivers/infiniband/hw/mthca/mthca_cq.c

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -171,6 +172,17 @@ static inline void set_cqe_hw(struct mthca_cqe *cqe)
 	cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW;
 }
 
+static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr)
+{
+	__be32 *cqe = cqe_ptr;
+
+	(void) cqe;	/* avoid warning if mthca_dbg compiled away... */
+	mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
+		  be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]),
+		  be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]),
+		  be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7]));
+}
+
 /*
  * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index
  * should be correct before calling update_cons_index().
@@ -280,16 +292,12 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
 	int dbd;
 	u32 new_wqe;
 
-	if (1 && cqe->syndrome != SYNDROME_WR_FLUSH_ERR) {
-		int j;
-
-		mthca_dbg(dev, "%x/%d: error CQE -> QPN %06x, WQE @ %08x\n",
-			  cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
-			  be32_to_cpu(cqe->wqe));
-
-		for (j = 0; j < 8; ++j)
-			printk(KERN_DEBUG "  [%2x] %08x\n",
-			       j * 4, be32_to_cpu(((u32 *) cqe)[j]));
+	if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) {
+		mthca_dbg(dev, "local QP operation err "
+			  "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n",
+			  be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe),
+			  cq->cqn, cq->cons_index);
+		dump_cqe(dev, cqe);
 	}
 
 	/*
@@ -377,15 +385,6 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
 	return 0;
 }
 
-static void dump_cqe(struct mthca_cqe *cqe)
-{
-	int j;
-
-	for (j = 0; j < 8; ++j)
-		printk(KERN_DEBUG "  [%2x] %08x\n",
-		       j * 4, be32_to_cpu(((u32 *) cqe)[j]));
-}
-
 static inline int mthca_poll_one(struct mthca_dev *dev,
 				 struct mthca_cq *cq,
 				 struct mthca_qp **cur_qp,
@@ -414,8 +413,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
 		mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n",
 			  cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
 			  be32_to_cpu(cqe->wqe));
-
-		dump_cqe(cqe);
+		dump_cqe(dev, cqe);
 	}
 
 	is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
@@ -638,19 +636,19 @@ static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq)
 	int size;
 
 	if (cq->is_direct)
-		pci_free_consistent(dev->pdev,
-				    (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
-				    cq->queue.direct.buf,
-				    pci_unmap_addr(&cq->queue.direct,
-						   mapping));
+		dma_free_coherent(&dev->pdev->dev,
+				  (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
+				  cq->queue.direct.buf,
+				  pci_unmap_addr(&cq->queue.direct,
+						 mapping));
 	else {
 		size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE;
 		for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
 			if (cq->queue.page_list[i].buf)
-				pci_free_consistent(dev->pdev, PAGE_SIZE,
-						    cq->queue.page_list[i].buf,
-						    pci_unmap_addr(&cq->queue.page_list[i],
-								   mapping));
+				dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+						  cq->queue.page_list[i].buf,
+						  pci_unmap_addr(&cq->queue.page_list[i],
+								 mapping));
 
 		kfree(cq->queue.page_list);
 	}
@@ -670,8 +668,8 @@ static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size,
 		npages        = 1;
 		shift         = get_order(size) + PAGE_SHIFT;
 
-		cq->queue.direct.buf = pci_alloc_consistent(dev->pdev,
-							    size, &t);
+		cq->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev,
+							  size, &t, GFP_KERNEL);
 		if (!cq->queue.direct.buf)
 			return -ENOMEM;
 
@@ -709,7 +707,8 @@ static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size,
 
 		for (i = 0; i < npages; ++i) {
 			cq->queue.page_list[i].buf =
-				pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t);
+				dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+						   &t, GFP_KERNEL);
 			if (!cq->queue.page_list[i].buf)
 				goto err_free;
 
@@ -746,7 +745,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
 		  struct mthca_cq *cq)
 {
 	int size = nent * MTHCA_CQ_ENTRY_SIZE;
-	void *mailbox = NULL;
+	struct mthca_mailbox *mailbox;
 	struct mthca_cq_context *cq_context;
 	int err = -ENOMEM;
 	u8 status;
@@ -780,12 +779,11 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
 			goto err_out_ci;
 	}
 
-	mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox)
-		goto err_out_mailbox;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		goto err_out_arm;
 
-	cq_context = MAILBOX_ALIGN(mailbox);
+	cq_context = mailbox->buf;
 
 	err = mthca_alloc_cq_buf(dev, size, cq);
 	if (err)
@@ -816,7 +814,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
 		cq_context->state_db = cpu_to_be32(cq->arm_db_index);
 	}
 
-	err = mthca_SW2HW_CQ(dev, cq_context, cq->cqn, &status);
+	err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status);
 	if (err) {
 		mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err);
 		goto err_out_free_mr;
@@ -840,7 +838,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
 
 	cq->cons_index = 0;
 
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 
 	return 0;
 
@@ -849,8 +847,9 @@ err_out_free_mr:
 	mthca_free_cq_buf(dev, cq);
 
 err_out_mailbox:
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 
+err_out_arm:
 	if (mthca_is_memfree(dev))
 		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
 
@@ -870,28 +869,26 @@ err_out:
 void mthca_free_cq(struct mthca_dev *dev,
 		   struct mthca_cq *cq)
 {
-	void *mailbox;
+	struct mthca_mailbox *mailbox;
 	int err;
 	u8 status;
 
 	might_sleep();
 
-	mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox) {
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox)) {
 		mthca_warn(dev, "No memory for mailbox to free CQ.\n");
 		return;
 	}
 
-	err = mthca_HW2SW_CQ(dev, MAILBOX_ALIGN(mailbox), cq->cqn, &status);
+	err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status);
 	if (err)
 		mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err);
 	else if (status)
-		mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n",
-			   status);
+		mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status);
 
 	if (0) {
-		u32 *ctx = MAILBOX_ALIGN(mailbox);
+		u32 *ctx = mailbox->buf;
 		int j;
 
 		printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
@@ -919,11 +916,11 @@ void mthca_free_cq(struct mthca_dev *dev,
 	if (mthca_is_memfree(dev)) {
 		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM,    cq->arm_db_index);
 		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
-		mthca_table_put(dev, dev->cq_table.table, cq->cqn);
 	}
 
+	mthca_table_put(dev, dev->cq_table.table, cq->cqn);
 	mthca_free(&dev->cq_table.alloc, cq->cqn);
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 }
 
 int __devinit mthca_init_cq_table(struct mthca_dev *dev)

+ 10 - 2
drivers/infiniband/hw/mthca/mthca_dev.h

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -46,8 +47,8 @@
 
 #define DRV_NAME	"ib_mthca"
 #define PFX		DRV_NAME ": "
-#define DRV_VERSION	"0.06-pre"
-#define DRV_RELDATE	"November 8, 2004"
+#define DRV_VERSION	"0.06"
+#define DRV_RELDATE	"June 23, 2005"
 
 enum {
 	MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
@@ -98,6 +99,7 @@ enum {
 };
 
 struct mthca_cmd {
+	struct pci_pool          *pool;
 	int                       use_events;
 	struct semaphore          hcr_sem;
 	struct semaphore 	  poll_sem;
@@ -379,6 +381,12 @@ void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
 int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd);
 void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
 
+struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
+void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt);
+int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+		    int start_index, u64 *buffer_list, int list_len);
+int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
+		   u64 iova, u64 total_size, u32 access, struct mthca_mr *mr);
 int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
 			   u32 access, struct mthca_mr *mr);
 int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,

+ 1 - 0
drivers/infiniband/hw/mthca/mthca_doorbell.h

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU

+ 27 - 31
drivers/infiniband/hw/mthca/mthca_eq.c

@@ -469,7 +469,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 		PAGE_SIZE;
 	u64 *dma_list = NULL;
 	dma_addr_t t;
-	void *mailbox = NULL;
+	struct mthca_mailbox *mailbox;
 	struct mthca_eq_context *eq_context;
 	int err = -ENOMEM;
 	int i;
@@ -494,17 +494,16 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 	if (!dma_list)
 		goto err_out_free;
 
-	mailbox = kmalloc(sizeof *eq_context + MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox)
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
 		goto err_out_free;
-	eq_context = MAILBOX_ALIGN(mailbox);
+	eq_context = mailbox->buf;
 
 	for (i = 0; i < npages; ++i) {
-		eq->page_list[i].buf = pci_alloc_consistent(dev->pdev,
-							    PAGE_SIZE, &t);
+		eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
+							  PAGE_SIZE, &t, GFP_KERNEL);
 		if (!eq->page_list[i].buf)
-			goto err_out_free;
+			goto err_out_free_pages;
 
 		dma_list[i] = t;
 		pci_unmap_addr_set(&eq->page_list[i], mapping, t);
@@ -517,7 +516,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 
 	eq->eqn = mthca_alloc(&dev->eq_table.alloc);
 	if (eq->eqn == -1)
-		goto err_out_free;
+		goto err_out_free_pages;
 
 	err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
 				  dma_list, PAGE_SHIFT, npages,
@@ -548,7 +547,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 	eq_context->intr            = intr;
 	eq_context->lkey            = cpu_to_be32(eq->mr.ibmr.lkey);
 
-	err = mthca_SW2HW_EQ(dev, eq_context, eq->eqn, &status);
+	err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status);
 	if (err) {
 		mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
 		goto err_out_free_mr;
@@ -561,7 +560,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 	}
 
 	kfree(dma_list);
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 
 	eq->eqn_mask   = swab32(1 << eq->eqn);
 	eq->cons_index = 0;
@@ -579,17 +578,19 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
  err_out_free_eq:
 	mthca_free(&dev->eq_table.alloc, eq->eqn);
 
- err_out_free:
+ err_out_free_pages:
 	for (i = 0; i < npages; ++i)
 		if (eq->page_list[i].buf)
-			pci_free_consistent(dev->pdev, PAGE_SIZE,
-					    eq->page_list[i].buf,
-					    pci_unmap_addr(&eq->page_list[i],
-							   mapping));
+			dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+					  eq->page_list[i].buf,
+					  pci_unmap_addr(&eq->page_list[i],
+							 mapping));
+
+	mthca_free_mailbox(dev, mailbox);
 
+ err_out_free:
 	kfree(eq->page_list);
 	kfree(dma_list);
-	kfree(mailbox);
 
  err_out:
 	return err;
@@ -598,25 +599,22 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 static void mthca_free_eq(struct mthca_dev *dev,
 			  struct mthca_eq *eq)
 {
-	void *mailbox = NULL;
+	struct mthca_mailbox *mailbox;
 	int err;
 	u8 status;
 	int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
 		PAGE_SIZE;
 	int i;
 
-	mailbox = kmalloc(sizeof (struct mthca_eq_context) + MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox)
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
 		return;
 
-	err = mthca_HW2SW_EQ(dev, MAILBOX_ALIGN(mailbox),
-			     eq->eqn, &status);
+	err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status);
 	if (err)
 		mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
 	if (status)
-		mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n",
-			   status);
+		mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status);
 
 	dev->eq_table.arm_mask &= ~eq->eqn_mask;
 
@@ -625,7 +623,7 @@ static void mthca_free_eq(struct mthca_dev *dev,
 		for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
 			if (i % 4 == 0)
 				printk("[%02x] ", i * 4);
-			printk(" %08x", be32_to_cpup(MAILBOX_ALIGN(mailbox) + i * 4));
+			printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
 			if ((i + 1) % 4 == 0)
 				printk("\n");
 		}
@@ -638,7 +636,7 @@ static void mthca_free_eq(struct mthca_dev *dev,
 				    pci_unmap_addr(&eq->page_list[i], mapping));
 
 	kfree(eq->page_list);
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 }
 
 static void mthca_free_irqs(struct mthca_dev *dev)
@@ -709,8 +707,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
 		if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
 					dev->fw.arbel.eq_arm_base) + 4, 4,
 				  &dev->eq_regs.arbel.eq_arm)) {
-			mthca_err(dev, "Couldn't map interrupt clear register, "
-				  "aborting.\n");
+			mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
 			mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
 					dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
 					dev->clr_base);
@@ -721,8 +718,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
 				  dev->fw.arbel.eq_set_ci_base,
 				  MTHCA_EQ_SET_CI_SIZE,
 				  &dev->eq_regs.arbel.eq_set_ci_base)) {
-			mthca_err(dev, "Couldn't map interrupt clear register, "
-				  "aborting.\n");
+			mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
 			mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
 					      dev->fw.arbel.eq_arm_base) + 4, 4,
 					dev->eq_regs.arbel.eq_arm);

+ 12 - 20
drivers/infiniband/hw/mthca/mthca_main.c

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -69,7 +70,7 @@ MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero");
 #endif /* CONFIG_PCI_MSI */
 
 static const char mthca_version[] __devinitdata =
-	"ib_mthca: Mellanox InfiniBand HCA driver v"
+	DRV_NAME ": Mellanox InfiniBand HCA driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
 
 static struct mthca_profile default_profile = {
@@ -927,13 +928,13 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
 	 */
 	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
 	    pci_resource_len(pdev, 0) != 1 << 20) {
-		dev_err(&pdev->dev, "Missing DCS, aborting.");
+		dev_err(&pdev->dev, "Missing DCS, aborting.\n");
 		err = -ENODEV;
 		goto err_disable_pdev;
 	}
 	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) ||
 	    pci_resource_len(pdev, 2) != 1 << 23) {
-		dev_err(&pdev->dev, "Missing UAR, aborting.");
+		dev_err(&pdev->dev, "Missing UAR, aborting.\n");
 		err = -ENODEV;
 		goto err_disable_pdev;
 	}
@@ -1004,25 +1005,18 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
 	    !pci_enable_msi(pdev))
 		mdev->mthca_flags |= MTHCA_FLAG_MSI;
 
-	sema_init(&mdev->cmd.hcr_sem, 1);
-	sema_init(&mdev->cmd.poll_sem, 1);
-	mdev->cmd.use_events = 0;
-
-	mdev->hcr = ioremap(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, MTHCA_HCR_SIZE);
-	if (!mdev->hcr) {
-		mthca_err(mdev, "Couldn't map command register, "
-			  "aborting.\n");
-		err = -ENOMEM;
+	if (mthca_cmd_init(mdev)) {
+		mthca_err(mdev, "Failed to init command interface, aborting.\n");
 		goto err_free_dev;
 	}
 
 	err = mthca_tune_pci(mdev);
 	if (err)
-		goto err_iounmap;
+		goto err_cmd;
 
 	err = mthca_init_hca(mdev);
 	if (err)
-		goto err_iounmap;
+		goto err_cmd;
 
 	if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) {
 		mthca_warn(mdev, "HCA FW version %x.%x.%x is old (%x.%x.%x is current).\n",
@@ -1070,8 +1064,8 @@ err_cleanup:
 err_close:
 	mthca_close_hca(mdev);
 
-err_iounmap:
-	iounmap(mdev->hcr);
+err_cmd:
+	mthca_cmd_cleanup(mdev);
 
 err_free_dev:
 	if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
@@ -1118,10 +1112,8 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev)
 		iounmap(mdev->kar);
 		mthca_uar_free(mdev, &mdev->driver_uar);
 		mthca_cleanup_uar_table(mdev);
-
 		mthca_close_hca(mdev);
-
-		iounmap(mdev->hcr);
+		mthca_cmd_cleanup(mdev);
 
 		if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
 			pci_disable_msix(pdev);
@@ -1163,7 +1155,7 @@ static struct pci_device_id mthca_pci_table[] = {
 MODULE_DEVICE_TABLE(pci, mthca_pci_table);
 
 static struct pci_driver mthca_driver = {
-	.name		= "ib_mthca",
+	.name		= DRV_NAME,
 	.id_table	= mthca_pci_table,
 	.probe		= mthca_init_one,
 	.remove		= __devexit_p(mthca_remove_one)

+ 32 - 31
drivers/infiniband/hw/mthca/mthca_mcg.c

@@ -66,22 +66,23 @@ static const u8 zero_gid[16];	/* automatically initialized to 0 */
  * entry in hash chain and *mgm holds end of hash chain.
  */
 static int find_mgm(struct mthca_dev *dev,
-		    u8 *gid, struct mthca_mgm *mgm,
+		    u8 *gid, struct mthca_mailbox *mgm_mailbox,
 		    u16 *hash, int *prev, int *index)
 {
-	void *mailbox;
+	struct mthca_mailbox *mailbox;
+	struct mthca_mgm *mgm = mgm_mailbox->buf;
 	u8 *mgid;
 	int err;
 	u8 status;
 
-	mailbox = kmalloc(16 + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
-	if (!mailbox)
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
 		return -ENOMEM;
-	mgid = MAILBOX_ALIGN(mailbox);
+	mgid = mailbox->buf;
 
 	memcpy(mgid, gid, 16);
 
-	err = mthca_MGID_HASH(dev, mgid, hash, &status);
+	err = mthca_MGID_HASH(dev, mailbox, hash, &status);
 	if (err)
 		goto out;
 	if (status) {
@@ -103,7 +104,7 @@ static int find_mgm(struct mthca_dev *dev,
 	*prev  = -1;
 
 	do {
-		err = mthca_READ_MGM(dev, *index, mgm, &status);
+		err = mthca_READ_MGM(dev, *index, mgm_mailbox, &status);
 		if (err)
 			goto out;
 		if (status) {
@@ -129,14 +130,14 @@ static int find_mgm(struct mthca_dev *dev,
 	*index = -1;
 
  out:
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 
 int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
-	void *mailbox;
+	struct mthca_mailbox *mailbox;
 	struct mthca_mgm *mgm;
 	u16 hash;
 	int index, prev;
@@ -145,15 +146,15 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	int err;
 	u8 status;
 
-	mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
-	if (!mailbox)
-		return -ENOMEM;
-	mgm = MAILBOX_ALIGN(mailbox);
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	mgm = mailbox->buf;
 
 	if (down_interruptible(&dev->mcg_table.sem))
 		return -EINTR;
 
-	err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index);
+	err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
 	if (err)
 		goto out;
 
@@ -170,7 +171,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 			goto out;
 		}
 
-		err = mthca_READ_MGM(dev, index, mgm, &status);
+		err = mthca_READ_MGM(dev, index, mailbox, &status);
 		if (err)
 			goto out;
 		if (status) {
@@ -195,7 +196,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		goto out;
 	}
 
-	err = mthca_WRITE_MGM(dev, index, mgm, &status);
+	err = mthca_WRITE_MGM(dev, index, mailbox, &status);
 	if (err)
 		goto out;
 	if (status) {
@@ -206,7 +207,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	if (!link)
 		goto out;
 
-	err = mthca_READ_MGM(dev, prev, mgm, &status);
+	err = mthca_READ_MGM(dev, prev, mailbox, &status);
 	if (err)
 		goto out;
 	if (status) {
@@ -217,7 +218,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 
 	mgm->next_gid_index = cpu_to_be32(index << 5);
 
-	err = mthca_WRITE_MGM(dev, prev, mgm, &status);
+	err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
 	if (err)
 		goto out;
 	if (status) {
@@ -227,14 +228,14 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 
  out:
 	up(&dev->mcg_table.sem);
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 
 int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
-	void *mailbox;
+	struct mthca_mailbox *mailbox;
 	struct mthca_mgm *mgm;
 	u16 hash;
 	int prev, index;
@@ -242,15 +243,15 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	int err;
 	u8 status;
 
-	mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
-	if (!mailbox)
-		return -ENOMEM;
-	mgm = MAILBOX_ALIGN(mailbox);
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	mgm = mailbox->buf;
 
 	if (down_interruptible(&dev->mcg_table.sem))
 		return -EINTR;
 
-	err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index);
+	err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
 	if (err)
 		goto out;
 
@@ -285,7 +286,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	mgm->qp[loc]   = mgm->qp[i - 1];
 	mgm->qp[i - 1] = 0;
 
-	err = mthca_WRITE_MGM(dev, index, mgm, &status);
+	err = mthca_WRITE_MGM(dev, index, mailbox, &status);
 	if (err)
 		goto out;
 	if (status) {
@@ -304,7 +305,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		if (be32_to_cpu(mgm->next_gid_index) >> 5) {
 			err = mthca_READ_MGM(dev,
 					     be32_to_cpu(mgm->next_gid_index) >> 5,
-					     mgm, &status);
+					     mailbox, &status);
 			if (err)
 				goto out;
 			if (status) {
@@ -316,7 +317,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		} else
 			memset(mgm->gid, 0, 16);
 
-		err = mthca_WRITE_MGM(dev, index, mgm, &status);
+		err = mthca_WRITE_MGM(dev, index, mailbox, &status);
 		if (err)
 			goto out;
 		if (status) {
@@ -327,7 +328,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	} else {
 		/* Remove entry from AMGM */
 		index = be32_to_cpu(mgm->next_gid_index) >> 5;
-		err = mthca_READ_MGM(dev, prev, mgm, &status);
+		err = mthca_READ_MGM(dev, prev, mailbox, &status);
 		if (err)
 			goto out;
 		if (status) {
@@ -338,7 +339,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 
 		mgm->next_gid_index = cpu_to_be32(index << 5);
 
-		err = mthca_WRITE_MGM(dev, prev, mgm, &status);
+		err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
 		if (err)
 			goto out;
 		if (status) {
@@ -350,7 +351,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 
  out:
 	up(&dev->mcg_table.sem);
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
 

+ 9 - 1
drivers/infiniband/hw/mthca/mthca_memfree.c

@@ -179,9 +179,14 @@ out:
 
 void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
 {
-	int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
+	int i;
 	u8 status;
 
+	if (!mthca_is_memfree(dev))
+		return;
+
+	i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
+
 	down(&table->mutex);
 
 	if (--table->icm[i]->refcount == 0) {
@@ -256,6 +261,9 @@ void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
 {
 	int i;
 
+	if (!mthca_is_memfree(dev))
+		return;
+
 	for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size)
 		mthca_table_put(dev, table, i);
 }

+ 182 - 185
drivers/infiniband/hw/mthca/mthca_mr.c

@@ -40,6 +40,12 @@
 #include "mthca_cmd.h"
 #include "mthca_memfree.h"
 
+struct mthca_mtt {
+	struct mthca_buddy *buddy;
+	int                 order;
+	u32                 first_seg;
+};
+
 /*
  * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
  */
@@ -173,8 +179,8 @@ static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy)
 	kfree(buddy->bits);
 }
 
-static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order,
-			   struct mthca_buddy *buddy)
+static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
+				 struct mthca_buddy *buddy)
 {
 	u32 seg = mthca_buddy_alloc(buddy, order);
 
@@ -191,14 +197,102 @@ static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order,
 	return seg;
 }
 
-static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order,
-			   struct mthca_buddy* buddy)
+static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
+					   struct mthca_buddy *buddy)
 {
-	mthca_buddy_free(buddy, seg, order);
+	struct mthca_mtt *mtt;
+	int i;
 
-	if (mthca_is_memfree(dev))
-		mthca_table_put_range(dev, dev->mr_table.mtt_table, seg,
-				      seg + (1 << order) - 1);
+	if (size <= 0)
+		return ERR_PTR(-EINVAL);
+
+	mtt = kmalloc(sizeof *mtt, GFP_KERNEL);
+	if (!mtt)
+		return ERR_PTR(-ENOMEM);
+
+	mtt->buddy = buddy;
+	mtt->order = 0;
+	for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1)
+		++mtt->order;
+
+	mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
+	if (mtt->first_seg == -1) {
+		kfree(mtt);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return mtt;
+}
+
+struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size)
+{
+	return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy);
+}
+
+void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
+{
+	if (!mtt)
+		return;
+
+	mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order);
+
+	mthca_table_put_range(dev, dev->mr_table.mtt_table,
+			      mtt->first_seg,
+			      mtt->first_seg + (1 << mtt->order) - 1);
+
+	kfree(mtt);
+}
+
+int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+		    int start_index, u64 *buffer_list, int list_len)
+{
+	struct mthca_mailbox *mailbox;
+	u64 *mtt_entry;
+	int err = 0;
+	u8 status;
+	int i;
+
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	mtt_entry = mailbox->buf;
+
+	while (list_len > 0) {
+		mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
+					   mtt->first_seg * MTHCA_MTT_SEG_SIZE +
+					   start_index * 8);
+		mtt_entry[1] = 0;
+		for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
+			mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
+						       MTHCA_MTT_FLAG_PRESENT);
+
+		/*
+		 * If we have an odd number of entries to write, add
+		 * one more dummy entry for firmware efficiency.
+		 */
+		if (i & 1)
+			mtt_entry[i + 2] = 0;
+
+		err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status);
+		if (err) {
+			mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
+			goto out;
+		}
+		if (status) {
+			mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
+				   status);
+			err = -EINVAL;
+			goto out;
+		}
+
+		list_len    -= i;
+		start_index += i;
+		buffer_list += i;
+	}
+
+out:
+	mthca_free_mailbox(dev, mailbox);
+	return err;
 }
 
 static inline u32 tavor_hw_index_to_key(u32 ind)
@@ -237,91 +331,18 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
 		return tavor_key_to_hw_index(key);
 }
 
-int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
-			   u32 access, struct mthca_mr *mr)
+int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
+		   u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
 {
-	void *mailbox = NULL;
+	struct mthca_mailbox *mailbox;
 	struct mthca_mpt_entry *mpt_entry;
 	u32 key;
+	int i;
 	int err;
 	u8 status;
 
 	might_sleep();
 
-	mr->order = -1;
-	key = mthca_alloc(&dev->mr_table.mpt_alloc);
-	if (key == -1)
-		return -ENOMEM;
-	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
-
-	if (mthca_is_memfree(dev)) {
-		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
-		if (err)
-			goto err_out_mpt_free;
-	}
-
-	mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox) {
-		err = -ENOMEM;
-		goto err_out_table;
-	}
-	mpt_entry = MAILBOX_ALIGN(mailbox);
-
-	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
-				       MTHCA_MPT_FLAG_MIO         |
-				       MTHCA_MPT_FLAG_PHYSICAL    |
-				       MTHCA_MPT_FLAG_REGION      |
-				       access);
-	mpt_entry->page_size = 0;
-	mpt_entry->key       = cpu_to_be32(key);
-	mpt_entry->pd        = cpu_to_be32(pd);
-	mpt_entry->start     = 0;
-	mpt_entry->length    = ~0ULL;
-
-	memset(&mpt_entry->lkey, 0,
-	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
-
-	err = mthca_SW2HW_MPT(dev, mpt_entry,
-			      key & (dev->limits.num_mpts - 1),
-			      &status);
-	if (err) {
-		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
-		goto err_out_table;
-	} else if (status) {
-		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
-			   status);
-		err = -EINVAL;
-		goto err_out_table;
-	}
-
-	kfree(mailbox);
-	return err;
-
-err_out_table:
-	if (mthca_is_memfree(dev))
-		mthca_table_put(dev, dev->mr_table.mpt_table, key);
-
-err_out_mpt_free:
-	mthca_free(&dev->mr_table.mpt_alloc, key);
-	kfree(mailbox);
-	return err;
-}
-
-int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
-			u64 *buffer_list, int buffer_size_shift,
-			int list_len, u64 iova, u64 total_size,
-			u32 access, struct mthca_mr *mr)
-{
-	void *mailbox;
-	u64 *mtt_entry;
-	struct mthca_mpt_entry *mpt_entry;
-	u32 key;
-	int err = -ENOMEM;
-	u8 status;
-	int i;
-
-	might_sleep();
 	WARN_ON(buffer_size_shift >= 32);
 
 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
@@ -335,75 +356,33 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
 			goto err_out_mpt_free;
 	}
 
-	for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
-	     i < list_len;
-	     i <<= 1, ++mr->order)
-		; /* nothing */
-
-	mr->first_seg = mthca_alloc_mtt(dev, mr->order,
-				       	&dev->mr_table.mtt_buddy);
-	if (mr->first_seg == -1)
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox)) {
+		err = PTR_ERR(mailbox);
 		goto err_out_table;
-
-	/*
-	 * If list_len is odd, we add one more dummy entry for
-	 * firmware efficiency.
-	 */
-	mailbox = kmalloc(max(sizeof *mpt_entry,
-			      (size_t) 8 * (list_len + (list_len & 1) + 2)) +
-			  MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox)
-		goto err_out_free_mtt;
-
-	mtt_entry = MAILBOX_ALIGN(mailbox);
-
-	mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
-				   mr->first_seg * MTHCA_MTT_SEG_SIZE);
-	mtt_entry[1] = 0;
-	for (i = 0; i < list_len; ++i)
-		mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
-					       MTHCA_MTT_FLAG_PRESENT);
-	if (list_len & 1) {
-		mtt_entry[i + 2] = 0;
-		++list_len;
-	}
-
-	if (0) {
-		mthca_dbg(dev, "Dumping MPT entry\n");
-		for (i = 0; i < list_len + 2; ++i)
-			printk(KERN_ERR "[%2d] %016llx\n",
-			       i, (unsigned long long) be64_to_cpu(mtt_entry[i]));
-	}
-
-	err = mthca_WRITE_MTT(dev, mtt_entry, list_len, &status);
-	if (err) {
-		mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
-		goto err_out_mailbox_free;
-	}
-	if (status) {
-		mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
-			   status);
-		err = -EINVAL;
-		goto err_out_mailbox_free;
 	}
-
-	mpt_entry = MAILBOX_ALIGN(mailbox);
+	mpt_entry = mailbox->buf;
 
 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
 				       MTHCA_MPT_FLAG_MIO         |
 				       MTHCA_MPT_FLAG_REGION      |
 				       access);
+	if (!mr->mtt)
+		mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL);
 
 	mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
 	mpt_entry->key       = cpu_to_be32(key);
 	mpt_entry->pd        = cpu_to_be32(pd);
 	mpt_entry->start     = cpu_to_be64(iova);
 	mpt_entry->length    = cpu_to_be64(total_size);
+
 	memset(&mpt_entry->lkey, 0,
 	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
-	mpt_entry->mtt_seg   = cpu_to_be64(dev->mr_table.mtt_base +
-					   mr->first_seg * MTHCA_MTT_SEG_SIZE);
+
+	if (mr->mtt)
+		mpt_entry->mtt_seg =
+			cpu_to_be64(dev->mr_table.mtt_base +
+				    mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE);
 
 	if (0) {
 		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
@@ -416,45 +395,70 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
 		}
 	}
 
-	err = mthca_SW2HW_MPT(dev, mpt_entry,
+	err = mthca_SW2HW_MPT(dev, mailbox,
 			      key & (dev->limits.num_mpts - 1),
 			      &status);
-	if (err)
+	if (err) {
 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
-	else if (status) {
+		goto err_out_mailbox;
+	} else if (status) {
 		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
 			   status);
 		err = -EINVAL;
+		goto err_out_mailbox;
 	}
 
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 
-err_out_mailbox_free:
-	kfree(mailbox);
-
-err_out_free_mtt:
-	mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy);
+err_out_mailbox:
+	mthca_free_mailbox(dev, mailbox);
 
 err_out_table:
-	if (mthca_is_memfree(dev))
-		mthca_table_put(dev, dev->mr_table.mpt_table, key);
+	mthca_table_put(dev, dev->mr_table.mpt_table, key);
 
 err_out_mpt_free:
 	mthca_free(&dev->mr_table.mpt_alloc, key);
 	return err;
 }
 
-/* Free mr or fmr */
-static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order,
-			      u32 first_seg, struct mthca_buddy *buddy)
+int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
+			   u32 access, struct mthca_mr *mr)
 {
-	if (order >= 0)
-		mthca_free_mtt(dev, first_seg, order, buddy);
+	mr->mtt = NULL;
+	return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr);
+}
 
-	if (mthca_is_memfree(dev))
-		mthca_table_put(dev, dev->mr_table.mpt_table,
-				arbel_key_to_hw_index(lkey));
+int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
+			u64 *buffer_list, int buffer_size_shift,
+			int list_len, u64 iova, u64 total_size,
+			u32 access, struct mthca_mr *mr)
+{
+	int err;
+
+	mr->mtt = mthca_alloc_mtt(dev, list_len);
+	if (IS_ERR(mr->mtt))
+		return PTR_ERR(mr->mtt);
+
+	err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len);
+	if (err) {
+		mthca_free_mtt(dev, mr->mtt);
+		return err;
+	}
+
+	err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova,
+			     total_size, access, mr);
+	if (err)
+		mthca_free_mtt(dev, mr->mtt);
+
+	return err;
+}
+
+/* Free mr or fmr */
+static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
+{
+	mthca_table_put(dev, dev->mr_table.mpt_table,
+			arbel_key_to_hw_index(lkey));
 
 	mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
 }
@@ -476,15 +480,15 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
 		mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
 			   status);
 
-	mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg,
-			  &dev->mr_table.mtt_buddy);
+	mthca_free_region(dev, mr->ibmr.lkey);
+	mthca_free_mtt(dev, mr->mtt);
 }
 
 int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 		    u32 access, struct mthca_fmr *mr)
 {
 	struct mthca_mpt_entry *mpt_entry;
-	void *mailbox;
+	struct mthca_mailbox *mailbox;
 	u64 mtt_seg;
 	u32 key, idx;
 	u8 status;
@@ -522,31 +526,24 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 		mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
 		       	sizeof *(mr->mem.tavor.mpt) * idx;
 
-	for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
-	     i < list_len;
-	     i <<= 1, ++mr->order)
-		; /* nothing */
-
-	mr->first_seg = mthca_alloc_mtt(dev, mr->order,
-				       	dev->mr_table.fmr_mtt_buddy);
-	if (mr->first_seg == -1)
+	mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
+	if (IS_ERR(mr->mtt))
 		goto err_out_table;
 
-	mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE;
+	mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE;
 
 	if (mthca_is_memfree(dev)) {
 		mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
-						      mr->first_seg);
+						      mr->mtt->first_seg);
 		BUG_ON(!mr->mem.arbel.mtts);
 	} else
 		mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
 
-	mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox)
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
 		goto err_out_free_mtt;
 
-	mpt_entry = MAILBOX_ALIGN(mailbox);
+	mpt_entry = mailbox->buf;
 
 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
 				       MTHCA_MPT_FLAG_MIO         |
@@ -571,7 +568,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 		}
 	}
 
-	err = mthca_SW2HW_MPT(dev, mpt_entry,
+	err = mthca_SW2HW_MPT(dev, mailbox,
 			      key & (dev->limits.num_mpts - 1),
 			      &status);
 	if (err) {
@@ -585,19 +582,17 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 		goto err_out_mailbox_free;
 	}
 
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 	return 0;
 
 err_out_mailbox_free:
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 
 err_out_free_mtt:
-	mthca_free_mtt(dev, mr->first_seg, mr->order,
-		       dev->mr_table.fmr_mtt_buddy);
+	mthca_free_mtt(dev, mr->mtt);
 
 err_out_table:
-	if (mthca_is_memfree(dev))
-		mthca_table_put(dev, dev->mr_table.mpt_table, key);
+	mthca_table_put(dev, dev->mr_table.mpt_table, key);
 
 err_out_mpt_free:
 	mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
@@ -609,8 +604,9 @@ int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
 	if (fmr->maps)
 		return -EBUSY;
 
-	mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg,
-			  dev->mr_table.fmr_mtt_buddy);
+	mthca_free_region(dev, fmr->ibmr.lkey);
+	mthca_free_mtt(dev, fmr->mtt);
+
 	return 0;
 }
 
@@ -826,7 +822,8 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
 	if (dev->limits.reserved_mtts) {
 		i = fls(dev->limits.reserved_mtts - 1);
 
-		if (mthca_alloc_mtt(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) {
+		if (mthca_alloc_mtt_range(dev, i,
+					  dev->mr_table.fmr_mtt_buddy) == -1) {
 			mthca_warn(dev, "MTT table of order %d is too small.\n",
 				  dev->mr_table.fmr_mtt_buddy->max_order);
 			err = -ENOMEM;

+ 3 - 1
drivers/infiniband/hw/mthca/mthca_provider.c

@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -52,7 +53,7 @@ static int mthca_query_device(struct ib_device *ibdev,
 	if (!in_mad || !out_mad)
 		goto out;
 
-	memset(props, 0, sizeof props);
+	memset(props, 0, sizeof *props);
 
 	props->fw_ver              = mdev->fw_ver;
 
@@ -558,6 +559,7 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd       *pd,
 				  convert_access(acc), mr);
 
 	if (err) {
+		kfree(page_list);
 		kfree(mr);
 		return ERR_PTR(err);
 	}

+ 7 - 7
drivers/infiniband/hw/mthca/mthca_provider.h

@@ -54,18 +54,18 @@ struct mthca_uar {
 	int           index;
 };
 
+struct mthca_mtt;
+
 struct mthca_mr {
-	struct ib_mr ibmr;
-	int order;
-	u32 first_seg;
+	struct ib_mr      ibmr;
+	struct mthca_mtt *mtt;
 };
 
 struct mthca_fmr {
-	struct ib_fmr ibmr;
+	struct ib_fmr      ibmr;
 	struct ib_fmr_attr attr;
-	int order;
-	u32 first_seg;
-	int maps;
+	struct mthca_mtt  *mtt;
+	int                maps;
 	union {
 		struct {
 			struct mthca_mpt_entry __iomem *mpt;

+ 109 - 30
drivers/infiniband/hw/mthca/mthca_qp.c

@@ -357,6 +357,9 @@ static const struct {
 				[UD]  = (IB_QP_PKEY_INDEX |
 					 IB_QP_PORT       |
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_PKEY_INDEX |
+					 IB_QP_PORT       |
+					 IB_QP_ACCESS_FLAGS),
 				[RC]  = (IB_QP_PKEY_INDEX |
 					 IB_QP_PORT       |
 					 IB_QP_ACCESS_FLAGS),
@@ -378,6 +381,9 @@ static const struct {
 				[UD]  = (IB_QP_PKEY_INDEX |
 					 IB_QP_PORT       |
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_PKEY_INDEX |
+					 IB_QP_PORT       |
+					 IB_QP_ACCESS_FLAGS),
 				[RC]  = (IB_QP_PKEY_INDEX |
 					 IB_QP_PORT       |
 					 IB_QP_ACCESS_FLAGS),
@@ -388,6 +394,11 @@ static const struct {
 		[IB_QPS_RTR]   = {
 			.trans = MTHCA_TRANS_INIT2RTR,
 			.req_param = {
+				[UC]  = (IB_QP_AV                  |
+					 IB_QP_PATH_MTU            |
+					 IB_QP_DEST_QPN            |
+					 IB_QP_RQ_PSN              |
+					 IB_QP_MAX_DEST_RD_ATOMIC),
 				[RC]  = (IB_QP_AV                  |
 					 IB_QP_PATH_MTU            |
 					 IB_QP_DEST_QPN            |
@@ -398,6 +409,9 @@ static const struct {
 			.opt_param = {
 				[UD]  = (IB_QP_PKEY_INDEX |
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_ALT_PATH     |
+					 IB_QP_ACCESS_FLAGS |
+					 IB_QP_PKEY_INDEX),
 				[RC]  = (IB_QP_ALT_PATH     |
 					 IB_QP_ACCESS_FLAGS |
 					 IB_QP_PKEY_INDEX),
@@ -413,6 +427,8 @@ static const struct {
 			.trans = MTHCA_TRANS_RTR2RTS,
 			.req_param = {
 				[UD]  = IB_QP_SQ_PSN,
+				[UC]  = (IB_QP_SQ_PSN            |
+					 IB_QP_MAX_QP_RD_ATOMIC),
 				[RC]  = (IB_QP_TIMEOUT           |
 					 IB_QP_RETRY_CNT         |
 					 IB_QP_RNR_RETRY         |
@@ -423,6 +439,11 @@ static const struct {
 			.opt_param = {
 				[UD]  = (IB_QP_CUR_STATE             |
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_CUR_STATE             |
+					 IB_QP_ALT_PATH              |
+					 IB_QP_ACCESS_FLAGS          |
+					 IB_QP_PKEY_INDEX            |
+					 IB_QP_PATH_MIG_STATE),
 				[RC]  = (IB_QP_CUR_STATE             |
 					 IB_QP_ALT_PATH              |
 					 IB_QP_ACCESS_FLAGS          |
@@ -442,6 +463,9 @@ static const struct {
 			.opt_param = {
 				[UD]  = (IB_QP_CUR_STATE             |
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_ACCESS_FLAGS          |
+					 IB_QP_ALT_PATH              |
+					 IB_QP_PATH_MIG_STATE),
 				[RC]  = (IB_QP_ACCESS_FLAGS          |
 					 IB_QP_ALT_PATH              |
 					 IB_QP_PATH_MIG_STATE        |
@@ -462,6 +486,10 @@ static const struct {
 			.opt_param = {
 				[UD]  = (IB_QP_CUR_STATE             |
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_CUR_STATE             |
+					 IB_QP_ALT_PATH              |
+					 IB_QP_ACCESS_FLAGS          |
+					 IB_QP_PATH_MIG_STATE),
 				[RC]  = (IB_QP_CUR_STATE             |
 					 IB_QP_ALT_PATH              |
 					 IB_QP_ACCESS_FLAGS          |
@@ -476,6 +504,14 @@ static const struct {
 			.opt_param = {
 				[UD]  = (IB_QP_PKEY_INDEX            |
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_AV                    |
+					 IB_QP_MAX_QP_RD_ATOMIC      |
+					 IB_QP_MAX_DEST_RD_ATOMIC    |
+					 IB_QP_CUR_STATE             |
+					 IB_QP_ALT_PATH              |
+					 IB_QP_ACCESS_FLAGS          |
+					 IB_QP_PKEY_INDEX            |
+					 IB_QP_PATH_MIG_STATE),
 				[RC]  = (IB_QP_AV                    |
 					 IB_QP_TIMEOUT               |
 					 IB_QP_RETRY_CNT             |
@@ -501,6 +537,7 @@ static const struct {
 			.opt_param = {
 				[UD]  = (IB_QP_CUR_STATE             |
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_CUR_STATE),
 				[RC]  = (IB_QP_CUR_STATE             |
 					 IB_QP_MIN_RNR_TIMER),
 				[MLX] = (IB_QP_CUR_STATE             |
@@ -552,7 +589,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
 	enum ib_qp_state cur_state, new_state;
-	void *mailbox = NULL;
+	struct mthca_mailbox *mailbox;
 	struct mthca_qp_param *qp_param;
 	struct mthca_qp_context *qp_context;
 	u32 req_param, opt_param;
@@ -609,10 +646,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		return -EINVAL;
 	}
 
-	mailbox = kmalloc(sizeof (*qp_param) + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
-	if (!mailbox)
-		return -ENOMEM;
-	qp_param = MAILBOX_ALIGN(mailbox);
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	qp_param = mailbox->buf;
 	qp_context = &qp_param->context;
 	memset(qp_param, 0, sizeof *qp_param);
 
@@ -683,7 +720,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	if (attr_mask & IB_QP_AV) {
 		qp_context->pri_path.g_mylmc     = attr->ah_attr.src_path_bits & 0x7f;
 		qp_context->pri_path.rlid        = cpu_to_be16(attr->ah_attr.dlid);
-		qp_context->pri_path.static_rate = (!!attr->ah_attr.static_rate) << 3;
+		qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate;
 		if (attr->ah_attr.ah_flags & IB_AH_GRH) {
 			qp_context->pri_path.g_mylmc |= 1 << 7;
 			qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
@@ -724,9 +761,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);
 	}
 
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
-		qp_context->params1 |= cpu_to_be32(min(attr->max_dest_rd_atomic ?
-						       ffs(attr->max_dest_rd_atomic) - 1 : 0,
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+		qp_context->params1 |= cpu_to_be32(min(attr->max_rd_atomic ?
+						       ffs(attr->max_rd_atomic) - 1 : 0,
 						       7) << 21);
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
 	}
@@ -764,10 +801,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		qp->atomic_rd_en = attr->qp_access_flags;
 	}
 
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
 		u8 rra_max;
 
-		if (qp->resp_depth && !attr->max_rd_atomic) {
+		if (qp->resp_depth && !attr->max_dest_rd_atomic) {
 			/*
 			 * Lowering our responder resources to zero.
 			 * Turn off RDMA/atomics as responder.
@@ -778,7 +815,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 								MTHCA_QP_OPTPAR_RAE);
 		}
 
-		if (!qp->resp_depth && attr->max_rd_atomic) {
+		if (!qp->resp_depth && attr->max_dest_rd_atomic) {
 			/*
 			 * Increasing our responder resources from
 			 * zero.  Turn on RDMA/atomics as appropriate.
@@ -799,7 +836,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		}
 
 		for (rra_max = 0;
-		     1 << rra_max < attr->max_rd_atomic &&
+		     1 << rra_max < attr->max_dest_rd_atomic &&
 			     rra_max < dev->qp_table.rdb_shift;
 		     ++rra_max)
 			; /* nothing */
@@ -807,7 +844,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		qp_context->params2      |= cpu_to_be32(rra_max << 21);
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
 
-		qp->resp_depth = attr->max_rd_atomic;
+		qp->resp_depth = attr->max_dest_rd_atomic;
 	}
 
 	qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
@@ -835,7 +872,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	}
 
 	err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
-			      qp->qpn, 0, qp_param, 0, &status);
+			      qp->qpn, 0, mailbox, 0, &status);
 	if (status) {
 		mthca_warn(dev, "modify QP %d returned status %02x.\n",
 			   state_table[cur_state][new_state].trans, status);
@@ -845,7 +882,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	if (!err)
 		qp->state = new_state;
 
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 
 	if (is_sqp(dev, qp))
 		store_attrs(to_msqp(qp), attr, attr_mask);
@@ -934,7 +971,8 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
 			mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n",
 				  size, shift);
 
-		qp->queue.direct.buf = pci_alloc_consistent(dev->pdev, size, &t);
+		qp->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev, size,
+							  &t, GFP_KERNEL);
 		if (!qp->queue.direct.buf)
 			goto err_out;
 
@@ -973,7 +1011,8 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
 
 		for (i = 0; i < npages; ++i) {
 			qp->queue.page_list[i].buf =
-				pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t);
+				dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+						   &t, GFP_KERNEL);
 			if (!qp->queue.page_list[i].buf)
 				goto err_out_free;
 
@@ -996,16 +1035,15 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
 
  err_out_free:
 	if (qp->is_direct) {
-		pci_free_consistent(dev->pdev, size,
-				    qp->queue.direct.buf,
-				    pci_unmap_addr(&qp->queue.direct, mapping));
+		dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf,
+				  pci_unmap_addr(&qp->queue.direct, mapping));
 	} else
 		for (i = 0; i < npages; ++i) {
 			if (qp->queue.page_list[i].buf)
-				pci_free_consistent(dev->pdev, PAGE_SIZE,
-						    qp->queue.page_list[i].buf,
-						    pci_unmap_addr(&qp->queue.page_list[i],
-								   mapping));
+				dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+						  qp->queue.page_list[i].buf,
+						  pci_unmap_addr(&qp->queue.page_list[i],
+								 mapping));
 
 		}
 
@@ -1073,11 +1111,12 @@ static void mthca_free_memfree(struct mthca_dev *dev,
 	if (mthca_is_memfree(dev)) {
 		mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
 		mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
-		mthca_table_put(dev, dev->qp_table.rdb_table,
-				qp->qpn << dev->qp_table.rdb_shift);
-		mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
-		mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
 	}
+
+	mthca_table_put(dev, dev->qp_table.rdb_table,
+			qp->qpn << dev->qp_table.rdb_shift);
+	mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
+	mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
 }
 
 static void mthca_wq_init(struct mthca_wq* wq)
@@ -1529,6 +1568,26 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 			break;
 
+		case UC:
+			switch (wr->opcode) {
+			case IB_WR_RDMA_WRITE:
+			case IB_WR_RDMA_WRITE_WITH_IMM:
+				((struct mthca_raddr_seg *) wqe)->raddr =
+					cpu_to_be64(wr->wr.rdma.remote_addr);
+				((struct mthca_raddr_seg *) wqe)->rkey =
+					cpu_to_be32(wr->wr.rdma.rkey);
+				((struct mthca_raddr_seg *) wqe)->reserved = 0;
+				wqe += sizeof (struct mthca_raddr_seg);
+				size += sizeof (struct mthca_raddr_seg) / 16;
+				break;
+
+			default:
+				/* No extra segments required for sends */
+				break;
+			}
+
+			break;
+
 		case UD:
 			((struct mthca_tavor_ud_seg *) wqe)->lkey =
 				cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
@@ -1814,9 +1873,29 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 					sizeof (struct mthca_atomic_seg);
 				break;
 
+			case IB_WR_RDMA_READ:
+			case IB_WR_RDMA_WRITE:
+			case IB_WR_RDMA_WRITE_WITH_IMM:
+				((struct mthca_raddr_seg *) wqe)->raddr =
+					cpu_to_be64(wr->wr.rdma.remote_addr);
+				((struct mthca_raddr_seg *) wqe)->rkey =
+					cpu_to_be32(wr->wr.rdma.rkey);
+				((struct mthca_raddr_seg *) wqe)->reserved = 0;
+				wqe += sizeof (struct mthca_raddr_seg);
+				size += sizeof (struct mthca_raddr_seg) / 16;
+				break;
+
+			default:
+				/* No extra segments required for sends */
+				break;
+			}
+
+			break;
+
+		case UC:
+			switch (wr->opcode) {
 			case IB_WR_RDMA_WRITE:
 			case IB_WR_RDMA_WRITE_WITH_IMM:
-			case IB_WR_RDMA_READ:
 				((struct mthca_raddr_seg *) wqe)->raddr =
 					cpu_to_be64(wr->wr.rdma.remote_addr);
 				((struct mthca_raddr_seg *) wqe)->rkey =

+ 327 - 80
drivers/input/evdev.c

@@ -21,6 +21,7 @@
 #include <linux/smp_lock.h>
 #include <linux/device.h>
 #include <linux/devfs_fs_kernel.h>
+#include <linux/compat.h>
 
 struct evdev {
 	int exist;
@@ -145,6 +146,41 @@ static int evdev_open(struct inode * inode, struct file * file)
 	return 0;
 }
 
+#ifdef CONFIG_COMPAT
+struct input_event_compat {
+	struct compat_timeval time;
+	__u16 type;
+	__u16 code;
+	__s32 value;
+};
+
+#ifdef CONFIG_X86_64
+#  define COMPAT_TEST test_thread_flag(TIF_IA32)
+#elif defined(CONFIG_IA64)
+#  define COMPAT_TEST IS_IA32_PROCESS(ia64_task_regs(current))
+#elif defined(CONFIG_ARCH_S390)
+#  define COMPAT_TEST test_thread_flag(TIF_31BIT)
+#else
+#  define COMPAT_TEST test_thread_flag(TIF_32BIT)
+#endif
+
+static ssize_t evdev_write_compat(struct file * file, const char __user * buffer, size_t count, loff_t *ppos)
+{
+	struct evdev_list *list = file->private_data;
+	struct input_event_compat event;
+	int retval = 0;
+
+	while (retval < count) {
+		if (copy_from_user(&event, buffer + retval, sizeof(struct input_event_compat)))
+			return -EFAULT;
+		input_event(list->evdev->handle.dev, event.type, event.code, event.value);
+		retval += sizeof(struct input_event_compat);
+	}
+
+	return retval;
+}
+#endif
+
 static ssize_t evdev_write(struct file * file, const char __user * buffer, size_t count, loff_t *ppos)
 {
 	struct evdev_list *list = file->private_data;
@@ -153,6 +189,11 @@ static ssize_t evdev_write(struct file * file, const char __user * buffer, size_
 
 	if (!list->evdev->exist) return -ENODEV;
 
+#ifdef CONFIG_COMPAT
+	if (COMPAT_TEST)
+		return evdev_write_compat(file, buffer, count, ppos);
+#endif
+
 	while (retval < count) {
 
 		if (copy_from_user(&event, buffer + retval, sizeof(struct input_event)))
@@ -164,11 +205,56 @@ static ssize_t evdev_write(struct file * file, const char __user * buffer, size_
 	return retval;
 }
 
+#ifdef CONFIG_COMPAT
+static ssize_t evdev_read_compat(struct file * file, char __user * buffer, size_t count, loff_t *ppos)
+{
+	struct evdev_list *list = file->private_data;
+	int retval;
+
+	if (count < sizeof(struct input_event_compat))
+		return -EINVAL;
+
+	if (list->head == list->tail && list->evdev->exist && (file->f_flags & O_NONBLOCK))
+		return -EAGAIN;
+
+	retval = wait_event_interruptible(list->evdev->wait,
+		list->head != list->tail || (!list->evdev->exist));
+
+	if (retval)
+		return retval;
+
+	if (!list->evdev->exist)
+		return -ENODEV;
+
+	while (list->head != list->tail && retval + sizeof(struct input_event_compat) <= count) {
+		struct input_event *event = (struct input_event *) list->buffer + list->tail;
+		struct input_event_compat event_compat;
+		event_compat.time.tv_sec = event->time.tv_sec;
+		event_compat.time.tv_usec = event->time.tv_usec;
+		event_compat.type = event->type;
+		event_compat.code = event->code;
+		event_compat.value = event->value;
+
+		if (copy_to_user(buffer + retval, &event_compat,
+			sizeof(struct input_event_compat))) return -EFAULT;
+		list->tail = (list->tail + 1) & (EVDEV_BUFFER_SIZE - 1);
+		retval += sizeof(struct input_event_compat);
+	}
+
+	return retval;
+}
+#endif
+
 static ssize_t evdev_read(struct file * file, char __user * buffer, size_t count, loff_t *ppos)
 {
 	struct evdev_list *list = file->private_data;
 	int retval;
 
+#ifdef CONFIG_COMPAT
+	if (COMPAT_TEST)
+		return evdev_read_compat(file, buffer, count, ppos);
+#endif
+
 	if (count < sizeof(struct input_event))
 		return -EINVAL;
 
@@ -186,7 +272,7 @@ static ssize_t evdev_read(struct file * file, char __user * buffer, size_t count
 
 	while (list->head != list->tail && retval + sizeof(struct input_event) <= count) {
 		if (copy_to_user(buffer + retval, list->buffer + list->tail,
-			 sizeof(struct input_event))) return -EFAULT;
+			sizeof(struct input_event))) return -EFAULT;
 		list->tail = (list->tail + 1) & (EVDEV_BUFFER_SIZE - 1);
 		retval += sizeof(struct input_event);
 	}
@@ -203,7 +289,7 @@ static unsigned int evdev_poll(struct file *file, poll_table *wait)
 		(list->evdev->exist ? 0 : (POLLHUP | POLLERR));
 }
 
-static int evdev_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+static long evdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct evdev_list *list = file->private_data;
 	struct evdev *evdev = list->evdev;
@@ -285,109 +371,267 @@ static int evdev_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 
 		default:
 
-			if (_IOC_TYPE(cmd) != 'E' || _IOC_DIR(cmd) != _IOC_READ)
+			if (_IOC_TYPE(cmd) != 'E')
 				return -EINVAL;
 
-			if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0,0))) {
-
-				long *bits;
-				int len;
-
-				switch (_IOC_NR(cmd) & EV_MAX) {
-					case      0: bits = dev->evbit;  len = EV_MAX;  break;
-					case EV_KEY: bits = dev->keybit; len = KEY_MAX; break;
-					case EV_REL: bits = dev->relbit; len = REL_MAX; break;
-					case EV_ABS: bits = dev->absbit; len = ABS_MAX; break;
-					case EV_MSC: bits = dev->mscbit; len = MSC_MAX; break;
-					case EV_LED: bits = dev->ledbit; len = LED_MAX; break;
-					case EV_SND: bits = dev->sndbit; len = SND_MAX; break;
-					case EV_FF:  bits = dev->ffbit;  len = FF_MAX;  break;
-					default: return -EINVAL;
+			if (_IOC_DIR(cmd) == _IOC_READ) {
+
+				if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0,0))) {
+
+					long *bits;
+					int len;
+
+					switch (_IOC_NR(cmd) & EV_MAX) {
+						case      0: bits = dev->evbit;  len = EV_MAX;  break;
+						case EV_KEY: bits = dev->keybit; len = KEY_MAX; break;
+						case EV_REL: bits = dev->relbit; len = REL_MAX; break;
+						case EV_ABS: bits = dev->absbit; len = ABS_MAX; break;
+						case EV_MSC: bits = dev->mscbit; len = MSC_MAX; break;
+						case EV_LED: bits = dev->ledbit; len = LED_MAX; break;
+						case EV_SND: bits = dev->sndbit; len = SND_MAX; break;
+						case EV_FF:  bits = dev->ffbit;  len = FF_MAX;  break;
+						default: return -EINVAL;
+					}
+					len = NBITS(len) * sizeof(long);
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, bits, len) ? -EFAULT : len;
 				}
-				len = NBITS(len) * sizeof(long);
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, bits, len) ? -EFAULT : len;
-			}
 
-			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0))) {
-				int len;
-				len = NBITS(KEY_MAX) * sizeof(long);
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, dev->key, len) ? -EFAULT : len;
-			}
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0))) {
+					int len;
+					len = NBITS(KEY_MAX) * sizeof(long);
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->key, len) ? -EFAULT : len;
+				}
 
-			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0))) {
-				int len;
-				len = NBITS(LED_MAX) * sizeof(long);
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, dev->led, len) ? -EFAULT : len;
-			}
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0))) {
+					int len;
+					len = NBITS(LED_MAX) * sizeof(long);
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->led, len) ? -EFAULT : len;
+				}
 
-			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0))) {
-				int len;
-				len = NBITS(SND_MAX) * sizeof(long);
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, dev->snd, len) ? -EFAULT : len;
-			}
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0))) {
+					int len;
+					len = NBITS(SND_MAX) * sizeof(long);
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->snd, len) ? -EFAULT : len;
+				}
 
-			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0))) {
-				int len;
-				if (!dev->name) return -ENOENT;
-				len = strlen(dev->name) + 1;
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, dev->name, len) ? -EFAULT : len;
-			}
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0))) {
+					int len;
+					if (!dev->name) return -ENOENT;
+					len = strlen(dev->name) + 1;
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->name, len) ? -EFAULT : len;
+				}
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0))) {
+					int len;
+					if (!dev->phys) return -ENOENT;
+					len = strlen(dev->phys) + 1;
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->phys, len) ? -EFAULT : len;
+				}
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0))) {
+					int len;
+					if (!dev->uniq) return -ENOENT;
+					len = strlen(dev->uniq) + 1;
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->uniq, len) ? -EFAULT : len;
+				}
+
+				if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) {
+
+					int t = _IOC_NR(cmd) & ABS_MAX;
+
+					abs.value = dev->abs[t];
+					abs.minimum = dev->absmin[t];
+					abs.maximum = dev->absmax[t];
+					abs.fuzz = dev->absfuzz[t];
+					abs.flat = dev->absflat[t];
+
+					if (copy_to_user(p, &abs, sizeof(struct input_absinfo)))
+						return -EFAULT;
+
+					return 0;
+				}
 
-			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0))) {
-				int len;
-				if (!dev->phys) return -ENOENT;
-				len = strlen(dev->phys) + 1;
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, dev->phys, len) ? -EFAULT : len;
 			}
 
-			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0))) {
-				int len;
-				if (!dev->uniq) return -ENOENT;
-				len = strlen(dev->uniq) + 1;
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, dev->uniq, len) ? -EFAULT : len;
+			if (_IOC_DIR(cmd) == _IOC_WRITE) {
+
+				if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) {
+
+					int t = _IOC_NR(cmd) & ABS_MAX;
+
+					if (copy_from_user(&abs, p, sizeof(struct input_absinfo)))
+						return -EFAULT;
+
+					dev->abs[t] = abs.value;
+					dev->absmin[t] = abs.minimum;
+					dev->absmax[t] = abs.maximum;
+					dev->absfuzz[t] = abs.fuzz;
+					dev->absflat[t] = abs.flat;
+
+					return 0;
+				}
 			}
+	}
+	return -EINVAL;
+}
 
-			if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) {
+#ifdef CONFIG_COMPAT
+
+#define BITS_PER_LONG_COMPAT (sizeof(compat_long_t) * 8)
+#define NBITS_COMPAT(x) ((((x)-1)/BITS_PER_LONG_COMPAT)+1)
+#define OFF_COMPAT(x)  ((x)%BITS_PER_LONG_COMPAT)
+#define BIT_COMPAT(x)  (1UL<<OFF_COMPAT(x))
+#define LONG_COMPAT(x) ((x)/BITS_PER_LONG_COMPAT)
+#define test_bit_compat(bit, array) ((array[LONG_COMPAT(bit)] >> OFF_COMPAT(bit)) & 1)
+
+#ifdef __BIG_ENDIAN
+#define bit_to_user(bit, max) \
+do { \
+	int i; \
+	int len = NBITS_COMPAT((max)) * sizeof(compat_long_t); \
+	if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd); \
+	for (i = 0; i < len / sizeof(compat_long_t); i++) \
+		if (copy_to_user((compat_long_t*) p + i, \
+				 (compat_long_t*) (bit) + i + 1 - ((i % 2) << 1), \
+				 sizeof(compat_long_t))) \
+			return -EFAULT; \
+	return len; \
+} while (0)
+#else
+#define bit_to_user(bit, max) \
+do { \
+	int len = NBITS_COMPAT((max)) * sizeof(compat_long_t); \
+	if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd); \
+	return copy_to_user(p, (bit), len) ? -EFAULT : len; \
+} while (0)
+#endif
+
+static long evdev_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct evdev_list *list = file->private_data;
+	struct evdev *evdev = list->evdev;
+	struct input_dev *dev = evdev->handle.dev;
+	struct input_absinfo abs;
+	void __user *p = compat_ptr(arg);
 
-				int t = _IOC_NR(cmd) & ABS_MAX;
+	if (!evdev->exist) return -ENODEV;
 
-				abs.value = dev->abs[t];
-				abs.minimum = dev->absmin[t];
-				abs.maximum = dev->absmax[t];
-				abs.fuzz = dev->absfuzz[t];
-				abs.flat = dev->absflat[t];
+	switch (cmd) {
 
-				if (copy_to_user(p, &abs, sizeof(struct input_absinfo)))
-					return -EFAULT;
+		case EVIOCGVERSION:
+		case EVIOCGID:
+		case EVIOCGKEYCODE:
+		case EVIOCSKEYCODE:
+		case EVIOCSFF:
+		case EVIOCRMFF:
+		case EVIOCGEFFECTS:
+		case EVIOCGRAB:
+			return evdev_ioctl(file, cmd, (unsigned long) p);
 
-				return 0;
+		default:
+
+			if (_IOC_TYPE(cmd) != 'E')
+				return -EINVAL;
+
+			if (_IOC_DIR(cmd) == _IOC_READ) {
+
+				if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0,0))) {
+					long *bits;
+					int max;
+
+					switch (_IOC_NR(cmd) & EV_MAX) {
+						case      0: bits = dev->evbit;  max = EV_MAX;  break;
+						case EV_KEY: bits = dev->keybit; max = KEY_MAX; break;
+						case EV_REL: bits = dev->relbit; max = REL_MAX; break;
+						case EV_ABS: bits = dev->absbit; max = ABS_MAX; break;
+						case EV_MSC: bits = dev->mscbit; max = MSC_MAX; break;
+						case EV_LED: bits = dev->ledbit; max = LED_MAX; break;
+						case EV_SND: bits = dev->sndbit; max = SND_MAX; break;
+						case EV_FF:  bits = dev->ffbit;  max = FF_MAX;  break;
+						default: return -EINVAL;
+					}
+					bit_to_user(bits, max);
+				}
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0)))
+					bit_to_user(dev->key, KEY_MAX);
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0)))
+					bit_to_user(dev->led, LED_MAX);
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0)))
+					bit_to_user(dev->snd, SND_MAX);
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0))) {
+					int len;
+					if (!dev->name) return -ENOENT;
+					len = strlen(dev->name) + 1;
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->name, len) ? -EFAULT : len;
+				}
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0))) {
+					int len;
+					if (!dev->phys) return -ENOENT;
+					len = strlen(dev->phys) + 1;
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->phys, len) ? -EFAULT : len;
+				}
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0))) {
+					int len;
+					if (!dev->uniq) return -ENOENT;
+					len = strlen(dev->uniq) + 1;
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->uniq, len) ? -EFAULT : len;
+				}
+
+				if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) {
+
+					int t = _IOC_NR(cmd) & ABS_MAX;
+
+					abs.value = dev->abs[t];
+					abs.minimum = dev->absmin[t];
+					abs.maximum = dev->absmax[t];
+					abs.fuzz = dev->absfuzz[t];
+					abs.flat = dev->absflat[t];
+
+					if (copy_to_user(p, &abs, sizeof(struct input_absinfo)))
+						return -EFAULT;
+
+					return 0;
+				}
 			}
 
-			if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) {
+			if (_IOC_DIR(cmd) == _IOC_WRITE) {
 
-				int t = _IOC_NR(cmd) & ABS_MAX;
+				if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) {
 
-				if (copy_from_user(&abs, p, sizeof(struct input_absinfo)))
-					return -EFAULT;
+					int t = _IOC_NR(cmd) & ABS_MAX;
 
-				dev->abs[t] = abs.value;
-				dev->absmin[t] = abs.minimum;
-				dev->absmax[t] = abs.maximum;
-				dev->absfuzz[t] = abs.fuzz;
-				dev->absflat[t] = abs.flat;
+					if (copy_from_user(&abs, p, sizeof(struct input_absinfo)))
+						return -EFAULT;
 
-				return 0;
+					dev->abs[t] = abs.value;
+					dev->absmin[t] = abs.minimum;
+					dev->absmax[t] = abs.maximum;
+					dev->absfuzz[t] = abs.fuzz;
+					dev->absflat[t] = abs.flat;
+
+					return 0;
+				}
 			}
 	}
 	return -EINVAL;
 }
+#endif
 
 static struct file_operations evdev_fops = {
 	.owner =	THIS_MODULE,
@@ -396,7 +640,10 @@ static struct file_operations evdev_fops = {
 	.poll =		evdev_poll,
 	.open =		evdev_open,
 	.release =	evdev_release,
-	.ioctl =	evdev_ioctl,
+	.unlocked_ioctl = evdev_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl =	evdev_ioctl_compat,
+#endif
 	.fasync =	evdev_fasync,
 	.flush =	evdev_flush
 };

+ 0 - 14
drivers/input/gameport/Kconfig

@@ -49,22 +49,8 @@ config GAMEPORT_EMU10K1
 	  To compile this driver as a module, choose M here: the
 	  module will be called emu10k1-gp.
 
-config GAMEPORT_VORTEX
-	tristate "Aureal Vortex, Vortex 2 gameport support"
-	depends on PCI
-	help
-	  Say Y here if you have an Aureal Vortex 1 or 2  card and want
-	  to use its gameport.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called vortex.
-
 config GAMEPORT_FM801
 	tristate "ForteMedia FM801 gameport support"
 	depends on PCI
 
-config GAMEPORT_CS461X
-	tristate "Crystal SoundFusion gameport support"
-	depends on PCI
-
 endif

+ 0 - 2
drivers/input/gameport/Makefile

@@ -5,9 +5,7 @@
 # Each configuration option enables a list of files.
 
 obj-$(CONFIG_GAMEPORT)		+= gameport.o
-obj-$(CONFIG_GAMEPORT_CS461X)	+= cs461x.o
 obj-$(CONFIG_GAMEPORT_EMU10K1)	+= emu10k1-gp.o
 obj-$(CONFIG_GAMEPORT_FM801)	+= fm801-gp.o
 obj-$(CONFIG_GAMEPORT_L4)	+= lightning.o
 obj-$(CONFIG_GAMEPORT_NS558)	+= ns558.o
-obj-$(CONFIG_GAMEPORT_VORTEX)	+= vortex.o

+ 0 - 322
drivers/input/gameport/cs461x.c

@@ -1,322 +0,0 @@
-/*
-	The all defines and part of code (such as cs461x_*) are
-	contributed from ALSA 0.5.8 sources.
-	See http://www.alsa-project.org/ for sources
-
-	Tested on Linux 686 2.4.0-test9, ALSA 0.5.8a and CS4610
-*/
-
-#include <asm/io.h>
-
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/gameport.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-
-MODULE_AUTHOR("Victor Krapivin");
-MODULE_LICENSE("GPL");
-
-/*
-	These options are experimental
-
-#define CS461X_FULL_MAP
-*/
-
-
-#ifndef PCI_VENDOR_ID_CIRRUS
-#define PCI_VENDOR_ID_CIRRUS            0x1013
-#endif
-#ifndef PCI_DEVICE_ID_CIRRUS_4610
-#define PCI_DEVICE_ID_CIRRUS_4610       0x6001
-#endif
-#ifndef PCI_DEVICE_ID_CIRRUS_4612
-#define PCI_DEVICE_ID_CIRRUS_4612       0x6003
-#endif
-#ifndef PCI_DEVICE_ID_CIRRUS_4615
-#define PCI_DEVICE_ID_CIRRUS_4615       0x6004
-#endif
-
-/* Registers */
-
-#define BA0_JSPT                                0x00000480
-#define BA0_JSCTL                               0x00000484
-#define BA0_JSC1                                0x00000488
-#define BA0_JSC2                                0x0000048C
-#define BA0_JSIO                                0x000004A0
-
-/* Bits for JSPT */
-
-#define JSPT_CAX                                0x00000001
-#define JSPT_CAY                                0x00000002
-#define JSPT_CBX                                0x00000004
-#define JSPT_CBY                                0x00000008
-#define JSPT_BA1                                0x00000010
-#define JSPT_BA2                                0x00000020
-#define JSPT_BB1                                0x00000040
-#define JSPT_BB2                                0x00000080
-
-/* Bits for JSCTL */
-
-#define JSCTL_SP_MASK                           0x00000003
-#define JSCTL_SP_SLOW                           0x00000000
-#define JSCTL_SP_MEDIUM_SLOW                    0x00000001
-#define JSCTL_SP_MEDIUM_FAST                    0x00000002
-#define JSCTL_SP_FAST                           0x00000003
-#define JSCTL_ARE                               0x00000004
-
-/* Data register pairs masks */
-
-#define JSC1_Y1V_MASK                           0x0000FFFF
-#define JSC1_X1V_MASK                           0xFFFF0000
-#define JSC1_Y1V_SHIFT                          0
-#define JSC1_X1V_SHIFT                          16
-#define JSC2_Y2V_MASK                           0x0000FFFF
-#define JSC2_X2V_MASK                           0xFFFF0000
-#define JSC2_Y2V_SHIFT                          0
-#define JSC2_X2V_SHIFT                          16
-
-/* JS GPIO */
-
-#define JSIO_DAX                                0x00000001
-#define JSIO_DAY                                0x00000002
-#define JSIO_DBX                                0x00000004
-#define JSIO_DBY                                0x00000008
-#define JSIO_AXOE                               0x00000010
-#define JSIO_AYOE                               0x00000020
-#define JSIO_BXOE                               0x00000040
-#define JSIO_BYOE                               0x00000080
-
-/*
-   The card initialization code is obfuscated; the module cs461x
-   need to be loaded after ALSA modules initialized and something
-   played on the CS 4610 chip (see sources for details of CS4610
-   initialization code from ALSA)
-*/
-
-/* Card specific definitions */
-
-#define CS461X_BA0_SIZE         0x2000
-#define CS461X_BA1_DATA0_SIZE   0x3000
-#define CS461X_BA1_DATA1_SIZE   0x3800
-#define CS461X_BA1_PRG_SIZE     0x7000
-#define CS461X_BA1_REG_SIZE     0x0100
-
-#define BA1_SP_DMEM0                            0x00000000
-#define BA1_SP_DMEM1                            0x00010000
-#define BA1_SP_PMEM                             0x00020000
-#define BA1_SP_REG                              0x00030000
-
-#define BA1_DWORD_SIZE          (13 * 1024 + 512)
-#define BA1_MEMORY_COUNT        3
-
-/*
-   Only one CS461x card is still suppoted; the code requires
-   redesign to avoid this limitatuion.
-*/
-
-static unsigned long ba0_addr;
-static unsigned int __iomem *ba0;
-
-#ifdef CS461X_FULL_MAP
-static unsigned long ba1_addr;
-static union ba1_t {
-        struct {
-                unsigned int __iomem *data0;
-                unsigned int __iomem *data1;
-                unsigned int __iomem *pmem;
-                unsigned int __iomem *reg;
-        } name;
-        unsigned int __iomem *idx[4];
-} ba1;
-
-static void cs461x_poke(unsigned long reg, unsigned int val)
-{
-        writel(val, &ba1.idx[(reg >> 16) & 3][(reg >> 2) & 0x3fff]);
-}
-
-static unsigned int cs461x_peek(unsigned long reg)
-{
-        return readl(&ba1.idx[(reg >> 16) & 3][(reg >> 2) & 0x3fff]);
-}
-
-#endif
-
-static void cs461x_pokeBA0(unsigned long reg, unsigned int val)
-{
-        writel(val, &ba0[reg >> 2]);
-}
-
-static unsigned int cs461x_peekBA0(unsigned long reg)
-{
-        return readl(&ba0[reg >> 2]);
-}
-
-static int cs461x_free(struct pci_dev *pdev)
-{
-	struct gameport *port = pci_get_drvdata(pdev);
-
-	if (port)
-	    gameport_unregister_port(port);
-
-	if (ba0) iounmap(ba0);
-#ifdef CS461X_FULL_MAP
-	if (ba1.name.data0) iounmap(ba1.name.data0);
-	if (ba1.name.data1) iounmap(ba1.name.data1);
-	if (ba1.name.pmem)  iounmap(ba1.name.pmem);
-	if (ba1.name.reg)   iounmap(ba1.name.reg);
-#endif
-	return 0;
-}
-
-static void cs461x_gameport_trigger(struct gameport *gameport)
-{
-	cs461x_pokeBA0(BA0_JSPT, 0xFF);  //outb(gameport->io, 0xFF);
-}
-
-static unsigned char cs461x_gameport_read(struct gameport *gameport)
-{
-	return cs461x_peekBA0(BA0_JSPT); //inb(gameport->io);
-}
-
-static int cs461x_gameport_cooked_read(struct gameport *gameport, int *axes, int *buttons)
-{
-	unsigned js1, js2, jst;
-
-	js1 = cs461x_peekBA0(BA0_JSC1);
-	js2 = cs461x_peekBA0(BA0_JSC2);
-	jst = cs461x_peekBA0(BA0_JSPT);
-
-	*buttons = (~jst >> 4) & 0x0F;
-
-	axes[0] = ((js1 & JSC1_Y1V_MASK) >> JSC1_Y1V_SHIFT) & 0xFFFF;
-	axes[1] = ((js1 & JSC1_X1V_MASK) >> JSC1_X1V_SHIFT) & 0xFFFF;
-	axes[2] = ((js2 & JSC2_Y2V_MASK) >> JSC2_Y2V_SHIFT) & 0xFFFF;
-	axes[3] = ((js2 & JSC2_X2V_MASK) >> JSC2_X2V_SHIFT) & 0xFFFF;
-
-	for(jst=0;jst<4;++jst)
-		if(axes[jst]==0xFFFF) axes[jst] = -1;
-	return 0;
-}
-
-static int cs461x_gameport_open(struct gameport *gameport, int mode)
-{
-	switch (mode) {
-		case GAMEPORT_MODE_COOKED:
-		case GAMEPORT_MODE_RAW:
-			return 0;
-		default:
-			return -1;
-	}
-	return 0;
-}
-
-static struct pci_device_id cs461x_pci_tbl[] = {
-	{ PCI_VENDOR_ID_CIRRUS, 0x6001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Cirrus CS4610 */
-	{ PCI_VENDOR_ID_CIRRUS, 0x6003, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Cirrus CS4612 */
-	{ PCI_VENDOR_ID_CIRRUS, 0x6005, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Cirrus CS4615 */
-	{ 0, }
-};
-MODULE_DEVICE_TABLE(pci, cs461x_pci_tbl);
-
-static int __devinit cs461x_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	int rc;
-	struct gameport* port;
-
-	rc = pci_enable_device(pdev);
-	if (rc) {
-		printk(KERN_ERR "cs461x: Cannot enable PCI gameport (bus %d, devfn %d) error=%d\n",
-			pdev->bus->number, pdev->devfn, rc);
-		return rc;
-	}
-
-	ba0_addr = pci_resource_start(pdev, 0);
-#ifdef CS461X_FULL_MAP
-	ba1_addr = pci_resource_start(pdev, 1);
-#endif
-	if (ba0_addr == 0 || ba0_addr == ~0
-#ifdef CS461X_FULL_MAP
-            || ba1_addr == 0 || ba1_addr == ~0
-#endif
-	    ) {
-                printk(KERN_ERR "cs461x: wrong address - ba0 = 0x%lx\n", ba0_addr);
-#ifdef CS461X_FULL_MAP
-                printk(KERN_ERR "cs461x: wrong address - ba1 = 0x%lx\n", ba1_addr);
-#endif
-		cs461x_free(pdev);
-                return -ENOMEM;
-        }
-
-	ba0 = ioremap(ba0_addr, CS461X_BA0_SIZE);
-#ifdef CS461X_FULL_MAP
-	ba1.name.data0 = ioremap(ba1_addr + BA1_SP_DMEM0, CS461X_BA1_DATA0_SIZE);
-	ba1.name.data1 = ioremap(ba1_addr + BA1_SP_DMEM1, CS461X_BA1_DATA1_SIZE);
-	ba1.name.pmem  = ioremap(ba1_addr + BA1_SP_PMEM, CS461X_BA1_PRG_SIZE);
-	ba1.name.reg   = ioremap(ba1_addr + BA1_SP_REG, CS461X_BA1_REG_SIZE);
-
-	if (ba0 == NULL || ba1.name.data0 == NULL ||
-            ba1.name.data1 == NULL || ba1.name.pmem == NULL ||
-            ba1.name.reg == NULL) {
-		cs461x_free(pdev);
-                return -ENOMEM;
-        }
-#else
-	if (ba0 == NULL) {
-		cs461x_free(pdev);
-		return -ENOMEM;
-	}
-#endif
-
-	if (!(port = gameport_allocate_port())) {
-		printk(KERN_ERR "cs461x: Memory allocation failed\n");
-		cs461x_free(pdev);
-		return -ENOMEM;
-	}
-
-	pci_set_drvdata(pdev, port);
-
-	port->open = cs461x_gameport_open;
-	port->trigger = cs461x_gameport_trigger;
-	port->read = cs461x_gameport_read;
-	port->cooked_read = cs461x_gameport_cooked_read;
-
-	gameport_set_name(port, "CS416x");
-	gameport_set_phys(port, "pci%s/gameport0", pci_name(pdev));
-	port->dev.parent = &pdev->dev;
-
-	cs461x_pokeBA0(BA0_JSIO, 0xFF); // ?
-	cs461x_pokeBA0(BA0_JSCTL, JSCTL_SP_MEDIUM_SLOW);
-
-	gameport_register_port(port);
-
-	return 0;
-}
-
-static void __devexit cs461x_pci_remove(struct pci_dev *pdev)
-{
-	cs461x_free(pdev);
-}
-
-static struct pci_driver cs461x_pci_driver = {
-        .name =         "CS461x_gameport",
-        .id_table =     cs461x_pci_tbl,
-        .probe =        cs461x_pci_probe,
-        .remove =       __devexit_p(cs461x_pci_remove),
-};
-
-static int __init cs461x_init(void)
-{
-        return pci_register_driver(&cs461x_pci_driver);
-}
-
-static void __exit cs461x_exit(void)
-{
-        pci_unregister_driver(&cs461x_pci_driver);
-}
-
-module_init(cs461x_init);
-module_exit(cs461x_exit);
-

+ 12 - 19
drivers/input/gameport/gameport.c

@@ -17,11 +17,10 @@
 #include <linux/init.h>
 #include <linux/gameport.h>
 #include <linux/wait.h>
-#include <linux/completion.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
+#include <linux/kthread.h>
 
 /*#include <asm/io.h>*/
 
@@ -238,8 +237,7 @@ struct gameport_event {
 static DEFINE_SPINLOCK(gameport_event_lock);	/* protects gameport_event_list */
 static LIST_HEAD(gameport_event_list);
 static DECLARE_WAIT_QUEUE_HEAD(gameport_wait);
-static DECLARE_COMPLETION(gameport_exited);
-static int gameport_pid;
+static struct task_struct *gameport_task;
 
 static void gameport_queue_event(void *object, struct module *owner,
 			      enum gameport_event_type event_type)
@@ -250,12 +248,12 @@ static void gameport_queue_event(void *object, struct module *owner,
 	spin_lock_irqsave(&gameport_event_lock, flags);
 
 	/*
- 	 * Scan event list for the other events for the same gameport port,
+	 * Scan event list for the other events for the same gameport port,
 	 * starting with the most recent one. If event is the same we
 	 * do not need add new one. If event is of different type we
 	 * need to add this event and should not look further because
 	 * we need to preseve sequence of distinct events.
- 	 */
+	 */
 	list_for_each_entry_reverse(event, &gameport_event_list, node) {
 		if (event->object == object) {
 			if (event->type == event_type)
@@ -432,20 +430,15 @@ static struct gameport *gameport_get_pending_child(struct gameport *parent)
 
 static int gameport_thread(void *nothing)
 {
-	lock_kernel();
-	daemonize("kgameportd");
-	allow_signal(SIGTERM);
-
 	do {
 		gameport_handle_events();
-		wait_event_interruptible(gameport_wait, !list_empty(&gameport_event_list));
+		wait_event_interruptible(gameport_wait,
+			kthread_should_stop() || !list_empty(&gameport_event_list));
 		try_to_freeze();
-	} while (!signal_pending(current));
+	} while (!kthread_should_stop());
 
 	printk(KERN_DEBUG "gameport: kgameportd exiting\n");
-
-	unlock_kernel();
-	complete_and_exit(&gameport_exited, 0);
+	return 0;
 }
 
 
@@ -773,9 +766,10 @@ void gameport_close(struct gameport *gameport)
 
 static int __init gameport_init(void)
 {
-	if (!(gameport_pid = kernel_thread(gameport_thread, NULL, CLONE_KERNEL))) {
+	gameport_task = kthread_run(gameport_thread, NULL, "kgameportd");
+	if (IS_ERR(gameport_task)) {
 		printk(KERN_ERR "gameport: Failed to start kgameportd\n");
-		return -1;
+		return PTR_ERR(gameport_task);
 	}
 
 	gameport_bus.dev_attrs = gameport_device_attrs;
@@ -789,8 +783,7 @@ static int __init gameport_init(void)
 static void __exit gameport_exit(void)
 {
 	bus_unregister(&gameport_bus);
-	kill_proc(gameport_pid, SIGTERM, 1);
-	wait_for_completion(&gameport_exited);
+	kthread_stop(gameport_task);
 }
 
 module_init(gameport_init);

+ 6 - 6
drivers/input/gameport/ns558.c

@@ -258,18 +258,18 @@ static int __init ns558_init(void)
 {
 	int i = 0;
 
+	if (pnp_register_driver(&ns558_pnp_driver) >= 0)
+		pnp_registered = 1;
+
 /*
- * Probe ISA ports first so that PnP gets to choose free port addresses
- * not occupied by the ISA ports.
+ * Probe ISA ports after PnP, so that PnP ports that are already
+ * enabled get detected as PnP. This may be suboptimal in multi-device
+ * configurations, but saves hassle with simple setups.
  */
 
 	while (ns558_isa_portlist[i])
 		ns558_isa_probe(ns558_isa_portlist[i++]);
 
-	if (pnp_register_driver(&ns558_pnp_driver) >= 0)
-		pnp_registered = 1;
-
-
 	return (list_empty(&ns558_list) && !pnp_registered) ? -ENODEV : 0;
 }
 

+ 0 - 186
drivers/input/gameport/vortex.c

@@ -1,186 +0,0 @@
-/*
- * $Id: vortex.c,v 1.5 2002/07/01 15:39:30 vojtech Exp $
- *
- *  Copyright (c) 2000-2001 Vojtech Pavlik
- *
- *  Based on the work of:
- *	Raymond Ingles
- */
-
-/*
- * Trident 4DWave and Aureal Vortex gameport driver for Linux
- */
-
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
- */
-
-#include <asm/io.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/ioport.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/gameport.h>
-
-MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>");
-MODULE_DESCRIPTION("Aureal Vortex and Vortex2 gameport driver");
-MODULE_LICENSE("GPL");
-
-#define VORTEX_GCR		0x0c	/* Gameport control register */
-#define VORTEX_LEG		0x08	/* Legacy port location */
-#define VORTEX_AXD		0x10	/* Axes start */
-#define VORTEX_DATA_WAIT	20	/* 20 ms */
-
-struct vortex {
-	struct gameport *gameport;
-	struct pci_dev *dev;
-	unsigned char __iomem *base;
-	unsigned char __iomem *io;
-};
-
-static unsigned char vortex_read(struct gameport *gameport)
-{
-	struct vortex *vortex = gameport->port_data;
-	return readb(vortex->io + VORTEX_LEG);
-}
-
-static void vortex_trigger(struct gameport *gameport)
-{
-	struct vortex *vortex = gameport->port_data;
-	writeb(0xff, vortex->io + VORTEX_LEG);
-}
-
-static int vortex_cooked_read(struct gameport *gameport, int *axes, int *buttons)
-{
-	struct vortex *vortex = gameport->port_data;
-	int i;
-
-	*buttons = (~readb(vortex->base + VORTEX_LEG) >> 4) & 0xf;
-
-	for (i = 0; i < 4; i++) {
-		axes[i] = readw(vortex->io + VORTEX_AXD + i * sizeof(u32));
-		if (axes[i] == 0x1fff) axes[i] = -1;
-	}
-
-        return 0;
-}
-
-static int vortex_open(struct gameport *gameport, int mode)
-{
-	struct vortex *vortex = gameport->port_data;
-
-	switch (mode) {
-		case GAMEPORT_MODE_COOKED:
-			writeb(0x40, vortex->io + VORTEX_GCR);
-			msleep(VORTEX_DATA_WAIT);
-			return 0;
-		case GAMEPORT_MODE_RAW:
-			writeb(0x00, vortex->io + VORTEX_GCR);
-			return 0;
-		default:
-			return -1;
-	}
-
-	return 0;
-}
-
-static int __devinit vortex_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct vortex *vortex;
-	struct gameport *port;
-	int i;
-
-	vortex = kcalloc(1, sizeof(struct vortex), GFP_KERNEL);
-	port = gameport_allocate_port();
-	if (!vortex || !port) {
-		printk(KERN_ERR "vortex: Memory allocation failed.\n");
-		kfree(vortex);
-		gameport_free_port(port);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < 6; i++)
-		if (~pci_resource_flags(dev, i) & IORESOURCE_IO)
-			break;
-
-	pci_enable_device(dev);
-
-	vortex->dev = dev;
-	vortex->gameport = port;
-	vortex->base = ioremap(pci_resource_start(vortex->dev, i),
-				pci_resource_len(vortex->dev, i));
-	vortex->io = vortex->base + id->driver_data;
-
-	pci_set_drvdata(dev, vortex);
-
-	port->port_data = vortex;
-	port->fuzz = 64;
-
-	gameport_set_name(port, "AU88x0");
-	gameport_set_phys(port, "pci%s/gameport0", pci_name(dev));
-	port->dev.parent = &dev->dev;
-	port->read = vortex_read;
-	port->trigger = vortex_trigger;
-	port->cooked_read = vortex_cooked_read;
-	port->open = vortex_open;
-
-	gameport_register_port(port);
-
-	return 0;
-}
-
-static void __devexit vortex_remove(struct pci_dev *dev)
-{
-	struct vortex *vortex = pci_get_drvdata(dev);
-
-	gameport_unregister_port(vortex->gameport);
-	iounmap(vortex->base);
-	kfree(vortex);
-}
-
-static struct pci_device_id vortex_id_table[] = {
-	{ 0x12eb, 0x0001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0x11000 },
-	{ 0x12eb, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0x28800 },
-	{ 0 }
-};
-
-static struct pci_driver vortex_driver = {
-	.name =		"vortex_gameport",
-	.id_table =	vortex_id_table,
-	.probe =	vortex_probe,
-	.remove =	__devexit_p(vortex_remove),
-};
-
-static int __init vortex_init(void)
-{
-	return pci_register_driver(&vortex_driver);
-}
-
-static void __exit vortex_exit(void)
-{
-	pci_unregister_driver(&vortex_driver);
-}
-
-module_init(vortex_init);
-module_exit(vortex_exit);

+ 32 - 5
drivers/input/input.c

@@ -219,10 +219,24 @@ void input_release_device(struct input_handle *handle)
 
 int input_open_device(struct input_handle *handle)
 {
+	struct input_dev *dev = handle->dev;
+	int err;
+
+	err = down_interruptible(&dev->sem);
+	if (err)
+		return err;
+
 	handle->open++;
-	if (handle->dev->open)
-		return handle->dev->open(handle->dev);
-	return 0;
+
+	if (!dev->users++ && dev->open)
+		err = dev->open(dev);
+
+	if (err)
+		handle->open--;
+
+	up(&dev->sem);
+
+	return err;
 }
 
 int input_flush_device(struct input_handle* handle, struct file* file)
@@ -235,10 +249,17 @@ int input_flush_device(struct input_handle* handle, struct file* file)
 
 void input_close_device(struct input_handle *handle)
 {
+	struct input_dev *dev = handle->dev;
+
 	input_release_device(handle);
-	if (handle->dev->close)
-		handle->dev->close(handle->dev);
+
+	down(&dev->sem);
+
+	if (!--dev->users && dev->close)
+		dev->close(dev);
 	handle->open--;
+
+	up(&dev->sem);
 }
 
 static void input_link_handle(struct input_handle *handle)
@@ -415,6 +436,8 @@ void input_register_device(struct input_dev *dev)
 
 	set_bit(EV_SYN, dev->evbit);
 
+	init_MUTEX(&dev->sem);
+
 	/*
 	 * If delay and period are pre-set by the driver, then autorepeating
 	 * is handled by the driver itself and we don't do it in input.c.
@@ -674,6 +697,8 @@ static int input_handlers_read(char *buf, char **start, off_t pos, int count, in
 	return (count > cnt) ? cnt : count;
 }
 
+static struct file_operations input_fileops;
+
 static int __init input_proc_init(void)
 {
 	struct proc_dir_entry *entry;
@@ -688,6 +713,8 @@ static int __init input_proc_init(void)
 		return -ENOMEM;
 	}
 	entry->owner = THIS_MODULE;
+	input_fileops = *entry->proc_fops;
+	entry->proc_fops = &input_fileops;
 	entry->proc_fops->poll = input_devices_poll;
 	entry = create_proc_read_entry("handlers", 0, proc_bus_input_dir, input_handlers_read, NULL);
 	if (entry == NULL) {

+ 91 - 25
drivers/input/joydev.c

@@ -285,48 +285,33 @@ static unsigned int joydev_poll(struct file *file, poll_table *wait)
 		(POLLIN | POLLRDNORM) : 0) | (list->joydev->exist ? 0 : (POLLHUP | POLLERR));
 }
 
-static int joydev_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+static int joydev_ioctl_common(struct joydev *joydev, unsigned int cmd, void __user *argp)
 {
-	struct joydev_list *list = file->private_data;
-	struct joydev *joydev = list->joydev;
 	struct input_dev *dev = joydev->handle.dev;
-	void __user *argp = (void __user *)arg;
 	int i, j;
 
-	if (!joydev->exist) return -ENODEV;
-
 	switch (cmd) {
 
 		case JS_SET_CAL:
 			return copy_from_user(&joydev->glue.JS_CORR, argp,
-				sizeof(struct JS_DATA_TYPE)) ? -EFAULT : 0;
+				sizeof(joydev->glue.JS_CORR)) ? -EFAULT : 0;
 		case JS_GET_CAL:
 			return copy_to_user(argp, &joydev->glue.JS_CORR,
-				sizeof(struct JS_DATA_TYPE)) ? -EFAULT : 0;
+				sizeof(joydev->glue.JS_CORR)) ? -EFAULT : 0;
 		case JS_SET_TIMEOUT:
-			return get_user(joydev->glue.JS_TIMEOUT, (int __user *) arg);
+			return get_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp);
 		case JS_GET_TIMEOUT:
-			return put_user(joydev->glue.JS_TIMEOUT, (int __user *) arg);
-		case JS_SET_TIMELIMIT:
-			return get_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg);
-		case JS_GET_TIMELIMIT:
-			return put_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg);
-		case JS_SET_ALL:
-			return copy_from_user(&joydev->glue, argp,
-						sizeof(struct JS_DATA_SAVE_TYPE)) ? -EFAULT : 0;
-		case JS_GET_ALL:
-			return copy_to_user(argp, &joydev->glue,
-						sizeof(struct JS_DATA_SAVE_TYPE)) ? -EFAULT : 0;
+			return put_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp);
 
 		case JSIOCGVERSION:
-			return put_user(JS_VERSION, (__u32 __user *) arg);
+			return put_user(JS_VERSION, (__u32 __user *) argp);
 		case JSIOCGAXES:
-			return put_user(joydev->nabs, (__u8 __user *) arg);
+			return put_user(joydev->nabs, (__u8 __user *) argp);
 		case JSIOCGBUTTONS:
-			return put_user(joydev->nkey, (__u8 __user *) arg);
+			return put_user(joydev->nkey, (__u8 __user *) argp);
 		case JSIOCSCORR:
 			if (copy_from_user(joydev->corr, argp,
-				      sizeof(struct js_corr) * joydev->nabs))
+				      sizeof(joydev->corr[0]) * joydev->nabs))
 			    return -EFAULT;
 			for (i = 0; i < joydev->nabs; i++) {
 				j = joydev->abspam[i];
@@ -335,7 +320,7 @@ static int joydev_ioctl(struct inode *inode, struct file *file, unsigned int cmd
 			return 0;
 		case JSIOCGCORR:
 			return copy_to_user(argp, joydev->corr,
-						sizeof(struct js_corr) * joydev->nabs) ? -EFAULT : 0;
+						sizeof(joydev->corr[0]) * joydev->nabs) ? -EFAULT : 0;
 		case JSIOCSAXMAP:
 			if (copy_from_user(joydev->abspam, argp, sizeof(__u8) * (ABS_MAX + 1)))
 				return -EFAULT;
@@ -371,6 +356,84 @@ static int joydev_ioctl(struct inode *inode, struct file *file, unsigned int cmd
 	return -EINVAL;
 }
 
+#ifdef CONFIG_COMPAT
+static long joydev_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct joydev_list *list = file->private_data;
+	struct joydev *joydev = list->joydev;
+	void __user *argp = (void __user *)arg;
+	s32 tmp32;
+	struct JS_DATA_SAVE_TYPE_32 ds32;
+	int err;
+
+	if (!joydev->exist) return -ENODEV;
+	switch(cmd) {
+	case JS_SET_TIMELIMIT:
+		err = get_user(tmp32, (s32 __user *) arg);
+		if (err == 0)
+			joydev->glue.JS_TIMELIMIT = tmp32;
+		break;
+	case JS_GET_TIMELIMIT:
+		tmp32 = joydev->glue.JS_TIMELIMIT;
+		err = put_user(tmp32, (s32 __user *) arg);
+		break;
+
+	case JS_SET_ALL:
+		err = copy_from_user(&ds32, argp,
+				     sizeof(ds32)) ? -EFAULT : 0;
+		if (err == 0) {
+			joydev->glue.JS_TIMEOUT    = ds32.JS_TIMEOUT;
+			joydev->glue.BUSY          = ds32.BUSY;
+			joydev->glue.JS_EXPIRETIME = ds32.JS_EXPIRETIME;
+			joydev->glue.JS_TIMELIMIT  = ds32.JS_TIMELIMIT;
+			joydev->glue.JS_SAVE       = ds32.JS_SAVE;
+			joydev->glue.JS_CORR       = ds32.JS_CORR;
+		}
+		break;
+
+	case JS_GET_ALL:
+		ds32.JS_TIMEOUT    = joydev->glue.JS_TIMEOUT;
+		ds32.BUSY          = joydev->glue.BUSY;
+		ds32.JS_EXPIRETIME = joydev->glue.JS_EXPIRETIME;
+		ds32.JS_TIMELIMIT  = joydev->glue.JS_TIMELIMIT;
+		ds32.JS_SAVE       = joydev->glue.JS_SAVE;
+		ds32.JS_CORR       = joydev->glue.JS_CORR;
+
+		err = copy_to_user(argp, &ds32,
+					  sizeof(ds32)) ? -EFAULT : 0;
+		break;
+
+	default:
+		err = joydev_ioctl_common(joydev, cmd, argp);
+	}
+	return err;
+}
+#endif /* CONFIG_COMPAT */
+
+static int joydev_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct joydev_list *list = file->private_data;
+	struct joydev *joydev = list->joydev;
+	void __user *argp = (void __user *)arg;
+
+	if (!joydev->exist) return -ENODEV;
+
+	switch(cmd) {
+		case JS_SET_TIMELIMIT:
+			return get_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg);
+		case JS_GET_TIMELIMIT:
+			return put_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg);
+		case JS_SET_ALL:
+			return copy_from_user(&joydev->glue, argp,
+						sizeof(joydev->glue)) ? -EFAULT : 0;
+		case JS_GET_ALL:
+			return copy_to_user(argp, &joydev->glue,
+						sizeof(joydev->glue)) ? -EFAULT : 0;
+		default:
+			return joydev_ioctl_common(joydev, cmd, argp);
+	}
+}
+
 static struct file_operations joydev_fops = {
 	.owner =	THIS_MODULE,
 	.read =		joydev_read,
@@ -379,6 +442,9 @@ static struct file_operations joydev_fops = {
 	.open =		joydev_open,
 	.release =	joydev_release,
 	.ioctl =	joydev_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl =	joydev_compat_ioctl,
+#endif
 	.fasync =	joydev_fasync,
 };
 

部分文件因文件數量過多而無法顯示