Browse Source

Merge rsync://rsync.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

Greg KH 20 năm trước cách đây
mục cha
commit
8644d2a42b
100 tập tin đã thay đổi với 4305 bổ sung2746 xóa
  1. 18 6
      Documentation/Changes
  2. 176 0
      Documentation/block/ioprio.txt
  3. 1 0
      Documentation/cciss.txt
  4. 1 1
      Documentation/kernel-parameters.txt
  5. 64 0
      Documentation/pcmcia/devicetable.txt
  6. 51 0
      Documentation/pcmcia/driver-changes.txt
  7. 1 1
      MAINTAINERS
  8. 4 1
      arch/arm/kernel/process.c
  9. 7 3
      arch/arm/kernel/time.c
  10. 1 0
      arch/arm/mach-aaec2000/Makefile.boot
  11. 4 4
      arch/arm/mach-omap/usb.c
  12. 65 6
      arch/arm/mm/init.c
  13. 7 80
      arch/arm/mm/mm-armv.c
  14. 66 3
      arch/arm/tools/mach-types
  15. 70 63
      arch/i386/kernel/kprobes.c
  16. 29 0
      arch/i386/kernel/process.c
  17. 2 0
      arch/i386/kernel/syscall_table.S
  18. 2 2
      arch/ia64/kernel/entry.S
  19. 124 4
      arch/ia64/kernel/kprobes.c
  20. 16 0
      arch/ia64/kernel/process.c
  21. 6 1
      arch/ia64/kernel/vmlinux.lds.S
  22. 1 1
      arch/mips/kernel/signal.c
  23. 2 0
      arch/ppc/kernel/misc.S
  24. 12 2
      arch/ppc/mm/init.c
  25. 2 2
      arch/ppc/platforms/pmac_sleep.S
  26. 4 4
      arch/ppc/platforms/pmac_time.c
  27. 1 0
      arch/ppc/platforms/sandpoint.c
  28. 19 7
      arch/ppc/syslib/open_pic.c
  29. 121 4
      arch/ppc64/kernel/kprobes.c
  30. 1 0
      arch/ppc64/kernel/ppc_ksyms.c
  31. 4 0
      arch/ppc64/kernel/process.c
  32. 1 0
      arch/ppc64/kernel/time.c
  33. 1 1
      arch/sparc64/kernel/auxio.c
  34. 4 112
      arch/sparc64/kernel/entry.S
  35. 50 121
      arch/sparc64/kernel/irq.c
  36. 8 4
      arch/sparc64/kernel/semaphore.c
  37. 0 1
      arch/sparc64/kernel/sparc64_ksyms.c
  38. 2 1
      arch/sparc64/kernel/trampoline.S
  39. 53 50
      arch/sparc64/lib/U1memcpy.S
  40. 13 2
      arch/sparc64/lib/VISsave.S
  41. 26 16
      arch/sparc64/lib/atomic.S
  42. 20 11
      arch/sparc64/lib/bitops.S
  43. 4 2
      arch/sparc64/lib/debuglocks.c
  44. 4 2
      arch/sparc64/lib/dec_and_lock.S
  45. 10 5
      arch/sparc64/lib/rwsem.S
  46. 4 2
      arch/sparc64/mm/init.c
  47. 2 1
      arch/sparc64/mm/ultra.S
  48. 66 170
      arch/x86_64/kernel/kprobes.c
  49. 29 0
      arch/x86_64/kernel/process.c
  50. 3 2
      drivers/block/as-iosched.c
  51. 7 10
      drivers/block/cciss.c
  52. 691 369
      drivers/block/cfq-iosched.c
  53. 2 1
      drivers/block/deadline-iosched.c
  54. 5 4
      drivers/block/elevator.c
  55. 36 23
      drivers/block/ll_rw_blk.c
  56. 9 1
      drivers/block/swim3.c
  57. 4 3
      drivers/block/sx8.c
  58. 9 0
      drivers/bluetooth/bluecard_cs.c
  59. 7 0
      drivers/bluetooth/bt3c_cs.c
  60. 7 0
      drivers/bluetooth/btuart_cs.c
  61. 8 0
      drivers/bluetooth/dtl1_cs.c
  62. 0 3
      drivers/char/misc.c
  63. 8 1
      drivers/char/pcmcia/synclink_cs.c
  64. 6 0
      drivers/ide/Kconfig
  65. 4 0
      drivers/ide/ide-disk.c
  66. 0 1
      drivers/ide/ide-dma.c
  67. 2 1
      drivers/ide/ide-iops.c
  68. 35 0
      drivers/ide/legacy/ide-cs.c
  69. 1 0
      drivers/ide/pci/Makefile
  70. 48 25
      drivers/ide/pci/generic.c
  71. 228 242
      drivers/ide/pci/hpt366.c
  72. 812 0
      drivers/ide/pci/it821x.c
  73. 5 5
      drivers/ide/pci/serverworks.c
  74. 4 4
      drivers/ide/ppc/pmac.c
  75. 5 5
      drivers/ieee1394/ohci1394.c
  76. 2 2
      drivers/infiniband/core/packer.c
  77. 13 5
      drivers/infiniband/core/sa_query.c
  78. 1 0
      drivers/infiniband/hw/mthca/mthca_av.c
  79. 227 304
      drivers/infiniband/hw/mthca/mthca_cmd.c
  80. 28 20
      drivers/infiniband/hw/mthca/mthca_cmd.h
  81. 49 52
      drivers/infiniband/hw/mthca/mthca_cq.c
  82. 10 2
      drivers/infiniband/hw/mthca/mthca_dev.h
  83. 1 0
      drivers/infiniband/hw/mthca/mthca_doorbell.h
  84. 27 31
      drivers/infiniband/hw/mthca/mthca_eq.c
  85. 12 20
      drivers/infiniband/hw/mthca/mthca_main.c
  86. 32 31
      drivers/infiniband/hw/mthca/mthca_mcg.c
  87. 9 1
      drivers/infiniband/hw/mthca/mthca_memfree.c
  88. 182 185
      drivers/infiniband/hw/mthca/mthca_mr.c
  89. 3 1
      drivers/infiniband/hw/mthca/mthca_provider.c
  90. 7 7
      drivers/infiniband/hw/mthca/mthca_provider.h
  91. 109 30
      drivers/infiniband/hw/mthca/mthca_qp.c
  92. 327 80
      drivers/input/evdev.c
  93. 0 14
      drivers/input/gameport/Kconfig
  94. 0 2
      drivers/input/gameport/Makefile
  95. 0 322
      drivers/input/gameport/cs461x.c
  96. 12 19
      drivers/input/gameport/gameport.c
  97. 6 6
      drivers/input/gameport/ns558.c
  98. 0 186
      drivers/input/gameport/vortex.c
  99. 32 5
      drivers/input/input.c
  100. 91 25
      drivers/input/joydev.c

+ 18 - 6
Documentation/Changes

@@ -44,9 +44,9 @@ running, the suggested command should tell you.
 
 
 Again, keep in mind that this list assumes you are already
 Again, keep in mind that this list assumes you are already
 functionally running a Linux 2.4 kernel.  Also, not all tools are
 functionally running a Linux 2.4 kernel.  Also, not all tools are
-necessary on all systems; obviously, if you don't have any PCMCIA (PC
-Card) hardware, for example, you probably needn't concern yourself
-with pcmcia-cs.
+necessary on all systems; obviously, if you don't have any ISDN
+hardware, for example, you probably needn't concern yourself with
+isdn4k-utils.
 
 
 o  Gnu C                  2.95.3                  # gcc --version
 o  Gnu C                  2.95.3                  # gcc --version
 o  Gnu make               3.79.1                  # make --version
 o  Gnu make               3.79.1                  # make --version
@@ -57,6 +57,7 @@ o  e2fsprogs              1.29                    # tune2fs
 o  jfsutils               1.1.3                   # fsck.jfs -V
 o  jfsutils               1.1.3                   # fsck.jfs -V
 o  reiserfsprogs          3.6.3                   # reiserfsck -V 2>&1|grep reiserfsprogs
 o  reiserfsprogs          3.6.3                   # reiserfsck -V 2>&1|grep reiserfsprogs
 o  xfsprogs               2.6.0                   # xfs_db -V
 o  xfsprogs               2.6.0                   # xfs_db -V
+o  pcmciautils            001
 o  pcmcia-cs              3.1.21                  # cardmgr -V
 o  pcmcia-cs              3.1.21                  # cardmgr -V
 o  quota-tools            3.09                    # quota -V
 o  quota-tools            3.09                    # quota -V
 o  PPP                    2.4.0                   # pppd --version
 o  PPP                    2.4.0                   # pppd --version
@@ -186,13 +187,20 @@ architecture independent and any version from 2.0.0 onward should
 work correctly with this version of the XFS kernel code (2.6.0 or
 work correctly with this version of the XFS kernel code (2.6.0 or
 later is recommended, due to some significant improvements).
 later is recommended, due to some significant improvements).
 
 
+PCMCIAutils
+-----------
+
+PCMCIAutils replaces pcmcia-cs (see below). It properly sets up
+PCMCIA sockets at system startup and loads the appropriate modules
+for 16-bit PCMCIA devices if the kernel is modularized and the hotplug
+subsystem is used.
 
 
 Pcmcia-cs
 Pcmcia-cs
 ---------
 ---------
 
 
 PCMCIA (PC Card) support is now partially implemented in the main
 PCMCIA (PC Card) support is now partially implemented in the main
-kernel source.  Pay attention when you recompile your kernel ;-).
-Also, be sure to upgrade to the latest pcmcia-cs release.
+kernel source. The "pcmciautils" package (see above) replaces pcmcia-cs
+for newest kernels.
 
 
 Quota-tools
 Quota-tools
 -----------
 -----------
@@ -349,9 +357,13 @@ Xfsprogs
 --------
 --------
 o  <ftp://oss.sgi.com/projects/xfs/download/>
 o  <ftp://oss.sgi.com/projects/xfs/download/>
 
 
+Pcmciautils
+-----------
+o  <ftp://ftp.kernel.org/pub/linux/utils/kernel/pcmcia/>
+
 Pcmcia-cs
 Pcmcia-cs
 ---------
 ---------
-o  <ftp://pcmcia-cs.sourceforge.net/pub/pcmcia-cs/pcmcia-cs-3.1.21.tar.gz>
+o  <http://pcmcia-cs.sourceforge.net/>
 
 
 Quota-tools
 Quota-tools
 ----------
 ----------

+ 176 - 0
Documentation/block/ioprio.txt

@@ -0,0 +1,176 @@
+Block io priorities
+===================
+
+
+Intro
+-----
+
+With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
+priorities is supported for reads on files. This enables users to io nice
+processes or process groups, similar to what has been possible to cpu
+scheduling for ages. This document mainly details the current possibilites
+with cfq, other io schedulers do not support io priorities so far.
+
+Scheduling classes
+------------------
+
+CFQ implements three generic scheduling classes that determine how io is
+served for a process.
+
+IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
+higher priority than any other in the system, processes from this class are
+given first access to the disk every time. Thus it needs to be used with some
+care, one io RT process can starve the entire system. Within the RT class,
+there are 8 levels of class data that determine exactly how much time this
+process needs the disk for on each service. In the future this might change
+to be more directly mappable to performance, by passing in a wanted data
+rate instead.
+
+IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
+for any process that hasn't set a specific io priority. The class data
+determines how much io bandwidth the process will get, it's directly mappable
+to the cpu nice levels just more coarsely implemented. 0 is the highest
+BE prio level, 7 is the lowest. The mapping between cpu nice level and io
+nice level is determined as: io_nice = (cpu_nice + 20) / 5.
+
+IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
+level only get io time when no one else needs the disk. The idle class has no
+class data, since it doesn't really apply here.
+
+Tools
+-----
+
+See below for a sample ionice tool. Usage:
+
+# ionice -c<class> -n<level> -p<pid>
+
+If pid isn't given, the current process is assumed. IO priority settings
+are inherited on fork, so you can use ionice to start the process at a given
+level:
+
+# ionice -c2 -n0 /bin/ls
+
+will run ls at the best-effort scheduling class at the highest priority.
+For a running process, you can give the pid instead:
+
+# ionice -c1 -n2 -p100
+
+will change pid 100 to run at the realtime scheduling class, at priority 2.
+
+---> snip ionice.c tool <---
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <asm/unistd.h>
+
+extern int sys_ioprio_set(int, int, int);
+extern int sys_ioprio_get(int, int);
+
+#if defined(__i386__)
+#define __NR_ioprio_set		289
+#define __NR_ioprio_get		290
+#elif defined(__ppc__)
+#define __NR_ioprio_set		273
+#define __NR_ioprio_get		274
+#elif defined(__x86_64__)
+#define __NR_ioprio_set		251
+#define __NR_ioprio_get		252
+#elif defined(__ia64__)
+#define __NR_ioprio_set		1274
+#define __NR_ioprio_get		1275
+#else
+#error "Unsupported arch"
+#endif
+
+_syscall3(int, ioprio_set, int, which, int, who, int, ioprio);
+_syscall2(int, ioprio_get, int, which, int, who);
+
+enum {
+	IOPRIO_CLASS_NONE,
+	IOPRIO_CLASS_RT,
+	IOPRIO_CLASS_BE,
+	IOPRIO_CLASS_IDLE,
+};
+
+enum {
+	IOPRIO_WHO_PROCESS = 1,
+	IOPRIO_WHO_PGRP,
+	IOPRIO_WHO_USER,
+};
+
+#define IOPRIO_CLASS_SHIFT	13
+
+const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
+
+int main(int argc, char *argv[])
+{
+	int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
+	int c, pid = 0;
+
+	while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
+		switch (c) {
+		case 'n':
+			ioprio = strtol(optarg, NULL, 10);
+			set = 1;
+			break;
+		case 'c':
+			ioprio_class = strtol(optarg, NULL, 10);
+			set = 1;
+			break;
+		case 'p':
+			pid = strtol(optarg, NULL, 10);
+			break;
+		}
+	}
+
+	switch (ioprio_class) {
+		case IOPRIO_CLASS_NONE:
+			ioprio_class = IOPRIO_CLASS_BE;
+			break;
+		case IOPRIO_CLASS_RT:
+		case IOPRIO_CLASS_BE:
+			break;
+		case IOPRIO_CLASS_IDLE:
+			ioprio = 7;
+			break;
+		default:
+			printf("bad prio class %d\n", ioprio_class);
+			return 1;
+	}
+
+	if (!set) {
+		if (!pid && argv[optind])
+			pid = strtol(argv[optind], NULL, 10);
+
+		ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
+
+		printf("pid=%d, %d\n", pid, ioprio);
+
+		if (ioprio == -1)
+			perror("ioprio_get");
+		else {
+			ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
+			ioprio = ioprio & 0xff;
+			printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
+		}
+	} else {
+		if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
+			perror("ioprio_set");
+			return 1;
+		}
+
+		if (argv[optind])
+			execvp(argv[optind], &argv[optind]);
+	}
+
+	return 0;
+}
+
+---> snip ionice.c tool <---
+
+
+March 11 2005, Jens Axboe <axboe@suse.de>

+ 1 - 0
Documentation/cciss.txt

@@ -17,6 +17,7 @@ This driver is known to work with the following cards:
 	* SA P600
 	* SA P600
 	* SA P800
 	* SA P800
 	* SA E400
 	* SA E400
+	* SA E300
 
 
 If nodes are not already created in the /dev/cciss directory, run as root:
 If nodes are not already created in the /dev/cciss directory, run as root:
 
 

+ 1 - 1
Documentation/kernel-parameters.txt

@@ -1119,7 +1119,7 @@ running once the system is up.
 			See Documentation/ramdisk.txt.
 			See Documentation/ramdisk.txt.
 
 
 	psmouse.proto=  [HW,MOUSE] Highest PS2 mouse protocol extension to
 	psmouse.proto=  [HW,MOUSE] Highest PS2 mouse protocol extension to
-			probe for (bare|imps|exps).
+			probe for (bare|imps|exps|lifebook|any).
 	psmouse.rate=	[HW,MOUSE] Set desired mouse report rate, in reports
 	psmouse.rate=	[HW,MOUSE] Set desired mouse report rate, in reports
 			per second.
 			per second.
 	psmouse.resetafter=
 	psmouse.resetafter=

+ 64 - 0
Documentation/pcmcia/devicetable.txt

@@ -0,0 +1,64 @@
+Matching of PCMCIA devices to drivers is done using one or more of the
+following criteria:
+
+- manufactor ID
+- card ID
+- product ID strings _and_ hashes of these strings
+- function ID
+- device function (actual and pseudo)
+
+You should use the helpers in include/pcmcia/device_id.h for generating the
+struct pcmcia_device_id[] entries which match devices to drivers.
+
+If you want to match product ID strings, you also need to pass the crc32
+hashes of the string to the macro, e.g. if you want to match the product ID
+string 1, you need to use
+
+PCMCIA_DEVICE_PROD_ID1("some_string", 0x(hash_of_some_string)),
+
+If the hash is incorrect, the kernel will inform you about this in "dmesg"
+upon module initialization, and tell you of the correct hash.
+
+You can determine the hash of the product ID strings by running
+"pcmcia-modalias %n.%m" [%n being replaced with the socket number and %m being
+replaced with the device function] from pcmciautils. It generates a string
+in the following form:
+pcmcia:m0149cC1ABf06pfn00fn00pa725B842DpbF1EFEE84pc0877B627pd00000000
+
+The hex value after "pa" is the hash of product ID string 1, after "pb" for
+string 2 and so on.
+
+Alternatively, you can use this small tool to determine the crc32 hash.
+simply pass the string you want to evaluate as argument to this program,
+e.g.
+$ ./crc32hash "Dual Speed"
+
+-------------------------------------------------------------------------
+/* crc32hash.c - derived from linux/lib/crc32.c, GNU GPL v2 */
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+unsigned int crc32(unsigned char const *p, unsigned int len)
+{
+	int i;
+	unsigned int crc = 0;
+	while (len--) {
+		crc ^= *p++;
+		for (i = 0; i < 8; i++)
+			crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
+	}
+	return crc;
+}
+
+int main(int argc, char **argv) {
+	unsigned int result;
+	if (argc != 2) {
+		printf("no string passed as argument\n");
+		return -1;
+	}
+	result = crc32(argv[1], strlen(argv[1]));
+	printf("0x%x\n", result);
+	return 0;
+}

+ 51 - 0
Documentation/pcmcia/driver-changes.txt

@@ -0,0 +1,51 @@
+This file details changes in 2.6 which affect PCMCIA card driver authors:
+
+* in-kernel device<->driver matching
+   PCMCIA devices and their correct drivers can now be matched in
+   kernelspace. See 'devicetable.txt' for details.
+
+* Device model integration (as of 2.6.11)
+   A struct pcmcia_device is registered with the device model core,
+   and can be used (e.g. for SET_NETDEV_DEV) by using
+   handle_to_dev(client_handle_t * handle).
+
+* Convert internal I/O port addresses to unsigned long (as of 2.6.11)
+   ioaddr_t should be replaced by kio_addr_t in PCMCIA card drivers.
+
+* irq_mask and irq_list parameters (as of 2.6.11)
+   The irq_mask and irq_list parameters should no longer be used in
+   PCMCIA card drivers. Instead, it is the job of the PCMCIA core to
+   determine which IRQ should be used. Therefore, link->irq.IRQInfo2
+   is ignored.
+
+* client->PendingEvents is gone (as of 2.6.11)
+   client->PendingEvents is no longer available.
+
+* client->Attributes are gone (as of 2.6.11)
+   client->Attributes is unused, therefore it is removed from all
+   PCMCIA card drivers
+
+* core functions no longer available (as of 2.6.11)
+   The following functions have been removed from the kernel source
+   because they are unused by all in-kernel drivers, and no external
+   driver was reported to rely on them:
+	pcmcia_get_first_region()
+	pcmcia_get_next_region()
+	pcmcia_modify_window()
+	pcmcia_set_event_mask()
+	pcmcia_get_first_window()
+	pcmcia_get_next_window()
+
+* device list iteration upon module removal (as of 2.6.10)
+   It is no longer necessary to iterate on the driver's internal
+   client list and call the ->detach() function upon module removal.
+
+* Resource management. (as of 2.6.8)
+   Although the PCMCIA subsystem will allocate resources for cards,
+   it no longer marks these resources busy. This means that driver
+   authors are now responsible for claiming your resources as per
+   other drivers in Linux. You should use request_region() to mark
+   your IO regions in-use, and request_mem_region() to mark your
+   memory regions in-use. The name argument should be a pointer to
+   your driver name. Eg, for pcnet_cs, name should point to the
+   string "pcnet_cs".

+ 1 - 1
MAINTAINERS

@@ -1149,7 +1149,7 @@ S:	Maintained
 
 
 INFINIBAND SUBSYSTEM
 INFINIBAND SUBSYSTEM
 P:	Roland Dreier
 P:	Roland Dreier
-M:	roland@topspin.com
+M:	rolandd@cisco.com
 P:	Sean Hefty
 P:	Sean Hefty
 M:	mshefty@ichips.intel.com
 M:	mshefty@ichips.intel.com
 P:	Hal Rosenstock
 P:	Hal Rosenstock

+ 4 - 1
arch/arm/kernel/process.c

@@ -32,6 +32,7 @@
 #include <asm/leds.h>
 #include <asm/leds.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
+#include <asm/mach/time.h>
 
 
 extern const char *processor_modes[];
 extern const char *processor_modes[];
 extern void setup_mm_for_reboot(char mode);
 extern void setup_mm_for_reboot(char mode);
@@ -85,8 +86,10 @@ EXPORT_SYMBOL(pm_power_off);
 void default_idle(void)
 void default_idle(void)
 {
 {
 	local_irq_disable();
 	local_irq_disable();
-	if (!need_resched() && !hlt_counter)
+	if (!need_resched() && !hlt_counter) {
+		timer_dyn_reprogram();
 		arch_idle();
 		arch_idle();
+	}
 	local_irq_enable();
 	local_irq_enable();
 }
 }
 
 

+ 7 - 3
arch/arm/kernel/time.c

@@ -424,15 +424,19 @@ static int timer_dyn_tick_disable(void)
 	return ret;
 	return ret;
 }
 }
 
 
+/*
+ * Reprogram the system timer for at least the calculated time interval.
+ * This function should be called from the idle thread with IRQs disabled,
+ * immediately before sleeping.
+ */
 void timer_dyn_reprogram(void)
 void timer_dyn_reprogram(void)
 {
 {
 	struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick;
 	struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick;
-	unsigned long flags;
 
 
-	write_seqlock_irqsave(&xtime_lock, flags);
+	write_seqlock(&xtime_lock);
 	if (dyn_tick->state & DYN_TICK_ENABLED)
 	if (dyn_tick->state & DYN_TICK_ENABLED)
 		dyn_tick->reprogram(next_timer_interrupt() - jiffies);
 		dyn_tick->reprogram(next_timer_interrupt() - jiffies);
-	write_sequnlock_irqrestore(&xtime_lock, flags);
+	write_sequnlock(&xtime_lock);
 }
 }
 
 
 static ssize_t timer_show_dyn_tick(struct sys_device *dev, char *buf)
 static ssize_t timer_show_dyn_tick(struct sys_device *dev, char *buf)

+ 1 - 0
arch/arm/mach-aaec2000/Makefile.boot

@@ -0,0 +1 @@
+	zreladdr-y := 0xf0008000

+ 4 - 4
arch/arm/mach-omap/usb.c

@@ -288,8 +288,8 @@ static void usb_release(struct device *dev)
 static struct resource udc_resources[] = {
 static struct resource udc_resources[] = {
 	/* order is significant! */
 	/* order is significant! */
 	{		/* registers */
 	{		/* registers */
-		.start		= IO_ADDRESS(UDC_BASE),
-		.end		= IO_ADDRESS(UDC_BASE + 0xff),
+		.start		= UDC_BASE,
+		.end		= UDC_BASE + 0xff,
 		.flags		= IORESOURCE_MEM,
 		.flags		= IORESOURCE_MEM,
 	}, {		/* general IRQ */
 	}, {		/* general IRQ */
 		.start		= IH2_BASE + 20,
 		.start		= IH2_BASE + 20,
@@ -355,8 +355,8 @@ static struct platform_device ohci_device = {
 static struct resource otg_resources[] = {
 static struct resource otg_resources[] = {
 	/* order is significant! */
 	/* order is significant! */
 	{
 	{
-		.start		= IO_ADDRESS(OTG_BASE),
-		.end		= IO_ADDRESS(OTG_BASE + 0xff),
+		.start		= OTG_BASE,
+		.end		= OTG_BASE + 0xff,
 		.flags		= IORESOURCE_MEM,
 		.flags		= IORESOURCE_MEM,
 	}, {
 	}, {
 		.start		= IH2_BASE + 8,
 		.start		= IH2_BASE + 8,

+ 65 - 6
arch/arm/mm/init.c

@@ -522,6 +522,69 @@ static inline void free_area(unsigned long addr, unsigned long end, char *s)
 		printk(KERN_INFO "Freeing %s memory: %dK\n", s, size);
 		printk(KERN_INFO "Freeing %s memory: %dK\n", s, size);
 }
 }
 
 
+static inline void
+free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn)
+{
+	struct page *start_pg, *end_pg;
+	unsigned long pg, pgend;
+
+	/*
+	 * Convert start_pfn/end_pfn to a struct page pointer.
+	 */
+	start_pg = pfn_to_page(start_pfn);
+	end_pg = pfn_to_page(end_pfn);
+
+	/*
+	 * Convert to physical addresses, and
+	 * round start upwards and end downwards.
+	 */
+	pg = PAGE_ALIGN(__pa(start_pg));
+	pgend = __pa(end_pg) & PAGE_MASK;
+
+	/*
+	 * If there are free pages between these,
+	 * free the section of the memmap array.
+	 */
+	if (pg < pgend)
+		free_bootmem_node(NODE_DATA(node), pg, pgend - pg);
+}
+
+/*
+ * The mem_map array can get very big.  Free the unused area of the memory map.
+ */
+static void __init free_unused_memmap_node(int node, struct meminfo *mi)
+{
+	unsigned long bank_start, prev_bank_end = 0;
+	unsigned int i;
+
+	/*
+	 * [FIXME] This relies on each bank being in address order.  This
+	 * may not be the case, especially if the user has provided the
+	 * information on the command line.
+	 */
+	for (i = 0; i < mi->nr_banks; i++) {
+		if (mi->bank[i].size == 0 || mi->bank[i].node != node)
+			continue;
+
+		bank_start = mi->bank[i].start >> PAGE_SHIFT;
+		if (bank_start < prev_bank_end) {
+			printk(KERN_ERR "MEM: unordered memory banks.  "
+				"Not freeing memmap.\n");
+			break;
+		}
+
+		/*
+		 * If we had a previous bank, and there is a space
+		 * between the current bank and the previous, free it.
+		 */
+		if (prev_bank_end && prev_bank_end != bank_start)
+			free_memmap(node, prev_bank_end, bank_start);
+
+		prev_bank_end = (mi->bank[i].start +
+				 mi->bank[i].size) >> PAGE_SHIFT;
+	}
+}
+
 /*
 /*
  * mem_init() marks the free areas in the mem_map and tells us how much
  * mem_init() marks the free areas in the mem_map and tells us how much
  * memory is free.  This is done after various parts of the system have
  * memory is free.  This is done after various parts of the system have
@@ -540,16 +603,12 @@ void __init mem_init(void)
 	max_mapnr   = virt_to_page(high_memory) - mem_map;
 	max_mapnr   = virt_to_page(high_memory) - mem_map;
 #endif
 #endif
 
 
-	/*
-	 * We may have non-contiguous memory.
-	 */
-	if (meminfo.nr_banks != 1)
-		create_memmap_holes(&meminfo);
-
 	/* this will put all unused low memory onto the freelists */
 	/* this will put all unused low memory onto the freelists */
 	for_each_online_node(node) {
 	for_each_online_node(node) {
 		pg_data_t *pgdat = NODE_DATA(node);
 		pg_data_t *pgdat = NODE_DATA(node);
 
 
+		free_unused_memmap_node(node, &meminfo);
+
 		if (pgdat->node_spanned_pages != 0)
 		if (pgdat->node_spanned_pages != 0)
 			totalram_pages += free_all_bootmem_node(pgdat);
 			totalram_pages += free_all_bootmem_node(pgdat);
 	}
 	}

+ 7 - 80
arch/arm/mm/mm-armv.c

@@ -169,7 +169,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 
 
 	memzero(new_pgd, FIRST_KERNEL_PGD_NR * sizeof(pgd_t));
 	memzero(new_pgd, FIRST_KERNEL_PGD_NR * sizeof(pgd_t));
 
 
+	/*
+	 * Copy over the kernel and IO PGD entries
+	 */
 	init_pgd = pgd_offset_k(0);
 	init_pgd = pgd_offset_k(0);
+	memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
+		       (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
+
+	clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
 
 
 	if (!vectors_high()) {
 	if (!vectors_high()) {
 		/*
 		/*
@@ -198,14 +205,6 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 		spin_unlock(&mm->page_table_lock);
 		spin_unlock(&mm->page_table_lock);
 	}
 	}
 
 
-	/*
-	 * Copy over the kernel and IO PGD entries
-	 */
-	memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
-		       (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
-
-	clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
-
 	return new_pgd;
 	return new_pgd;
 
 
 no_pte:
 no_pte:
@@ -698,75 +697,3 @@ void __init iotable_init(struct map_desc *io_desc, int nr)
 	for (i = 0; i < nr; i++)
 	for (i = 0; i < nr; i++)
 		create_mapping(io_desc + i);
 		create_mapping(io_desc + i);
 }
 }
-
-static inline void
-free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn)
-{
-	struct page *start_pg, *end_pg;
-	unsigned long pg, pgend;
-
-	/*
-	 * Convert start_pfn/end_pfn to a struct page pointer.
-	 */
-	start_pg = pfn_to_page(start_pfn);
-	end_pg = pfn_to_page(end_pfn);
-
-	/*
-	 * Convert to physical addresses, and
-	 * round start upwards and end downwards.
-	 */
-	pg = PAGE_ALIGN(__pa(start_pg));
-	pgend = __pa(end_pg) & PAGE_MASK;
-
-	/*
-	 * If there are free pages between these,
-	 * free the section of the memmap array.
-	 */
-	if (pg < pgend)
-		free_bootmem_node(NODE_DATA(node), pg, pgend - pg);
-}
-
-static inline void free_unused_memmap_node(int node, struct meminfo *mi)
-{
-	unsigned long bank_start, prev_bank_end = 0;
-	unsigned int i;
-
-	/*
-	 * [FIXME] This relies on each bank being in address order.  This
-	 * may not be the case, especially if the user has provided the
-	 * information on the command line.
-	 */
-	for (i = 0; i < mi->nr_banks; i++) {
-		if (mi->bank[i].size == 0 || mi->bank[i].node != node)
-			continue;
-
-		bank_start = mi->bank[i].start >> PAGE_SHIFT;
-		if (bank_start < prev_bank_end) {
-			printk(KERN_ERR "MEM: unordered memory banks.  "
-				"Not freeing memmap.\n");
-			break;
-		}
-
-		/*
-		 * If we had a previous bank, and there is a space
-		 * between the current bank and the previous, free it.
-		 */
-		if (prev_bank_end && prev_bank_end != bank_start)
-			free_memmap(node, prev_bank_end, bank_start);
-
-		prev_bank_end = PAGE_ALIGN(mi->bank[i].start +
-					   mi->bank[i].size) >> PAGE_SHIFT;
-	}
-}
-
-/*
- * The mem_map array can get very big.  Free
- * the unused area of the memory map.
- */
-void __init create_memmap_holes(struct meminfo *mi)
-{
-	int node;
-
-	for_each_online_node(node)
-		free_unused_memmap_node(node, mi);
-}

+ 66 - 3
arch/arm/tools/mach-types

@@ -6,7 +6,7 @@
 # To add an entry into this database, please see Documentation/arm/README,
 # To add an entry into this database, please see Documentation/arm/README,
 # or contact rmk@arm.linux.org.uk
 # or contact rmk@arm.linux.org.uk
 #
 #
-# Last update: Thu Mar 24 14:34:50 2005
+# Last update: Thu Jun 23 20:19:33 2005
 #
 #
 # machine_is_xxx	CONFIG_xxxx		MACH_TYPE_xxx		number
 # machine_is_xxx	CONFIG_xxxx		MACH_TYPE_xxx		number
 #
 #
@@ -243,7 +243,7 @@ yoho			ARCH_YOHO		YOHO			231
 jasper			ARCH_JASPER		JASPER			232
 jasper			ARCH_JASPER		JASPER			232
 dsc25			ARCH_DSC25		DSC25			233
 dsc25			ARCH_DSC25		DSC25			233
 omap_innovator		MACH_OMAP_INNOVATOR	OMAP_INNOVATOR		234
 omap_innovator		MACH_OMAP_INNOVATOR	OMAP_INNOVATOR		234
-ramses			ARCH_RAMSES		RAMSES			235
+mnci			ARCH_RAMSES		RAMSES			235
 s28x			ARCH_S28X		S28X			236
 s28x			ARCH_S28X		S28X			236
 mport3			ARCH_MPORT3		MPORT3			237
 mport3			ARCH_MPORT3		MPORT3			237
 pxa_eagle250		ARCH_PXA_EAGLE250	PXA_EAGLE250		238
 pxa_eagle250		ARCH_PXA_EAGLE250	PXA_EAGLE250		238
@@ -323,7 +323,7 @@ nimbra29x		ARCH_NIMBRA29X		NIMBRA29X		311
 nimbra210		ARCH_NIMBRA210		NIMBRA210		312
 nimbra210		ARCH_NIMBRA210		NIMBRA210		312
 hhp_d95xx		ARCH_HHP_D95XX		HHP_D95XX		313
 hhp_d95xx		ARCH_HHP_D95XX		HHP_D95XX		313
 labarm			ARCH_LABARM		LABARM			314
 labarm			ARCH_LABARM		LABARM			314
-m825xx			ARCH_M825XX		M825XX			315
+comcerto		ARCH_M825XX		M825XX			315
 m7100			SA1100_M7100		M7100			316
 m7100			SA1100_M7100		M7100			316
 nipc2			ARCH_NIPC2		NIPC2			317
 nipc2			ARCH_NIPC2		NIPC2			317
 fu7202			ARCH_FU7202		FU7202			318
 fu7202			ARCH_FU7202		FU7202			318
@@ -724,3 +724,66 @@ lpc22xx			MACH_LPC22XX		LPC22XX			715
 omap_comet3		MACH_COMET3		COMET3			716
 omap_comet3		MACH_COMET3		COMET3			716
 omap_comet4		MACH_COMET4		COMET4			717
 omap_comet4		MACH_COMET4		COMET4			717
 csb625			MACH_CSB625		CSB625			718
 csb625			MACH_CSB625		CSB625			718
+fortunet2		MACH_FORTUNET2		FORTUNET2		719
+s5h2200			MACH_S5H2200		S5H2200			720
+optorm920		MACH_OPTORM920		OPTORM920		721
+adsbitsyxb		MACH_ADSBITSYXB		ADSBITSYXB		722
+adssphere		MACH_ADSSPHERE		ADSSPHERE		723
+adsportal		MACH_ADSPORTAL		ADSPORTAL		724
+ln2410sbc		MACH_LN2410SBC		LN2410SBC		725
+cb3rufc			MACH_CB3RUFC		CB3RUFC			726
+mp2usb			MACH_MP2USB		MP2USB			727
+ntnp425c		MACH_NTNP425C		NTNP425C		728
+colibri			MACH_COLIBRI		COLIBRI			729
+pcm7220			MACH_PCM7220		PCM7220			730
+gateway7001		MACH_GATEWAY7001	GATEWAY7001		731
+pcm027			MACH_PCM027		PCM027			732
+cmpxa			MACH_CMPXA		CMPXA			733
+anubis			MACH_ANUBIS		ANUBIS			734
+ite8152			MACH_ITE8152		ITE8152			735
+lpc3xxx			MACH_LPC3XXX		LPC3XXX			736
+puppeteer		MACH_PUPPETEER		PUPPETEER		737
+vt001			MACH_MACH_VADATECH	MACH_VADATECH		738
+e570			MACH_E570		E570			739
+x50			MACH_X50		X50			740
+recon			MACH_RECON		RECON			741
+xboardgp8		MACH_XBOARDGP8		XBOARDGP8		742
+fpic2			MACH_FPIC2		FPIC2			743
+akita			MACH_AKITA		AKITA			744
+a81			MACH_A81		A81			745
+svm_sc25x		MACH_SVM_SC25X		SVM_SC25X		746
+vt020			MACH_VADATECH020	VADATECH020		747
+tli			MACH_TLI		TLI			748
+edb9315lc		MACH_EDB9315LC		EDB9315LC		749
+passec			MACH_PASSEC		PASSEC			750
+ds_tiger		MACH_DS_TIGER		DS_TIGER		751
+e310			MACH_E310		E310			752
+e330			MACH_E330		E330			753
+rt3000			MACH_RT3000		RT3000			754
+nokia770		MACH_NOKIA770		NOKIA770		755
+pnx0106			MACH_PNX0106		PNX0106			756
+hx21xx			MACH_HX21XX		HX21XX			757
+faraday			MACH_FARADAY		FARADAY			758
+sbc9312			MACH_SBC9312		SBC9312			759
+batman			MACH_BATMAN		BATMAN			760
+jpd201			MACH_JPD201		JPD201			761
+mipsa			MACH_MIPSA		MIPSA			762
+kacom			MACH_KACOM		KACOM			763
+swarcocpu		MACH_SWARCOCPU		SWARCOCPU		764
+swarcodsl		MACH_SWARCODSL		SWARCODSL		765
+blueangel		MACH_BLUEANGEL		BLUEANGEL		766
+hairygrama		MACH_HAIRYGRAMA		HAIRYGRAMA		767
+banff			MACH_BANFF		BANFF			768
+carmeva			MACH_CARMEVA		CARMEVA			769
+sam255			MACH_SAM255		SAM255			770
+ppm10			MACH_PPM10		PPM10			771
+edb9315a		MACH_EDB9315A		EDB9315A		772
+sunset			MACH_SUNSET		SUNSET			773
+stargate2		MACH_STARGATE2		STARGATE2		774
+intelmote2		MACH_INTELMOTE2		INTELMOTE2		775
+trizeps4		MACH_TRIZEPS4		TRIZEPS4		776
+mainstone2		MACH_MAINSTONE2		MAINSTONE2		777
+ez_ixp42x		MACH_EZ_IXP42X		EZ_IXP42X		778
+tapwave_zodiac		MACH_TAPWAVE_ZODIAC	TAPWAVE_ZODIAC		779
+universalmeter		MACH_UNIVERSALMETER	UNIVERSALMETER		780
+hicoarm9		MACH_HICOARM9		HICOARM9		781

+ 70 - 63
arch/i386/kernel/kprobes.c

@@ -127,48 +127,23 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 		regs->eip = (unsigned long)&p->ainsn.insn;
 		regs->eip = (unsigned long)&p->ainsn.insn;
 }
 }
 
 
-struct task_struct  *arch_get_kprobe_task(void *ptr)
-{
-	return ((struct thread_info *) (((unsigned long) ptr) &
-					(~(THREAD_SIZE -1))))->task;
-}
-
 void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
 void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
 {
 {
 	unsigned long *sara = (unsigned long *)&regs->esp;
 	unsigned long *sara = (unsigned long *)&regs->esp;
-	struct kretprobe_instance *ri;
-	static void *orig_ret_addr;
+        struct kretprobe_instance *ri;
+
+        if ((ri = get_free_rp_inst(rp)) != NULL) {
+                ri->rp = rp;
+                ri->task = current;
+		ri->ret_addr = (kprobe_opcode_t *) *sara;
 
 
-	/*
-	 * Save the return address when the return probe hits
-	 * the first time, and use it to populate the (krprobe
-	 * instance)->ret_addr for subsequent return probes at
-	 * the same addrress since stack address would have
-	 * the kretprobe_trampoline by then.
-	 */
-	if (((void*) *sara) != kretprobe_trampoline)
-		orig_ret_addr = (void*) *sara;
-
-	if ((ri = get_free_rp_inst(rp)) != NULL) {
-		ri->rp = rp;
-		ri->stack_addr = sara;
-		ri->ret_addr = orig_ret_addr;
-		add_rp_inst(ri);
 		/* Replace the return addr with trampoline addr */
 		/* Replace the return addr with trampoline addr */
 		*sara = (unsigned long) &kretprobe_trampoline;
 		*sara = (unsigned long) &kretprobe_trampoline;
-	} else {
-		rp->nmissed++;
-	}
-}
 
 
-void arch_kprobe_flush_task(struct task_struct *tk)
-{
-	struct kretprobe_instance *ri;
-	while ((ri = get_rp_inst_tsk(tk)) != NULL) {
-		*((unsigned long *)(ri->stack_addr)) =
-					(unsigned long) ri->ret_addr;
-		recycle_rp_inst(ri);
-	}
+                add_rp_inst(ri);
+        } else {
+                rp->nmissed++;
+        }
 }
 }
 
 
 /*
 /*
@@ -286,36 +261,59 @@ no_kprobe:
  */
  */
 int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
 {
-	struct task_struct *tsk;
-	struct kretprobe_instance *ri;
-	struct hlist_head *head;
-	struct hlist_node *node;
-	unsigned long *sara = ((unsigned long *) &regs->esp) - 1;
-
-	tsk = arch_get_kprobe_task(sara);
-	head = kretprobe_inst_table_head(tsk);
-
-	hlist_for_each_entry(ri, node, head, hlist) {
-		if (ri->stack_addr == sara && ri->rp) {
-			if (ri->rp->handler)
-				ri->rp->handler(ri, regs);
-		}
-	}
-	return 0;
-}
+        struct kretprobe_instance *ri = NULL;
+        struct hlist_head *head;
+        struct hlist_node *node, *tmp;
+	unsigned long orig_ret_address = 0;
+	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
 
-void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs,
-						unsigned long flags)
-{
-	struct kretprobe_instance *ri;
-	/* RA already popped */
-	unsigned long *sara = ((unsigned long *)&regs->esp) - 1;
+        head = kretprobe_inst_table_head(current);
 
 
-	while ((ri = get_rp_inst(sara))) {
-		regs->eip = (unsigned long)ri->ret_addr;
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more then one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+         *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+                if (ri->task != current)
+			/* another task is sharing our hash bucket */
+                        continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
 		recycle_rp_inst(ri);
 		recycle_rp_inst(ri);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
 	}
 	}
-	regs->eflags &= ~TF_MASK;
+
+	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+	regs->eip = orig_ret_address;
+
+	unlock_kprobes();
+	preempt_enable_no_resched();
+
+        /*
+         * By returning a non-zero value, we are telling
+         * kprobe_handler() that we have handled unlocking
+         * and re-enabling preemption.
+         */
+        return 1;
 }
 }
 
 
 /*
 /*
@@ -403,8 +401,7 @@ static inline int post_kprobe_handler(struct pt_regs *regs)
 		current_kprobe->post_handler(current_kprobe, regs, 0);
 		current_kprobe->post_handler(current_kprobe, regs, 0);
 	}
 	}
 
 
-	if (current_kprobe->post_handler != trampoline_post_handler)
-		resume_execution(current_kprobe, regs);
+	resume_execution(current_kprobe, regs);
 	regs->eflags |= kprobe_saved_eflags;
 	regs->eflags |= kprobe_saved_eflags;
 
 
 	/*Restore back the original saved kprobes variables and continue. */
 	/*Restore back the original saved kprobes variables and continue. */
@@ -534,3 +531,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	}
 	}
 	return 0;
 	return 0;
 }
 }
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init(void)
+{
+	return register_kprobe(&trampoline_p);
+}

+ 29 - 0
arch/i386/kernel/process.c

@@ -616,6 +616,33 @@ handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
 	tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
 	tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
 }
 }
 
 
+/*
+ * This function selects if the context switch from prev to next
+ * has to tweak the TSC disable bit in the cr4.
+ */
+static inline void disable_tsc(struct task_struct *prev_p,
+			       struct task_struct *next_p)
+{
+	struct thread_info *prev, *next;
+
+	/*
+	 * gcc should eliminate the ->thread_info dereference if
+	 * has_secure_computing returns 0 at compile time (SECCOMP=n).
+	 */
+	prev = prev_p->thread_info;
+	next = next_p->thread_info;
+
+	if (has_secure_computing(prev) || has_secure_computing(next)) {
+		/* slow path here */
+		if (has_secure_computing(prev) &&
+		    !has_secure_computing(next)) {
+			write_cr4(read_cr4() & ~X86_CR4_TSD);
+		} else if (!has_secure_computing(prev) &&
+			   has_secure_computing(next))
+			write_cr4(read_cr4() | X86_CR4_TSD);
+	}
+}
+
 /*
 /*
  *	switch_to(x,yn) should switch tasks from x to y.
  *	switch_to(x,yn) should switch tasks from x to y.
  *
  *
@@ -695,6 +722,8 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
 	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
 	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
 		handle_io_bitmap(next, tss);
 		handle_io_bitmap(next, tss);
 
 
+	disable_tsc(prev_p, next_p);
+
 	return prev_p;
 	return prev_p;
 }
 }
 
 

+ 2 - 0
arch/i386/kernel/syscall_table.S

@@ -289,3 +289,5 @@ ENTRY(sys_call_table)
 	.long sys_add_key
 	.long sys_add_key
 	.long sys_request_key
 	.long sys_request_key
 	.long sys_keyctl
 	.long sys_keyctl
+	.long sys_ioprio_set
+	.long sys_ioprio_get		/* 290 */

+ 2 - 2
arch/ia64/kernel/entry.S

@@ -1577,8 +1577,8 @@ sys_call_table:
 	data8 sys_add_key
 	data8 sys_add_key
 	data8 sys_request_key
 	data8 sys_request_key
 	data8 sys_keyctl
 	data8 sys_keyctl
-	data8 sys_ni_syscall
-	data8 sys_ni_syscall			// 1275
+	data8 sys_ioprio_set
+	data8 sys_ioprio_get			// 1275
 	data8 sys_set_zone_reclaim
 	data8 sys_set_zone_reclaim
 	data8 sys_ni_syscall
 	data8 sys_ni_syscall
 	data8 sys_ni_syscall
 	data8 sys_ni_syscall

+ 124 - 4
arch/ia64/kernel/kprobes.c

@@ -34,6 +34,7 @@
 
 
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/kdebug.h>
 #include <asm/kdebug.h>
+#include <asm/sections.h>
 
 
 extern void jprobe_inst_return(void);
 extern void jprobe_inst_return(void);
 
 
@@ -263,13 +264,33 @@ static inline void get_kprobe_inst(bundle_t *bundle, uint slot,
 	}
 	}
 }
 }
 
 
+/* Returns non-zero if the addr is in the Interrupt Vector Table */
+static inline int in_ivt_functions(unsigned long addr)
+{
+	return (addr >= (unsigned long)__start_ivt_text
+		&& addr < (unsigned long)__end_ivt_text);
+}
+
 static int valid_kprobe_addr(int template, int slot, unsigned long addr)
 static int valid_kprobe_addr(int template, int slot, unsigned long addr)
 {
 {
 	if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) {
 	if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) {
-		printk(KERN_WARNING "Attempting to insert unaligned kprobe at 0x%lx\n",
-				addr);
+		printk(KERN_WARNING "Attempting to insert unaligned kprobe "
+				"at 0x%lx\n", addr);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
+
+ 	if (in_ivt_functions(addr)) {
+ 		printk(KERN_WARNING "Kprobes can't be inserted inside "
+				"IVT functions at 0x%lx\n", addr);
+ 		return -EINVAL;
+ 	}
+
+	if (slot == 1 && bundle_encoding[template][1] != L) {
+		printk(KERN_WARNING "Inserting kprobes on slot #1 "
+		       "is not supported\n");
+		return -EINVAL;
+	}
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -290,6 +311,94 @@ static inline void set_current_kprobe(struct kprobe *p)
 	current_kprobe = p;
 	current_kprobe = p;
 }
 }
 
 
+static void kretprobe_trampoline(void)
+{
+}
+
+/*
+ * At this point the target function has been tricked into
+ * returning into our trampoline.  Lookup the associated instance
+ * and then:
+ *    - call the handler function
+ *    - cleanup by marking the instance as unused
+ *    - long jump back to the original return address
+ */
+int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri = NULL;
+	struct hlist_head *head;
+	struct hlist_node *node, *tmp;
+	unsigned long orig_ret_address = 0;
+	unsigned long trampoline_address =
+		((struct fnptr *)kretprobe_trampoline)->ip;
+
+        head = kretprobe_inst_table_head(current);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more then one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+	 *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+                if (ri->task != current)
+			/* another task is sharing our hash bucket */
+                        continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		recycle_rp_inst(ri);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+	regs->cr_iip = orig_ret_address;
+
+	unlock_kprobes();
+	preempt_enable_no_resched();
+
+        /*
+         * By returning a non-zero value, we are telling
+         * kprobe_handler() that we have handled unlocking
+         * and re-enabling preemption.
+         */
+        return 1;
+}
+
+void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri;
+
+	if ((ri = get_free_rp_inst(rp)) != NULL) {
+		ri->rp = rp;
+		ri->task = current;
+		ri->ret_addr = (kprobe_opcode_t *)regs->b0;
+
+		/* Replace the return addr with trampoline addr */
+		regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
+
+		add_rp_inst(ri);
+	} else {
+		rp->nmissed++;
+	}
+}
+
 int arch_prepare_kprobe(struct kprobe *p)
 int arch_prepare_kprobe(struct kprobe *p)
 {
 {
 	unsigned long addr = (unsigned long) p->addr;
 	unsigned long addr = (unsigned long) p->addr;
@@ -492,8 +601,8 @@ static int pre_kprobes_handler(struct die_args *args)
 	if (p->pre_handler && p->pre_handler(p, regs))
 	if (p->pre_handler && p->pre_handler(p, regs))
 		/*
 		/*
 		 * Our pre-handler is specifically requesting that we just
 		 * Our pre-handler is specifically requesting that we just
-		 * do a return.  This is handling the case where the
-		 * pre-handler is really our special jprobe pre-handler.
+		 * do a return.  This is used for both the jprobe pre-handler
+		 * and the kretprobe trampoline
 		 */
 		 */
 		return 1;
 		return 1;
 
 
@@ -599,3 +708,14 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	*regs = jprobe_saved_regs;
 	*regs = jprobe_saved_regs;
 	return 1;
 	return 1;
 }
 }
+
+static struct kprobe trampoline_p = {
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init(void)
+{
+	trampoline_p.addr =
+		(kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip;
+	return register_kprobe(&trampoline_p);
+}

+ 16 - 0
arch/ia64/kernel/process.c

@@ -27,6 +27,7 @@
 #include <linux/efi.h>
 #include <linux/efi.h>
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
+#include <linux/kprobes.h>
 
 
 #include <asm/cpu.h>
 #include <asm/cpu.h>
 #include <asm/delay.h>
 #include <asm/delay.h>
@@ -707,6 +708,13 @@ kernel_thread_helper (int (*fn)(void *), void *arg)
 void
 void
 flush_thread (void)
 flush_thread (void)
 {
 {
+	/*
+	 * Remove function-return probe instances associated with this task
+	 * and put them back on the free list. Do not insert an exit probe for
+	 * this function, it will be disabled by kprobe_flush_task if you do.
+	 */
+	kprobe_flush_task(current);
+
 	/* drop floating-point and debug-register state if it exists: */
 	/* drop floating-point and debug-register state if it exists: */
 	current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
 	current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
 	ia64_drop_fpu(current);
 	ia64_drop_fpu(current);
@@ -721,6 +729,14 @@ flush_thread (void)
 void
 void
 exit_thread (void)
 exit_thread (void)
 {
 {
+
+	/*
+	 * Remove function-return probe instances associated with this task
+	 * and put them back on the free list. Do not insert an exit probe for
+	 * this function, it will be disabled by kprobe_flush_task if you do.
+	 */
+	kprobe_flush_task(current);
+
 	ia64_drop_fpu(current);
 	ia64_drop_fpu(current);
 #ifdef CONFIG_PERFMON
 #ifdef CONFIG_PERFMON
        /* if needed, stop monitoring and flush state to perfmon context */
        /* if needed, stop monitoring and flush state to perfmon context */

+ 6 - 1
arch/ia64/kernel/vmlinux.lds.S

@@ -8,6 +8,11 @@
 #define LOAD_OFFSET	(KERNEL_START - KERNEL_TR_PAGE_SIZE)
 #define LOAD_OFFSET	(KERNEL_START - KERNEL_TR_PAGE_SIZE)
 #include <asm-generic/vmlinux.lds.h>
 #include <asm-generic/vmlinux.lds.h>
 
 
+#define IVT_TEXT							\
+		VMLINUX_SYMBOL(__start_ivt_text) = .;			\
+		*(.text.ivt)						\
+		VMLINUX_SYMBOL(__end_ivt_text) = .;
+
 OUTPUT_FORMAT("elf64-ia64-little")
 OUTPUT_FORMAT("elf64-ia64-little")
 OUTPUT_ARCH(ia64)
 OUTPUT_ARCH(ia64)
 ENTRY(phys_start)
 ENTRY(phys_start)
@@ -39,7 +44,7 @@ SECTIONS
 
 
   .text : AT(ADDR(.text) - LOAD_OFFSET)
   .text : AT(ADDR(.text) - LOAD_OFFSET)
     {
     {
-	*(.text.ivt)
+	IVT_TEXT
 	*(.text)
 	*(.text)
 	SCHED_TEXT
 	SCHED_TEXT
 	LOCK_TEXT
 	LOCK_TEXT

+ 1 - 1
arch/mips/kernel/signal.c

@@ -457,7 +457,7 @@ static int do_signal(sigset_t *oldset, struct pt_regs *regs)
 	if (!user_mode(regs))
 	if (!user_mode(regs))
 		return 1;
 		return 1;
 
 
-	if (try_to_freeze(0))
+	if (try_to_freeze())
 		goto no_signal;
 		goto no_signal;
 
 
 	if (!oldset)
 	if (!oldset)

+ 2 - 0
arch/ppc/kernel/misc.S

@@ -1449,3 +1449,5 @@ _GLOBAL(sys_call_table)
 	.long sys_request_key		/* 270 */
 	.long sys_request_key		/* 270 */
 	.long sys_keyctl
 	.long sys_keyctl
 	.long sys_waitid
 	.long sys_waitid
+	.long sys_ioprio_set
+	.long sys_ioprio_get

+ 12 - 2
arch/ppc/mm/init.c

@@ -606,9 +606,19 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 		struct page *page = pfn_to_page(pfn);
 		struct page *page = pfn_to_page(pfn);
 		if (!PageReserved(page)
 		if (!PageReserved(page)
 		    && !test_bit(PG_arch_1, &page->flags)) {
 		    && !test_bit(PG_arch_1, &page->flags)) {
-			if (vma->vm_mm == current->active_mm)
+			if (vma->vm_mm == current->active_mm) {
+#ifdef CONFIG_8xx
+			/* On 8xx, cache control instructions (particularly 
+		 	 * "dcbst" from flush_dcache_icache) fault as write 
+			 * operation if there is an unpopulated TLB entry 
+			 * for the address in question. To workaround that, 
+			 * we invalidate the TLB here, thus avoiding dcbst 
+			 * misbehaviour.
+			 */
+				_tlbie(address);
+#endif
 				__flush_dcache_icache((void *) address);
 				__flush_dcache_icache((void *) address);
-			else
+			} else
 				flush_dcache_icache_page(page);
 				flush_dcache_icache_page(page);
 			set_bit(PG_arch_1, &page->flags);
 			set_bit(PG_arch_1, &page->flags);
 		}
 		}

+ 2 - 2
arch/ppc/platforms/pmac_sleep.S

@@ -46,7 +46,7 @@
 	.section .text
 	.section .text
 	.align	5
 	.align	5
 
 
-#if defined(CONFIG_PMAC_PBOOK) || defined(CONFIG_CPU_FREQ_PMAC)
+#if defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ_PMAC)
 
 
 /* This gets called by via-pmu.c late during the sleep process.
 /* This gets called by via-pmu.c late during the sleep process.
  * The PMU was already send the sleep command and will shut us down
  * The PMU was already send the sleep command and will shut us down
@@ -382,7 +382,7 @@ turn_on_mmu:
 	isync
 	isync
 	rfi
 	rfi
 
 
-#endif /* defined(CONFIG_PMAC_PBOOK) || defined(CONFIG_CPU_FREQ) */
+#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */
 
 
 	.section .data
 	.section .data
 	.balign	L1_CACHE_LINE_SIZE
 	.balign	L1_CACHE_LINE_SIZE

+ 4 - 4
arch/ppc/platforms/pmac_time.c

@@ -206,7 +206,7 @@ via_calibrate_decr(void)
 	return 1;
 	return 1;
 }
 }
 
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 /*
 /*
  * Reset the time after a sleep.
  * Reset the time after a sleep.
  */
  */
@@ -238,7 +238,7 @@ time_sleep_notify(struct pmu_sleep_notifier *self, int when)
 static struct pmu_sleep_notifier time_sleep_notifier __pmacdata = {
 static struct pmu_sleep_notifier time_sleep_notifier __pmacdata = {
 	time_sleep_notify, SLEEP_LEVEL_MISC,
 	time_sleep_notify, SLEEP_LEVEL_MISC,
 };
 };
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 
 
 /*
 /*
  * Query the OF and get the decr frequency.
  * Query the OF and get the decr frequency.
@@ -251,9 +251,9 @@ pmac_calibrate_decr(void)
 	struct device_node *cpu;
 	struct device_node *cpu;
 	unsigned int freq, *fp;
 	unsigned int freq, *fp;
 
 
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PM
 	pmu_register_sleep_notifier(&time_sleep_notifier);
 	pmu_register_sleep_notifier(&time_sleep_notifier);
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PM */
 
 
 	/* We assume MacRISC2 machines have correct device-tree
 	/* We assume MacRISC2 machines have correct device-tree
 	 * calibration. That's better since the VIA itself seems
 	 * calibration. That's better since the VIA itself seems

+ 1 - 0
arch/ppc/platforms/sandpoint.c

@@ -324,6 +324,7 @@ sandpoint_setup_arch(void)
 			pdata[1].irq = 0;
 			pdata[1].irq = 0;
 			pdata[1].mapbase = 0;
 			pdata[1].mapbase = 0;
 		}
 		}
+	}
 
 
 	printk(KERN_INFO "Motorola SPS Sandpoint Test Platform\n");
 	printk(KERN_INFO "Motorola SPS Sandpoint Test Platform\n");
 	printk(KERN_INFO "Port by MontaVista Software, Inc. (source@mvista.com)\n");
 	printk(KERN_INFO "Port by MontaVista Software, Inc. (source@mvista.com)\n");

+ 19 - 7
arch/ppc/syslib/open_pic.c

@@ -370,8 +370,9 @@ void __init openpic_init(int offset)
 	/* Initialize IPI interrupts */
 	/* Initialize IPI interrupts */
 	if ( ppc_md.progress ) ppc_md.progress("openpic: ipi",0x3bb);
 	if ( ppc_md.progress ) ppc_md.progress("openpic: ipi",0x3bb);
 	for (i = 0; i < OPENPIC_NUM_IPI; i++) {
 	for (i = 0; i < OPENPIC_NUM_IPI; i++) {
-		/* Disabled, Priority 10..13 */
-		openpic_initipi(i, 10+i, OPENPIC_VEC_IPI+i+offset);
+		/* Disabled, increased priorities 10..13 */
+		openpic_initipi(i, OPENPIC_PRIORITY_IPI_BASE+i,
+				OPENPIC_VEC_IPI+i+offset);
 		/* IPIs are per-CPU */
 		/* IPIs are per-CPU */
 		irq_desc[OPENPIC_VEC_IPI+i+offset].status |= IRQ_PER_CPU;
 		irq_desc[OPENPIC_VEC_IPI+i+offset].status |= IRQ_PER_CPU;
 		irq_desc[OPENPIC_VEC_IPI+i+offset].handler = &open_pic_ipi;
 		irq_desc[OPENPIC_VEC_IPI+i+offset].handler = &open_pic_ipi;
@@ -399,8 +400,9 @@ void __init openpic_init(int offset)
 		if (sense & IRQ_SENSE_MASK)
 		if (sense & IRQ_SENSE_MASK)
 			irq_desc[i+offset].status = IRQ_LEVEL;
 			irq_desc[i+offset].status = IRQ_LEVEL;
 
 
-		/* Enabled, Priority 8 */
-		openpic_initirq(i, 8, i+offset, (sense & IRQ_POLARITY_MASK),
+		/* Enabled, Default priority */
+		openpic_initirq(i, OPENPIC_PRIORITY_DEFAULT, i+offset,
+				(sense & IRQ_POLARITY_MASK),
 				(sense & IRQ_SENSE_MASK));
 				(sense & IRQ_SENSE_MASK));
 		/* Processor 0 */
 		/* Processor 0 */
 		openpic_mapirq(i, CPU_MASK_CPU0, CPU_MASK_NONE);
 		openpic_mapirq(i, CPU_MASK_CPU0, CPU_MASK_NONE);
@@ -655,6 +657,18 @@ static void __init openpic_maptimer(u_int timer, cpumask_t cpumask)
 		      cpus_addr(phys)[0]);
 		      cpus_addr(phys)[0]);
 }
 }
 
 
+/*
+ * Change the priority of an interrupt
+ */
+void __init
+openpic_set_irq_priority(u_int irq, u_int pri)
+{
+	check_arg_irq(irq);
+	openpic_safe_writefield(&ISR[irq - open_pic_irq_offset]->Vector_Priority,
+				OPENPIC_PRIORITY_MASK,
+				pri << OPENPIC_PRIORITY_SHIFT);
+}
+
 /*
 /*
  * Initalize the interrupt source which will generate an NMI.
  * Initalize the interrupt source which will generate an NMI.
  * This raises the interrupt's priority from 8 to 9.
  * This raises the interrupt's priority from 8 to 9.
@@ -665,9 +679,7 @@ void __init
 openpic_init_nmi_irq(u_int irq)
 openpic_init_nmi_irq(u_int irq)
 {
 {
 	check_arg_irq(irq);
 	check_arg_irq(irq);
-	openpic_safe_writefield(&ISR[irq - open_pic_irq_offset]->Vector_Priority,
-				OPENPIC_PRIORITY_MASK,
-				9 << OPENPIC_PRIORITY_SHIFT);
+	openpic_set_irq_priority(irq, OPENPIC_PRIORITY_NMI);
 }
 }
 
 
 /*
 /*

+ 121 - 4
arch/ppc64/kernel/kprobes.c

@@ -36,6 +36,8 @@
 #include <asm/kdebug.h>
 #include <asm/kdebug.h>
 #include <asm/sstep.h>
 #include <asm/sstep.h>
 
 
+static DECLARE_MUTEX(kprobe_mutex);
+
 static struct kprobe *current_kprobe;
 static struct kprobe *current_kprobe;
 static unsigned long kprobe_status, kprobe_saved_msr;
 static unsigned long kprobe_status, kprobe_saved_msr;
 static struct kprobe *kprobe_prev;
 static struct kprobe *kprobe_prev;
@@ -54,6 +56,15 @@ int arch_prepare_kprobe(struct kprobe *p)
 		printk("Cannot register a kprobe on rfid or mtmsrd\n");
 		printk("Cannot register a kprobe on rfid or mtmsrd\n");
 		ret = -EINVAL;
 		ret = -EINVAL;
 	}
 	}
+
+	/* insn must be on a special executable page on ppc64 */
+	if (!ret) {
+		up(&kprobe_mutex);
+		p->ainsn.insn = get_insn_slot();
+		down(&kprobe_mutex);
+		if (!p->ainsn.insn)
+			ret = -ENOMEM;
+	}
 	return ret;
 	return ret;
 }
 }
 
 
@@ -79,16 +90,22 @@ void arch_disarm_kprobe(struct kprobe *p)
 
 
 void arch_remove_kprobe(struct kprobe *p)
 void arch_remove_kprobe(struct kprobe *p)
 {
 {
+	up(&kprobe_mutex);
+	free_insn_slot(p->ainsn.insn);
+	down(&kprobe_mutex);
 }
 }
 
 
 static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
 {
+	kprobe_opcode_t insn = *p->ainsn.insn;
+
 	regs->msr |= MSR_SE;
 	regs->msr |= MSR_SE;
-	/*single step inline if it a breakpoint instruction*/
-	if (p->opcode == BREAKPOINT_INSTRUCTION)
+
+	/* single step inline if it is a trap variant */
+	if (IS_TW(insn) || IS_TD(insn) || IS_TWI(insn) || IS_TDI(insn))
 		regs->nip = (unsigned long)p->addr;
 		regs->nip = (unsigned long)p->addr;
 	else
 	else
-		regs->nip = (unsigned long)&p->ainsn.insn;
+		regs->nip = (unsigned long)p->ainsn.insn;
 }
 }
 
 
 static inline void save_previous_kprobe(void)
 static inline void save_previous_kprobe(void)
@@ -105,6 +122,23 @@ static inline void restore_previous_kprobe(void)
 	kprobe_saved_msr = kprobe_saved_msr_prev;
 	kprobe_saved_msr = kprobe_saved_msr_prev;
 }
 }
 
 
+void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri;
+
+	if ((ri = get_free_rp_inst(rp)) != NULL) {
+		ri->rp = rp;
+		ri->task = current;
+		ri->ret_addr = (kprobe_opcode_t *)regs->link;
+
+		/* Replace the return addr with trampoline addr */
+		regs->link = (unsigned long)kretprobe_trampoline;
+		add_rp_inst(ri);
+	} else {
+		rp->nmissed++;
+	}
+}
+
 static inline int kprobe_handler(struct pt_regs *regs)
 static inline int kprobe_handler(struct pt_regs *regs)
 {
 {
 	struct kprobe *p;
 	struct kprobe *p;
@@ -194,6 +228,78 @@ no_kprobe:
 	return ret;
 	return ret;
 }
 }
 
 
+/*
+ * Function return probe trampoline:
+ * 	- init_kprobes() establishes a probepoint here
+ * 	- When the probed function returns, this probe
+ * 		causes the handlers to fire
+ */
+void kretprobe_trampoline_holder(void)
+{
+	asm volatile(".global kretprobe_trampoline\n"
+			"kretprobe_trampoline:\n"
+			"nop\n");
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+        struct kretprobe_instance *ri = NULL;
+        struct hlist_head *head;
+        struct hlist_node *node, *tmp;
+	unsigned long orig_ret_address = 0;
+	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+
+        head = kretprobe_inst_table_head(current);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more then one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+         *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+                if (ri->task != current)
+			/* another task is sharing our hash bucket */
+                        continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		recycle_rp_inst(ri);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+	regs->nip = orig_ret_address;
+
+	unlock_kprobes();
+
+        /*
+         * By returning a non-zero value, we are telling
+         * kprobe_handler() that we have handled unlocking
+         * and re-enabling preemption.
+         */
+        return 1;
+}
+
 /*
 /*
  * Called after single-stepping.  p->addr is the address of the
  * Called after single-stepping.  p->addr is the address of the
  * instruction whose first byte has been replaced by the "breakpoint"
  * instruction whose first byte has been replaced by the "breakpoint"
@@ -205,9 +311,10 @@ no_kprobe:
 static void resume_execution(struct kprobe *p, struct pt_regs *regs)
 static void resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
 {
 	int ret;
 	int ret;
+	unsigned int insn = *p->ainsn.insn;
 
 
 	regs->nip = (unsigned long)p->addr;
 	regs->nip = (unsigned long)p->addr;
-	ret = emulate_step(regs, p->ainsn.insn[0]);
+	ret = emulate_step(regs, insn);
 	if (ret == 0)
 	if (ret == 0)
 		regs->nip = (unsigned long)p->addr + 4;
 		regs->nip = (unsigned long)p->addr + 4;
 }
 }
@@ -331,3 +438,13 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs));
 	memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs));
 	return 1;
 	return 1;
 }
 }
+
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init(void)
+{
+	return register_kprobe(&trampoline_p);
+}

+ 1 - 0
arch/ppc64/kernel/ppc_ksyms.c

@@ -75,6 +75,7 @@ EXPORT_SYMBOL(giveup_fpu);
 EXPORT_SYMBOL(giveup_altivec);
 EXPORT_SYMBOL(giveup_altivec);
 #endif
 #endif
 EXPORT_SYMBOL(__flush_icache_range);
 EXPORT_SYMBOL(__flush_icache_range);
+EXPORT_SYMBOL(flush_dcache_range);
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PPC_ISERIES
 #ifdef CONFIG_PPC_ISERIES

+ 4 - 0
arch/ppc64/kernel/process.c

@@ -36,6 +36,7 @@
 #include <linux/kallsyms.h>
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
 #include <linux/utsname.h>
 #include <linux/utsname.h>
+#include <linux/kprobes.h>
 
 
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
@@ -307,6 +308,8 @@ void show_regs(struct pt_regs * regs)
 
 
 void exit_thread(void)
 void exit_thread(void)
 {
 {
+	kprobe_flush_task(current);
+
 #ifndef CONFIG_SMP
 #ifndef CONFIG_SMP
 	if (last_task_used_math == current)
 	if (last_task_used_math == current)
 		last_task_used_math = NULL;
 		last_task_used_math = NULL;
@@ -321,6 +324,7 @@ void flush_thread(void)
 {
 {
 	struct thread_info *t = current_thread_info();
 	struct thread_info *t = current_thread_info();
 
 
+	kprobe_flush_task(current);
 	if (t->flags & _TIF_ABI_PENDING)
 	if (t->flags & _TIF_ABI_PENDING)
 		t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
 		t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
 
 

+ 1 - 0
arch/ppc64/kernel/time.c

@@ -91,6 +91,7 @@ unsigned long tb_to_xs;
 unsigned      tb_to_us;
 unsigned      tb_to_us;
 unsigned long processor_freq;
 unsigned long processor_freq;
 DEFINE_SPINLOCK(rtc_lock);
 DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL_GPL(rtc_lock);
 
 
 unsigned long tb_to_ns_scale;
 unsigned long tb_to_ns_scale;
 unsigned long tb_to_ns_shift;
 unsigned long tb_to_ns_shift;

+ 1 - 1
arch/sparc64/kernel/auxio.c

@@ -16,7 +16,7 @@
 #include <asm/ebus.h>
 #include <asm/ebus.h>
 #include <asm/auxio.h>
 #include <asm/auxio.h>
 
 
-/* This cannot be static, as it is referenced in entry.S */
+/* This cannot be static, as it is referenced in irq.c */
 void __iomem *auxio_register = NULL;
 void __iomem *auxio_register = NULL;
 
 
 enum auxio_type {
 enum auxio_type {

+ 4 - 112
arch/sparc64/kernel/entry.S

@@ -271,8 +271,9 @@ cplus_fptrap_insn_1:
 	fmuld		%f0, %f2, %f26
 	fmuld		%f0, %f2, %f26
 	faddd		%f0, %f2, %f28
 	faddd		%f0, %f2, %f28
 	fmuld		%f0, %f2, %f30
 	fmuld		%f0, %f2, %f30
+	membar		#Sync
 	b,pt		%xcc, fpdis_exit
 	b,pt		%xcc, fpdis_exit
-	 membar		#Sync
+	 nop
 2:	andcc		%g5, FPRS_DU, %g0
 2:	andcc		%g5, FPRS_DU, %g0
 	bne,pt		%icc, 3f
 	bne,pt		%icc, 3f
 	 fzero		%f32
 	 fzero		%f32
@@ -301,8 +302,9 @@ cplus_fptrap_insn_2:
 	fmuld		%f32, %f34, %f58
 	fmuld		%f32, %f34, %f58
 	faddd		%f32, %f34, %f60
 	faddd		%f32, %f34, %f60
 	fmuld		%f32, %f34, %f62
 	fmuld		%f32, %f34, %f62
+	membar		#Sync
 	ba,pt		%xcc, fpdis_exit
 	ba,pt		%xcc, fpdis_exit
-	 membar		#Sync
+	 nop
 3:	mov		SECONDARY_CONTEXT, %g3
 3:	mov		SECONDARY_CONTEXT, %g3
 	add		%g6, TI_FPREGS, %g1
 	add		%g6, TI_FPREGS, %g1
 	ldxa		[%g3] ASI_DMMU, %g5
 	ldxa		[%g3] ASI_DMMU, %g5
@@ -699,116 +701,6 @@ utrap_ill:
 	ba,pt		%xcc, rtrap
 	ba,pt		%xcc, rtrap
 	 clr		%l6
 	 clr		%l6
 
 
-#ifdef CONFIG_BLK_DEV_FD
-	.globl		floppy_hardint
-floppy_hardint:
-	wr		%g0, (1 << 11), %clear_softint
-	sethi		%hi(doing_pdma), %g1
-	ld		[%g1 + %lo(doing_pdma)], %g2
-	brz,pn		%g2, floppy_dosoftint
-	 sethi		%hi(fdc_status), %g3
-	ldx		[%g3 + %lo(fdc_status)], %g3
-	sethi		%hi(pdma_vaddr), %g5
-	ldx		[%g5 + %lo(pdma_vaddr)], %g4
-	sethi		%hi(pdma_size), %g5
-	ldx		[%g5 + %lo(pdma_size)], %g5
-
-next_byte:
-	lduba		[%g3] ASI_PHYS_BYPASS_EC_E, %g7
-	andcc		%g7, 0x80, %g0
-	be,pn		%icc, floppy_fifo_emptied
-	 andcc		%g7, 0x20, %g0
-	be,pn		%icc, floppy_overrun
-	 andcc		%g7, 0x40, %g0
-	be,pn		%icc, floppy_write
-	 sub		%g5, 1, %g5
-
-	inc		%g3
-	lduba		[%g3] ASI_PHYS_BYPASS_EC_E, %g7
-	dec		%g3
-	orcc		%g0, %g5, %g0
-	stb		%g7, [%g4]
-	bne,pn		%xcc, next_byte
-	 add		%g4, 1, %g4
-
-	b,pt		%xcc, floppy_tdone
-	 nop
-
-floppy_write:
-	ldub		[%g4], %g7
-	orcc		%g0, %g5, %g0
-	inc		%g3
-	stba		%g7, [%g3] ASI_PHYS_BYPASS_EC_E
-	dec		%g3
-	bne,pn		%xcc, next_byte
-	 add		%g4, 1, %g4
-
-floppy_tdone:
-	sethi		%hi(pdma_vaddr), %g1
-	stx		%g4, [%g1 + %lo(pdma_vaddr)]
-	sethi		%hi(pdma_size), %g1
-	stx		%g5, [%g1 + %lo(pdma_size)]
-	sethi		%hi(auxio_register), %g1
-	ldx		[%g1 + %lo(auxio_register)], %g7
-	lduba		[%g7] ASI_PHYS_BYPASS_EC_E, %g5
-	or		%g5, AUXIO_AUX1_FTCNT, %g5
-/*	andn		%g5, AUXIO_AUX1_MASK, %g5 */
-	stba		%g5, [%g7] ASI_PHYS_BYPASS_EC_E
-	andn		%g5, AUXIO_AUX1_FTCNT, %g5
-/*	andn		%g5, AUXIO_AUX1_MASK, %g5 */
-
-	nop; nop;  nop; nop;  nop; nop;
-	nop; nop;  nop; nop;  nop; nop;
-
-	stba		%g5, [%g7] ASI_PHYS_BYPASS_EC_E
-	sethi		%hi(doing_pdma), %g1
-	b,pt		%xcc, floppy_dosoftint
-	 st		%g0, [%g1 + %lo(doing_pdma)]
-
-floppy_fifo_emptied:
-	sethi		%hi(pdma_vaddr), %g1
-	stx		%g4, [%g1 + %lo(pdma_vaddr)]
-	sethi		%hi(pdma_size), %g1
-	stx		%g5, [%g1 + %lo(pdma_size)]
-	sethi		%hi(irq_action), %g1
-	or		%g1, %lo(irq_action), %g1
-	ldx		[%g1 + (11 << 3)], %g3		! irqaction[floppy_irq]
-	ldx		[%g3 + 0x08], %g4		! action->flags>>48==ino
-	sethi		%hi(ivector_table), %g3
-	srlx		%g4, 48, %g4
-	or		%g3, %lo(ivector_table), %g3
-	sllx		%g4, 5, %g4
-	ldx		[%g3 + %g4], %g4		! &ivector_table[ino]
-	ldx		[%g4 + 0x10], %g4		! bucket->iclr
-	stwa		%g0, [%g4] ASI_PHYS_BYPASS_EC_E	! ICLR_IDLE
-	membar		#Sync				! probably not needed...
-	retry
-
-floppy_overrun:
-	sethi		%hi(pdma_vaddr), %g1
-	stx		%g4, [%g1 + %lo(pdma_vaddr)]
-	sethi		%hi(pdma_size), %g1
-	stx		%g5, [%g1 + %lo(pdma_size)]
-	sethi		%hi(doing_pdma), %g1
-	st		%g0, [%g1 + %lo(doing_pdma)]
-
-floppy_dosoftint:
-	rdpr		%pil, %g2
-	wrpr		%g0, 15, %pil
-	sethi		%hi(109f), %g7
-	b,pt		%xcc, etrap_irq
-109:	 or		%g7, %lo(109b), %g7
-
-	mov		11, %o0
-	mov		0, %o1
-	call		sparc_floppy_irq
-	 add		%sp, PTREGS_OFF, %o2
-
-	b,pt		%xcc, rtrap_irq
-	 nop
-
-#endif /* CONFIG_BLK_DEV_FD */
-
 	/* XXX Here is stuff we still need to write... -DaveM XXX */
 	/* XXX Here is stuff we still need to write... -DaveM XXX */
 	.globl		netbsd_syscall
 	.globl		netbsd_syscall
 netbsd_syscall:
 netbsd_syscall:

+ 50 - 121
arch/sparc64/kernel/irq.c

@@ -37,6 +37,7 @@
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/cache.h>
 #include <asm/cache.h>
 #include <asm/cpudata.h>
 #include <asm/cpudata.h>
+#include <asm/auxio.h>
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 static void distribute_irqs(void);
 static void distribute_irqs(void);
@@ -834,137 +835,65 @@ void handler_irq(int irq, struct pt_regs *regs)
 }
 }
 
 
 #ifdef CONFIG_BLK_DEV_FD
 #ifdef CONFIG_BLK_DEV_FD
-extern void floppy_interrupt(int irq, void *dev_cookie, struct pt_regs *regs);
+extern irqreturn_t floppy_interrupt(int, void *, struct pt_regs *);;
 
 
-void sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs)
-{
-	struct irqaction *action = *(irq + irq_action);
-	struct ino_bucket *bucket;
-	int cpu = smp_processor_id();
-
-	irq_enter();
-	kstat_this_cpu.irqs[irq]++;
-
-	*(irq_work(cpu, irq)) = 0;
-	bucket = get_ino_in_irqaction(action) + ivector_table;
-
-	bucket->flags |= IBF_INPROGRESS;
-
-	floppy_interrupt(irq, dev_cookie, regs);
-	upa_writel(ICLR_IDLE, bucket->iclr);
-
-	bucket->flags &= ~IBF_INPROGRESS;
-
-	irq_exit();
-}
-#endif
-
-/* The following assumes that the branch lies before the place we
- * are branching to.  This is the case for a trap vector...
- * You have been warned.
- */
-#define SPARC_BRANCH(dest_addr, inst_addr) \
-          (0x10800000 | ((((dest_addr)-(inst_addr))>>2)&0x3fffff))
-
-#define SPARC_NOP (0x01000000)
+/* XXX No easy way to include asm/floppy.h XXX */
+extern unsigned char *pdma_vaddr;
+extern unsigned long pdma_size;
+extern volatile int doing_pdma;
+extern unsigned long fdc_status;
 
 
-static void install_fast_irq(unsigned int cpu_irq,
-			     irqreturn_t (*handler)(int, void *, struct pt_regs *))
+irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs)
 {
 {
-	extern unsigned long sparc64_ttable_tl0;
-	unsigned long ttent = (unsigned long) &sparc64_ttable_tl0;
-	unsigned int *insns;
-
-	ttent += 0x820;
-	ttent += (cpu_irq - 1) << 5;
-	insns = (unsigned int *) ttent;
-	insns[0] = SPARC_BRANCH(((unsigned long) handler),
-				((unsigned long)&insns[0]));
-	insns[1] = SPARC_NOP;
-	__asm__ __volatile__("membar #StoreStore; flush %0" : : "r" (ttent));
-}
-
-int request_fast_irq(unsigned int irq,
-		     irqreturn_t (*handler)(int, void *, struct pt_regs *),
-		     unsigned long irqflags, const char *name, void *dev_id)
-{
-	struct irqaction *action;
-	struct ino_bucket *bucket = __bucket(irq);
-	unsigned long flags;
-
-	/* No pil0 dummy buckets allowed here. */
-	if (bucket < &ivector_table[0] ||
-	    bucket >= &ivector_table[NUM_IVECS]) {
-		unsigned int *caller;
-
-		__asm__ __volatile__("mov %%i7, %0" : "=r" (caller));
-		printk(KERN_CRIT "request_fast_irq: Old style IRQ registry attempt "
-		       "from %p, irq %08x.\n", caller, irq);
-		return -EINVAL;
-	}	
-	
-	if (!handler)
-		return -EINVAL;
+	if (likely(doing_pdma)) {
+		void __iomem *stat = (void __iomem *) fdc_status;
+		unsigned char *vaddr = pdma_vaddr;
+		unsigned long size = pdma_size;
+		u8 val;
+
+		while (size) {
+			val = readb(stat);
+			if (unlikely(!(val & 0x80))) {
+				pdma_vaddr = vaddr;
+				pdma_size = size;
+				return IRQ_HANDLED;
+			}
+			if (unlikely(!(val & 0x20))) {
+				pdma_vaddr = vaddr;
+				pdma_size = size;
+				doing_pdma = 0;
+				goto main_interrupt;
+			}
+			if (val & 0x40) {
+				/* read */
+				*vaddr++ = readb(stat + 1);
+			} else {
+				unsigned char data = *vaddr++;
 
 
-	if ((bucket->pil == 0) || (bucket->pil == 14)) {
-		printk("request_fast_irq: Trying to register shared IRQ 0 or 14.\n");
-		return -EBUSY;
-	}
+				/* write */
+				writeb(data, stat + 1);
+			}
+			size--;
+		}
 
 
-	spin_lock_irqsave(&irq_action_lock, flags);
+		pdma_vaddr = vaddr;
+		pdma_size = size;
 
 
-	action = *(bucket->pil + irq_action);
-	if (action) {
-		if (action->flags & SA_SHIRQ)
-			panic("Trying to register fast irq when already shared.\n");
-		if (irqflags & SA_SHIRQ)
-			panic("Trying to register fast irq as shared.\n");
-		printk("request_fast_irq: Trying to register yet already owned.\n");
-		spin_unlock_irqrestore(&irq_action_lock, flags);
-		return -EBUSY;
-	}
+		/* Send Terminal Count pulse to floppy controller. */
+		val = readb(auxio_register);
+		val |= AUXIO_AUX1_FTCNT;
+		writeb(val, auxio_register);
+		val &= AUXIO_AUX1_FTCNT;
+		writeb(val, auxio_register);
 
 
-	/*
-	 * We do not check for SA_SAMPLE_RANDOM in this path. Neither do we
-	 * support smp intr affinity in this path.
-	 */
-	if (irqflags & SA_STATIC_ALLOC) {
-		if (static_irq_count < MAX_STATIC_ALLOC)
-			action = &static_irqaction[static_irq_count++];
-		else
-			printk("Request for IRQ%d (%s) SA_STATIC_ALLOC failed "
-			       "using kmalloc\n", bucket->pil, name);
-	}
-	if (action == NULL)
-		action = (struct irqaction *)kmalloc(sizeof(struct irqaction),
-						     GFP_ATOMIC);
-	if (!action) {
-		spin_unlock_irqrestore(&irq_action_lock, flags);
-		return -ENOMEM;
+		doing_pdma = 0;
 	}
 	}
-	install_fast_irq(bucket->pil, handler);
 
 
-	bucket->irq_info = action;
-	bucket->flags |= IBF_ACTIVE;
-
-	action->handler = handler;
-	action->flags = irqflags;
-	action->dev_id = NULL;
-	action->name = name;
-	action->next = NULL;
-	put_ino_in_irqaction(action, irq);
-	put_smpaff_in_irqaction(action, CPU_MASK_NONE);
-
-	*(bucket->pil + irq_action) = action;
-	enable_irq(irq);
-
-	spin_unlock_irqrestore(&irq_action_lock, flags);
-
-#ifdef CONFIG_SMP
-	distribute_irqs();
-#endif
-	return 0;
+main_interrupt:
+	return floppy_interrupt(irq, dev_cookie, regs);
 }
 }
+EXPORT_SYMBOL(sparc_floppy_irq);
+#endif
 
 
 /* We really don't need these at all on the Sparc.  We only have
 /* We really don't need these at all on the Sparc.  We only have
  * stubs here because they are exported to modules.
  * stubs here because they are exported to modules.

+ 8 - 4
arch/sparc64/kernel/semaphore.c

@@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr)
 "	add	%1, %4, %1\n"
 "	add	%1, %4, %1\n"
 "	cas	[%3], %0, %1\n"
 "	cas	[%3], %0, %1\n"
 "	cmp	%0, %1\n"
 "	cmp	%0, %1\n"
+"	membar	#StoreLoad | #StoreStore\n"
 "	bne,pn	%%icc, 1b\n"
 "	bne,pn	%%icc, 1b\n"
-"	 membar #StoreLoad | #StoreStore\n"
+"	 nop\n"
 	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
 	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
 	: "r" (&sem->count), "r" (incr), "m" (sem->count)
 	: "r" (&sem->count), "r" (incr), "m" (sem->count)
 	: "cc");
 	: "cc");
@@ -71,8 +72,9 @@ void up(struct semaphore *sem)
 "	cmp	%%g1, %%g7\n"
 "	cmp	%%g1, %%g7\n"
 "	bne,pn	%%icc, 1b\n"
 "	bne,pn	%%icc, 1b\n"
 "	 addcc	%%g7, 1, %%g0\n"
 "	 addcc	%%g7, 1, %%g0\n"
+"	membar	#StoreLoad | #StoreStore\n"
 "	ble,pn	%%icc, 3f\n"
 "	ble,pn	%%icc, 3f\n"
-"	 membar	#StoreLoad | #StoreStore\n"
+"	 nop\n"
 "2:\n"
 "2:\n"
 "	.subsection 2\n"
 "	.subsection 2\n"
 "3:	mov	%0, %%g1\n"
 "3:	mov	%0, %%g1\n"
@@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem)
 "	cmp	%%g1, %%g7\n"
 "	cmp	%%g1, %%g7\n"
 "	bne,pn	%%icc, 1b\n"
 "	bne,pn	%%icc, 1b\n"
 "	 cmp	%%g7, 1\n"
 "	 cmp	%%g7, 1\n"
+"	membar	#StoreLoad | #StoreStore\n"
 "	bl,pn	%%icc, 3f\n"
 "	bl,pn	%%icc, 3f\n"
-"	 membar	#StoreLoad | #StoreStore\n"
+"	 nop\n"
 "2:\n"
 "2:\n"
 "	.subsection 2\n"
 "	.subsection 2\n"
 "3:	mov	%0, %%g1\n"
 "3:	mov	%0, %%g1\n"
@@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem)
 "	cmp	%%g1, %%g7\n"
 "	cmp	%%g1, %%g7\n"
 "	bne,pn	%%icc, 1b\n"
 "	bne,pn	%%icc, 1b\n"
 "	 cmp	%%g7, 1\n"
 "	 cmp	%%g7, 1\n"
+"	membar	#StoreLoad | #StoreStore\n"
 "	bl,pn	%%icc, 3f\n"
 "	bl,pn	%%icc, 3f\n"
-"	 membar	#StoreLoad | #StoreStore\n"
+"	 nop\n"
 "2:\n"
 "2:\n"
 "	.subsection 2\n"
 "	.subsection 2\n"
 "3:	mov	%2, %%g1\n"
 "3:	mov	%2, %%g1\n"

+ 0 - 1
arch/sparc64/kernel/sparc64_ksyms.c

@@ -227,7 +227,6 @@ EXPORT_SYMBOL(__flush_dcache_range);
 
 
 EXPORT_SYMBOL(mostek_lock);
 EXPORT_SYMBOL(mostek_lock);
 EXPORT_SYMBOL(mstk48t02_regs);
 EXPORT_SYMBOL(mstk48t02_regs);
-EXPORT_SYMBOL(request_fast_irq);
 #ifdef CONFIG_SUN_AUXIO
 #ifdef CONFIG_SUN_AUXIO
 EXPORT_SYMBOL(auxio_set_led);
 EXPORT_SYMBOL(auxio_set_led);
 EXPORT_SYMBOL(auxio_set_lte);
 EXPORT_SYMBOL(auxio_set_lte);

+ 2 - 1
arch/sparc64/kernel/trampoline.S

@@ -98,8 +98,9 @@ startup_continue:
 
 
 	sethi		%hi(prom_entry_lock), %g2
 	sethi		%hi(prom_entry_lock), %g2
 1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
 1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
+	membar		#StoreLoad | #StoreStore
 	brnz,pn		%g1, 1b
 	brnz,pn		%g1, 1b
-	 membar		#StoreLoad | #StoreStore
+	 nop
 
 
 	sethi		%hi(p1275buf), %g2
 	sethi		%hi(p1275buf), %g2
 	or		%g2, %lo(p1275buf), %g2
 	or		%g2, %lo(p1275buf), %g2

+ 53 - 50
arch/sparc64/lib/U1memcpy.S

@@ -87,14 +87,17 @@
 #define LOOP_CHUNK3(src, dest, len, branch_dest)		\
 #define LOOP_CHUNK3(src, dest, len, branch_dest)		\
 	MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
 	MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
 
 
+#define DO_SYNC			membar	#Sync;
 #define STORE_SYNC(dest, fsrc)				\
 #define STORE_SYNC(dest, fsrc)				\
 	EX_ST(STORE_BLK(%fsrc, %dest));			\
 	EX_ST(STORE_BLK(%fsrc, %dest));			\
-	add			%dest, 0x40, %dest;
+	add			%dest, 0x40, %dest;	\
+	DO_SYNC
 
 
 #define STORE_JUMP(dest, fsrc, target)			\
 #define STORE_JUMP(dest, fsrc, target)			\
 	EX_ST(STORE_BLK(%fsrc, %dest));			\
 	EX_ST(STORE_BLK(%fsrc, %dest));			\
 	add			%dest, 0x40, %dest;	\
 	add			%dest, 0x40, %dest;	\
-	ba,pt			%xcc, target;
+	ba,pt			%xcc, target;		\
+	 nop;
 
 
 #define FINISH_VISCHUNK(dest, f0, f1, left)	\
 #define FINISH_VISCHUNK(dest, f0, f1, left)	\
 	subcc			%left, 8, %left;\
 	subcc			%left, 8, %left;\
@@ -239,17 +242,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f0, %f2, %f48
 	 faligndata	%f0, %f2, %f48
 1:	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
 1:	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
 	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
-	STORE_JUMP(o0, f48, 40f) membar #Sync
+	STORE_JUMP(o0, f48, 40f)
 2:	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
 2:	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
 	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
-	STORE_JUMP(o0, f48, 48f) membar #Sync
+	STORE_JUMP(o0, f48, 48f)
 3:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
 3:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
 	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
-	STORE_JUMP(o0, f48, 56f) membar #Sync
+	STORE_JUMP(o0, f48, 56f)
 
 
 1:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
 1:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -260,17 +263,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f2, %f4, %f48
 	 faligndata	%f2, %f4, %f48
 1:	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
 1:	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
 	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
-	STORE_JUMP(o0, f48, 41f) membar #Sync
+	STORE_JUMP(o0, f48, 41f)
 2:	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
 2:	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
 	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
-	STORE_JUMP(o0, f48, 49f) membar #Sync
+	STORE_JUMP(o0, f48, 49f)
 3:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
 3:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
 	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
-	STORE_JUMP(o0, f48, 57f) membar #Sync
+	STORE_JUMP(o0, f48, 57f)
 
 
 1:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
 1:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -281,17 +284,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f4, %f6, %f48
 	 faligndata	%f4, %f6, %f48
 1:	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
 1:	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
 	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
-	STORE_JUMP(o0, f48, 42f) membar #Sync
+	STORE_JUMP(o0, f48, 42f)
 2:	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
 2:	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
 	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
-	STORE_JUMP(o0, f48, 50f) membar #Sync
+	STORE_JUMP(o0, f48, 50f)
 3:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
 3:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
 	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
-	STORE_JUMP(o0, f48, 58f) membar #Sync
+	STORE_JUMP(o0, f48, 58f)
 
 
 1:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
 1:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -302,17 +305,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f6, %f8, %f48
 	 faligndata	%f6, %f8, %f48
 1:	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
 1:	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
 	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
-	STORE_JUMP(o0, f48, 43f) membar #Sync
+	STORE_JUMP(o0, f48, 43f)
 2:	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
 2:	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
 	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
-	STORE_JUMP(o0, f48, 51f) membar #Sync
+	STORE_JUMP(o0, f48, 51f)
 3:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
 3:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
 	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
-	STORE_JUMP(o0, f48, 59f) membar #Sync
+	STORE_JUMP(o0, f48, 59f)
 
 
 1:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
 1:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -323,17 +326,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f8, %f10, %f48
 	 faligndata	%f8, %f10, %f48
 1:	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
 1:	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
 	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
-	STORE_JUMP(o0, f48, 44f) membar #Sync
+	STORE_JUMP(o0, f48, 44f)
 2:	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
 2:	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
 	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
-	STORE_JUMP(o0, f48, 52f) membar #Sync
+	STORE_JUMP(o0, f48, 52f)
 3:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
 3:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
 	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
-	STORE_JUMP(o0, f48, 60f) membar #Sync
+	STORE_JUMP(o0, f48, 60f)
 
 
 1:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
 1:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -344,17 +347,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f10, %f12, %f48
 	 faligndata	%f10, %f12, %f48
 1:	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
 1:	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
 	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
-	STORE_JUMP(o0, f48, 45f) membar #Sync
+	STORE_JUMP(o0, f48, 45f)
 2:	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
 2:	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
 	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
-	STORE_JUMP(o0, f48, 53f) membar #Sync
+	STORE_JUMP(o0, f48, 53f)
 3:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
 3:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
 	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
-	STORE_JUMP(o0, f48, 61f) membar #Sync
+	STORE_JUMP(o0, f48, 61f)
 
 
 1:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
 1:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -365,17 +368,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f12, %f14, %f48
 	 faligndata	%f12, %f14, %f48
 1:	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
 1:	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
 	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
-	STORE_JUMP(o0, f48, 46f) membar #Sync
+	STORE_JUMP(o0, f48, 46f)
 2:	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
 2:	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
 	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
-	STORE_JUMP(o0, f48, 54f) membar #Sync
+	STORE_JUMP(o0, f48, 54f)
 3:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
 3:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
 	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
-	STORE_JUMP(o0, f48, 62f) membar #Sync
+	STORE_JUMP(o0, f48, 62f)
 
 
 1:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
 1:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
 	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -386,17 +389,17 @@ FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
 	ba,pt		%xcc, 1b+4
 	ba,pt		%xcc, 1b+4
 	 faligndata	%f14, %f16, %f48
 	 faligndata	%f14, %f16, %f48
 1:	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
 1:	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
 	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
-	STORE_JUMP(o0, f48, 47f) membar #Sync
+	STORE_JUMP(o0, f48, 47f)
 2:	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
 2:	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
 	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
-	STORE_JUMP(o0, f48, 55f) membar #Sync
+	STORE_JUMP(o0, f48, 55f)
 3:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
 3:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
-	STORE_SYNC(o0, f48) membar #Sync
+	STORE_SYNC(o0, f48)
 	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
 	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
-	STORE_JUMP(o0, f48, 63f) membar #Sync
+	STORE_JUMP(o0, f48, 63f)
 
 
 40:	FINISH_VISCHUNK(o0, f0,  f2,  g3)
 40:	FINISH_VISCHUNK(o0, f0,  f2,  g3)
 41:	FINISH_VISCHUNK(o0, f2,  f4,  g3)
 41:	FINISH_VISCHUNK(o0, f2,  f4,  g3)

+ 13 - 2
arch/sparc64/lib/VISsave.S

@@ -72,7 +72,11 @@ vis1:	ldub		[%g6 + TI_FPSAVED], %g3
 
 
 	stda		%f48, [%g3 + %g1] ASI_BLK_P
 	stda		%f48, [%g3 + %g1] ASI_BLK_P
 5:	membar		#Sync
 5:	membar		#Sync
-	jmpl		%g7 + %g0, %g0
+	ba,pt		%xcc, 80f
+	 nop
+
+	.align		32
+80:	jmpl		%g7 + %g0, %g0
 	 nop
 	 nop
 
 
 6:	ldub		[%g3 + TI_FPSAVED], %o5
 6:	ldub		[%g3 + TI_FPSAVED], %o5
@@ -87,8 +91,11 @@ vis1:	ldub		[%g6 + TI_FPSAVED], %g3
 	stda		%f32, [%g2 + %g1] ASI_BLK_P
 	stda		%f32, [%g2 + %g1] ASI_BLK_P
 	stda		%f48, [%g3 + %g1] ASI_BLK_P
 	stda		%f48, [%g3 + %g1] ASI_BLK_P
 	membar		#Sync
 	membar		#Sync
-	jmpl		%g7 + %g0, %g0
+	ba,pt		%xcc, 80f
+	 nop
 
 
+	.align		32
+80:	jmpl		%g7 + %g0, %g0
 	 nop
 	 nop
 
 
 	.align		32
 	.align		32
@@ -126,6 +133,10 @@ VISenterhalf:
 	stda		%f0, [%g2 + %g1] ASI_BLK_P
 	stda		%f0, [%g2 + %g1] ASI_BLK_P
 	stda		%f16, [%g3 + %g1] ASI_BLK_P
 	stda		%f16, [%g3 + %g1] ASI_BLK_P
 	membar		#Sync
 	membar		#Sync
+	ba,pt		%xcc, 4f
+	 nop
+
+	.align		32
 4:	and		%o5, FPRS_DU, %o5
 4:	and		%o5, FPRS_DU, %o5
 	jmpl		%g7 + %g0, %g0
 	jmpl		%g7 + %g0, %g0
 	 wr		%o5, FPRS_FEF, %fprs
 	 wr		%o5, FPRS_FEF, %fprs

+ 26 - 16
arch/sparc64/lib/atomic.S

@@ -7,18 +7,6 @@
 #include <linux/config.h>
 #include <linux/config.h>
 #include <asm/asi.h>
 #include <asm/asi.h>
 
 
-	/* On SMP we need to use memory barriers to ensure
-	 * correct memory operation ordering, nop these out
-	 * for uniprocessor.
-	 */
-#ifdef CONFIG_SMP
-#define ATOMIC_PRE_BARRIER	membar #StoreLoad | #LoadLoad
-#define ATOMIC_POST_BARRIER	membar #StoreLoad | #StoreStore
-#else
-#define ATOMIC_PRE_BARRIER	nop
-#define ATOMIC_POST_BARRIER	nop
-#endif
-
 	.text
 	.text
 
 
 	/* Two versions of the atomic routines, one that
 	/* Two versions of the atomic routines, one that
@@ -52,6 +40,24 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */
 	 nop
 	 nop
 	.size	atomic_sub, .-atomic_sub
 	.size	atomic_sub, .-atomic_sub
 
 
+	/* On SMP we need to use memory barriers to ensure
+	 * correct memory operation ordering, nop these out
+	 * for uniprocessor.
+	 */
+#ifdef CONFIG_SMP
+
+#define ATOMIC_PRE_BARRIER	membar #StoreLoad | #LoadLoad;
+#define ATOMIC_POST_BARRIER	\
+	ba,pt %xcc, 80b;	\
+	membar #StoreLoad | #StoreStore
+
+80:	retl
+	 nop
+#else
+#define ATOMIC_PRE_BARRIER
+#define ATOMIC_POST_BARRIER
+#endif
+
 	.globl	atomic_add_ret
 	.globl	atomic_add_ret
 	.type	atomic_add_ret,#function
 	.type	atomic_add_ret,#function
 atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
@@ -62,9 +68,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	cmp	%g1, %g7
 	cmp	%g1, %g7
 	bne,pn	%icc, 1b
 	bne,pn	%icc, 1b
 	 add	%g7, %o0, %g7
 	 add	%g7, %o0, %g7
+	sra	%g7, 0, %o0
 	ATOMIC_POST_BARRIER
 	ATOMIC_POST_BARRIER
 	retl
 	retl
-	 sra	%g7, 0, %o0
+	 nop
 	.size	atomic_add_ret, .-atomic_add_ret
 	.size	atomic_add_ret, .-atomic_add_ret
 
 
 	.globl	atomic_sub_ret
 	.globl	atomic_sub_ret
@@ -77,9 +84,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	cmp	%g1, %g7
 	cmp	%g1, %g7
 	bne,pn	%icc, 1b
 	bne,pn	%icc, 1b
 	 sub	%g7, %o0, %g7
 	 sub	%g7, %o0, %g7
+	sra	%g7, 0, %o0
 	ATOMIC_POST_BARRIER
 	ATOMIC_POST_BARRIER
 	retl
 	retl
-	 sra	%g7, 0, %o0
+	 nop
 	.size	atomic_sub_ret, .-atomic_sub_ret
 	.size	atomic_sub_ret, .-atomic_sub_ret
 
 
 	.globl	atomic64_add
 	.globl	atomic64_add
@@ -118,9 +126,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	cmp	%g1, %g7
 	cmp	%g1, %g7
 	bne,pn	%xcc, 1b
 	bne,pn	%xcc, 1b
 	 add	%g7, %o0, %g7
 	 add	%g7, %o0, %g7
+	mov	%g7, %o0
 	ATOMIC_POST_BARRIER
 	ATOMIC_POST_BARRIER
 	retl
 	retl
-	 mov	%g7, %o0
+	 nop
 	.size	atomic64_add_ret, .-atomic64_add_ret
 	.size	atomic64_add_ret, .-atomic64_add_ret
 
 
 	.globl	atomic64_sub_ret
 	.globl	atomic64_sub_ret
@@ -133,7 +142,8 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	cmp	%g1, %g7
 	cmp	%g1, %g7
 	bne,pn	%xcc, 1b
 	bne,pn	%xcc, 1b
 	 sub	%g7, %o0, %g7
 	 sub	%g7, %o0, %g7
+	mov	%g7, %o0
 	ATOMIC_POST_BARRIER
 	ATOMIC_POST_BARRIER
 	retl
 	retl
-	 mov	%g7, %o0
+	 nop
 	.size	atomic64_sub_ret, .-atomic64_sub_ret
 	.size	atomic64_sub_ret, .-atomic64_sub_ret

+ 20 - 11
arch/sparc64/lib/bitops.S

@@ -7,20 +7,26 @@
 #include <linux/config.h>
 #include <linux/config.h>
 #include <asm/asi.h>
 #include <asm/asi.h>
 
 
+	.text
+
 	/* On SMP we need to use memory barriers to ensure
 	/* On SMP we need to use memory barriers to ensure
 	 * correct memory operation ordering, nop these out
 	 * correct memory operation ordering, nop these out
 	 * for uniprocessor.
 	 * for uniprocessor.
 	 */
 	 */
+
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 #define BITOP_PRE_BARRIER	membar #StoreLoad | #LoadLoad
 #define BITOP_PRE_BARRIER	membar #StoreLoad | #LoadLoad
-#define BITOP_POST_BARRIER	membar #StoreLoad | #StoreStore
+#define BITOP_POST_BARRIER	\
+	ba,pt	%xcc, 80b;	\
+	membar #StoreLoad | #StoreStore
+
+80:	retl
+	 nop
 #else
 #else
-#define BITOP_PRE_BARRIER	nop
-#define BITOP_POST_BARRIER	nop
+#define BITOP_PRE_BARRIER
+#define BITOP_POST_BARRIER
 #endif
 #endif
 
 
-	.text
-
 	.globl	test_and_set_bit
 	.globl	test_and_set_bit
 	.type	test_and_set_bit,#function
 	.type	test_and_set_bit,#function
 test_and_set_bit:	/* %o0=nr, %o1=addr */
 test_and_set_bit:	/* %o0=nr, %o1=addr */
@@ -37,10 +43,11 @@ test_and_set_bit:	/* %o0=nr, %o1=addr */
 	cmp	%g7, %g1
 	cmp	%g7, %g1
 	bne,pn	%xcc, 1b
 	bne,pn	%xcc, 1b
 	 and	%g7, %o2, %g2
 	 and	%g7, %o2, %g2
-	BITOP_POST_BARRIER
 	clr	%o0
 	clr	%o0
+	movrne	%g2, 1, %o0
+	BITOP_POST_BARRIER
 	retl
 	retl
-	 movrne	%g2, 1, %o0
+	 nop
 	.size	test_and_set_bit, .-test_and_set_bit
 	.size	test_and_set_bit, .-test_and_set_bit
 
 
 	.globl	test_and_clear_bit
 	.globl	test_and_clear_bit
@@ -59,10 +66,11 @@ test_and_clear_bit:	/* %o0=nr, %o1=addr */
 	cmp	%g7, %g1
 	cmp	%g7, %g1
 	bne,pn	%xcc, 1b
 	bne,pn	%xcc, 1b
 	 and	%g7, %o2, %g2
 	 and	%g7, %o2, %g2
-	BITOP_POST_BARRIER
 	clr	%o0
 	clr	%o0
+	movrne	%g2, 1, %o0
+	BITOP_POST_BARRIER
 	retl
 	retl
-	 movrne	%g2, 1, %o0
+	 nop
 	.size	test_and_clear_bit, .-test_and_clear_bit
 	.size	test_and_clear_bit, .-test_and_clear_bit
 
 
 	.globl	test_and_change_bit
 	.globl	test_and_change_bit
@@ -81,10 +89,11 @@ test_and_change_bit:	/* %o0=nr, %o1=addr */
 	cmp	%g7, %g1
 	cmp	%g7, %g1
 	bne,pn	%xcc, 1b
 	bne,pn	%xcc, 1b
 	 and	%g7, %o2, %g2
 	 and	%g7, %o2, %g2
-	BITOP_POST_BARRIER
 	clr	%o0
 	clr	%o0
+	movrne	%g2, 1, %o0
+	BITOP_POST_BARRIER
 	retl
 	retl
-	 movrne	%g2, 1, %o0
+	 nop
 	.size	test_and_change_bit, .-test_and_change_bit
 	.size	test_and_change_bit, .-test_and_change_bit
 
 
 	.globl	set_bit
 	.globl	set_bit

+ 4 - 2
arch/sparc64/lib/debuglocks.c

@@ -252,8 +252,9 @@ wlock_again:
 "		andn	%%g1, %%g3, %%g7\n"
 "		andn	%%g1, %%g3, %%g7\n"
 "		casx	[%0], %%g1, %%g7\n"
 "		casx	[%0], %%g1, %%g7\n"
 "		cmp	%%g1, %%g7\n"
 "		cmp	%%g1, %%g7\n"
+"		membar	#StoreLoad | #StoreStore\n"
 "		bne,pn	%%xcc, 1b\n"
 "		bne,pn	%%xcc, 1b\n"
-"		 membar	#StoreLoad | #StoreStore"
+"		 nop"
 		: /* no outputs */
 		: /* no outputs */
 		: "r" (&(rw->lock))
 		: "r" (&(rw->lock))
 		: "g3", "g1", "g7", "cc", "memory");
 		: "g3", "g1", "g7", "cc", "memory");
@@ -351,8 +352,9 @@ int _do_write_trylock (rwlock_t *rw, char *str)
 "		andn	%%g1, %%g3, %%g7\n"
 "		andn	%%g1, %%g3, %%g7\n"
 "		casx	[%0], %%g1, %%g7\n"
 "		casx	[%0], %%g1, %%g7\n"
 "		cmp	%%g1, %%g7\n"
 "		cmp	%%g1, %%g7\n"
+"		membar	#StoreLoad | #StoreStore\n"
 "		bne,pn	%%xcc, 1b\n"
 "		bne,pn	%%xcc, 1b\n"
-"		 membar	#StoreLoad | #StoreStore"
+"		 nop"
 		: /* no outputs */
 		: /* no outputs */
 		: "r" (&(rw->lock))
 		: "r" (&(rw->lock))
 		: "g3", "g1", "g7", "cc", "memory");
 		: "g3", "g1", "g7", "cc", "memory");

+ 4 - 2
arch/sparc64/lib/dec_and_lock.S

@@ -48,8 +48,9 @@ start_to_zero:
 #endif
 #endif
 to_zero:
 to_zero:
 	ldstub	[%o1], %g3
 	ldstub	[%o1], %g3
+	membar	#StoreLoad | #StoreStore
 	brnz,pn	%g3, spin_on_lock
 	brnz,pn	%g3, spin_on_lock
-	 membar	#StoreLoad | #StoreStore
+	 nop
 loop2:	cas	[%o0], %g2, %g7		/* ASSERT(g7 == 0) */
 loop2:	cas	[%o0], %g2, %g7		/* ASSERT(g7 == 0) */
 	cmp	%g2, %g7
 	cmp	%g2, %g7
 
 
@@ -71,8 +72,9 @@ loop2:	cas	[%o0], %g2, %g7		/* ASSERT(g7 == 0) */
 	 nop
 	 nop
 spin_on_lock:
 spin_on_lock:
 	ldub	[%o1], %g3
 	ldub	[%o1], %g3
+	membar	#LoadLoad
 	brnz,pt	%g3, spin_on_lock
 	brnz,pt	%g3, spin_on_lock
-	 membar	#LoadLoad
+	 nop
 	ba,pt	%xcc, to_zero
 	ba,pt	%xcc, to_zero
 	 nop
 	 nop
 	nop
 	nop

+ 10 - 5
arch/sparc64/lib/rwsem.S

@@ -17,8 +17,9 @@ __down_read:
 	bne,pn		%icc, 1b
 	bne,pn		%icc, 1b
 	 add		%g7, 1, %g7
 	 add		%g7, 1, %g7
 	cmp		%g7, 0
 	cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
 	bl,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:
 2:
 	retl
 	retl
 	 nop
 	 nop
@@ -57,8 +58,9 @@ __down_write:
 	cmp		%g3, %g7
 	cmp		%g3, %g7
 	bne,pn		%icc, 1b
 	bne,pn		%icc, 1b
 	 cmp		%g7, 0
 	 cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bne,pn		%icc, 3f
 	bne,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:	retl
 2:	retl
 	 nop
 	 nop
 3:
 3:
@@ -97,8 +99,9 @@ __up_read:
 	cmp		%g1, %g7
 	cmp		%g1, %g7
 	bne,pn		%icc, 1b
 	bne,pn		%icc, 1b
 	 cmp		%g7, 0
 	 cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
 	bl,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:	retl
 2:	retl
 	 nop
 	 nop
 3:	sethi		%hi(RWSEM_ACTIVE_MASK), %g1
 3:	sethi		%hi(RWSEM_ACTIVE_MASK), %g1
@@ -126,8 +129,9 @@ __up_write:
 	bne,pn		%icc, 1b
 	bne,pn		%icc, 1b
 	 sub		%g7, %g1, %g7
 	 sub		%g7, %g1, %g7
 	cmp		%g7, 0
 	cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
 	bl,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:
 2:
 	retl
 	retl
 	 nop
 	 nop
@@ -151,8 +155,9 @@ __downgrade_write:
 	bne,pn		%icc, 1b
 	bne,pn		%icc, 1b
 	 sub		%g7, %g1, %g7
 	 sub		%g7, %g1, %g7
 	cmp		%g7, 0
 	cmp		%g7, 0
+	membar		#StoreLoad | #StoreStore
 	bl,pn		%icc, 3f
 	bl,pn		%icc, 3f
-	 membar		#StoreLoad | #StoreStore
+	 nop
 2:
 2:
 	retl
 	retl
 	 nop
 	 nop

+ 4 - 2
arch/sparc64/mm/init.c

@@ -136,8 +136,9 @@ static __inline__ void set_dcache_dirty(struct page *page, int this_cpu)
 			     "or	%%g1, %0, %%g1\n\t"
 			     "or	%%g1, %0, %%g1\n\t"
 			     "casx	[%2], %%g7, %%g1\n\t"
 			     "casx	[%2], %%g7, %%g1\n\t"
 			     "cmp	%%g7, %%g1\n\t"
 			     "cmp	%%g7, %%g1\n\t"
+			     "membar	#StoreLoad | #StoreStore\n\t"
 			     "bne,pn	%%xcc, 1b\n\t"
 			     "bne,pn	%%xcc, 1b\n\t"
-			     " membar	#StoreLoad | #StoreStore"
+			     " nop"
 			     : /* no outputs */
 			     : /* no outputs */
 			     : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
 			     : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
 			     : "g1", "g7");
 			     : "g1", "g7");
@@ -157,8 +158,9 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
 			     " andn	%%g7, %1, %%g1\n\t"
 			     " andn	%%g7, %1, %%g1\n\t"
 			     "casx	[%2], %%g7, %%g1\n\t"
 			     "casx	[%2], %%g7, %%g1\n\t"
 			     "cmp	%%g7, %%g1\n\t"
 			     "cmp	%%g7, %%g1\n\t"
+			     "membar	#StoreLoad | #StoreStore\n\t"
 			     "bne,pn	%%xcc, 1b\n\t"
 			     "bne,pn	%%xcc, 1b\n\t"
-			     " membar	#StoreLoad | #StoreStore\n"
+			     " nop\n"
 			     "2:"
 			     "2:"
 			     : /* no outputs */
 			     : /* no outputs */
 			     : "r" (cpu), "r" (mask), "r" (&page->flags),
 			     : "r" (cpu), "r" (mask), "r" (&page->flags),

+ 2 - 1
arch/sparc64/mm/ultra.S

@@ -266,8 +266,9 @@ __cheetah_flush_tlb_pending:	/* 22 insns */
 	 andn		%o3, 1, %o3
 	 andn		%o3, 1, %o3
 	stxa		%g0, [%o3] ASI_IMMU_DEMAP
 	stxa		%g0, [%o3] ASI_IMMU_DEMAP
 2:	stxa		%g0, [%o3] ASI_DMMU_DEMAP	
 2:	stxa		%g0, [%o3] ASI_DMMU_DEMAP	
+	membar		#Sync
 	brnz,pt		%o1, 1b
 	brnz,pt		%o1, 1b
-	 membar		#Sync
+	 nop
 	stxa		%g2, [%o4] ASI_DMMU
 	stxa		%g2, [%o4] ASI_DMMU
 	flush		%g6
 	flush		%g6
 	wrpr		%g0, 0, %tl
 	wrpr		%g0, 0, %tl

+ 66 - 170
arch/x86_64/kernel/kprobes.c

@@ -38,7 +38,7 @@
 #include <linux/string.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/preempt.h>
 #include <linux/preempt.h>
-#include <linux/moduleloader.h>
+
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/kdebug.h>
 #include <asm/kdebug.h>
@@ -51,8 +51,6 @@ static struct kprobe *kprobe_prev;
 static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev;
 static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev;
 static struct pt_regs jprobe_saved_regs;
 static struct pt_regs jprobe_saved_regs;
 static long *jprobe_saved_rsp;
 static long *jprobe_saved_rsp;
-static kprobe_opcode_t *get_insn_slot(void);
-static void free_insn_slot(kprobe_opcode_t *slot);
 void jprobe_return_end(void);
 void jprobe_return_end(void);
 
 
 /* copy of the kernel stack at the probe fire time */
 /* copy of the kernel stack at the probe fire time */
@@ -274,48 +272,23 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 		regs->rip = (unsigned long)p->ainsn.insn;
 		regs->rip = (unsigned long)p->ainsn.insn;
 }
 }
 
 
-struct task_struct  *arch_get_kprobe_task(void *ptr)
-{
-	return ((struct thread_info *) (((unsigned long) ptr) &
-					(~(THREAD_SIZE -1))))->task;
-}
-
 void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
 void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
 {
 {
 	unsigned long *sara = (unsigned long *)regs->rsp;
 	unsigned long *sara = (unsigned long *)regs->rsp;
-	struct kretprobe_instance *ri;
-	static void *orig_ret_addr;
+        struct kretprobe_instance *ri;
+
+        if ((ri = get_free_rp_inst(rp)) != NULL) {
+                ri->rp = rp;
+                ri->task = current;
+		ri->ret_addr = (kprobe_opcode_t *) *sara;
 
 
-	/*
-	 * Save the return address when the return probe hits
-	 * the first time, and use it to populate the (krprobe
-	 * instance)->ret_addr for subsequent return probes at
-	 * the same addrress since stack address would have
-	 * the kretprobe_trampoline by then.
-	 */
-	if (((void*) *sara) != kretprobe_trampoline)
-		orig_ret_addr = (void*) *sara;
-
-	if ((ri = get_free_rp_inst(rp)) != NULL) {
-		ri->rp = rp;
-		ri->stack_addr = sara;
-		ri->ret_addr = orig_ret_addr;
-		add_rp_inst(ri);
 		/* Replace the return addr with trampoline addr */
 		/* Replace the return addr with trampoline addr */
 		*sara = (unsigned long) &kretprobe_trampoline;
 		*sara = (unsigned long) &kretprobe_trampoline;
-	} else {
-		rp->nmissed++;
-	}
-}
 
 
-void arch_kprobe_flush_task(struct task_struct *tk)
-{
-	struct kretprobe_instance *ri;
-	while ((ri = get_rp_inst_tsk(tk)) != NULL) {
-		*((unsigned long *)(ri->stack_addr)) =
-					(unsigned long) ri->ret_addr;
-		recycle_rp_inst(ri);
-	}
+                add_rp_inst(ri);
+        } else {
+                rp->nmissed++;
+        }
 }
 }
 
 
 /*
 /*
@@ -428,36 +401,59 @@ no_kprobe:
  */
  */
 int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
 {
-	struct task_struct *tsk;
-	struct kretprobe_instance *ri;
-	struct hlist_head *head;
-	struct hlist_node *node;
-	unsigned long *sara = (unsigned long *)regs->rsp - 1;
-
-	tsk = arch_get_kprobe_task(sara);
-	head = kretprobe_inst_table_head(tsk);
-
-	hlist_for_each_entry(ri, node, head, hlist) {
-		if (ri->stack_addr == sara && ri->rp) {
-			if (ri->rp->handler)
-				ri->rp->handler(ri, regs);
-		}
-	}
-	return 0;
-}
+        struct kretprobe_instance *ri = NULL;
+        struct hlist_head *head;
+        struct hlist_node *node, *tmp;
+	unsigned long orig_ret_address = 0;
+	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
 
-void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs,
-						unsigned long flags)
-{
-	struct kretprobe_instance *ri;
-	/* RA already popped */
-	unsigned long *sara = ((unsigned long *)regs->rsp) - 1;
+        head = kretprobe_inst_table_head(current);
 
 
-	while ((ri = get_rp_inst(sara))) {
-		regs->rip = (unsigned long)ri->ret_addr;
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more then one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+         *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+                if (ri->task != current)
+			/* another task is sharing our hash bucket */
+                        continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
 		recycle_rp_inst(ri);
 		recycle_rp_inst(ri);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
 	}
 	}
-	regs->eflags &= ~TF_MASK;
+
+	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+	regs->rip = orig_ret_address;
+
+	unlock_kprobes();
+	preempt_enable_no_resched();
+
+        /*
+         * By returning a non-zero value, we are telling
+         * kprobe_handler() that we have handled unlocking
+         * and re-enabling preemption.
+         */
+        return 1;
 }
 }
 
 
 /*
 /*
@@ -550,8 +546,7 @@ int post_kprobe_handler(struct pt_regs *regs)
 		current_kprobe->post_handler(current_kprobe, regs, 0);
 		current_kprobe->post_handler(current_kprobe, regs, 0);
 	}
 	}
 
 
-	if (current_kprobe->post_handler != trampoline_post_handler)
-		resume_execution(current_kprobe, regs);
+	resume_execution(current_kprobe, regs);
 	regs->eflags |= kprobe_saved_rflags;
 	regs->eflags |= kprobe_saved_rflags;
 
 
 	/* Restore the original saved kprobes variables and continue. */
 	/* Restore the original saved kprobes variables and continue. */
@@ -682,111 +677,12 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 	return 0;
 }
 }
 
 
-/*
- * kprobe->ainsn.insn points to the copy of the instruction to be single-stepped.
- * By default on x86_64, pages we get from kmalloc or vmalloc are not
- * executable.  Single-stepping an instruction on such a page yields an
- * oops.  So instead of storing the instruction copies in their respective
- * kprobe objects, we allocate a page, map it executable, and store all the
- * instruction copies there.  (We can allocate additional pages if somebody
- * inserts a huge number of probes.)  Each page can hold up to INSNS_PER_PAGE
- * instruction slots, each of which is MAX_INSN_SIZE*sizeof(kprobe_opcode_t)
- * bytes.
- */
-#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE*sizeof(kprobe_opcode_t)))
-struct kprobe_insn_page {
-	struct hlist_node hlist;
-	kprobe_opcode_t *insns;		/* page of instruction slots */
-	char slot_used[INSNS_PER_PAGE];
-	int nused;
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
 };
 };
 
 
-static struct hlist_head kprobe_insn_pages;
-
-/**
- * get_insn_slot() - Find a slot on an executable page for an instruction.
- * We allocate an executable page if there's no room on existing ones.
- */
-static kprobe_opcode_t *get_insn_slot(void)
-{
-	struct kprobe_insn_page *kip;
-	struct hlist_node *pos;
-
-	hlist_for_each(pos, &kprobe_insn_pages) {
-		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
-		if (kip->nused < INSNS_PER_PAGE) {
-			int i;
-			for (i = 0; i < INSNS_PER_PAGE; i++) {
-				if (!kip->slot_used[i]) {
-					kip->slot_used[i] = 1;
-					kip->nused++;
-					return kip->insns + (i*MAX_INSN_SIZE);
-				}
-			}
-			/* Surprise!  No unused slots.  Fix kip->nused. */
-			kip->nused = INSNS_PER_PAGE;
-		}
-	}
-
-	/* All out of space.  Need to allocate a new page. Use slot 0.*/
-	kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
-	if (!kip) {
-		return NULL;
-	}
-
-	/*
-	 * For the %rip-relative displacement fixups to be doable, we
-	 * need our instruction copy to be within +/- 2GB of any data it
-	 * might access via %rip.  That is, within 2GB of where the
-	 * kernel image and loaded module images reside.  So we allocate
-	 * a page in the module loading area.
-	 */
-	kip->insns = module_alloc(PAGE_SIZE);
-	if (!kip->insns) {
-		kfree(kip);
-		return NULL;
-	}
-	INIT_HLIST_NODE(&kip->hlist);
-	hlist_add_head(&kip->hlist, &kprobe_insn_pages);
-	memset(kip->slot_used, 0, INSNS_PER_PAGE);
-	kip->slot_used[0] = 1;
-	kip->nused = 1;
-	return kip->insns;
-}
-
-/**
- * free_insn_slot() - Free instruction slot obtained from get_insn_slot().
- */
-static void free_insn_slot(kprobe_opcode_t *slot)
+int __init arch_init(void)
 {
 {
-	struct kprobe_insn_page *kip;
-	struct hlist_node *pos;
-
-	hlist_for_each(pos, &kprobe_insn_pages) {
-		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
-		if (kip->insns <= slot
-		    && slot < kip->insns+(INSNS_PER_PAGE*MAX_INSN_SIZE)) {
-			int i = (slot - kip->insns) / MAX_INSN_SIZE;
-			kip->slot_used[i] = 0;
-			kip->nused--;
-			if (kip->nused == 0) {
-				/*
-				 * Page is no longer in use.  Free it unless
-				 * it's the last one.  We keep the last one
-				 * so as not to have to set it up again the
-				 * next time somebody inserts a probe.
-				 */
-				hlist_del(&kip->hlist);
-				if (hlist_empty(&kprobe_insn_pages)) {
-					INIT_HLIST_NODE(&kip->hlist);
-					hlist_add_head(&kip->hlist,
-						&kprobe_insn_pages);
-				} else {
-					module_free(NULL, kip->insns);
-					kfree(kip);
-				}
-			}
-			return;
-		}
-	}
+	return register_kprobe(&trampoline_p);
 }
 }

+ 29 - 0
arch/x86_64/kernel/process.c

@@ -481,6 +481,33 @@ out:
 	return err;
 	return err;
 }
 }
 
 
+/*
+ * This function selects if the context switch from prev to next
+ * has to tweak the TSC disable bit in the cr4.
+ */
+static inline void disable_tsc(struct task_struct *prev_p,
+			       struct task_struct *next_p)
+{
+	struct thread_info *prev, *next;
+
+	/*
+	 * gcc should eliminate the ->thread_info dereference if
+	 * has_secure_computing returns 0 at compile time (SECCOMP=n).
+	 */
+	prev = prev_p->thread_info;
+	next = next_p->thread_info;
+
+	if (has_secure_computing(prev) || has_secure_computing(next)) {
+		/* slow path here */
+		if (has_secure_computing(prev) &&
+		    !has_secure_computing(next)) {
+			write_cr4(read_cr4() & ~X86_CR4_TSD);
+		} else if (!has_secure_computing(prev) &&
+			   has_secure_computing(next))
+			write_cr4(read_cr4() | X86_CR4_TSD);
+	}
+}
+
 /*
 /*
  * This special macro can be used to load a debugging register
  * This special macro can be used to load a debugging register
  */
  */
@@ -599,6 +626,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *
 		}
 		}
 	}
 	}
 
 
+	disable_tsc(prev_p, next_p);
+
 	return prev_p;
 	return prev_p;
 }
 }
 
 

+ 3 - 2
drivers/block/as-iosched.c

@@ -1806,7 +1806,8 @@ static void as_put_request(request_queue_t *q, struct request *rq)
 	rq->elevator_private = NULL;
 	rq->elevator_private = NULL;
 }
 }
 
 
-static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+static int as_set_request(request_queue_t *q, struct request *rq,
+			  struct bio *bio, int gfp_mask)
 {
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
 	struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
@@ -1827,7 +1828,7 @@ static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
 	return 1;
 	return 1;
 }
 }
 
 
-static int as_may_queue(request_queue_t *q, int rw)
+static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
 {
 {
 	int ret = ELV_MQUEUE_MAY;
 	int ret = ELV_MQUEUE_MAY;
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_data *ad = q->elevator->elevator_data;

+ 7 - 10
drivers/block/cciss.c

@@ -1,6 +1,6 @@
 /*
 /*
  *    Disk Array driver for HP SA 5xxx and 6xxx Controllers
  *    Disk Array driver for HP SA 5xxx and 6xxx Controllers
- *    Copyright 2000, 2002 Hewlett-Packard Development Company, L.P.
+ *    Copyright 2000, 2005 Hewlett-Packard Development Company, L.P.
  *
  *
  *    This program is free software; you can redistribute it and/or modify
  *    This program is free software; you can redistribute it and/or modify
  *    it under the terms of the GNU General Public License as published by
  *    it under the terms of the GNU General Public License as published by
@@ -54,7 +54,7 @@
 MODULE_AUTHOR("Hewlett-Packard Company");
 MODULE_AUTHOR("Hewlett-Packard Company");
 MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.6");
 MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.6");
 MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
 MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
-			" SA6i P600 P800 E400");
+			" SA6i P600 P800 E400 E300");
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");
 
 
 #include "cciss_cmd.h"
 #include "cciss_cmd.h"
@@ -85,8 +85,10 @@ static const struct pci_device_id cciss_pci_device_id[] = {
 		0x103C, 0x3225, 0, 0, 0},
 		0x103C, 0x3225, 0, 0, 0},
 	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSB,
 	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSB,
 		0x103c, 0x3223, 0, 0, 0},
 		0x103c, 0x3223, 0, 0, 0},
-	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSB,
+	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC,
 		0x103c, 0x3231, 0, 0, 0},
 		0x103c, 0x3231, 0, 0, 0},
+	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC,
+		0x103c, 0x3233, 0, 0, 0},
 	{0,}
 	{0,}
 };
 };
 MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
 MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
@@ -110,6 +112,7 @@ static struct board_type products[] = {
 	{ 0x3225103C, "Smart Array P600", &SA5_access},
 	{ 0x3225103C, "Smart Array P600", &SA5_access},
 	{ 0x3223103C, "Smart Array P800", &SA5_access},
 	{ 0x3223103C, "Smart Array P800", &SA5_access},
 	{ 0x3231103C, "Smart Array E400", &SA5_access},
 	{ 0x3231103C, "Smart Array E400", &SA5_access},
+	{ 0x3233103C, "Smart Array E300", &SA5_access},
 };
 };
 
 
 /* How long to wait (in millesconds) for board to go into simple mode */
 /* How long to wait (in millesconds) for board to go into simple mode */
@@ -635,6 +638,7 @@ static int cciss_ioctl(struct inode *inode, struct file *filep,
 		cciss_pci_info_struct pciinfo;
 		cciss_pci_info_struct pciinfo;
 
 
 		if (!arg) return -EINVAL;
 		if (!arg) return -EINVAL;
+		pciinfo.domain = pci_domain_nr(host->pdev->bus);
 		pciinfo.bus = host->pdev->bus->number;
 		pciinfo.bus = host->pdev->bus->number;
 		pciinfo.dev_fn = host->pdev->devfn;
 		pciinfo.dev_fn = host->pdev->devfn;
 		pciinfo.board_id = host->board_id;
 		pciinfo.board_id = host->board_id;
@@ -787,13 +791,6 @@ static int cciss_ioctl(struct inode *inode, struct file *filep,
  		luninfo.LunID = drv->LunID;
  		luninfo.LunID = drv->LunID;
  		luninfo.num_opens = drv->usage_count;
  		luninfo.num_opens = drv->usage_count;
  		luninfo.num_parts = 0;
  		luninfo.num_parts = 0;
- 		/* count partitions 1 to 15 with sizes > 0 */
- 		for (i = 0; i < MAX_PART - 1; i++) {
-			if (!disk->part[i])
-				continue;
-			if (disk->part[i]->nr_sects != 0)
-				luninfo.num_parts++;
-		}
  		if (copy_to_user(argp, &luninfo,
  		if (copy_to_user(argp, &luninfo,
  				sizeof(LogvolInfo_struct)))
  				sizeof(LogvolInfo_struct)))
  			return -EFAULT;
  			return -EFAULT;

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 691 - 369
drivers/block/cfq-iosched.c


+ 2 - 1
drivers/block/deadline-iosched.c

@@ -760,7 +760,8 @@ static void deadline_put_request(request_queue_t *q, struct request *rq)
 }
 }
 
 
 static int
 static int
-deadline_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
+		     int gfp_mask)
 {
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq;
 	struct deadline_rq *drq;

+ 5 - 4
drivers/block/elevator.c

@@ -486,12 +486,13 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
 	return NULL;
 	return NULL;
 }
 }
 
 
-int elv_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
+		    int gfp_mask)
 {
 {
 	elevator_t *e = q->elevator;
 	elevator_t *e = q->elevator;
 
 
 	if (e->ops->elevator_set_req_fn)
 	if (e->ops->elevator_set_req_fn)
-		return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
+		return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
 
 
 	rq->elevator_private = NULL;
 	rq->elevator_private = NULL;
 	return 0;
 	return 0;
@@ -505,12 +506,12 @@ void elv_put_request(request_queue_t *q, struct request *rq)
 		e->ops->elevator_put_req_fn(q, rq);
 		e->ops->elevator_put_req_fn(q, rq);
 }
 }
 
 
-int elv_may_queue(request_queue_t *q, int rw)
+int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
 {
 {
 	elevator_t *e = q->elevator;
 	elevator_t *e = q->elevator;
 
 
 	if (e->ops->elevator_may_queue_fn)
 	if (e->ops->elevator_may_queue_fn)
-		return e->ops->elevator_may_queue_fn(q, rw);
+		return e->ops->elevator_may_queue_fn(q, rw, bio);
 
 
 	return ELV_MQUEUE_MAY;
 	return ELV_MQUEUE_MAY;
 }
 }

+ 36 - 23
drivers/block/ll_rw_blk.c

@@ -276,6 +276,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
 	rq->errors = 0;
 	rq->errors = 0;
 	rq->rq_status = RQ_ACTIVE;
 	rq->rq_status = RQ_ACTIVE;
 	rq->bio = rq->biotail = NULL;
 	rq->bio = rq->biotail = NULL;
+	rq->ioprio = 0;
 	rq->buffer = NULL;
 	rq->buffer = NULL;
 	rq->ref_count = 1;
 	rq->ref_count = 1;
 	rq->q = q;
 	rq->q = q;
@@ -1442,11 +1443,7 @@ void __generic_unplug_device(request_queue_t *q)
 	if (!blk_remove_plug(q))
 	if (!blk_remove_plug(q))
 		return;
 		return;
 
 
-	/*
-	 * was plugged, fire request_fn if queue has stuff to do
-	 */
-	if (elv_next_request(q))
-		q->request_fn(q);
+	q->request_fn(q);
 }
 }
 EXPORT_SYMBOL(__generic_unplug_device);
 EXPORT_SYMBOL(__generic_unplug_device);
 
 
@@ -1776,8 +1773,8 @@ static inline void blk_free_request(request_queue_t *q, struct request *rq)
 	mempool_free(rq, q->rq.rq_pool);
 	mempool_free(rq, q->rq.rq_pool);
 }
 }
 
 
-static inline struct request *blk_alloc_request(request_queue_t *q, int rw,
-						int gfp_mask)
+static inline struct request *
+blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
 {
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
 
@@ -1790,7 +1787,7 @@ static inline struct request *blk_alloc_request(request_queue_t *q, int rw,
 	 */
 	 */
 	rq->flags = rw;
 	rq->flags = rw;
 
 
-	if (!elv_set_request(q, rq, gfp_mask))
+	if (!elv_set_request(q, rq, bio, gfp_mask))
 		return rq;
 		return rq;
 
 
 	mempool_free(rq, q->rq.rq_pool);
 	mempool_free(rq, q->rq.rq_pool);
@@ -1872,7 +1869,8 @@ static void freed_request(request_queue_t *q, int rw)
 /*
 /*
  * Get a free request, queue_lock must not be held
  * Get a free request, queue_lock must not be held
  */
  */
-static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
+static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
+				   int gfp_mask)
 {
 {
 	struct request *rq = NULL;
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
 	struct request_list *rl = &q->rq;
@@ -1895,7 +1893,7 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
 		}
 		}
 	}
 	}
 
 
-	switch (elv_may_queue(q, rw)) {
+	switch (elv_may_queue(q, rw, bio)) {
 		case ELV_MQUEUE_NO:
 		case ELV_MQUEUE_NO:
 			goto rq_starved;
 			goto rq_starved;
 		case ELV_MQUEUE_MAY:
 		case ELV_MQUEUE_MAY:
@@ -1920,7 +1918,7 @@ get_rq:
 		set_queue_congested(q, rw);
 		set_queue_congested(q, rw);
 	spin_unlock_irq(q->queue_lock);
 	spin_unlock_irq(q->queue_lock);
 
 
-	rq = blk_alloc_request(q, rw, gfp_mask);
+	rq = blk_alloc_request(q, rw, bio, gfp_mask);
 	if (!rq) {
 	if (!rq) {
 		/*
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
 		 * Allocation failed presumably due to memory. Undo anything
@@ -1961,7 +1959,8 @@ out:
  * No available requests for this queue, unplug the device and wait for some
  * No available requests for this queue, unplug the device and wait for some
  * requests to become available.
  * requests to become available.
  */
  */
-static struct request *get_request_wait(request_queue_t *q, int rw)
+static struct request *get_request_wait(request_queue_t *q, int rw,
+					struct bio *bio)
 {
 {
 	DEFINE_WAIT(wait);
 	DEFINE_WAIT(wait);
 	struct request *rq;
 	struct request *rq;
@@ -1972,7 +1971,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw)
 		prepare_to_wait_exclusive(&rl->wait[rw], &wait,
 		prepare_to_wait_exclusive(&rl->wait[rw], &wait,
 				TASK_UNINTERRUPTIBLE);
 				TASK_UNINTERRUPTIBLE);
 
 
-		rq = get_request(q, rw, GFP_NOIO);
+		rq = get_request(q, rw, bio, GFP_NOIO);
 
 
 		if (!rq) {
 		if (!rq) {
 			struct io_context *ioc;
 			struct io_context *ioc;
@@ -2003,9 +2002,9 @@ struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask)
 	BUG_ON(rw != READ && rw != WRITE);
 	BUG_ON(rw != READ && rw != WRITE);
 
 
 	if (gfp_mask & __GFP_WAIT)
 	if (gfp_mask & __GFP_WAIT)
-		rq = get_request_wait(q, rw);
+		rq = get_request_wait(q, rw, NULL);
 	else
 	else
-		rq = get_request(q, rw, gfp_mask);
+		rq = get_request(q, rw, NULL, gfp_mask);
 
 
 	return rq;
 	return rq;
 }
 }
@@ -2333,7 +2332,6 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
 		return;
 		return;
 
 
 	req->rq_status = RQ_INACTIVE;
 	req->rq_status = RQ_INACTIVE;
-	req->q = NULL;
 	req->rl = NULL;
 	req->rl = NULL;
 
 
 	/*
 	/*
@@ -2462,6 +2460,8 @@ static int attempt_merge(request_queue_t *q, struct request *req,
 		req->rq_disk->in_flight--;
 		req->rq_disk->in_flight--;
 	}
 	}
 
 
+	req->ioprio = ioprio_best(req->ioprio, next->ioprio);
+
 	__blk_put_request(q, next);
 	__blk_put_request(q, next);
 	return 1;
 	return 1;
 }
 }
@@ -2514,11 +2514,13 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 {
 {
 	struct request *req, *freereq = NULL;
 	struct request *req, *freereq = NULL;
 	int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
 	int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
+	unsigned short prio;
 	sector_t sector;
 	sector_t sector;
 
 
 	sector = bio->bi_sector;
 	sector = bio->bi_sector;
 	nr_sectors = bio_sectors(bio);
 	nr_sectors = bio_sectors(bio);
 	cur_nr_sectors = bio_cur_sectors(bio);
 	cur_nr_sectors = bio_cur_sectors(bio);
+	prio = bio_prio(bio);
 
 
 	rw = bio_data_dir(bio);
 	rw = bio_data_dir(bio);
 	sync = bio_sync(bio);
 	sync = bio_sync(bio);
@@ -2559,6 +2561,7 @@ again:
 			req->biotail->bi_next = bio;
 			req->biotail->bi_next = bio;
 			req->biotail = bio;
 			req->biotail = bio;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
+			req->ioprio = ioprio_best(req->ioprio, prio);
 			drive_stat_acct(req, nr_sectors, 0);
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_back_merge(q, req))
 			if (!attempt_back_merge(q, req))
 				elv_merged_request(q, req);
 				elv_merged_request(q, req);
@@ -2583,6 +2586,7 @@ again:
 			req->hard_cur_sectors = cur_nr_sectors;
 			req->hard_cur_sectors = cur_nr_sectors;
 			req->sector = req->hard_sector = sector;
 			req->sector = req->hard_sector = sector;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
 			req->nr_sectors = req->hard_nr_sectors += nr_sectors;
+			req->ioprio = ioprio_best(req->ioprio, prio);
 			drive_stat_acct(req, nr_sectors, 0);
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_front_merge(q, req))
 			if (!attempt_front_merge(q, req))
 				elv_merged_request(q, req);
 				elv_merged_request(q, req);
@@ -2610,7 +2614,7 @@ get_rq:
 		freereq = NULL;
 		freereq = NULL;
 	} else {
 	} else {
 		spin_unlock_irq(q->queue_lock);
 		spin_unlock_irq(q->queue_lock);
-		if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) {
+		if ((freereq = get_request(q, rw, bio, GFP_ATOMIC)) == NULL) {
 			/*
 			/*
 			 * READA bit set
 			 * READA bit set
 			 */
 			 */
@@ -2618,7 +2622,7 @@ get_rq:
 			if (bio_rw_ahead(bio))
 			if (bio_rw_ahead(bio))
 				goto end_io;
 				goto end_io;
 	
 	
-			freereq = get_request_wait(q, rw);
+			freereq = get_request_wait(q, rw, bio);
 		}
 		}
 		goto again;
 		goto again;
 	}
 	}
@@ -2646,6 +2650,7 @@ get_rq:
 	req->buffer = bio_data(bio);	/* see ->buffer comment above */
 	req->buffer = bio_data(bio);	/* see ->buffer comment above */
 	req->waiting = NULL;
 	req->waiting = NULL;
 	req->bio = req->biotail = bio;
 	req->bio = req->biotail = bio;
+	req->ioprio = prio;
 	req->rq_disk = bio->bi_bdev->bd_disk;
 	req->rq_disk = bio->bi_bdev->bd_disk;
 	req->start_time = jiffies;
 	req->start_time = jiffies;
 
 
@@ -2674,7 +2679,7 @@ static inline void blk_partition_remap(struct bio *bio)
 	if (bdev != bdev->bd_contains) {
 	if (bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
 		struct hd_struct *p = bdev->bd_part;
 
 
-		switch (bio->bi_rw) {
+		switch (bio_data_dir(bio)) {
 		case READ:
 		case READ:
 			p->read_sectors += bio_sectors(bio);
 			p->read_sectors += bio_sectors(bio);
 			p->reads++;
 			p->reads++;
@@ -2693,6 +2698,7 @@ void blk_finish_queue_drain(request_queue_t *q)
 {
 {
 	struct request_list *rl = &q->rq;
 	struct request_list *rl = &q->rq;
 	struct request *rq;
 	struct request *rq;
+	int requeued = 0;
 
 
 	spin_lock_irq(q->queue_lock);
 	spin_lock_irq(q->queue_lock);
 	clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
 	clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
@@ -2701,9 +2707,13 @@ void blk_finish_queue_drain(request_queue_t *q)
 		rq = list_entry_rq(q->drain_list.next);
 		rq = list_entry_rq(q->drain_list.next);
 
 
 		list_del_init(&rq->queuelist);
 		list_del_init(&rq->queuelist);
-		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
+		elv_requeue_request(q, rq);
+		requeued++;
 	}
 	}
 
 
+	if (requeued)
+		q->request_fn(q);
+
 	spin_unlock_irq(q->queue_lock);
 	spin_unlock_irq(q->queue_lock);
 
 
 	wake_up(&rl->wait[0]);
 	wake_up(&rl->wait[0]);
@@ -2900,7 +2910,7 @@ void submit_bio(int rw, struct bio *bio)
 
 
 	BIO_BUG_ON(!bio->bi_size);
 	BIO_BUG_ON(!bio->bi_size);
 	BIO_BUG_ON(!bio->bi_io_vec);
 	BIO_BUG_ON(!bio->bi_io_vec);
-	bio->bi_rw = rw;
+	bio->bi_rw |= rw;
 	if (rw & WRITE)
 	if (rw & WRITE)
 		mod_page_state(pgpgout, count);
 		mod_page_state(pgpgout, count);
 	else
 	else
@@ -3257,8 +3267,11 @@ void exit_io_context(void)
 	struct io_context *ioc;
 	struct io_context *ioc;
 
 
 	local_irq_save(flags);
 	local_irq_save(flags);
+	task_lock(current);
 	ioc = current->io_context;
 	ioc = current->io_context;
 	current->io_context = NULL;
 	current->io_context = NULL;
+	ioc->task = NULL;
+	task_unlock(current);
 	local_irq_restore(flags);
 	local_irq_restore(flags);
 
 
 	if (ioc->aic && ioc->aic->exit)
 	if (ioc->aic && ioc->aic->exit)
@@ -3293,12 +3306,12 @@ struct io_context *get_io_context(int gfp_flags)
 	ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
 	ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
 	if (ret) {
 	if (ret) {
 		atomic_set(&ret->refcount, 1);
 		atomic_set(&ret->refcount, 1);
-		ret->pid = tsk->pid;
+		ret->task = current;
+		ret->set_ioprio = NULL;
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
 		ret->aic = NULL;
 		ret->cic = NULL;
 		ret->cic = NULL;
-		spin_lock_init(&ret->lock);
 
 
 		local_irq_save(flags);
 		local_irq_save(flags);
 
 

+ 9 - 1
drivers/block/swim3.c

@@ -253,7 +253,7 @@ static int floppy_revalidate(struct gendisk *disk);
 static int swim3_add_device(struct device_node *swims);
 static int swim3_add_device(struct device_node *swims);
 int swim3_init(void);
 int swim3_init(void);
 
 
-#ifndef CONFIG_PMAC_PBOOK
+#ifndef CONFIG_PMAC_MEDIABAY
 #define check_media_bay(which, what)	1
 #define check_media_bay(which, what)	1
 #endif
 #endif
 
 
@@ -297,9 +297,11 @@ static void do_fd_request(request_queue_t * q)
 	int i;
 	int i;
 	for(i=0;i<floppy_count;i++)
 	for(i=0;i<floppy_count;i++)
 	{
 	{
+#ifdef CONFIG_PMAC_MEDIABAY
 		if (floppy_states[i].media_bay &&
 		if (floppy_states[i].media_bay &&
 			check_media_bay(floppy_states[i].media_bay, MB_FD))
 			check_media_bay(floppy_states[i].media_bay, MB_FD))
 			continue;
 			continue;
+#endif /* CONFIG_PMAC_MEDIABAY */
 		start_request(&floppy_states[i]);
 		start_request(&floppy_states[i]);
 	}
 	}
 	sti();
 	sti();
@@ -856,8 +858,10 @@ static int floppy_ioctl(struct inode *inode, struct file *filp,
 	if ((cmd & 0x80) && !capable(CAP_SYS_ADMIN))
 	if ((cmd & 0x80) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 		return -EPERM;
 
 
+#ifdef CONFIG_PMAC_MEDIABAY
 	if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
 	if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
 		return -ENXIO;
 		return -ENXIO;
+#endif
 
 
 	switch (cmd) {
 	switch (cmd) {
 	case FDEJECT:
 	case FDEJECT:
@@ -881,8 +885,10 @@ static int floppy_open(struct inode *inode, struct file *filp)
 	int n, err = 0;
 	int n, err = 0;
 
 
 	if (fs->ref_count == 0) {
 	if (fs->ref_count == 0) {
+#ifdef CONFIG_PMAC_MEDIABAY
 		if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
 		if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
 			return -ENXIO;
 			return -ENXIO;
+#endif
 		out_8(&sw->setup, S_IBM_DRIVE | S_FCLK_DIV2);
 		out_8(&sw->setup, S_IBM_DRIVE | S_FCLK_DIV2);
 		out_8(&sw->control_bic, 0xff);
 		out_8(&sw->control_bic, 0xff);
 		out_8(&sw->mode, 0x95);
 		out_8(&sw->mode, 0x95);
@@ -967,8 +973,10 @@ static int floppy_revalidate(struct gendisk *disk)
 	struct swim3 __iomem *sw;
 	struct swim3 __iomem *sw;
 	int ret, n;
 	int ret, n;
 
 
+#ifdef CONFIG_PMAC_MEDIABAY
 	if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
 	if (fs->media_bay && check_media_bay(fs->media_bay, MB_FD))
 		return -ENXIO;
 		return -ENXIO;
+#endif
 
 
 	sw = fs->swim3;
 	sw = fs->swim3;
 	grab_drive(fs, revalidating, 0);
 	grab_drive(fs, revalidating, 0);

+ 4 - 3
drivers/block/sx8.c

@@ -26,6 +26,7 @@
 #include <linux/delay.h>
 #include <linux/delay.h>
 #include <linux/time.h>
 #include <linux/time.h>
 #include <linux/hdreg.h>
 #include <linux/hdreg.h>
+#include <linux/dma-mapping.h>
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/semaphore.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
@@ -1582,9 +1583,9 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_out;
 		goto err_out;
 
 
 #if IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */
 #if IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */
-	rc = pci_set_dma_mask(pdev, 0xffffffffffffffffULL);
+	rc = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
 	if (!rc) {
 	if (!rc) {
-		rc = pci_set_consistent_dma_mask(pdev, 0xffffffffffffffffULL);
+		rc = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
 		if (rc) {
 		if (rc) {
 			printk(KERN_ERR DRV_NAME "(%s): consistent DMA mask failure\n",
 			printk(KERN_ERR DRV_NAME "(%s): consistent DMA mask failure\n",
 				pci_name(pdev));
 				pci_name(pdev));
@@ -1593,7 +1594,7 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 		pci_dac = 1;
 		pci_dac = 1;
 	} else {
 	} else {
 #endif
 #endif
-		rc = pci_set_dma_mask(pdev, 0xffffffffULL);
+		rc = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
 		if (rc) {
 		if (rc) {
 			printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n",
 			printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n",
 				pci_name(pdev));
 				pci_name(pdev));

+ 9 - 0
drivers/bluetooth/bluecard_cs.c

@@ -1089,6 +1089,14 @@ static int bluecard_event(event_t event, int priority, event_callback_args_t *ar
 	return 0;
 	return 0;
 }
 }
 
 
+static struct pcmcia_device_id bluecard_ids[] = {
+	PCMCIA_DEVICE_PROD_ID12("BlueCard", "LSE041", 0xbaf16fbf, 0x657cc15e),
+	PCMCIA_DEVICE_PROD_ID12("BTCFCARD", "LSE139", 0xe3987764, 0x2524b59c),
+	PCMCIA_DEVICE_PROD_ID12("WSS", "LSE039", 0x0a0736ec, 0x24e6dfab),
+	PCMCIA_DEVICE_NULL
+};
+MODULE_DEVICE_TABLE(pcmcia, bluecard_ids);
+
 static struct pcmcia_driver bluecard_driver = {
 static struct pcmcia_driver bluecard_driver = {
 	.owner		= THIS_MODULE,
 	.owner		= THIS_MODULE,
 	.drv		= {
 	.drv		= {
@@ -1096,6 +1104,7 @@ static struct pcmcia_driver bluecard_driver = {
 	},
 	},
 	.attach		= bluecard_attach,
 	.attach		= bluecard_attach,
 	.detach		= bluecard_detach,
 	.detach		= bluecard_detach,
+	.id_table	= bluecard_ids,
 };
 };
 
 
 static int __init init_bluecard_cs(void)
 static int __init init_bluecard_cs(void)

+ 7 - 0
drivers/bluetooth/bt3c_cs.c

@@ -935,6 +935,12 @@ static int bt3c_event(event_t event, int priority, event_callback_args_t *args)
 	return 0;
 	return 0;
 }
 }
 
 
+static struct pcmcia_device_id bt3c_ids[] = {
+	PCMCIA_DEVICE_PROD_ID13("3COM", "Bluetooth PC Card", 0xefce0a31, 0xd4ce9b02),
+	PCMCIA_DEVICE_NULL
+};
+MODULE_DEVICE_TABLE(pcmcia, bt3c_ids);
+
 static struct pcmcia_driver bt3c_driver = {
 static struct pcmcia_driver bt3c_driver = {
 	.owner		= THIS_MODULE,
 	.owner		= THIS_MODULE,
 	.drv		= {
 	.drv		= {
@@ -942,6 +948,7 @@ static struct pcmcia_driver bt3c_driver = {
 	},
 	},
 	.attach		= bt3c_attach,
 	.attach		= bt3c_attach,
 	.detach		= bt3c_detach,
 	.detach		= bt3c_detach,
+	.id_table	= bt3c_ids,
 };
 };
 
 
 static int __init init_bt3c_cs(void)
 static int __init init_bt3c_cs(void)

+ 7 - 0
drivers/bluetooth/btuart_cs.c

@@ -855,6 +855,12 @@ static int btuart_event(event_t event, int priority, event_callback_args_t *args
 	return 0;
 	return 0;
 }
 }
 
 
+static struct pcmcia_device_id btuart_ids[] = {
+	/* don't use this driver. Use serial_cs + hci_uart instead */
+	PCMCIA_DEVICE_NULL
+};
+MODULE_DEVICE_TABLE(pcmcia, btuart_ids);
+
 static struct pcmcia_driver btuart_driver = {
 static struct pcmcia_driver btuart_driver = {
 	.owner		= THIS_MODULE,
 	.owner		= THIS_MODULE,
 	.drv		= {
 	.drv		= {
@@ -862,6 +868,7 @@ static struct pcmcia_driver btuart_driver = {
 	},
 	},
 	.attach		= btuart_attach,
 	.attach		= btuart_attach,
 	.detach		= btuart_detach,
 	.detach		= btuart_detach,
+	.id_table	= btuart_ids,
 };
 };
 
 
 static int __init init_btuart_cs(void)
 static int __init init_btuart_cs(void)

+ 8 - 0
drivers/bluetooth/dtl1_cs.c

@@ -807,6 +807,13 @@ static int dtl1_event(event_t event, int priority, event_callback_args_t *args)
 	return 0;
 	return 0;
 }
 }
 
 
+static struct pcmcia_device_id dtl1_ids[] = {
+	PCMCIA_DEVICE_PROD_ID12("Nokia Mobile Phones", "DTL-1", 0xe1bfdd64, 0xe168480d),
+	PCMCIA_DEVICE_PROD_ID12("Socket", "CF", 0xb38bcc2e, 0x44ebf863),
+	PCMCIA_DEVICE_NULL
+};
+MODULE_DEVICE_TABLE(pcmcia, dtl1_ids);
+
 static struct pcmcia_driver dtl1_driver = {
 static struct pcmcia_driver dtl1_driver = {
 	.owner		= THIS_MODULE,
 	.owner		= THIS_MODULE,
 	.drv		= {
 	.drv		= {
@@ -814,6 +821,7 @@ static struct pcmcia_driver dtl1_driver = {
 	},
 	},
 	.attach		= dtl1_attach,
 	.attach		= dtl1_attach,
 	.detach		= dtl1_detach,
 	.detach		= dtl1_detach,
+	.id_table	= dtl1_ids,
 };
 };
 
 
 static int __init init_dtl1_cs(void)
 static int __init init_dtl1_cs(void)

+ 0 - 3
drivers/char/misc.c

@@ -308,9 +308,6 @@ static int __init misc_init(void)
 #endif
 #endif
 #ifdef CONFIG_BVME6000
 #ifdef CONFIG_BVME6000
 	rtc_DP8570A_init();
 	rtc_DP8570A_init();
-#endif
-#ifdef CONFIG_PMAC_PBOOK
-	pmu_device_init();
 #endif
 #endif
 	if (register_chrdev(MISC_MAJOR,"misc",&misc_fops)) {
 	if (register_chrdev(MISC_MAJOR,"misc",&misc_fops)) {
 		printk("unable to get major %d for misc devices\n",
 		printk("unable to get major %d for misc devices\n",

+ 8 - 1
drivers/char/pcmcia/synclink_cs.c

@@ -581,7 +581,7 @@ static dev_link_t *mgslpc_attach(void)
 
 
     /* Interrupt setup */
     /* Interrupt setup */
     link->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
     link->irq.Attributes = IRQ_TYPE_EXCLUSIVE;
-    link->irq.IRQInfo1   = IRQ_INFO2_VALID | IRQ_LEVEL_ID;
+    link->irq.IRQInfo1   = IRQ_LEVEL_ID;
     link->irq.Handler = NULL;
     link->irq.Handler = NULL;
     
     
     link->conf.Attributes = 0;
     link->conf.Attributes = 0;
@@ -3081,6 +3081,12 @@ void mgslpc_remove_device(MGSLPC_INFO *remove_info)
 	}
 	}
 }
 }
 
 
+static struct pcmcia_device_id mgslpc_ids[] = {
+	PCMCIA_DEVICE_MANF_CARD(0x02c5, 0x0050),
+	PCMCIA_DEVICE_NULL
+};
+MODULE_DEVICE_TABLE(pcmcia, mgslpc_ids);
+
 static struct pcmcia_driver mgslpc_driver = {
 static struct pcmcia_driver mgslpc_driver = {
 	.owner		= THIS_MODULE,
 	.owner		= THIS_MODULE,
 	.drv		= {
 	.drv		= {
@@ -3088,6 +3094,7 @@ static struct pcmcia_driver mgslpc_driver = {
 	},
 	},
 	.attach		= mgslpc_attach,
 	.attach		= mgslpc_attach,
 	.detach		= mgslpc_detach,
 	.detach		= mgslpc_detach,
+	.id_table	= mgslpc_ids,
 };
 };
 
 
 static struct tty_operations mgslpc_ops = {
 static struct tty_operations mgslpc_ops = {

+ 6 - 0
drivers/ide/Kconfig

@@ -606,6 +606,12 @@ config BLK_DEV_IT8172
 	  <http://www.ite.com.tw/ia/brief_it8172bsp.htm>; picture of the
 	  <http://www.ite.com.tw/ia/brief_it8172bsp.htm>; picture of the
 	  board at <http://www.mvista.com/partners/semiconductor/ite.html>.
 	  board at <http://www.mvista.com/partners/semiconductor/ite.html>.
 
 
+config BLK_DEV_IT821X
+	tristate "IT821X IDE support"
+	help
+	  This driver adds support for the ITE 8211 IDE controller and the
+	  IT 8212 IDE RAID controller in both RAID and pass-through mode.
+
 config BLK_DEV_NS87415
 config BLK_DEV_NS87415
 	tristate "NS87415 chipset support"
 	tristate "NS87415 chipset support"
 	help
 	help

+ 4 - 0
drivers/ide/ide-disk.c

@@ -119,6 +119,10 @@ static int lba_capacity_is_ok (struct hd_driveid *id)
 {
 {
 	unsigned long lba_sects, chs_sects, head, tail;
 	unsigned long lba_sects, chs_sects, head, tail;
 
 
+	/* No non-LBA info .. so valid! */
+	if (id->cyls == 0)
+		return 1;
+
 	/*
 	/*
 	 * The ATA spec tells large drives to return
 	 * The ATA spec tells large drives to return
 	 * C/H/S = 16383/16/63 independent of their size.
 	 * C/H/S = 16383/16/63 independent of their size.

+ 0 - 1
drivers/ide/ide-dma.c

@@ -132,7 +132,6 @@ static const struct drive_list_entry drive_blacklist [] = {
 	{ "SAMSUNG CD-ROM SC-148C",	"ALL"		},
 	{ "SAMSUNG CD-ROM SC-148C",	"ALL"		},
 	{ "SAMSUNG CD-ROM SC",	"ALL"		},
 	{ "SAMSUNG CD-ROM SC",	"ALL"		},
 	{ "SanDisk SDP3B-64"	,	"ALL"		},
 	{ "SanDisk SDP3B-64"	,	"ALL"		},
-	{ "SAMSUNG CD-ROM SN-124",	"ALL"		},
 	{ "ATAPI CD-ROM DRIVE 40X MAXIMUM",	"ALL"		},
 	{ "ATAPI CD-ROM DRIVE 40X MAXIMUM",	"ALL"		},
 	{ "_NEC DV5800A",               "ALL"           },  
 	{ "_NEC DV5800A",               "ALL"           },  
 	{ NULL			,	NULL		}
 	{ NULL			,	NULL		}

+ 2 - 1
drivers/ide/ide-iops.c

@@ -1181,7 +1181,8 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 		pre_reset(drive);
 		pre_reset(drive);
 		SELECT_DRIVE(drive);
 		SELECT_DRIVE(drive);
 		udelay (20);
 		udelay (20);
-		hwif->OUTB(WIN_SRST, IDE_COMMAND_REG);
+		hwif->OUTBSYNC(drive, WIN_SRST, IDE_COMMAND_REG);
+		ndelay(400);
 		hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
 		hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
 		hwgroup->polling = 1;
 		hwgroup->polling = 1;
 		__ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL);
 		__ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL);

+ 35 - 0
drivers/ide/legacy/ide-cs.c

@@ -457,6 +457,40 @@ int ide_event(event_t event, int priority,
     return 0;
     return 0;
 } /* ide_event */
 } /* ide_event */
 
 
+static struct pcmcia_device_id ide_ids[] = {
+	PCMCIA_DEVICE_FUNC_ID(4),
+	PCMCIA_DEVICE_MANF_CARD(0x0032, 0x0704),
+	PCMCIA_DEVICE_MANF_CARD(0x00a4, 0x002d),
+	PCMCIA_DEVICE_MANF_CARD(0x2080, 0x0001),
+	PCMCIA_DEVICE_MANF_CARD(0x0045, 0x0401),
+	PCMCIA_DEVICE_PROD_ID123("Caravelle", "PSC-IDE ", "PSC000", 0x8c36137c, 0xd0693ab8, 0x2768a9f0),
+	PCMCIA_DEVICE_PROD_ID123("CDROM", "IDE", "MCD-601p", 0x1b9179ca, 0xede88951, 0x0d902f74),
+	PCMCIA_DEVICE_PROD_ID123("PCMCIA", "IDE CARD", "F1", 0x281f1c5d, 0x1907960c, 0xf7fde8b9),
+	PCMCIA_DEVICE_PROD_ID12("ARGOSY", "CD-ROM", 0x78f308dc, 0x66536591),
+	PCMCIA_DEVICE_PROD_ID12("ARGOSY", "PnPIDE", 0x78f308dc, 0x0c694728),
+	PCMCIA_DEVICE_PROD_ID12("CNF CD-M", "CD-ROM", 0x7d93b852, 0x66536591),
+	PCMCIA_DEVICE_PROD_ID12("Creative Technology Ltd.", "PCMCIA CD-ROM Interface Card", 0xff8c8a45, 0xfe8020c4),
+	PCMCIA_DEVICE_PROD_ID12("Digital Equipment Corporation.", "Digital Mobile Media CD-ROM", 0x17692a66, 0xef1dcbde),
+	PCMCIA_DEVICE_PROD_ID12("EXP", "CD", 0x6f58c983, 0xaae5994f),
+	PCMCIA_DEVICE_PROD_ID12("EXP   ", "CD-ROM", 0x0a5c52fd, 0x66536591),
+	PCMCIA_DEVICE_PROD_ID12("EXP   ", "PnPIDE", 0x0a5c52fd, 0x0c694728),
+	PCMCIA_DEVICE_PROD_ID12("FREECOM", "PCCARD-IDE", 0x5714cbf7, 0x48e0ab8e),
+	PCMCIA_DEVICE_PROD_ID12("IBM", "IBM17JSSFP20", 0xb569a6e5, 0xf2508753),
+	PCMCIA_DEVICE_PROD_ID12("IO DATA", "CBIDE2      ", 0x547e66dc, 0x8671043b),
+	PCMCIA_DEVICE_PROD_ID12("IO DATA", "PCIDE", 0x547e66dc, 0x5c5ab149),
+	PCMCIA_DEVICE_PROD_ID12("IO DATA", "PCIDEII", 0x547e66dc, 0xb3662674),
+	PCMCIA_DEVICE_PROD_ID12("LOOKMEET", "CBIDE2      ", 0xe37be2b5, 0x8671043b),
+	PCMCIA_DEVICE_PROD_ID12(" ", "NinjaATA-", 0x3b6e20c8, 0xebe0bd79),
+	PCMCIA_DEVICE_PROD_ID12("PCMCIA", "CD-ROM", 0x281f1c5d, 0x66536591),
+	PCMCIA_DEVICE_PROD_ID12("PCMCIA", "PnPIDE", 0x281f1c5d, 0x0c694728),
+	PCMCIA_DEVICE_PROD_ID12("SHUTTLE TECHNOLOGY LTD.", "PCCARD-IDE/ATAPI Adapter", 0x4a3f0ba0, 0x322560e1),
+	PCMCIA_DEVICE_PROD_ID12("TOSHIBA", "MK2001MPL", 0xb4585a1a, 0x3489e003),
+	PCMCIA_DEVICE_PROD_ID12("WIT", "IDE16", 0x244e5994, 0x3e232852),
+	PCMCIA_DEVICE_PROD_ID1("STI Flash", 0xe4a13209),
+	PCMCIA_DEVICE_NULL,
+};
+MODULE_DEVICE_TABLE(pcmcia, ide_ids);
+
 static struct pcmcia_driver ide_cs_driver = {
 static struct pcmcia_driver ide_cs_driver = {
 	.owner		= THIS_MODULE,
 	.owner		= THIS_MODULE,
 	.drv		= {
 	.drv		= {
@@ -464,6 +498,7 @@ static struct pcmcia_driver ide_cs_driver = {
 	},
 	},
 	.attach		= ide_attach,
 	.attach		= ide_attach,
 	.detach		= ide_detach,
 	.detach		= ide_detach,
+	.id_table       = ide_ids,
 };
 };
 
 
 static int __init init_ide_cs(void)
 static int __init init_ide_cs(void)

+ 1 - 0
drivers/ide/pci/Makefile

@@ -12,6 +12,7 @@ obj-$(CONFIG_BLK_DEV_HPT34X)		+= hpt34x.o
 obj-$(CONFIG_BLK_DEV_HPT366)		+= hpt366.o
 obj-$(CONFIG_BLK_DEV_HPT366)		+= hpt366.o
 #obj-$(CONFIG_BLK_DEV_HPT37X)		+= hpt37x.o
 #obj-$(CONFIG_BLK_DEV_HPT37X)		+= hpt37x.o
 obj-$(CONFIG_BLK_DEV_IT8172)		+= it8172.o
 obj-$(CONFIG_BLK_DEV_IT8172)		+= it8172.o
+obj-$(CONFIG_BLK_DEV_IT821X)		+= it821x.o
 obj-$(CONFIG_BLK_DEV_NS87415)		+= ns87415.o
 obj-$(CONFIG_BLK_DEV_NS87415)		+= ns87415.o
 obj-$(CONFIG_BLK_DEV_OPTI621)		+= opti621.o
 obj-$(CONFIG_BLK_DEV_OPTI621)		+= opti621.o
 obj-$(CONFIG_BLK_DEV_PDC202XX_OLD)	+= pdc202xx_old.o
 obj-$(CONFIG_BLK_DEV_PDC202XX_OLD)	+= pdc202xx_old.o

+ 48 - 25
drivers/ide/pci/generic.c

@@ -39,6 +39,17 @@
 
 
 #include <asm/io.h>
 #include <asm/io.h>
 
 
+static int ide_generic_all;		/* Set to claim all devices */
+
+static int __init ide_generic_all_on(char *unused)
+{
+	ide_generic_all = 1;
+	printk(KERN_INFO "IDE generic will claim all unknown PCI IDE storage controllers.\n");
+	return 1;
+}
+
+__setup("all-generic-ide", ide_generic_all_on);
+
 static void __devinit init_hwif_generic (ide_hwif_t *hwif)
 static void __devinit init_hwif_generic (ide_hwif_t *hwif)
 {
 {
 	switch(hwif->pci_dev->device) {
 	switch(hwif->pci_dev->device) {
@@ -78,79 +89,85 @@ static void __devinit init_hwif_generic (ide_hwif_t *hwif)
 
 
 static ide_pci_device_t generic_chipsets[] __devinitdata = {
 static ide_pci_device_t generic_chipsets[] __devinitdata = {
 	{	/* 0 */
 	{	/* 0 */
+		.name		= "Unknown",
+		.init_hwif	= init_hwif_generic,
+		.channels	= 2,
+		.autodma	= AUTODMA,
+		.bootable	= ON_BOARD,
+	},{	/* 1 */
 		.name		= "NS87410",
 		.name		= "NS87410",
 		.init_hwif	= init_hwif_generic,
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.channels	= 2,
 		.autodma	= AUTODMA,
 		.autodma	= AUTODMA,
 		.enablebits	= {{0x43,0x08,0x08}, {0x47,0x08,0x08}},
 		.enablebits	= {{0x43,0x08,0x08}, {0x47,0x08,0x08}},
 		.bootable	= ON_BOARD,
 		.bootable	= ON_BOARD,
-        },{	/* 1 */
+        },{	/* 2 */
 		.name		= "SAMURAI",
 		.name		= "SAMURAI",
 		.init_hwif	= init_hwif_generic,
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.channels	= 2,
 		.autodma	= AUTODMA,
 		.autodma	= AUTODMA,
 		.bootable	= ON_BOARD,
 		.bootable	= ON_BOARD,
-	},{	/* 2 */
+	},{	/* 3 */
 		.name		= "HT6565",
 		.name		= "HT6565",
 		.init_hwif	= init_hwif_generic,
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.channels	= 2,
 		.autodma	= AUTODMA,
 		.autodma	= AUTODMA,
 		.bootable	= ON_BOARD,
 		.bootable	= ON_BOARD,
-	},{	/* 3 */
+	},{	/* 4 */
 		.name		= "UM8673F",
 		.name		= "UM8673F",
 		.init_hwif	= init_hwif_generic,
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.channels	= 2,
 		.autodma	= NODMA,
 		.autodma	= NODMA,
 		.bootable	= ON_BOARD,
 		.bootable	= ON_BOARD,
-	},{	/* 4 */
+	},{	/* 5 */
 		.name		= "UM8886A",
 		.name		= "UM8886A",
 		.init_hwif	= init_hwif_generic,
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.channels	= 2,
 		.autodma	= NODMA,
 		.autodma	= NODMA,
 		.bootable	= ON_BOARD,
 		.bootable	= ON_BOARD,
-	},{	/* 5 */
+	},{	/* 6 */
 		.name		= "UM8886BF",
 		.name		= "UM8886BF",
 		.init_hwif	= init_hwif_generic,
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.channels	= 2,
 		.autodma	= NODMA,
 		.autodma	= NODMA,
 		.bootable	= ON_BOARD,
 		.bootable	= ON_BOARD,
-	},{	/* 6 */
+	},{	/* 7 */
 		.name		= "HINT_IDE",
 		.name		= "HINT_IDE",
 		.init_hwif	= init_hwif_generic,
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.channels	= 2,
 		.autodma	= AUTODMA,
 		.autodma	= AUTODMA,
 		.bootable	= ON_BOARD,
 		.bootable	= ON_BOARD,
-	},{	/* 7 */
+	},{	/* 8 */
 		.name		= "VIA_IDE",
 		.name		= "VIA_IDE",
 		.init_hwif	= init_hwif_generic,
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.channels	= 2,
 		.autodma	= NOAUTODMA,
 		.autodma	= NOAUTODMA,
 		.bootable	= ON_BOARD,
 		.bootable	= ON_BOARD,
-	},{	/* 8 */
+	},{	/* 9 */
 		.name		= "OPTI621V",
 		.name		= "OPTI621V",
 		.init_hwif	= init_hwif_generic,
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.channels	= 2,
 		.autodma	= NOAUTODMA,
 		.autodma	= NOAUTODMA,
 		.bootable	= ON_BOARD,
 		.bootable	= ON_BOARD,
-	},{	/* 9 */
+	},{	/* 10 */
 		.name		= "VIA8237SATA",
 		.name		= "VIA8237SATA",
 		.init_hwif	= init_hwif_generic,
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.channels	= 2,
 		.autodma	= AUTODMA,
 		.autodma	= AUTODMA,
 		.bootable	= OFF_BOARD,
 		.bootable	= OFF_BOARD,
-	},{	/* 10 */
+	},{	/* 11 */
 		.name 		= "Piccolo0102",
 		.name 		= "Piccolo0102",
 		.init_hwif	= init_hwif_generic,
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.channels	= 2,
 		.autodma	= NOAUTODMA,
 		.autodma	= NOAUTODMA,
 		.bootable	= ON_BOARD,
 		.bootable	= ON_BOARD,
-	},{	/* 11 */
+	},{	/* 12 */
 		.name 		= "Piccolo0103",
 		.name 		= "Piccolo0103",
 		.init_hwif	= init_hwif_generic,
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.channels	= 2,
 		.autodma	= NOAUTODMA,
 		.autodma	= NOAUTODMA,
 		.bootable	= ON_BOARD,
 		.bootable	= ON_BOARD,
-	},{	/* 12 */
+	},{	/* 13 */
 		.name 		= "Piccolo0105",
 		.name 		= "Piccolo0105",
 		.init_hwif	= init_hwif_generic,
 		.init_hwif	= init_hwif_generic,
 		.channels	= 2,
 		.channels	= 2,
@@ -174,6 +191,10 @@ static int __devinit generic_init_one(struct pci_dev *dev, const struct pci_devi
 	u16 command;
 	u16 command;
 	int ret = -ENODEV;
 	int ret = -ENODEV;
 
 
+	/* Don't use the generic entry unless instructed to do so */
+	if (id->driver_data == 0 && ide_generic_all == 0)
+			goto out;
+
 	if (dev->vendor == PCI_VENDOR_ID_UMC &&
 	if (dev->vendor == PCI_VENDOR_ID_UMC &&
 	    dev->device == PCI_DEVICE_ID_UMC_UM8886A &&
 	    dev->device == PCI_DEVICE_ID_UMC_UM8886A &&
 	    (!(PCI_FUNC(dev->devfn) & 1)))
 	    (!(PCI_FUNC(dev->devfn) & 1)))
@@ -195,21 +216,23 @@ out:
 }
 }
 
 
 static struct pci_device_id generic_pci_tbl[] = {
 static struct pci_device_id generic_pci_tbl[] = {
-	{ PCI_VENDOR_ID_NS,     PCI_DEVICE_ID_NS_87410,            PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
-	{ PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_SAMURAI_IDE,  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1},
-	{ PCI_VENDOR_ID_HOLTEK, PCI_DEVICE_ID_HOLTEK_6565,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2},
-	{ PCI_VENDOR_ID_UMC,    PCI_DEVICE_ID_UMC_UM8673F,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 3},
-	{ PCI_VENDOR_ID_UMC,    PCI_DEVICE_ID_UMC_UM8886A,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4},
-	{ PCI_VENDOR_ID_UMC,    PCI_DEVICE_ID_UMC_UM8886BF,        PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5},
-	{ PCI_VENDOR_ID_HINT,   PCI_DEVICE_ID_HINT_VXPROII_IDE,    PCI_ANY_ID, PCI_ANY_ID, 0, 0, 6},
-	{ PCI_VENDOR_ID_VIA,    PCI_DEVICE_ID_VIA_82C561,          PCI_ANY_ID, PCI_ANY_ID, 0, 0, 7},
-	{ PCI_VENDOR_ID_OPTI,   PCI_DEVICE_ID_OPTI_82C558,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8},
+	{ PCI_VENDOR_ID_NS,     PCI_DEVICE_ID_NS_87410,            PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1},
+	{ PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_SAMURAI_IDE,  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2},
+	{ PCI_VENDOR_ID_HOLTEK, PCI_DEVICE_ID_HOLTEK_6565,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 3},
+	{ PCI_VENDOR_ID_UMC,    PCI_DEVICE_ID_UMC_UM8673F,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4},
+	{ PCI_VENDOR_ID_UMC,    PCI_DEVICE_ID_UMC_UM8886A,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5},
+	{ PCI_VENDOR_ID_UMC,    PCI_DEVICE_ID_UMC_UM8886BF,        PCI_ANY_ID, PCI_ANY_ID, 0, 0, 6},
+	{ PCI_VENDOR_ID_HINT,   PCI_DEVICE_ID_HINT_VXPROII_IDE,    PCI_ANY_ID, PCI_ANY_ID, 0, 0, 7},
+	{ PCI_VENDOR_ID_VIA,    PCI_DEVICE_ID_VIA_82C561,          PCI_ANY_ID, PCI_ANY_ID, 0, 0, 8},
+	{ PCI_VENDOR_ID_OPTI,   PCI_DEVICE_ID_OPTI_82C558,         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 9},
 #ifdef CONFIG_BLK_DEV_IDE_SATA
 #ifdef CONFIG_BLK_DEV_IDE_SATA
-	{ PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8237_SATA,	   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 9},
+	{ PCI_VENDOR_ID_VIA,	PCI_DEVICE_ID_VIA_8237_SATA,	   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 10},
 #endif
 #endif
-	{ PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO,     PCI_ANY_ID, PCI_ANY_ID, 0, 0, 10},
-	{ PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1,   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 11},
-	{ PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2,   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 12},
+	{ PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO,     PCI_ANY_ID, PCI_ANY_ID, 0, 0, 11},
+	{ PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1,   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 12},
+	{ PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2,   PCI_ANY_ID, PCI_ANY_ID, 0, 0, 13},
+	/* Must come last. If you add entries adjust this table appropriately and the init_one code */
+	{ PCI_ANY_ID,		PCI_ANY_ID,			   PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL, 0},
 	{ 0, },
 	{ 0, },
 };
 };
 MODULE_DEVICE_TABLE(pci, generic_pci_tbl);
 MODULE_DEVICE_TABLE(pci, generic_pci_tbl);

+ 228 - 242
drivers/ide/pci/hpt366.c

@@ -10,6 +10,11 @@
  * donation of an ABit BP6 mainboard, processor, and memory acellerated
  * donation of an ABit BP6 mainboard, processor, and memory acellerated
  * development and support.
  * development and support.
  *
  *
+ *
+ * Highpoint have their own driver (source except for the raid part)
+ * available from http://www.highpoint-tech.com/hpt3xx-opensource-v131.tgz
+ * This may be useful to anyone wanting to work on the mainstream hpt IDE.
+ *
  * Note that final HPT370 support was done by force extraction of GPL.
  * Note that final HPT370 support was done by force extraction of GPL.
  *
  *
  * - add function for getting/setting power status of drive
  * - add function for getting/setting power status of drive
@@ -446,44 +451,29 @@ static struct chipset_bus_clock_list_entry sixty_six_base_hpt374[] = {
 #define F_LOW_PCI_50	0x2d
 #define F_LOW_PCI_50	0x2d
 #define F_LOW_PCI_66	0x42
 #define F_LOW_PCI_66	0x42
 
 
-/* FIXME: compare with driver's code before removing */
-#if 0
-		if (hpt_minimum_revision(dev, 3)) {
-			u8 cbl;
-			cbl = inb(iobase + 0x7b);
-			outb(cbl | 1, iobase + 0x7b);
-			outb(cbl & ~1, iobase + 0x7b);
-			cbl = inb(iobase + 0x7a);
-			p += sprintf(p, "Cable:          ATA-%d"
-					"                          ATA-%d\n",
-				(cbl & 0x02) ? 33 : 66,
-				(cbl & 0x01) ? 33 : 66);
-			p += sprintf(p, "\n");
-		}
-		{
-			u8 c2, c3;
-			/* older revs don't have these registers mapped 
-			 * into io space */
-			pci_read_config_byte(dev, 0x43, &c0);
-			pci_read_config_byte(dev, 0x47, &c1);
-			pci_read_config_byte(dev, 0x4b, &c2);
-			pci_read_config_byte(dev, 0x4f, &c3);
-
-			p += sprintf(p, "Mode:           %s             %s"
-					"           %s              %s\n",
-				(c0 & 0x10) ? "UDMA" : (c0 & 0x20) ? "DMA " : 
-					(c0 & 0x80) ? "PIO " : "off ",
-				(c1 & 0x10) ? "UDMA" : (c1 & 0x20) ? "DMA " :
-					(c1 & 0x80) ? "PIO " : "off ",
-				(c2 & 0x10) ? "UDMA" : (c2 & 0x20) ? "DMA " :
-					(c2 & 0x80) ? "PIO " : "off ",
-				(c3 & 0x10) ? "UDMA" : (c3 & 0x20) ? "DMA " :
-					(c3 & 0x80) ? "PIO " : "off ");
-		}
-	}
-#endif
+/*
+ *	Hold all the highpoint quirks and revision information in one
+ *	place.
+ */
 
 
-static u32 hpt_revision (struct pci_dev *dev)
+struct hpt_info
+{
+	u8 max_mode;		/* Speeds allowed */
+	int revision;		/* Chipset revision */
+	int flags;		/* Chipset properties */
+#define PLL_MODE	1
+#define IS_372N		2
+				/* Speed table */
+	struct chipset_bus_clock_list_entry *speed;
+};
+
+/*
+ *	This wants fixing so that we do everything not by classrev
+ *	(which breaks on the newest chips) but by creating an
+ *	enumeration of chip variants and using that
+ */
+
+static __devinit u32 hpt_revision (struct pci_dev *dev)
 {
 {
 	u32 class_rev;
 	u32 class_rev;
 	pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
 	pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
@@ -507,37 +497,33 @@ static u32 hpt_revision (struct pci_dev *dev)
 	return class_rev;
 	return class_rev;
 }
 }
 
 
-static u32 hpt_minimum_revision (struct pci_dev *dev, int revision)
-{
-	unsigned int class_rev = hpt_revision(dev);
-	revision--;
-	return ((int) (class_rev > revision) ? 1 : 0);
-}
-
 static int check_in_drive_lists(ide_drive_t *drive, const char **list);
 static int check_in_drive_lists(ide_drive_t *drive, const char **list);
 
 
 static u8 hpt3xx_ratemask (ide_drive_t *drive)
 static u8 hpt3xx_ratemask (ide_drive_t *drive)
 {
 {
-	struct pci_dev *dev	= HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif	= drive->hwif;
+	struct hpt_info *info	= ide_get_hwifdata(hwif);
 	u8 mode			= 0;
 	u8 mode			= 0;
 
 
-	if (hpt_minimum_revision(dev, 8)) {		/* HPT374 */
+	/* FIXME: TODO - move this to set info->mode once at boot */
+
+	if (info->revision >= 8) {		/* HPT374 */
 		mode = (HPT374_ALLOW_ATA133_6) ? 4 : 3;
 		mode = (HPT374_ALLOW_ATA133_6) ? 4 : 3;
-	} else if (hpt_minimum_revision(dev, 7)) {	/* HPT371 */
+	} else if (info->revision >= 7) {	/* HPT371 */
 		mode = (HPT371_ALLOW_ATA133_6) ? 4 : 3;
 		mode = (HPT371_ALLOW_ATA133_6) ? 4 : 3;
-	} else if (hpt_minimum_revision(dev, 6)) {	/* HPT302 */
+	} else if (info->revision >= 6) {	/* HPT302 */
 		mode = (HPT302_ALLOW_ATA133_6) ? 4 : 3;
 		mode = (HPT302_ALLOW_ATA133_6) ? 4 : 3;
-	} else if (hpt_minimum_revision(dev, 5)) {	/* HPT372 */
+	} else if (info->revision >= 5) {	/* HPT372 */
 		mode = (HPT372_ALLOW_ATA133_6) ? 4 : 3;
 		mode = (HPT372_ALLOW_ATA133_6) ? 4 : 3;
-	} else if (hpt_minimum_revision(dev, 4)) {	/* HPT370A */
+	} else if (info->revision >= 4) {	/* HPT370A */
 		mode = (HPT370_ALLOW_ATA100_5) ? 3 : 2;
 		mode = (HPT370_ALLOW_ATA100_5) ? 3 : 2;
-	} else if (hpt_minimum_revision(dev, 3)) {	/* HPT370 */
+	} else if (info->revision >= 3) {	/* HPT370 */
 		mode = (HPT370_ALLOW_ATA100_5) ? 3 : 2;
 		mode = (HPT370_ALLOW_ATA100_5) ? 3 : 2;
 		mode = (check_in_drive_lists(drive, bad_ata33)) ? 0 : mode;
 		mode = (check_in_drive_lists(drive, bad_ata33)) ? 0 : mode;
 	} else {				/* HPT366 and HPT368 */
 	} else {				/* HPT366 and HPT368 */
 		mode = (check_in_drive_lists(drive, bad_ata33)) ? 0 : 2;
 		mode = (check_in_drive_lists(drive, bad_ata33)) ? 0 : 2;
 	}
 	}
-	if (!eighty_ninty_three(drive) && (mode))
+	if (!eighty_ninty_three(drive) && mode)
 		mode = min(mode, (u8)1);
 		mode = min(mode, (u8)1);
 	return mode;
 	return mode;
 }
 }
@@ -549,7 +535,8 @@ static u8 hpt3xx_ratemask (ide_drive_t *drive)
  
  
 static u8 hpt3xx_ratefilter (ide_drive_t *drive, u8 speed)
 static u8 hpt3xx_ratefilter (ide_drive_t *drive, u8 speed)
 {
 {
-	struct pci_dev *dev	= HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif	= drive->hwif;
+	struct hpt_info *info	= ide_get_hwifdata(hwif);
 	u8 mode			= hpt3xx_ratemask(drive);
 	u8 mode			= hpt3xx_ratemask(drive);
 
 
 	if (drive->media != ide_disk)
 	if (drive->media != ide_disk)
@@ -561,7 +548,7 @@ static u8 hpt3xx_ratefilter (ide_drive_t *drive, u8 speed)
 			break;
 			break;
 		case 0x03:
 		case 0x03:
 			speed = min(speed, (u8)XFER_UDMA_5);
 			speed = min(speed, (u8)XFER_UDMA_5);
-			if (hpt_minimum_revision(dev, 5))
+			if (info->revision >= 5)
 				break;
 				break;
 			if (check_in_drive_lists(drive, bad_ata100_5))
 			if (check_in_drive_lists(drive, bad_ata100_5))
 				speed = min(speed, (u8)XFER_UDMA_4);
 				speed = min(speed, (u8)XFER_UDMA_4);
@@ -571,7 +558,7 @@ static u8 hpt3xx_ratefilter (ide_drive_t *drive, u8 speed)
 	/*
 	/*
 	 * CHECK ME, Does this need to be set to 5 ??
 	 * CHECK ME, Does this need to be set to 5 ??
 	 */
 	 */
-			if (hpt_minimum_revision(dev, 3))
+			if (info->revision >= 3)
 				break;
 				break;
 			if ((check_in_drive_lists(drive, bad_ata66_4)) ||
 			if ((check_in_drive_lists(drive, bad_ata66_4)) ||
 			    (!(HPT366_ALLOW_ATA66_4)))
 			    (!(HPT366_ALLOW_ATA66_4)))
@@ -585,7 +572,7 @@ static u8 hpt3xx_ratefilter (ide_drive_t *drive, u8 speed)
 	/*
 	/*
 	 * CHECK ME, Does this need to be set to 5 ??
 	 * CHECK ME, Does this need to be set to 5 ??
 	 */
 	 */
-			if (hpt_minimum_revision(dev, 3))
+			if (info->revision >= 3)
 				break;
 				break;
 			if (check_in_drive_lists(drive, bad_ata33))
 			if (check_in_drive_lists(drive, bad_ata33))
 				speed = min(speed, (u8)XFER_MW_DMA_2);
 				speed = min(speed, (u8)XFER_MW_DMA_2);
@@ -624,11 +611,12 @@ static unsigned int pci_bus_clock_list (u8 speed, struct chipset_bus_clock_list_
 
 
 static int hpt36x_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 static int hpt36x_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 {
 {
-	struct pci_dev *dev	= HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif	= drive->hwif;
+	struct pci_dev *dev	= hwif->pci_dev;
+	struct hpt_info	*info	= ide_get_hwifdata(hwif);
 	u8 speed		= hpt3xx_ratefilter(drive, xferspeed);
 	u8 speed		= hpt3xx_ratefilter(drive, xferspeed);
-//	u8 speed		= ide_rate_filter(hpt3xx_ratemask(drive), xferspeed);
 	u8 regtime		= (drive->select.b.unit & 0x01) ? 0x44 : 0x40;
 	u8 regtime		= (drive->select.b.unit & 0x01) ? 0x44 : 0x40;
-	u8 regfast		= (HWIF(drive)->channel) ? 0x55 : 0x51;
+	u8 regfast		= (hwif->channel) ? 0x55 : 0x51;
 	u8 drive_fast		= 0;
 	u8 drive_fast		= 0;
 	u32 reg1 = 0, reg2	= 0;
 	u32 reg1 = 0, reg2	= 0;
 
 
@@ -636,16 +624,11 @@ static int hpt36x_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 	 * Disable the "fast interrupt" prediction.
 	 * Disable the "fast interrupt" prediction.
 	 */
 	 */
 	pci_read_config_byte(dev, regfast, &drive_fast);
 	pci_read_config_byte(dev, regfast, &drive_fast);
-#if 0
-	if (drive_fast & 0x02)
-		pci_write_config_byte(dev, regfast, drive_fast & ~0x20);
-#else
 	if (drive_fast & 0x80)
 	if (drive_fast & 0x80)
 		pci_write_config_byte(dev, regfast, drive_fast & ~0x80);
 		pci_write_config_byte(dev, regfast, drive_fast & ~0x80);
-#endif
 
 
-	reg2 = pci_bus_clock_list(speed,
-		(struct chipset_bus_clock_list_entry *) pci_get_drvdata(dev));
+	reg2 = pci_bus_clock_list(speed, info->speed);
+
 	/*
 	/*
 	 * Disable on-chip PIO FIFO/buffer
 	 * Disable on-chip PIO FIFO/buffer
 	 *  (to avoid problems handling I/O errors later)
 	 *  (to avoid problems handling I/O errors later)
@@ -665,10 +648,11 @@ static int hpt36x_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 
 
 static int hpt370_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 static int hpt370_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 {
 {
-	struct pci_dev *dev = HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif	= drive->hwif;
+	struct pci_dev *dev = hwif->pci_dev;
+	struct hpt_info	*info	= ide_get_hwifdata(hwif);
 	u8 speed	= hpt3xx_ratefilter(drive, xferspeed);
 	u8 speed	= hpt3xx_ratefilter(drive, xferspeed);
-//	u8 speed	= ide_rate_filter(hpt3xx_ratemask(drive), xferspeed);
-	u8 regfast	= (HWIF(drive)->channel) ? 0x55 : 0x51;
+	u8 regfast	= (drive->hwif->channel) ? 0x55 : 0x51;
 	u8 drive_pci	= 0x40 + (drive->dn * 4);
 	u8 drive_pci	= 0x40 + (drive->dn * 4);
 	u8 new_fast	= 0, drive_fast = 0;
 	u8 new_fast	= 0, drive_fast = 0;
 	u32 list_conf	= 0, drive_conf = 0;
 	u32 list_conf	= 0, drive_conf = 0;
@@ -693,17 +677,13 @@ static int hpt370_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 	if (new_fast != drive_fast)
 	if (new_fast != drive_fast)
 		pci_write_config_byte(dev, regfast, new_fast);
 		pci_write_config_byte(dev, regfast, new_fast);
 
 
-	list_conf = pci_bus_clock_list(speed, 
-				       (struct chipset_bus_clock_list_entry *)
-				       pci_get_drvdata(dev));
+	list_conf = pci_bus_clock_list(speed, info->speed);
 
 
 	pci_read_config_dword(dev, drive_pci, &drive_conf);
 	pci_read_config_dword(dev, drive_pci, &drive_conf);
 	list_conf = (list_conf & ~conf_mask) | (drive_conf & conf_mask);
 	list_conf = (list_conf & ~conf_mask) | (drive_conf & conf_mask);
 	
 	
-	if (speed < XFER_MW_DMA_0) {
+	if (speed < XFER_MW_DMA_0)
 		list_conf &= ~0x80000000; /* Disable on-chip PIO FIFO/buffer */
 		list_conf &= ~0x80000000; /* Disable on-chip PIO FIFO/buffer */
-	}
-
 	pci_write_config_dword(dev, drive_pci, list_conf);
 	pci_write_config_dword(dev, drive_pci, list_conf);
 
 
 	return ide_config_drive_speed(drive, speed);
 	return ide_config_drive_speed(drive, speed);
@@ -711,10 +691,11 @@ static int hpt370_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 
 
 static int hpt372_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 static int hpt372_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 {
 {
-	struct pci_dev *dev	= HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif	= drive->hwif;
+	struct pci_dev *dev	= hwif->pci_dev;
+	struct hpt_info	*info	= ide_get_hwifdata(hwif);
 	u8 speed	= hpt3xx_ratefilter(drive, xferspeed);
 	u8 speed	= hpt3xx_ratefilter(drive, xferspeed);
-//	u8 speed	= ide_rate_filter(hpt3xx_ratemask(drive), xferspeed);
-	u8 regfast	= (HWIF(drive)->channel) ? 0x55 : 0x51;
+	u8 regfast	= (drive->hwif->channel) ? 0x55 : 0x51;
 	u8 drive_fast	= 0, drive_pci = 0x40 + (drive->dn * 4);
 	u8 drive_fast	= 0, drive_pci = 0x40 + (drive->dn * 4);
 	u32 list_conf	= 0, drive_conf = 0;
 	u32 list_conf	= 0, drive_conf = 0;
 	u32 conf_mask	= (speed >= XFER_MW_DMA_0) ? 0xc0000000 : 0x30070000;
 	u32 conf_mask	= (speed >= XFER_MW_DMA_0) ? 0xc0000000 : 0x30070000;
@@ -726,10 +707,8 @@ static int hpt372_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 	pci_read_config_byte(dev, regfast, &drive_fast);
 	pci_read_config_byte(dev, regfast, &drive_fast);
 	drive_fast &= ~0x07;
 	drive_fast &= ~0x07;
 	pci_write_config_byte(dev, regfast, drive_fast);
 	pci_write_config_byte(dev, regfast, drive_fast);
-					
-	list_conf = pci_bus_clock_list(speed,
-			(struct chipset_bus_clock_list_entry *)
-					pci_get_drvdata(dev));
+
+	list_conf = pci_bus_clock_list(speed, info->speed);
 	pci_read_config_dword(dev, drive_pci, &drive_conf);
 	pci_read_config_dword(dev, drive_pci, &drive_conf);
 	list_conf = (list_conf & ~conf_mask) | (drive_conf & conf_mask);
 	list_conf = (list_conf & ~conf_mask) | (drive_conf & conf_mask);
 	if (speed < XFER_MW_DMA_0)
 	if (speed < XFER_MW_DMA_0)
@@ -741,19 +720,14 @@ static int hpt372_tune_chipset(ide_drive_t *drive, u8 xferspeed)
 
 
 static int hpt3xx_tune_chipset (ide_drive_t *drive, u8 speed)
 static int hpt3xx_tune_chipset (ide_drive_t *drive, u8 speed)
 {
 {
-	struct pci_dev *dev	= HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif	= drive->hwif;
+	struct hpt_info	*info	= ide_get_hwifdata(hwif);
 
 
-	if (hpt_minimum_revision(dev, 8))
+	if (info->revision >= 8)
 		return hpt372_tune_chipset(drive, speed); /* not a typo */
 		return hpt372_tune_chipset(drive, speed); /* not a typo */
-#if 0
-	else if (hpt_minimum_revision(dev, 7))
-		hpt371_tune_chipset(drive, speed);
-	else if (hpt_minimum_revision(dev, 6))
-		hpt302_tune_chipset(drive, speed);
-#endif
-	else if (hpt_minimum_revision(dev, 5))
+	else if (info->revision >= 5)
 		return hpt372_tune_chipset(drive, speed);
 		return hpt372_tune_chipset(drive, speed);
-	else if (hpt_minimum_revision(dev, 3))
+	else if (info->revision >= 3)
 		return hpt370_tune_chipset(drive, speed);
 		return hpt370_tune_chipset(drive, speed);
 	else	/* hpt368: hpt_minimum_revision(dev, 2) */
 	else	/* hpt368: hpt_minimum_revision(dev, 2) */
 		return hpt36x_tune_chipset(drive, speed);
 		return hpt36x_tune_chipset(drive, speed);
@@ -779,8 +753,14 @@ static void hpt3xx_tune_drive (ide_drive_t *drive, u8 pio)
 static int config_chipset_for_dma (ide_drive_t *drive)
 static int config_chipset_for_dma (ide_drive_t *drive)
 {
 {
 	u8 speed = ide_dma_speed(drive, hpt3xx_ratemask(drive));
 	u8 speed = ide_dma_speed(drive, hpt3xx_ratemask(drive));
+	ide_hwif_t *hwif = drive->hwif;
+	struct hpt_info	*info	= ide_get_hwifdata(hwif);
 
 
-	if (!(speed))
+	if (!speed)
+		return 0;
+
+	/* If we don't have any timings we can't do a lot */
+	if (info->speed == NULL)
 		return 0;
 		return 0;
 
 
 	(void) hpt3xx_tune_chipset(drive, speed);
 	(void) hpt3xx_tune_chipset(drive, speed);
@@ -794,7 +774,7 @@ static int hpt3xx_quirkproc (ide_drive_t *drive)
 
 
 static void hpt3xx_intrproc (ide_drive_t *drive)
 static void hpt3xx_intrproc (ide_drive_t *drive)
 {
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 
 
 	if (drive->quirk_list)
 	if (drive->quirk_list)
 		return;
 		return;
@@ -804,24 +784,26 @@ static void hpt3xx_intrproc (ide_drive_t *drive)
 
 
 static void hpt3xx_maskproc (ide_drive_t *drive, int mask)
 static void hpt3xx_maskproc (ide_drive_t *drive, int mask)
 {
 {
-	struct pci_dev *dev = HWIF(drive)->pci_dev;
+	ide_hwif_t *hwif = drive->hwif;
+	struct hpt_info *info = ide_get_hwifdata(hwif);
+	struct pci_dev *dev = hwif->pci_dev;
 
 
 	if (drive->quirk_list) {
 	if (drive->quirk_list) {
-		if (hpt_minimum_revision(dev,3)) {
+		if (info->revision >= 3) {
 			u8 reg5a = 0;
 			u8 reg5a = 0;
 			pci_read_config_byte(dev, 0x5a, &reg5a);
 			pci_read_config_byte(dev, 0x5a, &reg5a);
 			if (((reg5a & 0x10) >> 4) != mask)
 			if (((reg5a & 0x10) >> 4) != mask)
 				pci_write_config_byte(dev, 0x5a, mask ? (reg5a | 0x10) : (reg5a & ~0x10));
 				pci_write_config_byte(dev, 0x5a, mask ? (reg5a | 0x10) : (reg5a & ~0x10));
 		} else {
 		} else {
 			if (mask) {
 			if (mask) {
-				disable_irq(HWIF(drive)->irq);
+				disable_irq(hwif->irq);
 			} else {
 			} else {
-				enable_irq(HWIF(drive)->irq);
+				enable_irq(hwif->irq);
 			}
 			}
 		}
 		}
 	} else {
 	} else {
 		if (IDE_CONTROL_REG)
 		if (IDE_CONTROL_REG)
-			HWIF(drive)->OUTB(mask ? (drive->ctl | 2) :
+			hwif->OUTB(mask ? (drive->ctl | 2) :
 						 (drive->ctl & ~2),
 						 (drive->ctl & ~2),
 						 IDE_CONTROL_REG);
 						 IDE_CONTROL_REG);
 	}
 	}
@@ -829,12 +811,12 @@ static void hpt3xx_maskproc (ide_drive_t *drive, int mask)
 
 
 static int hpt366_config_drive_xfer_rate (ide_drive_t *drive)
 static int hpt366_config_drive_xfer_rate (ide_drive_t *drive)
 {
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct hd_driveid *id	= drive->id;
 	struct hd_driveid *id	= drive->id;
 
 
 	drive->init_speed = 0;
 	drive->init_speed = 0;
 
 
-	if (id && (id->capability & 1) && drive->autodma) {
+	if ((id->capability & 1) && drive->autodma) {
 
 
 		if (ide_use_dma(drive)) {
 		if (ide_use_dma(drive)) {
 			if (config_chipset_for_dma(drive))
 			if (config_chipset_for_dma(drive))
@@ -868,15 +850,6 @@ static int hpt366_ide_dma_lostirq (ide_drive_t *drive)
 		drive->name, __FUNCTION__, reg50h, reg52h, reg5ah);
 		drive->name, __FUNCTION__, reg50h, reg52h, reg5ah);
 	if (reg5ah & 0x10)
 	if (reg5ah & 0x10)
 		pci_write_config_byte(dev, 0x5a, reg5ah & ~0x10);
 		pci_write_config_byte(dev, 0x5a, reg5ah & ~0x10);
-#if 0
-	/* how about we flush and reset, mmmkay? */
-	pci_write_config_byte(dev, 0x51, 0x1F);
-	/* fall through to a reset */
-	case dma_start:
-	case ide_dma_end:
-	/* reset the chips state over and over.. */
-	pci_write_config_byte(dev, 0x51, 0x13);
-#endif
 	return __ide_dma_lostirq(drive);
 	return __ide_dma_lostirq(drive);
 }
 }
 
 
@@ -919,7 +892,7 @@ static void hpt370_lostirq_timeout (ide_drive_t *drive)
 	u8 dma_stat = 0, dma_cmd = 0;
 	u8 dma_stat = 0, dma_cmd = 0;
 
 
 	pci_read_config_byte(HWIF(drive)->pci_dev, reginfo, &bfifo);
 	pci_read_config_byte(HWIF(drive)->pci_dev, reginfo, &bfifo);
-	printk("%s: %d bytes in FIFO\n", drive->name, bfifo);
+	printk(KERN_DEBUG "%s: %d bytes in FIFO\n", drive->name, bfifo);
 	hpt370_clear_engine(drive);
 	hpt370_clear_engine(drive);
 	/* get dma command mode */
 	/* get dma command mode */
 	dma_cmd = hwif->INB(hwif->dma_command);
 	dma_cmd = hwif->INB(hwif->dma_command);
@@ -1047,15 +1020,6 @@ static void hpt372n_rw_disk(ide_drive_t *drive, struct request *rq)
 
 
 static void hpt3xx_reset (ide_drive_t *drive)
 static void hpt3xx_reset (ide_drive_t *drive)
 {
 {
-#if 0
-	unsigned long high_16	= pci_resource_start(HWIF(drive)->pci_dev, 4);
-	u8 reset	= (HWIF(drive)->channel) ? 0x80 : 0x40;
-	u8 reg59h	= 0;
-
-	pci_read_config_byte(HWIF(drive)->pci_dev, 0x59, &reg59h);
-	pci_write_config_byte(HWIF(drive)->pci_dev, 0x59, reg59h|reset);
-	pci_write_config_byte(HWIF(drive)->pci_dev, 0x59, reg59h);
-#endif
 }
 }
 
 
 static int hpt3xx_tristate (ide_drive_t * drive, int state)
 static int hpt3xx_tristate (ide_drive_t * drive, int state)
@@ -1065,8 +1029,6 @@ static int hpt3xx_tristate (ide_drive_t * drive, int state)
 	u8 reg59h = 0, reset	= (hwif->channel) ? 0x80 : 0x40;
 	u8 reg59h = 0, reset	= (hwif->channel) ? 0x80 : 0x40;
 	u8 regXXh = 0, state_reg= (hwif->channel) ? 0x57 : 0x53;
 	u8 regXXh = 0, state_reg= (hwif->channel) ? 0x57 : 0x53;
 
 
-//	hwif->bus_state = state;
-
 	pci_read_config_byte(dev, 0x59, &reg59h);
 	pci_read_config_byte(dev, 0x59, &reg59h);
 	pci_read_config_byte(dev, state_reg, &regXXh);
 	pci_read_config_byte(dev, state_reg, &regXXh);
 
 
@@ -1093,7 +1055,7 @@ static int hpt3xx_tristate (ide_drive_t * drive, int state)
 #define TRISTATE_BIT  0x8000
 #define TRISTATE_BIT  0x8000
 static int hpt370_busproc(ide_drive_t * drive, int state)
 static int hpt370_busproc(ide_drive_t * drive, int state)
 {
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= hwif->pci_dev;
 	struct pci_dev *dev	= hwif->pci_dev;
 	u8 tristate = 0, resetmask = 0, bus_reg = 0;
 	u8 tristate = 0, resetmask = 0, bus_reg = 0;
 	u16 tri_reg;
 	u16 tri_reg;
@@ -1148,33 +1110,44 @@ static int hpt370_busproc(ide_drive_t * drive, int state)
 	return 0;
 	return 0;
 }
 }
 
 
-static int __devinit init_hpt37x(struct pci_dev *dev)
+static void __devinit hpt366_clocking(ide_hwif_t *hwif)
 {
 {
+	u32 reg1	= 0;
+	struct hpt_info *info = ide_get_hwifdata(hwif);
+
+	pci_read_config_dword(hwif->pci_dev, 0x40, &reg1);
+
+	/* detect bus speed by looking at control reg timing: */
+	switch((reg1 >> 8) & 7) {
+		case 5:
+			info->speed = forty_base_hpt366;
+			break;
+		case 9:
+			info->speed = twenty_five_base_hpt366;
+			break;
+		case 7:
+		default:
+			info->speed = thirty_three_base_hpt366;
+			break;
+	}
+}
+
+static void __devinit hpt37x_clocking(ide_hwif_t *hwif)
+{
+	struct hpt_info *info = ide_get_hwifdata(hwif);
+	struct pci_dev *dev = hwif->pci_dev;
 	int adjust, i;
 	int adjust, i;
 	u16 freq;
 	u16 freq;
 	u32 pll;
 	u32 pll;
 	u8 reg5bh;
 	u8 reg5bh;
-	u8 reg5ah = 0;
-	unsigned long dmabase = pci_resource_start(dev, 4);
-	u8 did, rid;	
-	int is_372n = 0;
 	
 	
-	pci_read_config_byte(dev, 0x5a, &reg5ah);
-	/* interrupt force enable */
-	pci_write_config_byte(dev, 0x5a, (reg5ah & ~0x10));
-
-	if(dmabase)
-	{
-		did = inb(dmabase + 0x22);
-		rid = inb(dmabase + 0x28);
-	
-		if((did == 4 && rid == 6) || (did == 5 && rid > 1))
-			is_372n = 1;
-	}
-
 	/*
 	/*
 	 * default to pci clock. make sure MA15/16 are set to output
 	 * default to pci clock. make sure MA15/16 are set to output
-	 * to prevent drives having problems with 40-pin cables.
+	 * to prevent drives having problems with 40-pin cables. Needed
+	 * for some drives such as IBM-DTLA which will not enter ready
+	 * state on reset when PDIAG is a input.
+	 *
+	 * ToDo: should we set 0x21 when using PLL mode ?
 	 */
 	 */
 	pci_write_config_byte(dev, 0x5b, 0x23);
 	pci_write_config_byte(dev, 0x5b, 0x23);
 
 
@@ -1197,9 +1170,7 @@ static int __devinit init_hpt37x(struct pci_dev *dev)
 	 * Currently we always set up the PLL for the 372N
 	 * Currently we always set up the PLL for the 372N
 	 */
 	 */
 	 
 	 
-	pci_set_drvdata(dev, NULL);
-	
-	if(is_372n)
+	if(info->flags & IS_372N)
 	{
 	{
 		printk(KERN_INFO "hpt: HPT372N detected, using 372N timing.\n");
 		printk(KERN_INFO "hpt: HPT372N detected, using 372N timing.\n");
 		if(freq < 0x55)
 		if(freq < 0x55)
@@ -1227,39 +1198,38 @@ static int __devinit init_hpt37x(struct pci_dev *dev)
 			pll = F_LOW_PCI_66;
 			pll = F_LOW_PCI_66;
 	
 	
 		if (pll == F_LOW_PCI_33) {
 		if (pll == F_LOW_PCI_33) {
-			if (hpt_minimum_revision(dev,8))
-				pci_set_drvdata(dev, (void *) thirty_three_base_hpt374);
-			else if (hpt_minimum_revision(dev,5))
-				pci_set_drvdata(dev, (void *) thirty_three_base_hpt372);
-			else if (hpt_minimum_revision(dev,4))
-				pci_set_drvdata(dev, (void *) thirty_three_base_hpt370a);
+			if (info->revision >= 8)
+				info->speed = thirty_three_base_hpt374;
+			else if (info->revision >= 5)
+				info->speed = thirty_three_base_hpt372;
+			else if (info->revision >= 4)
+				info->speed = thirty_three_base_hpt370a;
 			else
 			else
-				pci_set_drvdata(dev, (void *) thirty_three_base_hpt370);
-			printk("HPT37X: using 33MHz PCI clock\n");
+				info->speed = thirty_three_base_hpt370;
+			printk(KERN_DEBUG "HPT37X: using 33MHz PCI clock\n");
 		} else if (pll == F_LOW_PCI_40) {
 		} else if (pll == F_LOW_PCI_40) {
 			/* Unsupported */
 			/* Unsupported */
 		} else if (pll == F_LOW_PCI_50) {
 		} else if (pll == F_LOW_PCI_50) {
-			if (hpt_minimum_revision(dev,8))
-				pci_set_drvdata(dev, (void *) fifty_base_hpt370a);
-			else if (hpt_minimum_revision(dev,5))
-				pci_set_drvdata(dev, (void *) fifty_base_hpt372);
-			else if (hpt_minimum_revision(dev,4))
-				pci_set_drvdata(dev, (void *) fifty_base_hpt370a);
+			if (info->revision >= 8)
+				info->speed = fifty_base_hpt370a;
+			else if (info->revision >= 5)
+				info->speed = fifty_base_hpt372;
+			else if (info->revision >= 4)
+				info->speed = fifty_base_hpt370a;
 			else
 			else
-				pci_set_drvdata(dev, (void *) fifty_base_hpt370a);
-			printk("HPT37X: using 50MHz PCI clock\n");
+				info->speed = fifty_base_hpt370a;
+			printk(KERN_DEBUG "HPT37X: using 50MHz PCI clock\n");
 		} else {
 		} else {
-			if (hpt_minimum_revision(dev,8))
-			{
+			if (info->revision >= 8) {
 				printk(KERN_ERR "HPT37x: 66MHz timings are not supported.\n");
 				printk(KERN_ERR "HPT37x: 66MHz timings are not supported.\n");
 			}
 			}
-			else if (hpt_minimum_revision(dev,5))
-				pci_set_drvdata(dev, (void *) sixty_six_base_hpt372);
-			else if (hpt_minimum_revision(dev,4))
-				pci_set_drvdata(dev, (void *) sixty_six_base_hpt370a);
+			else if (info->revision >= 5)
+				info->speed = sixty_six_base_hpt372;
+			else if (info->revision >= 4)
+				info->speed = sixty_six_base_hpt370a;
 			else
 			else
-				pci_set_drvdata(dev, (void *) sixty_six_base_hpt370);
-			printk("HPT37X: using 66MHz PCI clock\n");
+				info->speed = sixty_six_base_hpt370;
+			printk(KERN_DEBUG "HPT37X: using 66MHz PCI clock\n");
 		}
 		}
 	}
 	}
 	
 	
@@ -1269,11 +1239,19 @@ static int __devinit init_hpt37x(struct pci_dev *dev)
 	 * result in slow reads when using a 33MHz PCI clock. we also
 	 * result in slow reads when using a 33MHz PCI clock. we also
 	 * don't like to use the PLL because it will cause glitches
 	 * don't like to use the PLL because it will cause glitches
 	 * on PRST/SRST when the HPT state engine gets reset.
 	 * on PRST/SRST when the HPT state engine gets reset.
+	 *
+	 * ToDo: Use 66MHz PLL when ATA133 devices are present on a
+	 * 372 device so we can get ATA133 support
 	 */
 	 */
-	if (pci_get_drvdata(dev)) 
+	if (info->speed)
 		goto init_hpt37X_done;
 		goto init_hpt37X_done;
+
+	info->flags |= PLL_MODE;
 	
 	
 	/*
 	/*
+	 * FIXME: make this work correctly, esp with 372N as per
+	 * reference driver code.
+	 *
 	 * adjust PLL based upon PCI clock, enable it, and wait for
 	 * adjust PLL based upon PCI clock, enable it, and wait for
 	 * stabilization.
 	 * stabilization.
 	 */
 	 */
@@ -1298,14 +1276,14 @@ static int __devinit init_hpt37x(struct pci_dev *dev)
 				pci_write_config_dword(dev, 0x5c, 
 				pci_write_config_dword(dev, 0x5c, 
 						       pll & ~0x100);
 						       pll & ~0x100);
 				pci_write_config_byte(dev, 0x5b, 0x21);
 				pci_write_config_byte(dev, 0x5b, 0x21);
-				if (hpt_minimum_revision(dev,8))
-					pci_set_drvdata(dev, (void *) fifty_base_hpt370a);
-				else if (hpt_minimum_revision(dev,5))
-					pci_set_drvdata(dev, (void *) fifty_base_hpt372);
-				else if (hpt_minimum_revision(dev,4))
-					pci_set_drvdata(dev, (void *) fifty_base_hpt370a);
+				if (info->revision >= 8)
+					info->speed = fifty_base_hpt370a;
+				else if (info->revision >= 5)
+					info->speed = fifty_base_hpt372;
+				else if (info->revision >= 4)
+					info->speed = fifty_base_hpt370a;
 				else
 				else
-					pci_set_drvdata(dev, (void *) fifty_base_hpt370a);
+					info->speed = fifty_base_hpt370a;
 				printk("HPT37X: using 50MHz internal PLL\n");
 				printk("HPT37X: using 50MHz internal PLL\n");
 				goto init_hpt37X_done;
 				goto init_hpt37X_done;
 			}
 			}
@@ -1318,10 +1296,22 @@ pll_recal:
 	} 
 	} 
 
 
 init_hpt37X_done:
 init_hpt37X_done:
+	if (!info->speed)
+		printk(KERN_ERR "HPT37X%s: unknown bus timing [%d %d].\n",
+			(info->flags & IS_372N)?"N":"", pll, freq);
 	/* reset state engine */
 	/* reset state engine */
 	pci_write_config_byte(dev, 0x50, 0x37); 
 	pci_write_config_byte(dev, 0x50, 0x37); 
 	pci_write_config_byte(dev, 0x54, 0x37); 
 	pci_write_config_byte(dev, 0x54, 0x37); 
 	udelay(100);
 	udelay(100);
+}
+
+static int __devinit init_hpt37x(struct pci_dev *dev)
+{
+	u8 reg5ah;
+
+	pci_read_config_byte(dev, 0x5a, &reg5ah);
+	/* interrupt force enable */
+	pci_write_config_byte(dev, 0x5a, (reg5ah & ~0x10));
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1338,59 +1328,27 @@ static int __devinit init_hpt366(struct pci_dev *dev)
 		pci_write_config_byte(dev, 0x51, drive_fast & ~0x80);
 		pci_write_config_byte(dev, 0x51, drive_fast & ~0x80);
 	pci_read_config_dword(dev, 0x40, &reg1);
 	pci_read_config_dword(dev, 0x40, &reg1);
 									
 									
-	/* detect bus speed by looking at control reg timing: */
-	switch((reg1 >> 8) & 7) {
-		case 5:
-			pci_set_drvdata(dev, (void *) forty_base_hpt366);
-			break;
-		case 9:
-			pci_set_drvdata(dev, (void *) twenty_five_base_hpt366);
-			break;
-		case 7:
-		default:
-			pci_set_drvdata(dev, (void *) thirty_three_base_hpt366);
-			break;
-	}
-
-	if (!pci_get_drvdata(dev))
-	{
-		printk(KERN_ERR "hpt366: unknown bus timing.\n");
-		pci_set_drvdata(dev, NULL);
-	}
 	return 0;
 	return 0;
 }
 }
 
 
 static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const char *name)
 static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const char *name)
 {
 {
 	int ret = 0;
 	int ret = 0;
-	u8 test = 0;
-
+	/* FIXME: Not portable */
 	if (dev->resource[PCI_ROM_RESOURCE].start)
 	if (dev->resource[PCI_ROM_RESOURCE].start)
 		pci_write_config_byte(dev, PCI_ROM_ADDRESS,
 		pci_write_config_byte(dev, PCI_ROM_ADDRESS,
 			dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
 			dev->resource[PCI_ROM_RESOURCE].start | PCI_ROM_ADDRESS_ENABLE);
 
 
-	pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &test);
-	if (test != (L1_CACHE_BYTES / 4))
-		pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
-			(L1_CACHE_BYTES / 4));
-
-	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &test);
-	if (test != 0x78)
-		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x78);
+	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, (L1_CACHE_BYTES / 4));
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x78);
+	pci_write_config_byte(dev, PCI_MIN_GNT, 0x08);
+	pci_write_config_byte(dev, PCI_MAX_LAT, 0x08);
 
 
-	pci_read_config_byte(dev, PCI_MIN_GNT, &test);
-	if (test != 0x08)
-		pci_write_config_byte(dev, PCI_MIN_GNT, 0x08);
-
-	pci_read_config_byte(dev, PCI_MAX_LAT, &test);
-	if (test != 0x08)
-		pci_write_config_byte(dev, PCI_MAX_LAT, 0x08);
-
-	if (hpt_minimum_revision(dev, 3)) {
+	if (hpt_revision(dev) >= 3)
 		ret = init_hpt37x(dev);
 		ret = init_hpt37x(dev);
-	} else {
-		ret =init_hpt366(dev);
-	}
+	else
+		ret = init_hpt366(dev);
+
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
@@ -1400,27 +1358,16 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev, const cha
 static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 {
 {
 	struct pci_dev *dev		= hwif->pci_dev;
 	struct pci_dev *dev		= hwif->pci_dev;
+	struct hpt_info *info		= ide_get_hwifdata(hwif);
 	u8 ata66 = 0, regmask		= (hwif->channel) ? 0x01 : 0x02;
 	u8 ata66 = 0, regmask		= (hwif->channel) ? 0x01 : 0x02;
-	u8 did, rid;
-	unsigned long dmabase		= hwif->dma_base;
-	int is_372n = 0;
 	
 	
-	if(dmabase)
-	{
-		did = inb(dmabase + 0x22);
-		rid = inb(dmabase + 0x28);
-	
-		if((did == 4 && rid == 6) || (did == 5 && rid > 1))
-			is_372n = 1;
-	}
-		
 	hwif->tuneproc			= &hpt3xx_tune_drive;
 	hwif->tuneproc			= &hpt3xx_tune_drive;
 	hwif->speedproc			= &hpt3xx_tune_chipset;
 	hwif->speedproc			= &hpt3xx_tune_chipset;
 	hwif->quirkproc			= &hpt3xx_quirkproc;
 	hwif->quirkproc			= &hpt3xx_quirkproc;
 	hwif->intrproc			= &hpt3xx_intrproc;
 	hwif->intrproc			= &hpt3xx_intrproc;
 	hwif->maskproc			= &hpt3xx_maskproc;
 	hwif->maskproc			= &hpt3xx_maskproc;
 	
 	
-	if(is_372n)
+	if(info->flags & IS_372N)
 		hwif->rw_disk = &hpt372n_rw_disk;
 		hwif->rw_disk = &hpt372n_rw_disk;
 
 
 	/*
 	/*
@@ -1428,7 +1375,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 	 * address lines to access an external eeprom.  To read valid
 	 * address lines to access an external eeprom.  To read valid
 	 * cable detect state the pins must be enabled as inputs.
 	 * cable detect state the pins must be enabled as inputs.
 	 */
 	 */
-	if (hpt_minimum_revision(dev, 8) && PCI_FUNC(dev->devfn) & 1) {
+	if (info->revision >= 8 && (PCI_FUNC(dev->devfn) & 1)) {
 		/*
 		/*
 		 * HPT374 PCI function 1
 		 * HPT374 PCI function 1
 		 * - set bit 15 of reg 0x52 to enable TCBLID as input
 		 * - set bit 15 of reg 0x52 to enable TCBLID as input
@@ -1443,7 +1390,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 		pci_read_config_byte(dev, 0x5a, &ata66);
 		pci_read_config_byte(dev, 0x5a, &ata66);
 		pci_write_config_word(dev, 0x52, mcr3);
 		pci_write_config_word(dev, 0x52, mcr3);
 		pci_write_config_word(dev, 0x56, mcr6);
 		pci_write_config_word(dev, 0x56, mcr6);
-	} else if (hpt_minimum_revision(dev, 3)) {
+	} else if (info->revision >= 3) {
 		/*
 		/*
 		 * HPT370/372 and 374 pcifn 0
 		 * HPT370/372 and 374 pcifn 0
 		 * - clear bit 0 of 0x5b to enable P/SCBLID as inputs
 		 * - clear bit 0 of 0x5b to enable P/SCBLID as inputs
@@ -1470,7 +1417,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 		hwif->serialized = hwif->mate->serialized = 1;
 		hwif->serialized = hwif->mate->serialized = 1;
 #endif
 #endif
 
 
-	if (hpt_minimum_revision(dev,3)) {
+	if (info->revision >= 3) {
 		u8 reg5ah = 0;
 		u8 reg5ah = 0;
 			pci_write_config_byte(dev, 0x5a, reg5ah & ~0x10);
 			pci_write_config_byte(dev, 0x5a, reg5ah & ~0x10);
 		/*
 		/*
@@ -1480,8 +1427,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 		 */
 		 */
 		hwif->resetproc	= &hpt3xx_reset;
 		hwif->resetproc	= &hpt3xx_reset;
 		hwif->busproc	= &hpt370_busproc;
 		hwif->busproc	= &hpt370_busproc;
-//		hwif->drives[0].autotune = hwif->drives[1].autotune = 1;
-	} else if (hpt_minimum_revision(dev,2)) {
+	} else if (info->revision >= 2) {
 		hwif->resetproc	= &hpt3xx_reset;
 		hwif->resetproc	= &hpt3xx_reset;
 		hwif->busproc	= &hpt3xx_tristate;
 		hwif->busproc	= &hpt3xx_tristate;
 	} else {
 	} else {
@@ -1502,18 +1448,18 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 		hwif->udma_four = ((ata66 & regmask) ? 0 : 1);
 		hwif->udma_four = ((ata66 & regmask) ? 0 : 1);
 	hwif->ide_dma_check = &hpt366_config_drive_xfer_rate;
 	hwif->ide_dma_check = &hpt366_config_drive_xfer_rate;
 
 
-	if (hpt_minimum_revision(dev,8)) {
+	if (info->revision >= 8) {
 		hwif->ide_dma_test_irq = &hpt374_ide_dma_test_irq;
 		hwif->ide_dma_test_irq = &hpt374_ide_dma_test_irq;
 		hwif->ide_dma_end = &hpt374_ide_dma_end;
 		hwif->ide_dma_end = &hpt374_ide_dma_end;
-	} else if (hpt_minimum_revision(dev,5)) {
+	} else if (info->revision >= 5) {
 		hwif->ide_dma_test_irq = &hpt374_ide_dma_test_irq;
 		hwif->ide_dma_test_irq = &hpt374_ide_dma_test_irq;
 		hwif->ide_dma_end = &hpt374_ide_dma_end;
 		hwif->ide_dma_end = &hpt374_ide_dma_end;
-	} else if (hpt_minimum_revision(dev,3)) {
+	} else if (info->revision >= 3) {
 		hwif->dma_start = &hpt370_ide_dma_start;
 		hwif->dma_start = &hpt370_ide_dma_start;
 		hwif->ide_dma_end = &hpt370_ide_dma_end;
 		hwif->ide_dma_end = &hpt370_ide_dma_end;
 		hwif->ide_dma_timeout = &hpt370_ide_dma_timeout;
 		hwif->ide_dma_timeout = &hpt370_ide_dma_timeout;
 		hwif->ide_dma_lostirq = &hpt370_ide_dma_lostirq;
 		hwif->ide_dma_lostirq = &hpt370_ide_dma_lostirq;
-	} else if (hpt_minimum_revision(dev,2))
+	} else if (info->revision >= 2)
 		hwif->ide_dma_lostirq = &hpt366_ide_dma_lostirq;
 		hwif->ide_dma_lostirq = &hpt366_ide_dma_lostirq;
 	else
 	else
 		hwif->ide_dma_lostirq = &hpt366_ide_dma_lostirq;
 		hwif->ide_dma_lostirq = &hpt366_ide_dma_lostirq;
@@ -1526,6 +1472,7 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 
 
 static void __devinit init_dma_hpt366(ide_hwif_t *hwif, unsigned long dmabase)
 static void __devinit init_dma_hpt366(ide_hwif_t *hwif, unsigned long dmabase)
 {
 {
+	struct hpt_info	*info	= ide_get_hwifdata(hwif);
 	u8 masterdma	= 0, slavedma = 0;
 	u8 masterdma	= 0, slavedma = 0;
 	u8 dma_new	= 0, dma_old = 0;
 	u8 dma_new	= 0, dma_old = 0;
 	u8 primary	= hwif->channel ? 0x4b : 0x43;
 	u8 primary	= hwif->channel ? 0x4b : 0x43;
@@ -1535,8 +1482,7 @@ static void __devinit init_dma_hpt366(ide_hwif_t *hwif, unsigned long dmabase)
 	if (!dmabase)
 	if (!dmabase)
 		return;
 		return;
 		
 		
-	if(pci_get_drvdata(hwif->pci_dev) == NULL)
-	{
+	if(info->speed == NULL) {
 		printk(KERN_WARNING "hpt: no known IDE timings, disabling DMA.\n");
 		printk(KERN_WARNING "hpt: no known IDE timings, disabling DMA.\n");
 		return;
 		return;
 	}
 	}
@@ -1559,6 +1505,40 @@ static void __devinit init_dma_hpt366(ide_hwif_t *hwif, unsigned long dmabase)
 	ide_setup_dma(hwif, dmabase, 8);
 	ide_setup_dma(hwif, dmabase, 8);
 }
 }
 
 
+/*
+ *	We "borrow" this hook in order to set the data structures
+ *	up early enough before dma or init_hwif calls are made.
+ */
+
+static void __devinit init_iops_hpt366(ide_hwif_t *hwif)
+{
+	struct hpt_info *info = kmalloc(sizeof(struct hpt_info), GFP_KERNEL);
+	unsigned long dmabase = pci_resource_start(hwif->pci_dev, 4);
+	u8 did, rid;
+
+	if(info == NULL) {
+		printk(KERN_WARNING "hpt366: out of memory.\n");
+		return;
+	}
+	memset(info, 0, sizeof(struct hpt_info));
+	ide_set_hwifdata(hwif, info);
+
+	if(dmabase) {
+		did = inb(dmabase + 0x22);
+		rid = inb(dmabase + 0x28);
+
+		if((did == 4 && rid == 6) || (did == 5 && rid > 1))
+			info->flags |= IS_372N;
+	}
+
+	info->revision = hpt_revision(hwif->pci_dev);
+
+	if (info->revision >= 3)
+		hpt37x_clocking(hwif);
+	else
+		hpt366_clocking(hwif);
+}
+
 static int __devinit init_setup_hpt374(struct pci_dev *dev, ide_pci_device_t *d)
 static int __devinit init_setup_hpt374(struct pci_dev *dev, ide_pci_device_t *d)
 {
 {
 	struct pci_dev *findev = NULL;
 	struct pci_dev *findev = NULL;
@@ -1646,6 +1626,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
 		.name		= "HPT366",
 		.name		= "HPT366",
 		.init_setup	= init_setup_hpt366,
 		.init_setup	= init_setup_hpt366,
 		.init_chipset	= init_chipset_hpt366,
 		.init_chipset	= init_chipset_hpt366,
+		.init_iops	= init_iops_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.channels	= 2,
 		.channels	= 2,
@@ -1656,6 +1637,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
 		.name		= "HPT372A",
 		.name		= "HPT372A",
 		.init_setup	= init_setup_hpt37x,
 		.init_setup	= init_setup_hpt37x,
 		.init_chipset	= init_chipset_hpt366,
 		.init_chipset	= init_chipset_hpt366,
+		.init_iops	= init_iops_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.channels	= 2,
 		.channels	= 2,
@@ -1665,6 +1647,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
 		.name		= "HPT302",
 		.name		= "HPT302",
 		.init_setup	= init_setup_hpt37x,
 		.init_setup	= init_setup_hpt37x,
 		.init_chipset	= init_chipset_hpt366,
 		.init_chipset	= init_chipset_hpt366,
+		.init_iops	= init_iops_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.channels	= 2,
 		.channels	= 2,
@@ -1674,6 +1657,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
 		.name		= "HPT371",
 		.name		= "HPT371",
 		.init_setup	= init_setup_hpt37x,
 		.init_setup	= init_setup_hpt37x,
 		.init_chipset	= init_chipset_hpt366,
 		.init_chipset	= init_chipset_hpt366,
+		.init_iops	= init_iops_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.channels	= 2,
 		.channels	= 2,
@@ -1683,6 +1667,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
 		.name		= "HPT374",
 		.name		= "HPT374",
 		.init_setup	= init_setup_hpt374,
 		.init_setup	= init_setup_hpt374,
 		.init_chipset	= init_chipset_hpt366,
 		.init_chipset	= init_chipset_hpt366,
+		.init_iops	= init_iops_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.channels	= 2,	/* 4 */
 		.channels	= 2,	/* 4 */
@@ -1692,6 +1677,7 @@ static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
 		.name		= "HPT372N",
 		.name		= "HPT372N",
 		.init_setup	= init_setup_hpt37x,
 		.init_setup	= init_setup_hpt37x,
 		.init_chipset	= init_chipset_hpt366,
 		.init_chipset	= init_chipset_hpt366,
+		.init_iops	= init_iops_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_hwif	= init_hwif_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.init_dma	= init_dma_hpt366,
 		.channels	= 2,	/* 4 */
 		.channels	= 2,	/* 4 */

+ 812 - 0
drivers/ide/pci/it821x.c

@@ -0,0 +1,812 @@
+
+/*
+ * linux/drivers/ide/pci/it821x.c		Version 0.09	December 2004
+ *
+ * Copyright (C) 2004		Red Hat <alan@redhat.com>
+ *
+ *  May be copied or modified under the terms of the GNU General Public License
+ *  Based in part on the ITE vendor provided SCSI driver.
+ *
+ *  Documentation available from
+ * 	http://www.ite.com.tw/pc/IT8212F_V04.pdf
+ *  Some other documents are NDA.
+ *
+ *  The ITE8212 isn't exactly a standard IDE controller. It has two
+ *  modes. In pass through mode then it is an IDE controller. In its smart
+ *  mode its actually quite a capable hardware raid controller disguised
+ *  as an IDE controller. Smart mode only understands DMA read/write and
+ *  identify, none of the fancier commands apply. The IT8211 is identical
+ *  in other respects but lacks the raid mode.
+ *
+ *  Errata:
+ *  o	Rev 0x10 also requires master/slave hold the same DMA timings and
+ *	cannot do ATAPI MWDMA.
+ *  o	The identify data for raid volumes lacks CHS info (technically ok)
+ *	but also fails to set the LBA28 and other bits. We fix these in
+ *	the IDE probe quirk code.
+ *  o	If you write LBA48 sized I/O's (ie > 256 sector) in smart mode
+ *	raid then the controller firmware dies
+ *  o	Smart mode without RAID doesn't clear all the necessary identify
+ *	bits to reduce the command set to the one used
+ *
+ *  This has a few impacts on the driver
+ *  - In pass through mode we do all the work you would expect
+ *  - In smart mode the clocking set up is done by the controller generally
+ *    but we must watch the other limits and filter.
+ *  - There are a few extra vendor commands that actually talk to the
+ *    controller but only work PIO with no IRQ.
+ *
+ *  Vendor areas of the identify block in smart mode are used for the
+ *  timing and policy set up. Each HDD in raid mode also has a serial
+ *  block on the disk. The hardware extra commands are get/set chip status,
+ *  rebuild, get rebuild status.
+ *
+ *  In Linux the driver supports pass through mode as if the device was
+ *  just another IDE controller. If the smart mode is running then
+ *  volumes are managed by the controller firmware and each IDE "disk"
+ *  is a raid volume. Even more cute - the controller can do automated
+ *  hotplug and rebuild.
+ *
+ *  The pass through controller itself is a little demented. It has a
+ *  flaw that it has a single set of PIO/MWDMA timings per channel so
+ *  non UDMA devices restrict each others performance. It also has a
+ *  single clock source per channel so mixed UDMA100/133 performance
+ *  isn't perfect and we have to pick a clock. Thankfully none of this
+ *  matters in smart mode. ATAPI DMA is not currently supported.
+ *
+ *  It seems the smart mode is a win for RAID1/RAID10 but otherwise not.
+ *
+ *  TODO
+ *	-	ATAPI UDMA is ok but not MWDMA it seems
+ *	-	RAID configuration ioctls
+ *	-	Move to libata once it grows up
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/hdreg.h>
+#include <linux/ide.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+
+struct it821x_dev
+{
+	unsigned int smart:1,		/* Are we in smart raid mode */
+		timing10:1;		/* Rev 0x10 */
+	u8	clock_mode;		/* 0, ATA_50 or ATA_66 */
+	u8	want[2][2];		/* Mode/Pri log for master slave */
+	/* We need these for switching the clock when DMA goes on/off
+	   The high byte is the 66Mhz timing */
+	u16	pio[2];			/* Cached PIO values */
+	u16	mwdma[2];		/* Cached MWDMA values */
+	u16	udma[2];		/* Cached UDMA values (per drive) */
+};
+
+#define ATA_66		0
+#define ATA_50		1
+#define ATA_ANY		2
+
+#define UDMA_OFF	0
+#define MWDMA_OFF	0
+
+/*
+ *	We allow users to force the card into non raid mode without
+ *	flashing the alternative BIOS. This is also neccessary right now
+ *	for embedded platforms that cannot run a PC BIOS but are using this
+ *	device.
+ */
+
+static int it8212_noraid;
+
+/**
+ *	it821x_program	-	program the PIO/MWDMA registers
+ *	@drive: drive to tune
+ *
+ *	Program the PIO/MWDMA timing for this channel according to the
+ *	current clock.
+ */
+
+static void it821x_program(ide_drive_t *drive, u16 timing)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int channel = hwif->channel;
+	u8 conf;
+
+	/* Program PIO/MWDMA timing bits */
+	if(itdev->clock_mode == ATA_66)
+		conf = timing >> 8;
+	else
+		conf = timing & 0xFF;
+	pci_write_config_byte(hwif->pci_dev, 0x54 + 4 * channel, conf);
+}
+
+/**
+ *	it821x_program_udma	-	program the UDMA registers
+ *	@drive: drive to tune
+ *
+ *	Program the UDMA timing for this drive according to the
+ *	current clock.
+ */
+
+static void it821x_program_udma(ide_drive_t *drive, u16 timing)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int channel = hwif->channel;
+	int unit = drive->select.b.unit;
+	u8 conf;
+
+	/* Program UDMA timing bits */
+	if(itdev->clock_mode == ATA_66)
+		conf = timing >> 8;
+	else
+		conf = timing & 0xFF;
+	if(itdev->timing10 == 0)
+		pci_write_config_byte(hwif->pci_dev, 0x56 + 4 * channel + unit, conf);
+	else {
+		pci_write_config_byte(hwif->pci_dev, 0x56 + 4 * channel, conf);
+		pci_write_config_byte(hwif->pci_dev, 0x56 + 4 * channel + 1, conf);
+	}
+}
+
+
+/**
+ *	it821x_clock_strategy
+ *	@hwif: hardware interface
+ *
+ *	Select between the 50 and 66Mhz base clocks to get the best
+ *	results for this interface.
+ */
+
+static void it821x_clock_strategy(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif = drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+
+	u8 unit = drive->select.b.unit;
+	ide_drive_t *pair = &hwif->drives[1-unit];
+
+	int clock, altclock;
+	u8 v;
+	int sel = 0;
+
+	if(itdev->want[0][0] > itdev->want[1][0]) {
+		clock = itdev->want[0][1];
+		altclock = itdev->want[1][1];
+	} else {
+		clock = itdev->want[1][1];
+		altclock = itdev->want[0][1];
+	}
+
+	/* Master doesn't care does the slave ? */
+	if(clock == ATA_ANY)
+		clock = altclock;
+
+	/* Nobody cares - keep the same clock */
+	if(clock == ATA_ANY)
+		return;
+	/* No change */
+	if(clock == itdev->clock_mode)
+		return;
+
+	/* Load this into the controller ? */
+	if(clock == ATA_66)
+		itdev->clock_mode = ATA_66;
+	else {
+		itdev->clock_mode = ATA_50;
+		sel = 1;
+	}
+	pci_read_config_byte(hwif->pci_dev, 0x50, &v);
+	v &= ~(1 << (1 + hwif->channel));
+	v |= sel << (1 + hwif->channel);
+	pci_write_config_byte(hwif->pci_dev, 0x50, v);
+
+	/*
+	 *	Reprogram the UDMA/PIO of the pair drive for the switch
+	 *	MWDMA will be dealt with by the dma switcher
+	 */
+	if(pair && itdev->udma[1-unit] != UDMA_OFF) {
+		it821x_program_udma(pair, itdev->udma[1-unit]);
+		it821x_program(pair, itdev->pio[1-unit]);
+	}
+	/*
+	 *	Reprogram the UDMA/PIO of our drive for the switch.
+	 *	MWDMA will be dealt with by the dma switcher
+	 */
+	if(itdev->udma[unit] != UDMA_OFF) {
+		it821x_program_udma(drive, itdev->udma[unit]);
+		it821x_program(drive, itdev->pio[unit]);
+	}
+}
+
+/**
+ *	it821x_ratemask	-	Compute available modes
+ *	@drive: IDE drive
+ *
+ *	Compute the available speeds for the devices on the interface. This
+ *	is all modes to ATA133 clipped by drive cable setup.
+ */
+
+static u8 it821x_ratemask (ide_drive_t *drive)
+{
+	u8 mode	= 4;
+	if (!eighty_ninty_three(drive))
+		mode = min(mode, (u8)1);
+	return mode;
+}
+
+/**
+ *	it821x_tuneproc	-	tune a drive
+ *	@drive: drive to tune
+ *	@mode_wanted: the target operating mode
+ *
+ *	Load the timing settings for this device mode into the
+ *	controller. By the time we are called the mode has been
+ *	modified as neccessary to handle the absence of seperate
+ *	master/slave timers for MWDMA/PIO.
+ *
+ *	This code is only used in pass through mode.
+ */
+
+static void it821x_tuneproc (ide_drive_t *drive, byte mode_wanted)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int unit = drive->select.b.unit;
+
+	/* Spec says 89 ref driver uses 88 */
+	static u16 pio[]	= { 0xAA88, 0xA382, 0xA181, 0x3332, 0x3121 };
+	static u8 pio_want[]    = { ATA_66, ATA_66, ATA_66, ATA_66, ATA_ANY };
+
+	if(itdev->smart)
+		return;
+
+	/* We prefer 66Mhz clock for PIO 0-3, don't care for PIO4 */
+	itdev->want[unit][1] = pio_want[mode_wanted];
+	itdev->want[unit][0] = 1;	/* PIO is lowest priority */
+	itdev->pio[unit] = pio[mode_wanted];
+	it821x_clock_strategy(drive);
+	it821x_program(drive, itdev->pio[unit]);
+}
+
+/**
+ *	it821x_tune_mwdma	-	tune a channel for MWDMA
+ *	@drive: drive to set up
+ *	@mode_wanted: the target operating mode
+ *
+ *	Load the timing settings for this device mode into the
+ *	controller when doing MWDMA in pass through mode. The caller
+ *	must manage the whole lack of per device MWDMA/PIO timings and
+ *	the shared MWDMA/PIO timing register.
+ */
+
+static void it821x_tune_mwdma (ide_drive_t *drive, byte mode_wanted)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = (void *)ide_get_hwifdata(hwif);
+	int unit = drive->select.b.unit;
+	int channel = hwif->channel;
+	u8 conf;
+
+	static u16 dma[]	= { 0x8866, 0x3222, 0x3121 };
+	static u8 mwdma_want[]	= { ATA_ANY, ATA_66, ATA_ANY };
+
+	itdev->want[unit][1] = mwdma_want[mode_wanted];
+	itdev->want[unit][0] = 2;	/* MWDMA is low priority */
+	itdev->mwdma[unit] = dma[mode_wanted];
+	itdev->udma[unit] = UDMA_OFF;
+
+	/* UDMA bits off - Revision 0x10 do them in pairs */
+	pci_read_config_byte(hwif->pci_dev, 0x50, &conf);
+	if(itdev->timing10)
+		conf |= channel ? 0x60: 0x18;
+	else
+		conf |= 1 << (3 + 2 * channel + unit);
+	pci_write_config_byte(hwif->pci_dev, 0x50, conf);
+
+	it821x_clock_strategy(drive);
+	/* FIXME: do we need to program this ? */
+	/* it821x_program(drive, itdev->mwdma[unit]); */
+}
+
+/**
+ *	it821x_tune_udma	-	tune a channel for UDMA
+ *	@drive: drive to set up
+ *	@mode_wanted: the target operating mode
+ *
+ *	Load the timing settings for this device mode into the
+ *	controller when doing UDMA modes in pass through.
+ */
+
+static void it821x_tune_udma (ide_drive_t *drive, byte mode_wanted)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int unit = drive->select.b.unit;
+	int channel = hwif->channel;
+	u8 conf;
+
+	static u16 udma[]	= { 0x4433, 0x4231, 0x3121, 0x2121, 0x1111, 0x2211, 0x1111 };
+	static u8 udma_want[]	= { ATA_ANY, ATA_50, ATA_ANY, ATA_66, ATA_66, ATA_50, ATA_66 };
+
+	itdev->want[unit][1] = udma_want[mode_wanted];
+	itdev->want[unit][0] = 3;	/* UDMA is high priority */
+	itdev->mwdma[unit] = MWDMA_OFF;
+	itdev->udma[unit] = udma[mode_wanted];
+	if(mode_wanted >= 5)
+		itdev->udma[unit] |= 0x8080;	/* UDMA 5/6 select on */
+
+	/* UDMA on. Again revision 0x10 must do the pair */
+	pci_read_config_byte(hwif->pci_dev, 0x50, &conf);
+	if(itdev->timing10)
+		conf &= channel ? 0x9F: 0xE7;
+	else
+		conf &= ~ (1 << (3 + 2 * channel + unit));
+	pci_write_config_byte(hwif->pci_dev, 0x50, conf);
+
+	it821x_clock_strategy(drive);
+	it821x_program_udma(drive, itdev->udma[unit]);
+
+}
+
+/**
+ *	config_it821x_chipset_for_pio	-	set drive timings
+ *	@drive: drive to tune
+ *	@speed we want
+ *
+ *	Compute the best pio mode we can for a given device. We must
+ *	pick a speed that does not cause problems with the other device
+ *	on the cable.
+ */
+
+static void config_it821x_chipset_for_pio (ide_drive_t *drive, byte set_speed)
+{
+	u8 unit = drive->select.b.unit;
+	ide_hwif_t *hwif = drive->hwif;
+	ide_drive_t *pair = &hwif->drives[1-unit];
+	u8 speed = 0, set_pio	= ide_get_best_pio_mode(drive, 255, 5, NULL);
+	u8 pair_pio;
+
+	/* We have to deal with this mess in pairs */
+	if(pair != NULL) {
+		pair_pio = ide_get_best_pio_mode(pair, 255, 5, NULL);
+		/* Trim PIO to the slowest of the master/slave */
+		if(pair_pio < set_pio)
+			set_pio = pair_pio;
+	}
+	it821x_tuneproc(drive, set_pio);
+	speed = XFER_PIO_0 + set_pio;
+	/* XXX - We trim to the lowest of the pair so the other drive
+	   will always be fine at this point until we do hotplug passthru */
+
+	if (set_speed)
+		(void) ide_config_drive_speed(drive, speed);
+}
+
+/**
+ *	it821x_dma_read	-	DMA hook
+ *	@drive: drive for DMA
+ *
+ *	The IT821x has a single timing register for MWDMA and for PIO
+ *	operations. As we flip back and forth we have to reload the
+ *	clock. In addition the rev 0x10 device only works if the same
+ *	timing value is loaded into the master and slave UDMA clock
+ * 	so we must also reload that.
+ *
+ *	FIXME: we could figure out in advance if we need to do reloads
+ */
+
+static void it821x_dma_start(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif = drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int unit = drive->select.b.unit;
+	if(itdev->mwdma[unit] != MWDMA_OFF)
+		it821x_program(drive, itdev->mwdma[unit]);
+	else if(itdev->udma[unit] != UDMA_OFF && itdev->timing10)
+		it821x_program_udma(drive, itdev->udma[unit]);
+	ide_dma_start(drive);
+}
+
+/**
+ *	it821x_dma_write	-	DMA hook
+ *	@drive: drive for DMA stop
+ *
+ *	The IT821x has a single timing register for MWDMA and for PIO
+ *	operations. As we flip back and forth we have to reload the
+ *	clock.
+ */
+
+static int it821x_dma_end(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif = drive->hwif;
+	int unit = drive->select.b.unit;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int ret = __ide_dma_end(drive);
+	if(itdev->mwdma[unit] != MWDMA_OFF)
+		it821x_program(drive, itdev->pio[unit]);
+	return ret;
+}
+
+
+/**
+ *	it821x_tune_chipset	-	set controller timings
+ *	@drive: Drive to set up
+ *	@xferspeed: speed we want to achieve
+ *
+ *	Tune the ITE chipset for the desired mode. If we can't achieve
+ *	the desired mode then tune for a lower one, but ultimately
+ *	make the thing work.
+ */
+
+static int it821x_tune_chipset (ide_drive_t *drive, byte xferspeed)
+{
+
+	ide_hwif_t *hwif	= drive->hwif;
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	u8 speed		= ide_rate_filter(it821x_ratemask(drive), xferspeed);
+
+	if(!itdev->smart) {
+		switch(speed) {
+			case XFER_PIO_4:
+			case XFER_PIO_3:
+			case XFER_PIO_2:
+			case XFER_PIO_1:
+			case XFER_PIO_0:
+				it821x_tuneproc(drive, (speed - XFER_PIO_0));
+				break;
+			/* MWDMA tuning is really hard because our MWDMA and PIO
+			   timings are kept in the same place. We can switch in the
+			   host dma on/off callbacks */
+			case XFER_MW_DMA_2:
+			case XFER_MW_DMA_1:
+			case XFER_MW_DMA_0:
+				it821x_tune_mwdma(drive, (speed - XFER_MW_DMA_0));
+				break;
+			case XFER_UDMA_6:
+			case XFER_UDMA_5:
+			case XFER_UDMA_4:
+			case XFER_UDMA_3:
+			case XFER_UDMA_2:
+			case XFER_UDMA_1:
+			case XFER_UDMA_0:
+				it821x_tune_udma(drive, (speed - XFER_UDMA_0));
+				break;
+			default:
+				return 1;
+		}
+	}
+	/*
+	 *	In smart mode the clocking is done by the host controller
+	 * 	snooping the mode we picked. The rest of it is not our problem
+	 */
+	return ide_config_drive_speed(drive, speed);
+}
+
+/**
+ *	config_chipset_for_dma	-	configure for DMA
+ *	@drive: drive to configure
+ *
+ *	Called by the IDE layer when it wants the timings set up.
+ */
+
+static int config_chipset_for_dma (ide_drive_t *drive)
+{
+	u8 speed	= ide_dma_speed(drive, it821x_ratemask(drive));
+
+	config_it821x_chipset_for_pio(drive, !speed);
+	it821x_tune_chipset(drive, speed);
+	return ide_dma_enable(drive);
+}
+
+/**
+ *	it821x_configure_drive_for_dma	-	set up for DMA transfers
+ *	@drive: drive we are going to set up
+ *
+ *	Set up the drive for DMA, tune the controller and drive as
+ *	required. If the drive isn't suitable for DMA or we hit
+ *	other problems then we will drop down to PIO and set up
+ *	PIO appropriately
+ */
+
+static int it821x_config_drive_for_dma (ide_drive_t *drive)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+
+	if (ide_use_dma(drive)) {
+		if (config_chipset_for_dma(drive))
+			return hwif->ide_dma_on(drive);
+	}
+	config_it821x_chipset_for_pio(drive, 1);
+	return hwif->ide_dma_off_quietly(drive);
+}
+
+/**
+ *	ata66_it821x	-	check for 80 pin cable
+ *	@hwif: interface to check
+ *
+ *	Check for the presence of an ATA66 capable cable on the
+ *	interface. Problematic as it seems some cards don't have
+ *	the needed logic onboard.
+ */
+
+static unsigned int __devinit ata66_it821x(ide_hwif_t *hwif)
+{
+	/* The reference driver also only does disk side */
+	return 1;
+}
+
+/**
+ *	it821x_fixup	-	post init callback
+ *	@hwif: interface
+ *
+ *	This callback is run after the drives have been probed but
+ *	before anything gets attached. It allows drivers to do any
+ *	final tuning that is needed, or fixups to work around bugs.
+ */
+
+static void __devinit it821x_fixups(ide_hwif_t *hwif)
+{
+	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
+	int i;
+
+	if(!itdev->smart) {
+		/*
+		 *	If we are in pass through mode then not much
+		 *	needs to be done, but we do bother to clear the
+		 *	IRQ mask as we may well be in PIO (eg rev 0x10)
+		 *	for now and we know unmasking is safe on this chipset.
+		 */
+		for (i = 0; i < 2; i++) {
+			ide_drive_t *drive = &hwif->drives[i];
+			if(drive->present)
+				drive->unmask = 1;
+		}
+		return;
+	}
+	/*
+	 *	Perform fixups on smart mode. We need to "lose" some
+	 *	capabilities the firmware lacks but does not filter, and
+	 *	also patch up some capability bits that it forgets to set
+	 *	in RAID mode.
+	 */
+
+	for(i = 0; i < 2; i++) {
+		ide_drive_t *drive = &hwif->drives[i];
+		struct hd_driveid *id;
+		u16 *idbits;
+
+		if(!drive->present)
+			continue;
+		id = drive->id;
+		idbits = (u16 *)drive->id;
+
+		/* Check for RAID v native */
+		if(strstr(id->model, "Integrated Technology Express")) {
+			/* In raid mode the ident block is slightly buggy
+			   We need to set the bits so that the IDE layer knows
+			   LBA28. LBA48 and DMA ar valid */
+			id->capability |= 3;		/* LBA28, DMA */
+			id->command_set_2 |= 0x0400;	/* LBA48 valid */
+			id->cfs_enable_2 |= 0x0400;	/* LBA48 on */
+			/* Reporting logic */
+			printk(KERN_INFO "%s: IT8212 %sRAID %d volume",
+				drive->name,
+				idbits[147] ? "Bootable ":"",
+				idbits[129]);
+				if(idbits[129] != 1)
+					printk("(%dK stripe)", idbits[146]);
+				printk(".\n");
+			/* Now the core code will have wrongly decided no DMA
+			   so we need to fix this */
+			hwif->ide_dma_off_quietly(drive);
+#ifdef CONFIG_IDEDMA_ONLYDISK
+			if (drive->media == ide_disk)
+#endif
+				hwif->ide_dma_check(drive);
+		} else {
+			/* Non RAID volume. Fixups to stop the core code
+			   doing unsupported things */
+			id->field_valid &= 1;
+			id->queue_depth = 0;
+			id->command_set_1 = 0;
+			id->command_set_2 &= 0xC400;
+			id->cfsse &= 0xC000;
+			id->cfs_enable_1 = 0;
+			id->cfs_enable_2 &= 0xC400;
+			id->csf_default &= 0xC000;
+			id->word127 = 0;
+			id->dlf = 0;
+			id->csfo = 0;
+			id->cfa_power = 0;
+			printk(KERN_INFO "%s: Performing identify fixups.\n",
+				drive->name);
+		}
+	}
+
+}
+
+/**
+ *	init_hwif_it821x	-	set up hwif structs
+ *	@hwif: interface to set up
+ *
+ *	We do the basic set up of the interface structure. The IT8212
+ *	requires several custom handlers so we override the default
+ *	ide DMA handlers appropriately
+ */
+
+static void __devinit init_hwif_it821x(ide_hwif_t *hwif)
+{
+	struct it821x_dev *idev = kmalloc(sizeof(struct it821x_dev), GFP_KERNEL);
+	u8 conf;
+
+	if(idev == NULL) {
+		printk(KERN_ERR "it821x: out of memory, falling back to legacy behaviour.\n");
+		goto fallback;
+	}
+	memset(idev, 0, sizeof(struct it821x_dev));
+	ide_set_hwifdata(hwif, idev);
+
+	pci_read_config_byte(hwif->pci_dev, 0x50, &conf);
+	if(conf & 1) {
+		idev->smart = 1;
+		hwif->atapi_dma = 0;
+		/* Long I/O's although allowed in LBA48 space cause the
+		   onboard firmware to enter the twighlight zone */
+		hwif->rqsize = 256;
+	}
+
+	/* Pull the current clocks from 0x50 also */
+	if (conf & (1 << (1 + hwif->channel)))
+		idev->clock_mode = ATA_50;
+	else
+		idev->clock_mode = ATA_66;
+
+	idev->want[0][1] = ATA_ANY;
+	idev->want[1][1] = ATA_ANY;
+
+	/*
+	 *	Not in the docs but according to the reference driver
+	 *	this is neccessary.
+	 */
+
+	pci_read_config_byte(hwif->pci_dev, 0x08, &conf);
+	if(conf == 0x10) {
+		idev->timing10 = 1;
+		hwif->atapi_dma = 0;
+		if(!idev->smart)
+			printk(KERN_WARNING "it821x: Revision 0x10, workarounds activated.\n");
+	}
+
+	hwif->speedproc = &it821x_tune_chipset;
+	hwif->tuneproc	= &it821x_tuneproc;
+
+	/* MWDMA/PIO clock switching for pass through mode */
+	if(!idev->smart) {
+		hwif->dma_start = &it821x_dma_start;
+		hwif->ide_dma_end = &it821x_dma_end;
+	}
+
+	hwif->drives[0].autotune = 1;
+	hwif->drives[1].autotune = 1;
+
+	if (!hwif->dma_base)
+		goto fallback;
+
+	hwif->ultra_mask = 0x7f;
+	hwif->mwdma_mask = 0x07;
+	hwif->swdma_mask = 0x07;
+
+	hwif->ide_dma_check = &it821x_config_drive_for_dma;
+	if (!(hwif->udma_four))
+		hwif->udma_four = ata66_it821x(hwif);
+
+	/*
+	 *	The BIOS often doesn't set up DMA on this controller
+	 *	so we always do it.
+	 */
+
+	hwif->autodma = 1;
+	hwif->drives[0].autodma = hwif->autodma;
+	hwif->drives[1].autodma = hwif->autodma;
+	return;
+fallback:
+	hwif->autodma = 0;
+	return;
+}
+
+static void __devinit it8212_disable_raid(struct pci_dev *dev)
+{
+	/* Reset local CPU, and set BIOS not ready */
+	pci_write_config_byte(dev, 0x5E, 0x01);
+
+	/* Set to bypass mode, and reset PCI bus */
+	pci_write_config_byte(dev, 0x50, 0x00);
+	pci_write_config_word(dev, PCI_COMMAND,
+			      PCI_COMMAND_PARITY | PCI_COMMAND_IO |
+			      PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+	pci_write_config_word(dev, 0x40, 0xA0F3);
+
+	pci_write_config_dword(dev,0x4C, 0x02040204);
+	pci_write_config_byte(dev, 0x42, 0x36);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0);
+}
+
+static unsigned int __devinit init_chipset_it821x(struct pci_dev *dev, const char *name)
+{
+	u8 conf;
+	static char *mode[2] = { "pass through", "smart" };
+
+	/* Force the card into bypass mode if so requested */
+	if (it8212_noraid) {
+		printk(KERN_INFO "it8212: forcing bypass mode.\n");
+		it8212_disable_raid(dev);
+	}
+	pci_read_config_byte(dev, 0x50, &conf);
+	printk(KERN_INFO "it821x: controller in %s mode.\n", mode[conf & 1]);
+	return 0;
+}
+
+
+#define DECLARE_ITE_DEV(name_str)			\
+	{						\
+		.name		= name_str,		\
+		.init_chipset	= init_chipset_it821x,	\
+		.init_hwif	= init_hwif_it821x,	\
+		.channels	= 2,			\
+		.autodma	= AUTODMA,		\
+		.bootable	= ON_BOARD,		\
+		.fixup	 	= it821x_fixups		\
+	}
+
+static ide_pci_device_t it821x_chipsets[] __devinitdata = {
+	/* 0 */ DECLARE_ITE_DEV("IT8212"),
+};
+
+/**
+ *	it821x_init_one	-	pci layer discovery entry
+ *	@dev: PCI device
+ *	@id: ident table entry
+ *
+ *	Called by the PCI code when it finds an ITE821x controller.
+ *	We then use the IDE PCI generic helper to do most of the work.
+ */
+
+static int __devinit it821x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	ide_setup_pci_device(dev, &it821x_chipsets[id->driver_data]);
+	return 0;
+}
+
+static struct pci_device_id it821x_pci_tbl[] = {
+	{ PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8211,  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8212,  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{ 0, },
+};
+
+MODULE_DEVICE_TABLE(pci, it821x_pci_tbl);
+
+static struct pci_driver driver = {
+	.name		= "ITE821x IDE",
+	.id_table	= it821x_pci_tbl,
+	.probe		= it821x_init_one,
+};
+
+static int __init it821x_ide_init(void)
+{
+	return ide_pci_register_driver(&driver);
+}
+
+module_init(it821x_ide_init);
+
+module_param_named(noraid, it8212_noraid, int, S_IRUGO);
+MODULE_PARM_DESC(it8212_noraid, "Force card into bypass mode");
+
+MODULE_AUTHOR("Alan Cox");
+MODULE_DESCRIPTION("PCI driver module for the ITE 821x");
+MODULE_LICENSE("GPL");

+ 5 - 5
drivers/ide/pci/serverworks.c

@@ -442,7 +442,7 @@ static unsigned int __devinit init_chipset_svwks (struct pci_dev *dev, const cha
 	return (dev->irq) ? dev->irq : 0;
 	return (dev->irq) ? dev->irq : 0;
 }
 }
 
 
-static unsigned int __init ata66_svwks_svwks (ide_hwif_t *hwif)
+static unsigned int __devinit ata66_svwks_svwks (ide_hwif_t *hwif)
 {
 {
 	return 1;
 	return 1;
 }
 }
@@ -454,7 +454,7 @@ static unsigned int __init ata66_svwks_svwks (ide_hwif_t *hwif)
  * Bit 14 clear = primary IDE channel does not have 80-pin cable.
  * Bit 14 clear = primary IDE channel does not have 80-pin cable.
  * Bit 14 set   = primary IDE channel has 80-pin cable.
  * Bit 14 set   = primary IDE channel has 80-pin cable.
  */
  */
-static unsigned int __init ata66_svwks_dell (ide_hwif_t *hwif)
+static unsigned int __devinit ata66_svwks_dell (ide_hwif_t *hwif)
 {
 {
 	struct pci_dev *dev = hwif->pci_dev;
 	struct pci_dev *dev = hwif->pci_dev;
 	if (dev->subsystem_vendor == PCI_VENDOR_ID_DELL &&
 	if (dev->subsystem_vendor == PCI_VENDOR_ID_DELL &&
@@ -472,7 +472,7 @@ static unsigned int __init ata66_svwks_dell (ide_hwif_t *hwif)
  *
  *
  * WARNING: this only works on Alpine hardware!
  * WARNING: this only works on Alpine hardware!
  */
  */
-static unsigned int __init ata66_svwks_cobalt (ide_hwif_t *hwif)
+static unsigned int __devinit ata66_svwks_cobalt (ide_hwif_t *hwif)
 {
 {
 	struct pci_dev *dev = hwif->pci_dev;
 	struct pci_dev *dev = hwif->pci_dev;
 	if (dev->subsystem_vendor == PCI_VENDOR_ID_SUN &&
 	if (dev->subsystem_vendor == PCI_VENDOR_ID_SUN &&
@@ -483,7 +483,7 @@ static unsigned int __init ata66_svwks_cobalt (ide_hwif_t *hwif)
 	return 0;
 	return 0;
 }
 }
 
 
-static unsigned int __init ata66_svwks (ide_hwif_t *hwif)
+static unsigned int __devinit ata66_svwks (ide_hwif_t *hwif)
 {
 {
 	struct pci_dev *dev = hwif->pci_dev;
 	struct pci_dev *dev = hwif->pci_dev;
 
 
@@ -573,7 +573,7 @@ static int __devinit init_setup_svwks (struct pci_dev *dev, ide_pci_device_t *d)
 	return ide_setup_pci_device(dev, d);
 	return ide_setup_pci_device(dev, d);
 }
 }
 
 
-static int __init init_setup_csb6 (struct pci_dev *dev, ide_pci_device_t *d)
+static int __devinit init_setup_csb6 (struct pci_dev *dev, ide_pci_device_t *d)
 {
 {
 	if (!(PCI_FUNC(dev->devfn) & 1)) {
 	if (!(PCI_FUNC(dev->devfn) & 1)) {
 		d->bootable = NEVER_BOARD;
 		d->bootable = NEVER_BOARD;

+ 4 - 4
drivers/ide/ppc/pmac.c

@@ -1324,9 +1324,9 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
 	/* XXX FIXME: Media bay stuff need re-organizing */
 	/* XXX FIXME: Media bay stuff need re-organizing */
 	if (np->parent && np->parent->name
 	if (np->parent && np->parent->name
 	    && strcasecmp(np->parent->name, "media-bay") == 0) {
 	    && strcasecmp(np->parent->name, "media-bay") == 0) {
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PMAC_MEDIABAY
 		media_bay_set_ide_infos(np->parent, pmif->regbase, pmif->irq, hwif->index);
 		media_bay_set_ide_infos(np->parent, pmif->regbase, pmif->irq, hwif->index);
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PMAC_MEDIABAY */
 		pmif->mediabay = 1;
 		pmif->mediabay = 1;
 		if (!bidp)
 		if (!bidp)
 			pmif->aapl_bus_id = 1;
 			pmif->aapl_bus_id = 1;
@@ -1382,10 +1382,10 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
 	       hwif->index, model_name[pmif->kind], pmif->aapl_bus_id,
 	       hwif->index, model_name[pmif->kind], pmif->aapl_bus_id,
 	       pmif->mediabay ? " (mediabay)" : "", hwif->irq);
 	       pmif->mediabay ? " (mediabay)" : "", hwif->irq);
 			
 			
-#ifdef CONFIG_PMAC_PBOOK
+#ifdef CONFIG_PMAC_MEDIABAY
 	if (pmif->mediabay && check_media_bay_by_base(pmif->regbase, MB_CD) == 0)
 	if (pmif->mediabay && check_media_bay_by_base(pmif->regbase, MB_CD) == 0)
 		hwif->noprobe = 0;
 		hwif->noprobe = 0;
-#endif /* CONFIG_PMAC_PBOOK */
+#endif /* CONFIG_PMAC_MEDIABAY */
 
 
 	hwif->sg_max_nents = MAX_DCMDS;
 	hwif->sg_max_nents = MAX_DCMDS;
 
 

+ 5 - 5
drivers/ieee1394/ohci1394.c

@@ -3538,8 +3538,8 @@ static void ohci1394_pci_remove(struct pci_dev *pdev)
 
 
 static int ohci1394_pci_resume (struct pci_dev *pdev)
 static int ohci1394_pci_resume (struct pci_dev *pdev)
 {
 {
-#ifdef CONFIG_PMAC_PBOOK
-	{
+#ifdef CONFIG_PPC_PMAC
+	if (_machine == _MACH_Pmac) {
 		struct device_node *of_node;
 		struct device_node *of_node;
 
 
 		/* Re-enable 1394 */
 		/* Re-enable 1394 */
@@ -3547,7 +3547,7 @@ static int ohci1394_pci_resume (struct pci_dev *pdev)
 		if (of_node)
 		if (of_node)
 			pmac_call_feature (PMAC_FTR_1394_ENABLE, of_node, 0, 1);
 			pmac_call_feature (PMAC_FTR_1394_ENABLE, of_node, 0, 1);
 	}
 	}
-#endif
+#endif /* CONFIG_PPC_PMAC */
 
 
 	pci_enable_device(pdev);
 	pci_enable_device(pdev);
 
 
@@ -3557,8 +3557,8 @@ static int ohci1394_pci_resume (struct pci_dev *pdev)
 
 
 static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 {
 {
-#ifdef CONFIG_PMAC_PBOOK
-	{
+#ifdef CONFIG_PPC_PMAC
+	if (_machine == _MACH_Pmac) {
 		struct device_node *of_node;
 		struct device_node *of_node;
 
 
 		/* Disable 1394 */
 		/* Disable 1394 */

+ 2 - 2
drivers/infiniband/core/packer.c

@@ -96,7 +96,7 @@ void ib_pack(const struct ib_field        *desc,
 			else
 			else
 				val = 0;
 				val = 0;
 
 
-			mask = cpu_to_be64(((1ull << desc[i].size_bits) - 1) << shift);
+			mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift);
 			addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words);
 			addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words);
 			*addr = (*addr & ~mask) | (cpu_to_be64(val) & mask);
 			*addr = (*addr & ~mask) | (cpu_to_be64(val) & mask);
 		} else {
 		} else {
@@ -176,7 +176,7 @@ void ib_unpack(const struct ib_field        *desc,
 			__be64 *addr;
 			__be64 *addr;
 
 
 			shift = 64 - desc[i].offset_bits - desc[i].size_bits;
 			shift = 64 - desc[i].offset_bits - desc[i].size_bits;
-			mask = ((1ull << desc[i].size_bits) - 1) << shift;
+			mask = (~0ull >> (64 - desc[i].size_bits)) << shift;
 			addr = (__be64 *) buf + desc[i].offset_words;
 			addr = (__be64 *) buf + desc[i].offset_words;
 			val = (be64_to_cpup(addr) & mask) >> shift;
 			val = (be64_to_cpup(addr) & mask) >> shift;
 			value_write(desc[i].struct_offset_bytes,
 			value_write(desc[i].struct_offset_bytes,

+ 13 - 5
drivers/infiniband/core/sa_query.c

@@ -507,7 +507,13 @@ retry:
 		spin_unlock_irqrestore(&idr_lock, flags);
 		spin_unlock_irqrestore(&idr_lock, flags);
 	}
 	}
 
 
-	return ret;
+	/*
+	 * It's not safe to dereference query any more, because the
+	 * send may already have completed and freed the query in
+	 * another context.  So use wr.wr_id, which has a copy of the
+	 * query's id.
+	 */
+	return ret ? ret : wr.wr_id;
 }
 }
 
 
 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
@@ -598,14 +604,15 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
 		rec, query->sa_query.mad->data);
 		rec, query->sa_query.mad->data);
 
 
 	*sa_query = &query->sa_query;
 	*sa_query = &query->sa_query;
+
 	ret = send_mad(&query->sa_query, timeout_ms);
 	ret = send_mad(&query->sa_query, timeout_ms);
-	if (ret) {
+	if (ret < 0) {
 		*sa_query = NULL;
 		*sa_query = NULL;
 		kfree(query->sa_query.mad);
 		kfree(query->sa_query.mad);
 		kfree(query);
 		kfree(query);
 	}
 	}
 
 
-	return ret ? ret : query->sa_query.id;
+	return ret;
 }
 }
 EXPORT_SYMBOL(ib_sa_path_rec_get);
 EXPORT_SYMBOL(ib_sa_path_rec_get);
 
 
@@ -674,14 +681,15 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
 		rec, query->sa_query.mad->data);
 		rec, query->sa_query.mad->data);
 
 
 	*sa_query = &query->sa_query;
 	*sa_query = &query->sa_query;
+
 	ret = send_mad(&query->sa_query, timeout_ms);
 	ret = send_mad(&query->sa_query, timeout_ms);
-	if (ret) {
+	if (ret < 0) {
 		*sa_query = NULL;
 		*sa_query = NULL;
 		kfree(query->sa_query.mad);
 		kfree(query->sa_query.mad);
 		kfree(query);
 		kfree(query);
 	}
 	}
 
 
-	return ret ? ret : query->sa_query.id;
+	return ret;
 }
 }
 EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
 EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
 
 

+ 1 - 0
drivers/infiniband/hw/mthca/mthca_av.c

@@ -1,5 +1,6 @@
 /*
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * licenses.  You may choose to be licensed under the terms of the GNU

+ 227 - 304
drivers/infiniband/hw/mthca/mthca_cmd.c

@@ -431,6 +431,36 @@ static int mthca_cmd_imm(struct mthca_dev *dev,
 				      timeout, status);
 				      timeout, status);
 }
 }
 
 
+int mthca_cmd_init(struct mthca_dev *dev)
+{
+	sema_init(&dev->cmd.hcr_sem, 1);
+	sema_init(&dev->cmd.poll_sem, 1);
+	dev->cmd.use_events = 0;
+
+	dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE,
+			   MTHCA_HCR_SIZE);
+	if (!dev->hcr) {
+		mthca_err(dev, "Couldn't map command register.");
+		return -ENOMEM;
+	}
+
+	dev->cmd.pool = pci_pool_create("mthca_cmd", dev->pdev,
+					MTHCA_MAILBOX_SIZE,
+					MTHCA_MAILBOX_SIZE, 0);
+	if (!dev->cmd.pool) {
+		iounmap(dev->hcr);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void mthca_cmd_cleanup(struct mthca_dev *dev)
+{
+	pci_pool_destroy(dev->cmd.pool);
+	iounmap(dev->hcr);
+}
+
 /*
 /*
  * Switch to using events to issue FW commands (should be called after
  * Switch to using events to issue FW commands (should be called after
  * event queue to command events has been initialized).
  * event queue to command events has been initialized).
@@ -489,6 +519,33 @@ void mthca_cmd_use_polling(struct mthca_dev *dev)
 	up(&dev->cmd.poll_sem);
 	up(&dev->cmd.poll_sem);
 }
 }
 
 
+struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
+					  unsigned int gfp_mask)
+{
+	struct mthca_mailbox *mailbox;
+
+	mailbox = kmalloc(sizeof *mailbox, gfp_mask);
+	if (!mailbox)
+		return ERR_PTR(-ENOMEM);
+
+	mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma);
+	if (!mailbox->buf) {
+		kfree(mailbox);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return mailbox;
+}
+
+void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox)
+{
+	if (!mailbox)
+		return;
+
+	pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
+	kfree(mailbox);
+}
+
 int mthca_SYS_EN(struct mthca_dev *dev, u8 *status)
 int mthca_SYS_EN(struct mthca_dev *dev, u8 *status)
 {
 {
 	u64 out;
 	u64 out;
@@ -513,20 +570,20 @@ int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status)
 static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
 static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
 			 u64 virt, u8 *status)
 			 u64 virt, u8 *status)
 {
 {
-	u32 *inbox;
-	dma_addr_t indma;
+	struct mthca_mailbox *mailbox;
 	struct mthca_icm_iter iter;
 	struct mthca_icm_iter iter;
+	__be64 *pages;
 	int lg;
 	int lg;
 	int nent = 0;
 	int nent = 0;
 	int i;
 	int i;
 	int err = 0;
 	int err = 0;
 	int ts = 0, tc = 0;
 	int ts = 0, tc = 0;
 
 
-	inbox = pci_alloc_consistent(dev->pdev, PAGE_SIZE, &indma);
-	if (!inbox)
-		return -ENOMEM;
-
-	memset(inbox, 0, PAGE_SIZE);
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	memset(mailbox->buf, 0, MTHCA_MAILBOX_SIZE);
+	pages = mailbox->buf;
 
 
 	for (mthca_icm_first(icm, &iter);
 	for (mthca_icm_first(icm, &iter);
 	     !mthca_icm_last(&iter);
 	     !mthca_icm_last(&iter);
@@ -546,19 +603,17 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
 		}
 		}
 		for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) {
 		for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) {
 			if (virt != -1) {
 			if (virt != -1) {
-				*((__be64 *) (inbox + nent * 4)) =
-					cpu_to_be64(virt);
+				pages[nent * 2] = cpu_to_be64(virt);
 				virt += 1 << lg;
 				virt += 1 << lg;
 			}
 			}
 
 
-			*((__be64 *) (inbox + nent * 4 + 2)) =
-				cpu_to_be64((mthca_icm_addr(&iter) +
-					     (i << lg)) | (lg - 12));
+			pages[nent * 2 + 1] = cpu_to_be64((mthca_icm_addr(&iter) +
+							   (i << lg)) | (lg - 12));
 			ts += 1 << (lg - 10);
 			ts += 1 << (lg - 10);
 			++tc;
 			++tc;
 
 
-			if (nent == PAGE_SIZE / 16) {
-				err = mthca_cmd(dev, indma, nent, 0, op,
+			if (nent == MTHCA_MAILBOX_SIZE / 16) {
+				err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
 						CMD_TIME_CLASS_B, status);
 						CMD_TIME_CLASS_B, status);
 				if (err || *status)
 				if (err || *status)
 					goto out;
 					goto out;
@@ -568,7 +623,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
 	}
 	}
 
 
 	if (nent)
 	if (nent)
-		err = mthca_cmd(dev, indma, nent, 0, op,
+		err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
 				CMD_TIME_CLASS_B, status);
 				CMD_TIME_CLASS_B, status);
 
 
 	switch (op) {
 	switch (op) {
@@ -585,7 +640,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
 	}
 	}
 
 
 out:
 out:
-	pci_free_consistent(dev->pdev, PAGE_SIZE, inbox, indma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 	return err;
 }
 }
 
 
@@ -606,8 +661,8 @@ int mthca_RUN_FW(struct mthca_dev *dev, u8 *status)
 
 
 int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
 int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
 {
 {
+	struct mthca_mailbox *mailbox;
 	u32 *outbox;
 	u32 *outbox;
-	dma_addr_t outdma;
 	int err = 0;
 	int err = 0;
 	u8 lg;
 	u8 lg;
 
 
@@ -625,12 +680,12 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
 #define QUERY_FW_EQ_ARM_BASE_OFFSET    0x40
 #define QUERY_FW_EQ_ARM_BASE_OFFSET    0x40
 #define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48
 #define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48
 
 
-	outbox = pci_alloc_consistent(dev->pdev, QUERY_FW_OUT_SIZE, &outdma);
-	if (!outbox) {
-		return -ENOMEM;
-	}
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
 
 
-	err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_FW,
+	err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_FW,
 			    CMD_TIME_CLASS_A, status);
 			    CMD_TIME_CLASS_A, status);
 
 
 	if (err)
 	if (err)
@@ -681,15 +736,15 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
 	}
 	}
 
 
 out:
 out:
-	pci_free_consistent(dev->pdev, QUERY_FW_OUT_SIZE, outbox, outdma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 	return err;
 }
 }
 
 
 int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
 int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
 {
 {
+	struct mthca_mailbox *mailbox;
 	u8 info;
 	u8 info;
 	u32 *outbox;
 	u32 *outbox;
-	dma_addr_t outdma;
 	int err = 0;
 	int err = 0;
 
 
 #define ENABLE_LAM_OUT_SIZE         0x100
 #define ENABLE_LAM_OUT_SIZE         0x100
@@ -700,11 +755,12 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
 #define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4)
 #define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4)
 #define ENABLE_LAM_INFO_ECC_MASK    0x3
 #define ENABLE_LAM_INFO_ECC_MASK    0x3
 
 
-	outbox = pci_alloc_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, &outdma);
-	if (!outbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
 
 
-	err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_ENABLE_LAM,
+	err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_ENABLE_LAM,
 			    CMD_TIME_CLASS_C, status);
 			    CMD_TIME_CLASS_C, status);
 
 
 	if (err)
 	if (err)
@@ -733,7 +789,7 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
 		  (unsigned long long) dev->ddr_end);
 		  (unsigned long long) dev->ddr_end);
 
 
 out:
 out:
-	pci_free_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, outbox, outdma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 	return err;
 }
 }
 
 
@@ -744,9 +800,9 @@ int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status)
 
 
 int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
 int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
 {
 {
+	struct mthca_mailbox *mailbox;
 	u8 info;
 	u8 info;
 	u32 *outbox;
 	u32 *outbox;
-	dma_addr_t outdma;
 	int err = 0;
 	int err = 0;
 
 
 #define QUERY_DDR_OUT_SIZE         0x100
 #define QUERY_DDR_OUT_SIZE         0x100
@@ -757,11 +813,12 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
 #define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4)
 #define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4)
 #define QUERY_DDR_INFO_ECC_MASK    0x3
 #define QUERY_DDR_INFO_ECC_MASK    0x3
 
 
-	outbox = pci_alloc_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, &outdma);
-	if (!outbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
 
 
-	err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DDR,
+	err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DDR,
 			    CMD_TIME_CLASS_A, status);
 			    CMD_TIME_CLASS_A, status);
 
 
 	if (err)
 	if (err)
@@ -787,15 +844,15 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
 		  (unsigned long long) dev->ddr_end);
 		  (unsigned long long) dev->ddr_end);
 
 
 out:
 out:
-	pci_free_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, outbox, outdma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 	return err;
 }
 }
 
 
 int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
 int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
 			struct mthca_dev_lim *dev_lim, u8 *status)
 			struct mthca_dev_lim *dev_lim, u8 *status)
 {
 {
+	struct mthca_mailbox *mailbox;
 	u32 *outbox;
 	u32 *outbox;
-	dma_addr_t outdma;
 	u8 field;
 	u8 field;
 	u16 size;
 	u16 size;
 	int err;
 	int err;
@@ -860,11 +917,12 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
 #define QUERY_DEV_LIM_LAMR_OFFSET           0x9f
 #define QUERY_DEV_LIM_LAMR_OFFSET           0x9f
 #define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET     0xa0
 #define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET     0xa0
 
 
-	outbox = pci_alloc_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, &outdma);
-	if (!outbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
 
 
-	err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DEV_LIM,
+	err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DEV_LIM,
 			    CMD_TIME_CLASS_A, status);
 			    CMD_TIME_CLASS_A, status);
 
 
 	if (err)
 	if (err)
@@ -1020,15 +1078,15 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
 	}
 	}
 
 
 out:
 out:
-	pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 	return err;
 }
 }
 
 
 int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
 int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
 			struct mthca_adapter *adapter, u8 *status)
 			struct mthca_adapter *adapter, u8 *status)
 {
 {
+	struct mthca_mailbox *mailbox;
 	u32 *outbox;
 	u32 *outbox;
-	dma_addr_t outdma;
 	int err;
 	int err;
 
 
 #define QUERY_ADAPTER_OUT_SIZE             0x100
 #define QUERY_ADAPTER_OUT_SIZE             0x100
@@ -1037,23 +1095,24 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
 #define QUERY_ADAPTER_REVISION_ID_OFFSET   0x08
 #define QUERY_ADAPTER_REVISION_ID_OFFSET   0x08
 #define QUERY_ADAPTER_INTA_PIN_OFFSET      0x10
 #define QUERY_ADAPTER_INTA_PIN_OFFSET      0x10
 
 
-	outbox = pci_alloc_consistent(dev->pdev, QUERY_ADAPTER_OUT_SIZE, &outdma);
-	if (!outbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
 
 
-	err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_ADAPTER,
+	err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_ADAPTER,
 			    CMD_TIME_CLASS_A, status);
 			    CMD_TIME_CLASS_A, status);
 
 
 	if (err)
 	if (err)
 		goto out;
 		goto out;
 
 
-	MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET);
-	MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET);
+	MTHCA_GET(adapter->vendor_id, outbox,   QUERY_ADAPTER_VENDOR_ID_OFFSET);
+	MTHCA_GET(adapter->device_id, outbox,   QUERY_ADAPTER_DEVICE_ID_OFFSET);
 	MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET);
 	MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET);
-	MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
+	MTHCA_GET(adapter->inta_pin, outbox,    QUERY_ADAPTER_INTA_PIN_OFFSET);
 
 
 out:
 out:
-	pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 	return err;
 }
 }
 
 
@@ -1061,8 +1120,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
 		   struct mthca_init_hca_param *param,
 		   struct mthca_init_hca_param *param,
 		   u8 *status)
 		   u8 *status)
 {
 {
+	struct mthca_mailbox *mailbox;
 	u32 *inbox;
 	u32 *inbox;
-	dma_addr_t indma;
 	int err;
 	int err;
 
 
 #define INIT_HCA_IN_SIZE             	 0x200
 #define INIT_HCA_IN_SIZE             	 0x200
@@ -1102,9 +1161,10 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
 #define  INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10)
 #define  INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10)
 #define  INIT_HCA_UAR_CTX_BASE_OFFSET    (INIT_HCA_UAR_OFFSET + 0x18)
 #define  INIT_HCA_UAR_CTX_BASE_OFFSET    (INIT_HCA_UAR_OFFSET + 0x18)
 
 
-	inbox = pci_alloc_consistent(dev->pdev, INIT_HCA_IN_SIZE, &indma);
-	if (!inbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	inbox = mailbox->buf;
 
 
 	memset(inbox, 0, INIT_HCA_IN_SIZE);
 	memset(inbox, 0, INIT_HCA_IN_SIZE);
 
 
@@ -1167,10 +1227,9 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
 		MTHCA_PUT(inbox, param->uarc_base,   INIT_HCA_UAR_CTX_BASE_OFFSET);
 		MTHCA_PUT(inbox, param->uarc_base,   INIT_HCA_UAR_CTX_BASE_OFFSET);
 	}
 	}
 
 
-	err = mthca_cmd(dev, indma, 0, 0, CMD_INIT_HCA,
-			HZ, status);
+	err = mthca_cmd(dev, mailbox->dma, 0, 0, CMD_INIT_HCA, HZ, status);
 
 
-	pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 	return err;
 }
 }
 
 
@@ -1178,8 +1237,8 @@ int mthca_INIT_IB(struct mthca_dev *dev,
 		  struct mthca_init_ib_param *param,
 		  struct mthca_init_ib_param *param,
 		  int port, u8 *status)
 		  int port, u8 *status)
 {
 {
+	struct mthca_mailbox *mailbox;
 	u32 *inbox;
 	u32 *inbox;
-	dma_addr_t indma;
 	int err;
 	int err;
 	u32 flags;
 	u32 flags;
 
 
@@ -1199,9 +1258,10 @@ int mthca_INIT_IB(struct mthca_dev *dev,
 #define INIT_IB_NODE_GUID_OFFSET 0x18
 #define INIT_IB_NODE_GUID_OFFSET 0x18
 #define INIT_IB_SI_GUID_OFFSET   0x20
 #define INIT_IB_SI_GUID_OFFSET   0x20
 
 
-	inbox = pci_alloc_consistent(dev->pdev, INIT_IB_IN_SIZE, &indma);
-	if (!inbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	inbox = mailbox->buf;
 
 
 	memset(inbox, 0, INIT_IB_IN_SIZE);
 	memset(inbox, 0, INIT_IB_IN_SIZE);
 
 
@@ -1221,10 +1281,10 @@ int mthca_INIT_IB(struct mthca_dev *dev,
 	MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET);
 	MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET);
 	MTHCA_PUT(inbox, param->si_guid,   INIT_IB_SI_GUID_OFFSET);
 	MTHCA_PUT(inbox, param->si_guid,   INIT_IB_SI_GUID_OFFSET);
 
 
-	err = mthca_cmd(dev, indma, port, 0, CMD_INIT_IB,
+	err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_INIT_IB,
 			CMD_TIME_CLASS_A, status);
 			CMD_TIME_CLASS_A, status);
 
 
-	pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 	return err;
 }
 }
 
 
@@ -1241,8 +1301,8 @@ int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status)
 int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
 int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
 		 int port, u8 *status)
 		 int port, u8 *status)
 {
 {
+	struct mthca_mailbox *mailbox;
 	u32 *inbox;
 	u32 *inbox;
-	dma_addr_t indma;
 	int err;
 	int err;
 	u32 flags = 0;
 	u32 flags = 0;
 
 
@@ -1253,9 +1313,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
 #define SET_IB_CAP_MASK_OFFSET 0x04
 #define SET_IB_CAP_MASK_OFFSET 0x04
 #define SET_IB_SI_GUID_OFFSET  0x08
 #define SET_IB_SI_GUID_OFFSET  0x08
 
 
-	inbox = pci_alloc_consistent(dev->pdev, SET_IB_IN_SIZE, &indma);
-	if (!inbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	inbox = mailbox->buf;
 
 
 	memset(inbox, 0, SET_IB_IN_SIZE);
 	memset(inbox, 0, SET_IB_IN_SIZE);
 
 
@@ -1266,10 +1327,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
 	MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET);
 	MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET);
 	MTHCA_PUT(inbox, param->si_guid,  SET_IB_SI_GUID_OFFSET);
 	MTHCA_PUT(inbox, param->si_guid,  SET_IB_SI_GUID_OFFSET);
 
 
-	err = mthca_cmd(dev, indma, port, 0, CMD_SET_IB,
+	err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_SET_IB,
 			CMD_TIME_CLASS_B, status);
 			CMD_TIME_CLASS_B, status);
 
 
-	pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 	return err;
 }
 }
 
 
@@ -1280,20 +1341,22 @@ int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *st
 
 
 int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status)
 int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status)
 {
 {
+	struct mthca_mailbox *mailbox;
 	u64 *inbox;
 	u64 *inbox;
-	dma_addr_t indma;
 	int err;
 	int err;
 
 
-	inbox = pci_alloc_consistent(dev->pdev, 16, &indma);
-	if (!inbox)
-		return -ENOMEM;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	inbox = mailbox->buf;
 
 
 	inbox[0] = cpu_to_be64(virt);
 	inbox[0] = cpu_to_be64(virt);
 	inbox[1] = cpu_to_be64(dma_addr);
 	inbox[1] = cpu_to_be64(dma_addr);
 
 
-	err = mthca_cmd(dev, indma, 1, 0, CMD_MAP_ICM, CMD_TIME_CLASS_B, status);
+	err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM,
+			CMD_TIME_CLASS_B, status);
 
 
-	pci_free_consistent(dev->pdev, 16, inbox, indma);
+	mthca_free_mailbox(dev, mailbox);
 
 
 	if (!err)
 	if (!err)
 		mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n",
 		mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n",
@@ -1338,69 +1401,26 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
 	return 0;
 	return 0;
 }
 }
 
 
-int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
+int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int mpt_index, u8 *status)
 		    int mpt_index, u8 *status)
 {
 {
-	dma_addr_t indma;
-	int err;
-
-	indma = pci_map_single(dev->pdev, mpt_entry,
-			       MTHCA_MPT_ENTRY_SIZE,
-			       PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(indma))
-		return -ENOMEM;
-
-	err = mthca_cmd(dev, indma, mpt_index, 0, CMD_SW2HW_MPT,
-			CMD_TIME_CLASS_B, status);
-
-	pci_unmap_single(dev->pdev, indma,
-			 MTHCA_MPT_ENTRY_SIZE, PCI_DMA_TODEVICE);
-	return err;
+	return mthca_cmd(dev, mailbox->dma, mpt_index, 0, CMD_SW2HW_MPT,
+			 CMD_TIME_CLASS_B, status);
 }
 }
 
 
-int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry,
+int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int mpt_index, u8 *status)
 		    int mpt_index, u8 *status)
 {
 {
-	dma_addr_t outdma = 0;
-	int err;
-
-	if (mpt_entry) {
-		outdma = pci_map_single(dev->pdev, mpt_entry,
-					MTHCA_MPT_ENTRY_SIZE,
-					PCI_DMA_FROMDEVICE);
-		if (pci_dma_mapping_error(outdma))
-			return -ENOMEM;
-	}
-
-	err = mthca_cmd_box(dev, 0, outdma, mpt_index, !mpt_entry,
-			    CMD_HW2SW_MPT,
-			    CMD_TIME_CLASS_B, status);
-
-	if (mpt_entry)
-		pci_unmap_single(dev->pdev, outdma,
-				 MTHCA_MPT_ENTRY_SIZE,
-				 PCI_DMA_FROMDEVICE);
-	return err;
+	return mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
+			     !mailbox, CMD_HW2SW_MPT,
+			     CMD_TIME_CLASS_B, status);
 }
 }
 
 
-int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
+int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int num_mtt, u8 *status)
 		    int num_mtt, u8 *status)
 {
 {
-	dma_addr_t indma;
-	int err;
-
-	indma = pci_map_single(dev->pdev, mtt_entry,
-			       (num_mtt + 2) * 8,
-			       PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(indma))
-		return -ENOMEM;
-
-	err = mthca_cmd(dev, indma, num_mtt, 0, CMD_WRITE_MTT,
-			CMD_TIME_CLASS_B, status);
-
-	pci_unmap_single(dev->pdev, indma,
-			 (num_mtt + 2) * 8, PCI_DMA_TODEVICE);
-	return err;
+	return mthca_cmd(dev, mailbox->dma, num_mtt, 0, CMD_WRITE_MTT,
+			 CMD_TIME_CLASS_B, status);
 }
 }
 
 
 int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status)
 int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status)
@@ -1418,92 +1438,38 @@ int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
 			 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status);
 			 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status);
 }
 }
 
 
-int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
+int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int eq_num, u8 *status)
 		   int eq_num, u8 *status)
 {
 {
-	dma_addr_t indma;
-	int err;
-
-	indma = pci_map_single(dev->pdev, eq_context,
-			       MTHCA_EQ_CONTEXT_SIZE,
-			       PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(indma))
-		return -ENOMEM;
-
-	err = mthca_cmd(dev, indma, eq_num, 0, CMD_SW2HW_EQ,
-			CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, indma,
-			 MTHCA_EQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
-	return err;
+	return mthca_cmd(dev, mailbox->dma, eq_num, 0, CMD_SW2HW_EQ,
+			 CMD_TIME_CLASS_A, status);
 }
 }
 
 
-int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context,
+int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int eq_num, u8 *status)
 		   int eq_num, u8 *status)
 {
 {
-	dma_addr_t outdma = 0;
-	int err;
-
-	outdma = pci_map_single(dev->pdev, eq_context,
-				MTHCA_EQ_CONTEXT_SIZE,
-				PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(outdma))
-		return -ENOMEM;
-
-	err = mthca_cmd_box(dev, 0, outdma, eq_num, 0,
-			    CMD_HW2SW_EQ,
-			    CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, outdma,
-			 MTHCA_EQ_CONTEXT_SIZE,
-			 PCI_DMA_FROMDEVICE);
-	return err;
+	return mthca_cmd_box(dev, 0, mailbox->dma, eq_num, 0,
+			     CMD_HW2SW_EQ,
+			     CMD_TIME_CLASS_A, status);
 }
 }
 
 
-int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context,
+int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int cq_num, u8 *status)
 		   int cq_num, u8 *status)
 {
 {
-	dma_addr_t indma;
-	int err;
-
-	indma = pci_map_single(dev->pdev, cq_context,
-			       MTHCA_CQ_CONTEXT_SIZE,
-			       PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(indma))
-		return -ENOMEM;
-
-	err = mthca_cmd(dev, indma, cq_num, 0, CMD_SW2HW_CQ,
+	return mthca_cmd(dev, mailbox->dma, cq_num, 0, CMD_SW2HW_CQ,
 			CMD_TIME_CLASS_A, status);
 			CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, indma,
-			 MTHCA_CQ_CONTEXT_SIZE, PCI_DMA_TODEVICE);
-	return err;
 }
 }
 
 
-int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context,
+int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int cq_num, u8 *status)
 		   int cq_num, u8 *status)
 {
 {
-	dma_addr_t outdma = 0;
-	int err;
-
-	outdma = pci_map_single(dev->pdev, cq_context,
-				MTHCA_CQ_CONTEXT_SIZE,
-				PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(outdma))
-		return -ENOMEM;
-
-	err = mthca_cmd_box(dev, 0, outdma, cq_num, 0,
-			    CMD_HW2SW_CQ,
-			    CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, outdma,
-			 MTHCA_CQ_CONTEXT_SIZE,
-			 PCI_DMA_FROMDEVICE);
-	return err;
+	return mthca_cmd_box(dev, 0, mailbox->dma, cq_num, 0,
+			     CMD_HW2SW_CQ,
+			     CMD_TIME_CLASS_A, status);
 }
 }
 
 
 int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
 int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
-		    int is_ee, void *qp_context, u32 optmask,
+		    int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
 		    u8 *status)
 		    u8 *status)
 {
 {
 	static const u16 op[] = {
 	static const u16 op[] = {
@@ -1520,36 +1486,34 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
 		[MTHCA_TRANS_ANY2RST]   = CMD_ERR2RST_QPEE
 		[MTHCA_TRANS_ANY2RST]   = CMD_ERR2RST_QPEE
 	};
 	};
 	u8 op_mod = 0;
 	u8 op_mod = 0;
-
-	dma_addr_t indma;
+	int my_mailbox = 0;
 	int err;
 	int err;
 
 
 	if (trans < 0 || trans >= ARRAY_SIZE(op))
 	if (trans < 0 || trans >= ARRAY_SIZE(op))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	if (trans == MTHCA_TRANS_ANY2RST) {
 	if (trans == MTHCA_TRANS_ANY2RST) {
-		indma  = 0;
 		op_mod = 3;	/* don't write outbox, any->reset */
 		op_mod = 3;	/* don't write outbox, any->reset */
 
 
 		/* For debugging */
 		/* For debugging */
-		qp_context = pci_alloc_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE,
-						  &indma);
-		op_mod = 2;	/* write outbox, any->reset */
+		if (!mailbox) {
+			mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+			if (!IS_ERR(mailbox)) {
+				my_mailbox = 1;
+				op_mod     = 2;	/* write outbox, any->reset */
+			} else
+				mailbox = NULL;
+		}
 	} else {
 	} else {
-		indma = pci_map_single(dev->pdev, qp_context,
-				       MTHCA_QP_CONTEXT_SIZE,
-				       PCI_DMA_TODEVICE);
-		if (pci_dma_mapping_error(indma))
-			return -ENOMEM;
-
 		if (0) {
 		if (0) {
 			int i;
 			int i;
 			mthca_dbg(dev, "Dumping QP context:\n");
 			mthca_dbg(dev, "Dumping QP context:\n");
-			printk("  opt param mask: %08x\n", be32_to_cpup(qp_context));
+			printk("  opt param mask: %08x\n", be32_to_cpup(mailbox->buf));
 			for (i = 0; i < 0x100 / 4; ++i) {
 			for (i = 0; i < 0x100 / 4; ++i) {
 				if (i % 8 == 0)
 				if (i % 8 == 0)
 					printk("  [%02x] ", i * 4);
 					printk("  [%02x] ", i * 4);
-				printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2]));
+				printk(" %08x",
+				       be32_to_cpu(((u32 *) mailbox->buf)[i + 2]));
 				if ((i + 1) % 8 == 0)
 				if ((i + 1) % 8 == 0)
 					printk("\n");
 					printk("\n");
 			}
 			}
@@ -1557,55 +1521,39 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
 	}
 	}
 
 
 	if (trans == MTHCA_TRANS_ANY2RST) {
 	if (trans == MTHCA_TRANS_ANY2RST) {
-		err = mthca_cmd_box(dev, 0, indma, (!!is_ee << 24) | num,
-				    op_mod, op[trans], CMD_TIME_CLASS_C, status);
+		err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
+				    (!!is_ee << 24) | num, op_mod,
+				    op[trans], CMD_TIME_CLASS_C, status);
 
 
-		if (0) {
+		if (0 && mailbox) {
 			int i;
 			int i;
 			mthca_dbg(dev, "Dumping QP context:\n");
 			mthca_dbg(dev, "Dumping QP context:\n");
-			printk(" %08x\n", be32_to_cpup(qp_context));
+			printk(" %08x\n", be32_to_cpup(mailbox->buf));
 			for (i = 0; i < 0x100 / 4; ++i) {
 			for (i = 0; i < 0x100 / 4; ++i) {
 				if (i % 8 == 0)
 				if (i % 8 == 0)
 					printk("[%02x] ", i * 4);
 					printk("[%02x] ", i * 4);
-				printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2]));
+				printk(" %08x",
+				       be32_to_cpu(((u32 *) mailbox->buf)[i + 2]));
 				if ((i + 1) % 8 == 0)
 				if ((i + 1) % 8 == 0)
 					printk("\n");
 					printk("\n");
 			}
 			}
 		}
 		}
 
 
 	} else
 	} else
-		err = mthca_cmd(dev, indma, (!!is_ee << 24) | num,
+		err = mthca_cmd(dev, mailbox->dma, (!!is_ee << 24) | num,
 				op_mod, op[trans], CMD_TIME_CLASS_C, status);
 				op_mod, op[trans], CMD_TIME_CLASS_C, status);
 
 
-	if (trans != MTHCA_TRANS_ANY2RST)
-		pci_unmap_single(dev->pdev, indma,
-				 MTHCA_QP_CONTEXT_SIZE, PCI_DMA_TODEVICE);
-	else
-		pci_free_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE,
-				    qp_context, indma);
+	if (my_mailbox)
+		mthca_free_mailbox(dev, mailbox);
+
 	return err;
 	return err;
 }
 }
 
 
 int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
 int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
-		   void *qp_context, u8 *status)
+		   struct mthca_mailbox *mailbox, u8 *status)
 {
 {
-	dma_addr_t outdma = 0;
-	int err;
-
-	outdma = pci_map_single(dev->pdev, qp_context,
-				MTHCA_QP_CONTEXT_SIZE,
-				PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(outdma))
-		return -ENOMEM;
-
-	err = mthca_cmd_box(dev, 0, outdma, (!!is_ee << 24) | num, 0,
-			    CMD_QUERY_QPEE,
-			    CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, outdma,
-			 MTHCA_QP_CONTEXT_SIZE,
-			 PCI_DMA_FROMDEVICE);
-	return err;
+	return mthca_cmd_box(dev, 0, mailbox->dma, (!!is_ee << 24) | num, 0,
+			     CMD_QUERY_QPEE, CMD_TIME_CLASS_A, status);
 }
 }
 
 
 int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
 int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
@@ -1635,11 +1583,11 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
 }
 }
 
 
 int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
 int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
-		  int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
+		  int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
 		  void *in_mad, void *response_mad, u8 *status)
 		  void *in_mad, void *response_mad, u8 *status)
 {
 {
-	void *box;
-	dma_addr_t dma;
+	struct mthca_mailbox *inmailbox, *outmailbox;
+	void *inbox;
 	int err;
 	int err;
 	u32 in_modifier = port;
 	u32 in_modifier = port;
 	u8 op_modifier = 0;
 	u8 op_modifier = 0;
@@ -1653,11 +1601,18 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
 #define MAD_IFC_PKEY_OFFSET   0x10e
 #define MAD_IFC_PKEY_OFFSET   0x10e
 #define MAD_IFC_GRH_OFFSET    0x140
 #define MAD_IFC_GRH_OFFSET    0x140
 
 
-	box = pci_alloc_consistent(dev->pdev, MAD_IFC_BOX_SIZE, &dma);
-	if (!box)
-		return -ENOMEM;
+	inmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(inmailbox))
+		return PTR_ERR(inmailbox);
+	inbox = inmailbox->buf;
 
 
-	memcpy(box, in_mad, 256);
+	outmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(outmailbox)) {
+		mthca_free_mailbox(dev, inmailbox);
+		return PTR_ERR(outmailbox);
+	}
+
+	memcpy(inbox, in_mad, 256);
 
 
 	/*
 	/*
 	 * Key check traps can't be generated unless we have in_wc to
 	 * Key check traps can't be generated unless we have in_wc to
@@ -1671,97 +1626,65 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
 	if (in_wc) {
 	if (in_wc) {
 		u8 val;
 		u8 val;
 
 
-		memset(box + 256, 0, 256);
+		memset(inbox + 256, 0, 256);
 
 
-		MTHCA_PUT(box, in_wc->qp_num, 	  MAD_IFC_MY_QPN_OFFSET);
-		MTHCA_PUT(box, in_wc->src_qp, 	  MAD_IFC_RQPN_OFFSET);
+		MTHCA_PUT(inbox, in_wc->qp_num,     MAD_IFC_MY_QPN_OFFSET);
+		MTHCA_PUT(inbox, in_wc->src_qp,     MAD_IFC_RQPN_OFFSET);
 
 
 		val = in_wc->sl << 4;
 		val = in_wc->sl << 4;
-		MTHCA_PUT(box, val,               MAD_IFC_SL_OFFSET);
+		MTHCA_PUT(inbox, val,               MAD_IFC_SL_OFFSET);
 
 
 		val = in_wc->dlid_path_bits |
 		val = in_wc->dlid_path_bits |
 			(in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
 			(in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
-		MTHCA_PUT(box, val,               MAD_IFC_GRH_OFFSET);
+		MTHCA_PUT(inbox, val,               MAD_IFC_GRH_OFFSET);
 
 
-		MTHCA_PUT(box, in_wc->slid,       MAD_IFC_RLID_OFFSET);
-		MTHCA_PUT(box, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
+		MTHCA_PUT(inbox, in_wc->slid,       MAD_IFC_RLID_OFFSET);
+		MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
 
 
 		if (in_grh)
 		if (in_grh)
-			memcpy((u8 *) box + MAD_IFC_GRH_OFFSET, in_grh, 40);
+			memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40);
 
 
 		op_modifier |= 0x10;
 		op_modifier |= 0x10;
 
 
 		in_modifier |= in_wc->slid << 16;
 		in_modifier |= in_wc->slid << 16;
 	}
 	}
 
 
-	err = mthca_cmd_box(dev, dma, dma + 512, in_modifier, op_modifier,
+	err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma,
+			    in_modifier, op_modifier,
 			    CMD_MAD_IFC, CMD_TIME_CLASS_C, status);
 			    CMD_MAD_IFC, CMD_TIME_CLASS_C, status);
 
 
 	if (!err && !*status)
 	if (!err && !*status)
-		memcpy(response_mad, box + 512, 256);
+		memcpy(response_mad, outmailbox->buf, 256);
 
 
-	pci_free_consistent(dev->pdev, MAD_IFC_BOX_SIZE, box, dma);
+	mthca_free_mailbox(dev, inmailbox);
+	mthca_free_mailbox(dev, outmailbox);
 	return err;
 	return err;
 }
 }
 
 
-int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm,
-		   u8 *status)
+int mthca_READ_MGM(struct mthca_dev *dev, int index,
+		   struct mthca_mailbox *mailbox, u8 *status)
 {
 {
-	dma_addr_t outdma = 0;
-	int err;
-
-	outdma = pci_map_single(dev->pdev, mgm,
-				MTHCA_MGM_ENTRY_SIZE,
-				PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(outdma))
-		return -ENOMEM;
-
-	err = mthca_cmd_box(dev, 0, outdma, index, 0,
-			    CMD_READ_MGM,
-			    CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, outdma,
-			 MTHCA_MGM_ENTRY_SIZE,
-			 PCI_DMA_FROMDEVICE);
-	return err;
+	return mthca_cmd_box(dev, 0, mailbox->dma, index, 0,
+			     CMD_READ_MGM, CMD_TIME_CLASS_A, status);
 }
 }
 
 
-int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm,
-		    u8 *status)
+int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
+		    struct mthca_mailbox *mailbox, u8 *status)
 {
 {
-	dma_addr_t indma;
-	int err;
-
-	indma = pci_map_single(dev->pdev, mgm,
-			       MTHCA_MGM_ENTRY_SIZE,
-			       PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(indma))
-		return -ENOMEM;
-
-	err = mthca_cmd(dev, indma, index, 0, CMD_WRITE_MGM,
-			CMD_TIME_CLASS_A, status);
-
-	pci_unmap_single(dev->pdev, indma,
-			 MTHCA_MGM_ENTRY_SIZE, PCI_DMA_TODEVICE);
-	return err;
+	return mthca_cmd(dev, mailbox->dma, index, 0, CMD_WRITE_MGM,
+			 CMD_TIME_CLASS_A, status);
 }
 }
 
 
-int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash,
-		    u8 *status)
+int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+		    u16 *hash, u8 *status)
 {
 {
-	dma_addr_t indma;
 	u64 imm;
 	u64 imm;
 	int err;
 	int err;
 
 
-	indma = pci_map_single(dev->pdev, gid, 16, PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(indma))
-		return -ENOMEM;
-
-	err = mthca_cmd_imm(dev, indma, &imm, 0, 0, CMD_MGID_HASH,
+	err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH,
 			    CMD_TIME_CLASS_A, status);
 			    CMD_TIME_CLASS_A, status);
-	*hash = imm;
 
 
-	pci_unmap_single(dev->pdev, indma, 16, PCI_DMA_TODEVICE);
+	*hash = imm;
 	return err;
 	return err;
 }
 }
 
 

+ 28 - 20
drivers/infiniband/hw/mthca/mthca_cmd.h

@@ -37,8 +37,7 @@
 
 
 #include <ib_verbs.h>
 #include <ib_verbs.h>
 
 
-#define MTHCA_CMD_MAILBOX_ALIGN 16UL
-#define MTHCA_CMD_MAILBOX_EXTRA (MTHCA_CMD_MAILBOX_ALIGN - 1)
+#define MTHCA_MAILBOX_SIZE 4096
 
 
 enum {
 enum {
 	/* command completed successfully: */
 	/* command completed successfully: */
@@ -112,6 +111,11 @@ enum {
 	DEV_LIM_FLAG_UD_MULTI           = 1 << 21,
 	DEV_LIM_FLAG_UD_MULTI           = 1 << 21,
 };
 };
 
 
+struct mthca_mailbox {
+	dma_addr_t dma;
+	void      *buf;
+};
+
 struct mthca_dev_lim {
 struct mthca_dev_lim {
 	int max_srq_sz;
 	int max_srq_sz;
 	int max_qp_sz;
 	int max_qp_sz;
@@ -235,11 +239,17 @@ struct mthca_set_ib_param {
 	u32 cap_mask;
 	u32 cap_mask;
 };
 };
 
 
+int mthca_cmd_init(struct mthca_dev *dev);
+void mthca_cmd_cleanup(struct mthca_dev *dev);
 int mthca_cmd_use_events(struct mthca_dev *dev);
 int mthca_cmd_use_events(struct mthca_dev *dev);
 void mthca_cmd_use_polling(struct mthca_dev *dev);
 void mthca_cmd_use_polling(struct mthca_dev *dev);
 void mthca_cmd_event(struct mthca_dev *dev, u16 token,
 void mthca_cmd_event(struct mthca_dev *dev, u16 token,
 		     u8  status, u64 out_param);
 		     u8  status, u64 out_param);
 
 
+struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
+					  unsigned int gfp_mask);
+void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox);
+
 int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);
 int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);
 int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status);
 int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status);
 int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
 int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
@@ -270,41 +280,39 @@ int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
 int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status);
 int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status);
 int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
 int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
 		       u8 *status);
 		       u8 *status);
-int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry,
+int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int mpt_index, u8 *status);
 		    int mpt_index, u8 *status);
-int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry,
+int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int mpt_index, u8 *status);
 		    int mpt_index, u8 *status);
-int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry,
+int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		    int num_mtt, u8 *status);
 		    int num_mtt, u8 *status);
 int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status);
 int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status);
 int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
 int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
 		 int eq_num, u8 *status);
 		 int eq_num, u8 *status);
-int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context,
+int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int eq_num, u8 *status);
 		   int eq_num, u8 *status);
-int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context,
+int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int eq_num, u8 *status);
 		   int eq_num, u8 *status);
-int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context,
+int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int cq_num, u8 *status);
 		   int cq_num, u8 *status);
-int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context,
+int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
 		   int cq_num, u8 *status);
 		   int cq_num, u8 *status);
 int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
 int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num,
-		    int is_ee, void *qp_context, u32 optmask,
+		    int is_ee, struct mthca_mailbox *mailbox, u32 optmask,
 		    u8 *status);
 		    u8 *status);
 int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
 int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
-		   void *qp_context, u8 *status);
+		   struct mthca_mailbox *mailbox, u8 *status);
 int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
 int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
 			  u8 *status);
 			  u8 *status);
 int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
 int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
-		  int port, struct ib_wc* in_wc, struct ib_grh* in_grh,
+		  int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
 		  void *in_mad, void *response_mad, u8 *status);
 		  void *in_mad, void *response_mad, u8 *status);
-int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm,
-		   u8 *status);
-int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm,
-		    u8 *status);
-int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash,
-		    u8 *status);
+int mthca_READ_MGM(struct mthca_dev *dev, int index,
+		   struct mthca_mailbox *mailbox, u8 *status);
+int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
+		    struct mthca_mailbox *mailbox, u8 *status);
+int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+		    u16 *hash, u8 *status);
 int mthca_NOP(struct mthca_dev *dev, u8 *status);
 int mthca_NOP(struct mthca_dev *dev, u8 *status);
 
 
-#define MAILBOX_ALIGN(x) ((void *) ALIGN((unsigned long) (x), MTHCA_CMD_MAILBOX_ALIGN))
-
 #endif /* MTHCA_CMD_H */
 #endif /* MTHCA_CMD_H */

+ 49 - 52
drivers/infiniband/hw/mthca/mthca_cq.c

@@ -1,5 +1,6 @@
 /*
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -171,6 +172,17 @@ static inline void set_cqe_hw(struct mthca_cqe *cqe)
 	cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW;
 	cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW;
 }
 }
 
 
+static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr)
+{
+	__be32 *cqe = cqe_ptr;
+
+	(void) cqe;	/* avoid warning if mthca_dbg compiled away... */
+	mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
+		  be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]),
+		  be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]),
+		  be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7]));
+}
+
 /*
 /*
  * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index
  * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index
  * should be correct before calling update_cons_index().
  * should be correct before calling update_cons_index().
@@ -280,16 +292,12 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
 	int dbd;
 	int dbd;
 	u32 new_wqe;
 	u32 new_wqe;
 
 
-	if (1 && cqe->syndrome != SYNDROME_WR_FLUSH_ERR) {
-		int j;
-
-		mthca_dbg(dev, "%x/%d: error CQE -> QPN %06x, WQE @ %08x\n",
-			  cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
-			  be32_to_cpu(cqe->wqe));
-
-		for (j = 0; j < 8; ++j)
-			printk(KERN_DEBUG "  [%2x] %08x\n",
-			       j * 4, be32_to_cpu(((u32 *) cqe)[j]));
+	if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) {
+		mthca_dbg(dev, "local QP operation err "
+			  "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n",
+			  be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe),
+			  cq->cqn, cq->cons_index);
+		dump_cqe(dev, cqe);
 	}
 	}
 
 
 	/*
 	/*
@@ -377,15 +385,6 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
 	return 0;
 	return 0;
 }
 }
 
 
-static void dump_cqe(struct mthca_cqe *cqe)
-{
-	int j;
-
-	for (j = 0; j < 8; ++j)
-		printk(KERN_DEBUG "  [%2x] %08x\n",
-		       j * 4, be32_to_cpu(((u32 *) cqe)[j]));
-}
-
 static inline int mthca_poll_one(struct mthca_dev *dev,
 static inline int mthca_poll_one(struct mthca_dev *dev,
 				 struct mthca_cq *cq,
 				 struct mthca_cq *cq,
 				 struct mthca_qp **cur_qp,
 				 struct mthca_qp **cur_qp,
@@ -414,8 +413,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
 		mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n",
 		mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n",
 			  cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
 			  cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
 			  be32_to_cpu(cqe->wqe));
 			  be32_to_cpu(cqe->wqe));
-
-		dump_cqe(cqe);
+		dump_cqe(dev, cqe);
 	}
 	}
 
 
 	is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
 	is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
@@ -638,19 +636,19 @@ static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq)
 	int size;
 	int size;
 
 
 	if (cq->is_direct)
 	if (cq->is_direct)
-		pci_free_consistent(dev->pdev,
-				    (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
-				    cq->queue.direct.buf,
-				    pci_unmap_addr(&cq->queue.direct,
-						   mapping));
+		dma_free_coherent(&dev->pdev->dev,
+				  (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
+				  cq->queue.direct.buf,
+				  pci_unmap_addr(&cq->queue.direct,
+						 mapping));
 	else {
 	else {
 		size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE;
 		size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE;
 		for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
 		for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
 			if (cq->queue.page_list[i].buf)
 			if (cq->queue.page_list[i].buf)
-				pci_free_consistent(dev->pdev, PAGE_SIZE,
-						    cq->queue.page_list[i].buf,
-						    pci_unmap_addr(&cq->queue.page_list[i],
-								   mapping));
+				dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+						  cq->queue.page_list[i].buf,
+						  pci_unmap_addr(&cq->queue.page_list[i],
+								 mapping));
 
 
 		kfree(cq->queue.page_list);
 		kfree(cq->queue.page_list);
 	}
 	}
@@ -670,8 +668,8 @@ static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size,
 		npages        = 1;
 		npages        = 1;
 		shift         = get_order(size) + PAGE_SHIFT;
 		shift         = get_order(size) + PAGE_SHIFT;
 
 
-		cq->queue.direct.buf = pci_alloc_consistent(dev->pdev,
-							    size, &t);
+		cq->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev,
+							  size, &t, GFP_KERNEL);
 		if (!cq->queue.direct.buf)
 		if (!cq->queue.direct.buf)
 			return -ENOMEM;
 			return -ENOMEM;
 
 
@@ -709,7 +707,8 @@ static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size,
 
 
 		for (i = 0; i < npages; ++i) {
 		for (i = 0; i < npages; ++i) {
 			cq->queue.page_list[i].buf =
 			cq->queue.page_list[i].buf =
-				pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t);
+				dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+						   &t, GFP_KERNEL);
 			if (!cq->queue.page_list[i].buf)
 			if (!cq->queue.page_list[i].buf)
 				goto err_free;
 				goto err_free;
 
 
@@ -746,7 +745,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
 		  struct mthca_cq *cq)
 		  struct mthca_cq *cq)
 {
 {
 	int size = nent * MTHCA_CQ_ENTRY_SIZE;
 	int size = nent * MTHCA_CQ_ENTRY_SIZE;
-	void *mailbox = NULL;
+	struct mthca_mailbox *mailbox;
 	struct mthca_cq_context *cq_context;
 	struct mthca_cq_context *cq_context;
 	int err = -ENOMEM;
 	int err = -ENOMEM;
 	u8 status;
 	u8 status;
@@ -780,12 +779,11 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
 			goto err_out_ci;
 			goto err_out_ci;
 	}
 	}
 
 
-	mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox)
-		goto err_out_mailbox;
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		goto err_out_arm;
 
 
-	cq_context = MAILBOX_ALIGN(mailbox);
+	cq_context = mailbox->buf;
 
 
 	err = mthca_alloc_cq_buf(dev, size, cq);
 	err = mthca_alloc_cq_buf(dev, size, cq);
 	if (err)
 	if (err)
@@ -816,7 +814,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
 		cq_context->state_db = cpu_to_be32(cq->arm_db_index);
 		cq_context->state_db = cpu_to_be32(cq->arm_db_index);
 	}
 	}
 
 
-	err = mthca_SW2HW_CQ(dev, cq_context, cq->cqn, &status);
+	err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status);
 	if (err) {
 	if (err) {
 		mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err);
 		mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err);
 		goto err_out_free_mr;
 		goto err_out_free_mr;
@@ -840,7 +838,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
 
 
 	cq->cons_index = 0;
 	cq->cons_index = 0;
 
 
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 
 
 	return 0;
 	return 0;
 
 
@@ -849,8 +847,9 @@ err_out_free_mr:
 	mthca_free_cq_buf(dev, cq);
 	mthca_free_cq_buf(dev, cq);
 
 
 err_out_mailbox:
 err_out_mailbox:
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 
 
+err_out_arm:
 	if (mthca_is_memfree(dev))
 	if (mthca_is_memfree(dev))
 		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
 		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
 
 
@@ -870,28 +869,26 @@ err_out:
 void mthca_free_cq(struct mthca_dev *dev,
 void mthca_free_cq(struct mthca_dev *dev,
 		   struct mthca_cq *cq)
 		   struct mthca_cq *cq)
 {
 {
-	void *mailbox;
+	struct mthca_mailbox *mailbox;
 	int err;
 	int err;
 	u8 status;
 	u8 status;
 
 
 	might_sleep();
 	might_sleep();
 
 
-	mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox) {
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox)) {
 		mthca_warn(dev, "No memory for mailbox to free CQ.\n");
 		mthca_warn(dev, "No memory for mailbox to free CQ.\n");
 		return;
 		return;
 	}
 	}
 
 
-	err = mthca_HW2SW_CQ(dev, MAILBOX_ALIGN(mailbox), cq->cqn, &status);
+	err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status);
 	if (err)
 	if (err)
 		mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err);
 		mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err);
 	else if (status)
 	else if (status)
-		mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n",
-			   status);
+		mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status);
 
 
 	if (0) {
 	if (0) {
-		u32 *ctx = MAILBOX_ALIGN(mailbox);
+		u32 *ctx = mailbox->buf;
 		int j;
 		int j;
 
 
 		printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
 		printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
@@ -919,11 +916,11 @@ void mthca_free_cq(struct mthca_dev *dev,
 	if (mthca_is_memfree(dev)) {
 	if (mthca_is_memfree(dev)) {
 		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM,    cq->arm_db_index);
 		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM,    cq->arm_db_index);
 		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
 		mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
-		mthca_table_put(dev, dev->cq_table.table, cq->cqn);
 	}
 	}
 
 
+	mthca_table_put(dev, dev->cq_table.table, cq->cqn);
 	mthca_free(&dev->cq_table.alloc, cq->cqn);
 	mthca_free(&dev->cq_table.alloc, cq->cqn);
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 }
 }
 
 
 int __devinit mthca_init_cq_table(struct mthca_dev *dev)
 int __devinit mthca_init_cq_table(struct mthca_dev *dev)

+ 10 - 2
drivers/infiniband/hw/mthca/mthca_dev.h

@@ -1,5 +1,6 @@
 /*
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -46,8 +47,8 @@
 
 
 #define DRV_NAME	"ib_mthca"
 #define DRV_NAME	"ib_mthca"
 #define PFX		DRV_NAME ": "
 #define PFX		DRV_NAME ": "
-#define DRV_VERSION	"0.06-pre"
-#define DRV_RELDATE	"November 8, 2004"
+#define DRV_VERSION	"0.06"
+#define DRV_RELDATE	"June 23, 2005"
 
 
 enum {
 enum {
 	MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
 	MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
@@ -98,6 +99,7 @@ enum {
 };
 };
 
 
 struct mthca_cmd {
 struct mthca_cmd {
+	struct pci_pool          *pool;
 	int                       use_events;
 	int                       use_events;
 	struct semaphore          hcr_sem;
 	struct semaphore          hcr_sem;
 	struct semaphore 	  poll_sem;
 	struct semaphore 	  poll_sem;
@@ -379,6 +381,12 @@ void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
 int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd);
 int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd);
 void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
 void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
 
 
+struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
+void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt);
+int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+		    int start_index, u64 *buffer_list, int list_len);
+int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
+		   u64 iova, u64 total_size, u32 access, struct mthca_mr *mr);
 int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
 int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
 			   u32 access, struct mthca_mr *mr);
 			   u32 access, struct mthca_mr *mr);
 int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
 int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,

+ 1 - 0
drivers/infiniband/hw/mthca/mthca_doorbell.h

@@ -1,5 +1,6 @@
 /*
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * licenses.  You may choose to be licensed under the terms of the GNU

+ 27 - 31
drivers/infiniband/hw/mthca/mthca_eq.c

@@ -469,7 +469,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 		PAGE_SIZE;
 		PAGE_SIZE;
 	u64 *dma_list = NULL;
 	u64 *dma_list = NULL;
 	dma_addr_t t;
 	dma_addr_t t;
-	void *mailbox = NULL;
+	struct mthca_mailbox *mailbox;
 	struct mthca_eq_context *eq_context;
 	struct mthca_eq_context *eq_context;
 	int err = -ENOMEM;
 	int err = -ENOMEM;
 	int i;
 	int i;
@@ -494,17 +494,16 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 	if (!dma_list)
 	if (!dma_list)
 		goto err_out_free;
 		goto err_out_free;
 
 
-	mailbox = kmalloc(sizeof *eq_context + MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox)
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
 		goto err_out_free;
 		goto err_out_free;
-	eq_context = MAILBOX_ALIGN(mailbox);
+	eq_context = mailbox->buf;
 
 
 	for (i = 0; i < npages; ++i) {
 	for (i = 0; i < npages; ++i) {
-		eq->page_list[i].buf = pci_alloc_consistent(dev->pdev,
-							    PAGE_SIZE, &t);
+		eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
+							  PAGE_SIZE, &t, GFP_KERNEL);
 		if (!eq->page_list[i].buf)
 		if (!eq->page_list[i].buf)
-			goto err_out_free;
+			goto err_out_free_pages;
 
 
 		dma_list[i] = t;
 		dma_list[i] = t;
 		pci_unmap_addr_set(&eq->page_list[i], mapping, t);
 		pci_unmap_addr_set(&eq->page_list[i], mapping, t);
@@ -517,7 +516,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 
 
 	eq->eqn = mthca_alloc(&dev->eq_table.alloc);
 	eq->eqn = mthca_alloc(&dev->eq_table.alloc);
 	if (eq->eqn == -1)
 	if (eq->eqn == -1)
-		goto err_out_free;
+		goto err_out_free_pages;
 
 
 	err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
 	err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
 				  dma_list, PAGE_SHIFT, npages,
 				  dma_list, PAGE_SHIFT, npages,
@@ -548,7 +547,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 	eq_context->intr            = intr;
 	eq_context->intr            = intr;
 	eq_context->lkey            = cpu_to_be32(eq->mr.ibmr.lkey);
 	eq_context->lkey            = cpu_to_be32(eq->mr.ibmr.lkey);
 
 
-	err = mthca_SW2HW_EQ(dev, eq_context, eq->eqn, &status);
+	err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status);
 	if (err) {
 	if (err) {
 		mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
 		mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
 		goto err_out_free_mr;
 		goto err_out_free_mr;
@@ -561,7 +560,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 	}
 	}
 
 
 	kfree(dma_list);
 	kfree(dma_list);
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 
 
 	eq->eqn_mask   = swab32(1 << eq->eqn);
 	eq->eqn_mask   = swab32(1 << eq->eqn);
 	eq->cons_index = 0;
 	eq->cons_index = 0;
@@ -579,17 +578,19 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
  err_out_free_eq:
  err_out_free_eq:
 	mthca_free(&dev->eq_table.alloc, eq->eqn);
 	mthca_free(&dev->eq_table.alloc, eq->eqn);
 
 
- err_out_free:
+ err_out_free_pages:
 	for (i = 0; i < npages; ++i)
 	for (i = 0; i < npages; ++i)
 		if (eq->page_list[i].buf)
 		if (eq->page_list[i].buf)
-			pci_free_consistent(dev->pdev, PAGE_SIZE,
-					    eq->page_list[i].buf,
-					    pci_unmap_addr(&eq->page_list[i],
-							   mapping));
+			dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+					  eq->page_list[i].buf,
+					  pci_unmap_addr(&eq->page_list[i],
+							 mapping));
+
+	mthca_free_mailbox(dev, mailbox);
 
 
+ err_out_free:
 	kfree(eq->page_list);
 	kfree(eq->page_list);
 	kfree(dma_list);
 	kfree(dma_list);
-	kfree(mailbox);
 
 
  err_out:
  err_out:
 	return err;
 	return err;
@@ -598,25 +599,22 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 static void mthca_free_eq(struct mthca_dev *dev,
 static void mthca_free_eq(struct mthca_dev *dev,
 			  struct mthca_eq *eq)
 			  struct mthca_eq *eq)
 {
 {
-	void *mailbox = NULL;
+	struct mthca_mailbox *mailbox;
 	int err;
 	int err;
 	u8 status;
 	u8 status;
 	int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
 	int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
 		PAGE_SIZE;
 		PAGE_SIZE;
 	int i;
 	int i;
 
 
-	mailbox = kmalloc(sizeof (struct mthca_eq_context) + MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox)
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
 		return;
 		return;
 
 
-	err = mthca_HW2SW_EQ(dev, MAILBOX_ALIGN(mailbox),
-			     eq->eqn, &status);
+	err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status);
 	if (err)
 	if (err)
 		mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
 		mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
 	if (status)
 	if (status)
-		mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n",
-			   status);
+		mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status);
 
 
 	dev->eq_table.arm_mask &= ~eq->eqn_mask;
 	dev->eq_table.arm_mask &= ~eq->eqn_mask;
 
 
@@ -625,7 +623,7 @@ static void mthca_free_eq(struct mthca_dev *dev,
 		for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
 		for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
 			if (i % 4 == 0)
 			if (i % 4 == 0)
 				printk("[%02x] ", i * 4);
 				printk("[%02x] ", i * 4);
-			printk(" %08x", be32_to_cpup(MAILBOX_ALIGN(mailbox) + i * 4));
+			printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
 			if ((i + 1) % 4 == 0)
 			if ((i + 1) % 4 == 0)
 				printk("\n");
 				printk("\n");
 		}
 		}
@@ -638,7 +636,7 @@ static void mthca_free_eq(struct mthca_dev *dev,
 				    pci_unmap_addr(&eq->page_list[i], mapping));
 				    pci_unmap_addr(&eq->page_list[i], mapping));
 
 
 	kfree(eq->page_list);
 	kfree(eq->page_list);
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 }
 }
 
 
 static void mthca_free_irqs(struct mthca_dev *dev)
 static void mthca_free_irqs(struct mthca_dev *dev)
@@ -709,8 +707,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
 		if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
 		if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
 					dev->fw.arbel.eq_arm_base) + 4, 4,
 					dev->fw.arbel.eq_arm_base) + 4, 4,
 				  &dev->eq_regs.arbel.eq_arm)) {
 				  &dev->eq_regs.arbel.eq_arm)) {
-			mthca_err(dev, "Couldn't map interrupt clear register, "
-				  "aborting.\n");
+			mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
 			mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
 			mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
 					dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
 					dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
 					dev->clr_base);
 					dev->clr_base);
@@ -721,8 +718,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev)
 				  dev->fw.arbel.eq_set_ci_base,
 				  dev->fw.arbel.eq_set_ci_base,
 				  MTHCA_EQ_SET_CI_SIZE,
 				  MTHCA_EQ_SET_CI_SIZE,
 				  &dev->eq_regs.arbel.eq_set_ci_base)) {
 				  &dev->eq_regs.arbel.eq_set_ci_base)) {
-			mthca_err(dev, "Couldn't map interrupt clear register, "
-				  "aborting.\n");
+			mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
 			mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
 			mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
 					      dev->fw.arbel.eq_arm_base) + 4, 4,
 					      dev->fw.arbel.eq_arm_base) + 4, 4,
 					dev->eq_regs.arbel.eq_arm);
 					dev->eq_regs.arbel.eq_arm);

+ 12 - 20
drivers/infiniband/hw/mthca/mthca_main.c

@@ -1,5 +1,6 @@
 /*
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -69,7 +70,7 @@ MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero");
 #endif /* CONFIG_PCI_MSI */
 #endif /* CONFIG_PCI_MSI */
 
 
 static const char mthca_version[] __devinitdata =
 static const char mthca_version[] __devinitdata =
-	"ib_mthca: Mellanox InfiniBand HCA driver v"
+	DRV_NAME ": Mellanox InfiniBand HCA driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
 	DRV_VERSION " (" DRV_RELDATE ")\n";
 
 
 static struct mthca_profile default_profile = {
 static struct mthca_profile default_profile = {
@@ -927,13 +928,13 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
 	 */
 	 */
 	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
 	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
 	    pci_resource_len(pdev, 0) != 1 << 20) {
 	    pci_resource_len(pdev, 0) != 1 << 20) {
-		dev_err(&pdev->dev, "Missing DCS, aborting.");
+		dev_err(&pdev->dev, "Missing DCS, aborting.\n");
 		err = -ENODEV;
 		err = -ENODEV;
 		goto err_disable_pdev;
 		goto err_disable_pdev;
 	}
 	}
 	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) ||
 	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) ||
 	    pci_resource_len(pdev, 2) != 1 << 23) {
 	    pci_resource_len(pdev, 2) != 1 << 23) {
-		dev_err(&pdev->dev, "Missing UAR, aborting.");
+		dev_err(&pdev->dev, "Missing UAR, aborting.\n");
 		err = -ENODEV;
 		err = -ENODEV;
 		goto err_disable_pdev;
 		goto err_disable_pdev;
 	}
 	}
@@ -1004,25 +1005,18 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
 	    !pci_enable_msi(pdev))
 	    !pci_enable_msi(pdev))
 		mdev->mthca_flags |= MTHCA_FLAG_MSI;
 		mdev->mthca_flags |= MTHCA_FLAG_MSI;
 
 
-	sema_init(&mdev->cmd.hcr_sem, 1);
-	sema_init(&mdev->cmd.poll_sem, 1);
-	mdev->cmd.use_events = 0;
-
-	mdev->hcr = ioremap(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, MTHCA_HCR_SIZE);
-	if (!mdev->hcr) {
-		mthca_err(mdev, "Couldn't map command register, "
-			  "aborting.\n");
-		err = -ENOMEM;
+	if (mthca_cmd_init(mdev)) {
+		mthca_err(mdev, "Failed to init command interface, aborting.\n");
 		goto err_free_dev;
 		goto err_free_dev;
 	}
 	}
 
 
 	err = mthca_tune_pci(mdev);
 	err = mthca_tune_pci(mdev);
 	if (err)
 	if (err)
-		goto err_iounmap;
+		goto err_cmd;
 
 
 	err = mthca_init_hca(mdev);
 	err = mthca_init_hca(mdev);
 	if (err)
 	if (err)
-		goto err_iounmap;
+		goto err_cmd;
 
 
 	if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) {
 	if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) {
 		mthca_warn(mdev, "HCA FW version %x.%x.%x is old (%x.%x.%x is current).\n",
 		mthca_warn(mdev, "HCA FW version %x.%x.%x is old (%x.%x.%x is current).\n",
@@ -1070,8 +1064,8 @@ err_cleanup:
 err_close:
 err_close:
 	mthca_close_hca(mdev);
 	mthca_close_hca(mdev);
 
 
-err_iounmap:
-	iounmap(mdev->hcr);
+err_cmd:
+	mthca_cmd_cleanup(mdev);
 
 
 err_free_dev:
 err_free_dev:
 	if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
 	if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
@@ -1118,10 +1112,8 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev)
 		iounmap(mdev->kar);
 		iounmap(mdev->kar);
 		mthca_uar_free(mdev, &mdev->driver_uar);
 		mthca_uar_free(mdev, &mdev->driver_uar);
 		mthca_cleanup_uar_table(mdev);
 		mthca_cleanup_uar_table(mdev);
-
 		mthca_close_hca(mdev);
 		mthca_close_hca(mdev);
-
-		iounmap(mdev->hcr);
+		mthca_cmd_cleanup(mdev);
 
 
 		if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
 		if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
 			pci_disable_msix(pdev);
 			pci_disable_msix(pdev);
@@ -1163,7 +1155,7 @@ static struct pci_device_id mthca_pci_table[] = {
 MODULE_DEVICE_TABLE(pci, mthca_pci_table);
 MODULE_DEVICE_TABLE(pci, mthca_pci_table);
 
 
 static struct pci_driver mthca_driver = {
 static struct pci_driver mthca_driver = {
-	.name		= "ib_mthca",
+	.name		= DRV_NAME,
 	.id_table	= mthca_pci_table,
 	.id_table	= mthca_pci_table,
 	.probe		= mthca_init_one,
 	.probe		= mthca_init_one,
 	.remove		= __devexit_p(mthca_remove_one)
 	.remove		= __devexit_p(mthca_remove_one)

+ 32 - 31
drivers/infiniband/hw/mthca/mthca_mcg.c

@@ -66,22 +66,23 @@ static const u8 zero_gid[16];	/* automatically initialized to 0 */
  * entry in hash chain and *mgm holds end of hash chain.
  * entry in hash chain and *mgm holds end of hash chain.
  */
  */
 static int find_mgm(struct mthca_dev *dev,
 static int find_mgm(struct mthca_dev *dev,
-		    u8 *gid, struct mthca_mgm *mgm,
+		    u8 *gid, struct mthca_mailbox *mgm_mailbox,
 		    u16 *hash, int *prev, int *index)
 		    u16 *hash, int *prev, int *index)
 {
 {
-	void *mailbox;
+	struct mthca_mailbox *mailbox;
+	struct mthca_mgm *mgm = mgm_mailbox->buf;
 	u8 *mgid;
 	u8 *mgid;
 	int err;
 	int err;
 	u8 status;
 	u8 status;
 
 
-	mailbox = kmalloc(16 + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
-	if (!mailbox)
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
 		return -ENOMEM;
 		return -ENOMEM;
-	mgid = MAILBOX_ALIGN(mailbox);
+	mgid = mailbox->buf;
 
 
 	memcpy(mgid, gid, 16);
 	memcpy(mgid, gid, 16);
 
 
-	err = mthca_MGID_HASH(dev, mgid, hash, &status);
+	err = mthca_MGID_HASH(dev, mailbox, hash, &status);
 	if (err)
 	if (err)
 		goto out;
 		goto out;
 	if (status) {
 	if (status) {
@@ -103,7 +104,7 @@ static int find_mgm(struct mthca_dev *dev,
 	*prev  = -1;
 	*prev  = -1;
 
 
 	do {
 	do {
-		err = mthca_READ_MGM(dev, *index, mgm, &status);
+		err = mthca_READ_MGM(dev, *index, mgm_mailbox, &status);
 		if (err)
 		if (err)
 			goto out;
 			goto out;
 		if (status) {
 		if (status) {
@@ -129,14 +130,14 @@ static int find_mgm(struct mthca_dev *dev,
 	*index = -1;
 	*index = -1;
 
 
  out:
  out:
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 	return err;
 }
 }
 
 
 int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_dev *dev = to_mdev(ibqp->device);
-	void *mailbox;
+	struct mthca_mailbox *mailbox;
 	struct mthca_mgm *mgm;
 	struct mthca_mgm *mgm;
 	u16 hash;
 	u16 hash;
 	int index, prev;
 	int index, prev;
@@ -145,15 +146,15 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	int err;
 	int err;
 	u8 status;
 	u8 status;
 
 
-	mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
-	if (!mailbox)
-		return -ENOMEM;
-	mgm = MAILBOX_ALIGN(mailbox);
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	mgm = mailbox->buf;
 
 
 	if (down_interruptible(&dev->mcg_table.sem))
 	if (down_interruptible(&dev->mcg_table.sem))
 		return -EINTR;
 		return -EINTR;
 
 
-	err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index);
+	err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
 	if (err)
 	if (err)
 		goto out;
 		goto out;
 
 
@@ -170,7 +171,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 			goto out;
 			goto out;
 		}
 		}
 
 
-		err = mthca_READ_MGM(dev, index, mgm, &status);
+		err = mthca_READ_MGM(dev, index, mailbox, &status);
 		if (err)
 		if (err)
 			goto out;
 			goto out;
 		if (status) {
 		if (status) {
@@ -195,7 +196,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		goto out;
 		goto out;
 	}
 	}
 
 
-	err = mthca_WRITE_MGM(dev, index, mgm, &status);
+	err = mthca_WRITE_MGM(dev, index, mailbox, &status);
 	if (err)
 	if (err)
 		goto out;
 		goto out;
 	if (status) {
 	if (status) {
@@ -206,7 +207,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	if (!link)
 	if (!link)
 		goto out;
 		goto out;
 
 
-	err = mthca_READ_MGM(dev, prev, mgm, &status);
+	err = mthca_READ_MGM(dev, prev, mailbox, &status);
 	if (err)
 	if (err)
 		goto out;
 		goto out;
 	if (status) {
 	if (status) {
@@ -217,7 +218,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 
 
 	mgm->next_gid_index = cpu_to_be32(index << 5);
 	mgm->next_gid_index = cpu_to_be32(index << 5);
 
 
-	err = mthca_WRITE_MGM(dev, prev, mgm, &status);
+	err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
 	if (err)
 	if (err)
 		goto out;
 		goto out;
 	if (status) {
 	if (status) {
@@ -227,14 +228,14 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 
 
  out:
  out:
 	up(&dev->mcg_table.sem);
 	up(&dev->mcg_table.sem);
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 	return err;
 }
 }
 
 
 int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_dev *dev = to_mdev(ibqp->device);
-	void *mailbox;
+	struct mthca_mailbox *mailbox;
 	struct mthca_mgm *mgm;
 	struct mthca_mgm *mgm;
 	u16 hash;
 	u16 hash;
 	int prev, index;
 	int prev, index;
@@ -242,15 +243,15 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	int err;
 	int err;
 	u8 status;
 	u8 status;
 
 
-	mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
-	if (!mailbox)
-		return -ENOMEM;
-	mgm = MAILBOX_ALIGN(mailbox);
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	mgm = mailbox->buf;
 
 
 	if (down_interruptible(&dev->mcg_table.sem))
 	if (down_interruptible(&dev->mcg_table.sem))
 		return -EINTR;
 		return -EINTR;
 
 
-	err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index);
+	err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
 	if (err)
 	if (err)
 		goto out;
 		goto out;
 
 
@@ -285,7 +286,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	mgm->qp[loc]   = mgm->qp[i - 1];
 	mgm->qp[loc]   = mgm->qp[i - 1];
 	mgm->qp[i - 1] = 0;
 	mgm->qp[i - 1] = 0;
 
 
-	err = mthca_WRITE_MGM(dev, index, mgm, &status);
+	err = mthca_WRITE_MGM(dev, index, mailbox, &status);
 	if (err)
 	if (err)
 		goto out;
 		goto out;
 	if (status) {
 	if (status) {
@@ -304,7 +305,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		if (be32_to_cpu(mgm->next_gid_index) >> 5) {
 		if (be32_to_cpu(mgm->next_gid_index) >> 5) {
 			err = mthca_READ_MGM(dev,
 			err = mthca_READ_MGM(dev,
 					     be32_to_cpu(mgm->next_gid_index) >> 5,
 					     be32_to_cpu(mgm->next_gid_index) >> 5,
-					     mgm, &status);
+					     mailbox, &status);
 			if (err)
 			if (err)
 				goto out;
 				goto out;
 			if (status) {
 			if (status) {
@@ -316,7 +317,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		} else
 		} else
 			memset(mgm->gid, 0, 16);
 			memset(mgm->gid, 0, 16);
 
 
-		err = mthca_WRITE_MGM(dev, index, mgm, &status);
+		err = mthca_WRITE_MGM(dev, index, mailbox, &status);
 		if (err)
 		if (err)
 			goto out;
 			goto out;
 		if (status) {
 		if (status) {
@@ -327,7 +328,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	} else {
 	} else {
 		/* Remove entry from AMGM */
 		/* Remove entry from AMGM */
 		index = be32_to_cpu(mgm->next_gid_index) >> 5;
 		index = be32_to_cpu(mgm->next_gid_index) >> 5;
-		err = mthca_READ_MGM(dev, prev, mgm, &status);
+		err = mthca_READ_MGM(dev, prev, mailbox, &status);
 		if (err)
 		if (err)
 			goto out;
 			goto out;
 		if (status) {
 		if (status) {
@@ -338,7 +339,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 
 
 		mgm->next_gid_index = cpu_to_be32(index << 5);
 		mgm->next_gid_index = cpu_to_be32(index << 5);
 
 
-		err = mthca_WRITE_MGM(dev, prev, mgm, &status);
+		err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
 		if (err)
 		if (err)
 			goto out;
 			goto out;
 		if (status) {
 		if (status) {
@@ -350,7 +351,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 
 
  out:
  out:
 	up(&dev->mcg_table.sem);
 	up(&dev->mcg_table.sem);
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 	return err;
 }
 }
 
 

+ 9 - 1
drivers/infiniband/hw/mthca/mthca_memfree.c

@@ -179,9 +179,14 @@ out:
 
 
 void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
 void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
 {
 {
-	int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
+	int i;
 	u8 status;
 	u8 status;
 
 
+	if (!mthca_is_memfree(dev))
+		return;
+
+	i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
+
 	down(&table->mutex);
 	down(&table->mutex);
 
 
 	if (--table->icm[i]->refcount == 0) {
 	if (--table->icm[i]->refcount == 0) {
@@ -256,6 +261,9 @@ void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
 {
 {
 	int i;
 	int i;
 
 
+	if (!mthca_is_memfree(dev))
+		return;
+
 	for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size)
 	for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size)
 		mthca_table_put(dev, table, i);
 		mthca_table_put(dev, table, i);
 }
 }

+ 182 - 185
drivers/infiniband/hw/mthca/mthca_mr.c

@@ -40,6 +40,12 @@
 #include "mthca_cmd.h"
 #include "mthca_cmd.h"
 #include "mthca_memfree.h"
 #include "mthca_memfree.h"
 
 
+struct mthca_mtt {
+	struct mthca_buddy *buddy;
+	int                 order;
+	u32                 first_seg;
+};
+
 /*
 /*
  * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
  * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
  */
  */
@@ -173,8 +179,8 @@ static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy)
 	kfree(buddy->bits);
 	kfree(buddy->bits);
 }
 }
 
 
-static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order,
-			   struct mthca_buddy *buddy)
+static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
+				 struct mthca_buddy *buddy)
 {
 {
 	u32 seg = mthca_buddy_alloc(buddy, order);
 	u32 seg = mthca_buddy_alloc(buddy, order);
 
 
@@ -191,14 +197,102 @@ static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order,
 	return seg;
 	return seg;
 }
 }
 
 
-static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order,
-			   struct mthca_buddy* buddy)
+static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
+					   struct mthca_buddy *buddy)
 {
 {
-	mthca_buddy_free(buddy, seg, order);
+	struct mthca_mtt *mtt;
+	int i;
 
 
-	if (mthca_is_memfree(dev))
-		mthca_table_put_range(dev, dev->mr_table.mtt_table, seg,
-				      seg + (1 << order) - 1);
+	if (size <= 0)
+		return ERR_PTR(-EINVAL);
+
+	mtt = kmalloc(sizeof *mtt, GFP_KERNEL);
+	if (!mtt)
+		return ERR_PTR(-ENOMEM);
+
+	mtt->buddy = buddy;
+	mtt->order = 0;
+	for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1)
+		++mtt->order;
+
+	mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
+	if (mtt->first_seg == -1) {
+		kfree(mtt);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return mtt;
+}
+
+struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size)
+{
+	return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy);
+}
+
+void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
+{
+	if (!mtt)
+		return;
+
+	mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order);
+
+	mthca_table_put_range(dev, dev->mr_table.mtt_table,
+			      mtt->first_seg,
+			      mtt->first_seg + (1 << mtt->order) - 1);
+
+	kfree(mtt);
+}
+
+int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+		    int start_index, u64 *buffer_list, int list_len)
+{
+	struct mthca_mailbox *mailbox;
+	u64 *mtt_entry;
+	int err = 0;
+	u8 status;
+	int i;
+
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	mtt_entry = mailbox->buf;
+
+	while (list_len > 0) {
+		mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
+					   mtt->first_seg * MTHCA_MTT_SEG_SIZE +
+					   start_index * 8);
+		mtt_entry[1] = 0;
+		for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
+			mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
+						       MTHCA_MTT_FLAG_PRESENT);
+
+		/*
+		 * If we have an odd number of entries to write, add
+		 * one more dummy entry for firmware efficiency.
+		 */
+		if (i & 1)
+			mtt_entry[i + 2] = 0;
+
+		err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status);
+		if (err) {
+			mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
+			goto out;
+		}
+		if (status) {
+			mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
+				   status);
+			err = -EINVAL;
+			goto out;
+		}
+
+		list_len    -= i;
+		start_index += i;
+		buffer_list += i;
+	}
+
+out:
+	mthca_free_mailbox(dev, mailbox);
+	return err;
 }
 }
 
 
 static inline u32 tavor_hw_index_to_key(u32 ind)
 static inline u32 tavor_hw_index_to_key(u32 ind)
@@ -237,91 +331,18 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
 		return tavor_key_to_hw_index(key);
 		return tavor_key_to_hw_index(key);
 }
 }
 
 
-int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
-			   u32 access, struct mthca_mr *mr)
+int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
+		   u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
 {
 {
-	void *mailbox = NULL;
+	struct mthca_mailbox *mailbox;
 	struct mthca_mpt_entry *mpt_entry;
 	struct mthca_mpt_entry *mpt_entry;
 	u32 key;
 	u32 key;
+	int i;
 	int err;
 	int err;
 	u8 status;
 	u8 status;
 
 
 	might_sleep();
 	might_sleep();
 
 
-	mr->order = -1;
-	key = mthca_alloc(&dev->mr_table.mpt_alloc);
-	if (key == -1)
-		return -ENOMEM;
-	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
-
-	if (mthca_is_memfree(dev)) {
-		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
-		if (err)
-			goto err_out_mpt_free;
-	}
-
-	mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox) {
-		err = -ENOMEM;
-		goto err_out_table;
-	}
-	mpt_entry = MAILBOX_ALIGN(mailbox);
-
-	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
-				       MTHCA_MPT_FLAG_MIO         |
-				       MTHCA_MPT_FLAG_PHYSICAL    |
-				       MTHCA_MPT_FLAG_REGION      |
-				       access);
-	mpt_entry->page_size = 0;
-	mpt_entry->key       = cpu_to_be32(key);
-	mpt_entry->pd        = cpu_to_be32(pd);
-	mpt_entry->start     = 0;
-	mpt_entry->length    = ~0ULL;
-
-	memset(&mpt_entry->lkey, 0,
-	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
-
-	err = mthca_SW2HW_MPT(dev, mpt_entry,
-			      key & (dev->limits.num_mpts - 1),
-			      &status);
-	if (err) {
-		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
-		goto err_out_table;
-	} else if (status) {
-		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
-			   status);
-		err = -EINVAL;
-		goto err_out_table;
-	}
-
-	kfree(mailbox);
-	return err;
-
-err_out_table:
-	if (mthca_is_memfree(dev))
-		mthca_table_put(dev, dev->mr_table.mpt_table, key);
-
-err_out_mpt_free:
-	mthca_free(&dev->mr_table.mpt_alloc, key);
-	kfree(mailbox);
-	return err;
-}
-
-int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
-			u64 *buffer_list, int buffer_size_shift,
-			int list_len, u64 iova, u64 total_size,
-			u32 access, struct mthca_mr *mr)
-{
-	void *mailbox;
-	u64 *mtt_entry;
-	struct mthca_mpt_entry *mpt_entry;
-	u32 key;
-	int err = -ENOMEM;
-	u8 status;
-	int i;
-
-	might_sleep();
 	WARN_ON(buffer_size_shift >= 32);
 	WARN_ON(buffer_size_shift >= 32);
 
 
 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
@@ -335,75 +356,33 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
 			goto err_out_mpt_free;
 			goto err_out_mpt_free;
 	}
 	}
 
 
-	for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
-	     i < list_len;
-	     i <<= 1, ++mr->order)
-		; /* nothing */
-
-	mr->first_seg = mthca_alloc_mtt(dev, mr->order,
-				       	&dev->mr_table.mtt_buddy);
-	if (mr->first_seg == -1)
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox)) {
+		err = PTR_ERR(mailbox);
 		goto err_out_table;
 		goto err_out_table;
-
-	/*
-	 * If list_len is odd, we add one more dummy entry for
-	 * firmware efficiency.
-	 */
-	mailbox = kmalloc(max(sizeof *mpt_entry,
-			      (size_t) 8 * (list_len + (list_len & 1) + 2)) +
-			  MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox)
-		goto err_out_free_mtt;
-
-	mtt_entry = MAILBOX_ALIGN(mailbox);
-
-	mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
-				   mr->first_seg * MTHCA_MTT_SEG_SIZE);
-	mtt_entry[1] = 0;
-	for (i = 0; i < list_len; ++i)
-		mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
-					       MTHCA_MTT_FLAG_PRESENT);
-	if (list_len & 1) {
-		mtt_entry[i + 2] = 0;
-		++list_len;
-	}
-
-	if (0) {
-		mthca_dbg(dev, "Dumping MPT entry\n");
-		for (i = 0; i < list_len + 2; ++i)
-			printk(KERN_ERR "[%2d] %016llx\n",
-			       i, (unsigned long long) be64_to_cpu(mtt_entry[i]));
-	}
-
-	err = mthca_WRITE_MTT(dev, mtt_entry, list_len, &status);
-	if (err) {
-		mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
-		goto err_out_mailbox_free;
-	}
-	if (status) {
-		mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
-			   status);
-		err = -EINVAL;
-		goto err_out_mailbox_free;
 	}
 	}
-
-	mpt_entry = MAILBOX_ALIGN(mailbox);
+	mpt_entry = mailbox->buf;
 
 
 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
 				       MTHCA_MPT_FLAG_MIO         |
 				       MTHCA_MPT_FLAG_MIO         |
 				       MTHCA_MPT_FLAG_REGION      |
 				       MTHCA_MPT_FLAG_REGION      |
 				       access);
 				       access);
+	if (!mr->mtt)
+		mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL);
 
 
 	mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
 	mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
 	mpt_entry->key       = cpu_to_be32(key);
 	mpt_entry->key       = cpu_to_be32(key);
 	mpt_entry->pd        = cpu_to_be32(pd);
 	mpt_entry->pd        = cpu_to_be32(pd);
 	mpt_entry->start     = cpu_to_be64(iova);
 	mpt_entry->start     = cpu_to_be64(iova);
 	mpt_entry->length    = cpu_to_be64(total_size);
 	mpt_entry->length    = cpu_to_be64(total_size);
+
 	memset(&mpt_entry->lkey, 0,
 	memset(&mpt_entry->lkey, 0,
 	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
 	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
-	mpt_entry->mtt_seg   = cpu_to_be64(dev->mr_table.mtt_base +
-					   mr->first_seg * MTHCA_MTT_SEG_SIZE);
+
+	if (mr->mtt)
+		mpt_entry->mtt_seg =
+			cpu_to_be64(dev->mr_table.mtt_base +
+				    mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE);
 
 
 	if (0) {
 	if (0) {
 		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
 		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
@@ -416,45 +395,70 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
 		}
 		}
 	}
 	}
 
 
-	err = mthca_SW2HW_MPT(dev, mpt_entry,
+	err = mthca_SW2HW_MPT(dev, mailbox,
 			      key & (dev->limits.num_mpts - 1),
 			      key & (dev->limits.num_mpts - 1),
 			      &status);
 			      &status);
-	if (err)
+	if (err) {
 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
-	else if (status) {
+		goto err_out_mailbox;
+	} else if (status) {
 		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
 		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
 			   status);
 			   status);
 		err = -EINVAL;
 		err = -EINVAL;
+		goto err_out_mailbox;
 	}
 	}
 
 
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 	return err;
 	return err;
 
 
-err_out_mailbox_free:
-	kfree(mailbox);
-
-err_out_free_mtt:
-	mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy);
+err_out_mailbox:
+	mthca_free_mailbox(dev, mailbox);
 
 
 err_out_table:
 err_out_table:
-	if (mthca_is_memfree(dev))
-		mthca_table_put(dev, dev->mr_table.mpt_table, key);
+	mthca_table_put(dev, dev->mr_table.mpt_table, key);
 
 
 err_out_mpt_free:
 err_out_mpt_free:
 	mthca_free(&dev->mr_table.mpt_alloc, key);
 	mthca_free(&dev->mr_table.mpt_alloc, key);
 	return err;
 	return err;
 }
 }
 
 
-/* Free mr or fmr */
-static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order,
-			      u32 first_seg, struct mthca_buddy *buddy)
+int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
+			   u32 access, struct mthca_mr *mr)
 {
 {
-	if (order >= 0)
-		mthca_free_mtt(dev, first_seg, order, buddy);
+	mr->mtt = NULL;
+	return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr);
+}
 
 
-	if (mthca_is_memfree(dev))
-		mthca_table_put(dev, dev->mr_table.mpt_table,
-				arbel_key_to_hw_index(lkey));
+int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
+			u64 *buffer_list, int buffer_size_shift,
+			int list_len, u64 iova, u64 total_size,
+			u32 access, struct mthca_mr *mr)
+{
+	int err;
+
+	mr->mtt = mthca_alloc_mtt(dev, list_len);
+	if (IS_ERR(mr->mtt))
+		return PTR_ERR(mr->mtt);
+
+	err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len);
+	if (err) {
+		mthca_free_mtt(dev, mr->mtt);
+		return err;
+	}
+
+	err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova,
+			     total_size, access, mr);
+	if (err)
+		mthca_free_mtt(dev, mr->mtt);
+
+	return err;
+}
+
+/* Free mr or fmr */
+static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
+{
+	mthca_table_put(dev, dev->mr_table.mpt_table,
+			arbel_key_to_hw_index(lkey));
 
 
 	mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
 	mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
 }
 }
@@ -476,15 +480,15 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
 		mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
 		mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
 			   status);
 			   status);
 
 
-	mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg,
-			  &dev->mr_table.mtt_buddy);
+	mthca_free_region(dev, mr->ibmr.lkey);
+	mthca_free_mtt(dev, mr->mtt);
 }
 }
 
 
 int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 		    u32 access, struct mthca_fmr *mr)
 		    u32 access, struct mthca_fmr *mr)
 {
 {
 	struct mthca_mpt_entry *mpt_entry;
 	struct mthca_mpt_entry *mpt_entry;
-	void *mailbox;
+	struct mthca_mailbox *mailbox;
 	u64 mtt_seg;
 	u64 mtt_seg;
 	u32 key, idx;
 	u32 key, idx;
 	u8 status;
 	u8 status;
@@ -522,31 +526,24 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 		mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
 		mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
 		       	sizeof *(mr->mem.tavor.mpt) * idx;
 		       	sizeof *(mr->mem.tavor.mpt) * idx;
 
 
-	for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
-	     i < list_len;
-	     i <<= 1, ++mr->order)
-		; /* nothing */
-
-	mr->first_seg = mthca_alloc_mtt(dev, mr->order,
-				       	dev->mr_table.fmr_mtt_buddy);
-	if (mr->first_seg == -1)
+	mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
+	if (IS_ERR(mr->mtt))
 		goto err_out_table;
 		goto err_out_table;
 
 
-	mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE;
+	mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE;
 
 
 	if (mthca_is_memfree(dev)) {
 	if (mthca_is_memfree(dev)) {
 		mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
 		mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
-						      mr->first_seg);
+						      mr->mtt->first_seg);
 		BUG_ON(!mr->mem.arbel.mtts);
 		BUG_ON(!mr->mem.arbel.mtts);
 	} else
 	} else
 		mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
 		mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
 
 
-	mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
-			  GFP_KERNEL);
-	if (!mailbox)
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
 		goto err_out_free_mtt;
 		goto err_out_free_mtt;
 
 
-	mpt_entry = MAILBOX_ALIGN(mailbox);
+	mpt_entry = mailbox->buf;
 
 
 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
 				       MTHCA_MPT_FLAG_MIO         |
 				       MTHCA_MPT_FLAG_MIO         |
@@ -571,7 +568,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 		}
 		}
 	}
 	}
 
 
-	err = mthca_SW2HW_MPT(dev, mpt_entry,
+	err = mthca_SW2HW_MPT(dev, mailbox,
 			      key & (dev->limits.num_mpts - 1),
 			      key & (dev->limits.num_mpts - 1),
 			      &status);
 			      &status);
 	if (err) {
 	if (err) {
@@ -585,19 +582,17 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 		goto err_out_mailbox_free;
 		goto err_out_mailbox_free;
 	}
 	}
 
 
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 	return 0;
 	return 0;
 
 
 err_out_mailbox_free:
 err_out_mailbox_free:
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 
 
 err_out_free_mtt:
 err_out_free_mtt:
-	mthca_free_mtt(dev, mr->first_seg, mr->order,
-		       dev->mr_table.fmr_mtt_buddy);
+	mthca_free_mtt(dev, mr->mtt);
 
 
 err_out_table:
 err_out_table:
-	if (mthca_is_memfree(dev))
-		mthca_table_put(dev, dev->mr_table.mpt_table, key);
+	mthca_table_put(dev, dev->mr_table.mpt_table, key);
 
 
 err_out_mpt_free:
 err_out_mpt_free:
 	mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
 	mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
@@ -609,8 +604,9 @@ int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
 	if (fmr->maps)
 	if (fmr->maps)
 		return -EBUSY;
 		return -EBUSY;
 
 
-	mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg,
-			  dev->mr_table.fmr_mtt_buddy);
+	mthca_free_region(dev, fmr->ibmr.lkey);
+	mthca_free_mtt(dev, fmr->mtt);
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -826,7 +822,8 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
 	if (dev->limits.reserved_mtts) {
 	if (dev->limits.reserved_mtts) {
 		i = fls(dev->limits.reserved_mtts - 1);
 		i = fls(dev->limits.reserved_mtts - 1);
 
 
-		if (mthca_alloc_mtt(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) {
+		if (mthca_alloc_mtt_range(dev, i,
+					  dev->mr_table.fmr_mtt_buddy) == -1) {
 			mthca_warn(dev, "MTT table of order %d is too small.\n",
 			mthca_warn(dev, "MTT table of order %d is too small.\n",
 				  dev->mr_table.fmr_mtt_buddy->max_order);
 				  dev->mr_table.fmr_mtt_buddy->max_order);
 			err = -ENOMEM;
 			err = -ENOMEM;

+ 3 - 1
drivers/infiniband/hw/mthca/mthca_provider.c

@@ -1,5 +1,6 @@
 /*
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -52,7 +53,7 @@ static int mthca_query_device(struct ib_device *ibdev,
 	if (!in_mad || !out_mad)
 	if (!in_mad || !out_mad)
 		goto out;
 		goto out;
 
 
-	memset(props, 0, sizeof props);
+	memset(props, 0, sizeof *props);
 
 
 	props->fw_ver              = mdev->fw_ver;
 	props->fw_ver              = mdev->fw_ver;
 
 
@@ -558,6 +559,7 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd       *pd,
 				  convert_access(acc), mr);
 				  convert_access(acc), mr);
 
 
 	if (err) {
 	if (err) {
+		kfree(page_list);
 		kfree(mr);
 		kfree(mr);
 		return ERR_PTR(err);
 		return ERR_PTR(err);
 	}
 	}

+ 7 - 7
drivers/infiniband/hw/mthca/mthca_provider.h

@@ -54,18 +54,18 @@ struct mthca_uar {
 	int           index;
 	int           index;
 };
 };
 
 
+struct mthca_mtt;
+
 struct mthca_mr {
 struct mthca_mr {
-	struct ib_mr ibmr;
-	int order;
-	u32 first_seg;
+	struct ib_mr      ibmr;
+	struct mthca_mtt *mtt;
 };
 };
 
 
 struct mthca_fmr {
 struct mthca_fmr {
-	struct ib_fmr ibmr;
+	struct ib_fmr      ibmr;
 	struct ib_fmr_attr attr;
 	struct ib_fmr_attr attr;
-	int order;
-	u32 first_seg;
-	int maps;
+	struct mthca_mtt  *mtt;
+	int                maps;
 	union {
 	union {
 		struct {
 		struct {
 			struct mthca_mpt_entry __iomem *mpt;
 			struct mthca_mpt_entry __iomem *mpt;

+ 109 - 30
drivers/infiniband/hw/mthca/mthca_qp.c

@@ -357,6 +357,9 @@ static const struct {
 				[UD]  = (IB_QP_PKEY_INDEX |
 				[UD]  = (IB_QP_PKEY_INDEX |
 					 IB_QP_PORT       |
 					 IB_QP_PORT       |
 					 IB_QP_QKEY),
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_PKEY_INDEX |
+					 IB_QP_PORT       |
+					 IB_QP_ACCESS_FLAGS),
 				[RC]  = (IB_QP_PKEY_INDEX |
 				[RC]  = (IB_QP_PKEY_INDEX |
 					 IB_QP_PORT       |
 					 IB_QP_PORT       |
 					 IB_QP_ACCESS_FLAGS),
 					 IB_QP_ACCESS_FLAGS),
@@ -378,6 +381,9 @@ static const struct {
 				[UD]  = (IB_QP_PKEY_INDEX |
 				[UD]  = (IB_QP_PKEY_INDEX |
 					 IB_QP_PORT       |
 					 IB_QP_PORT       |
 					 IB_QP_QKEY),
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_PKEY_INDEX |
+					 IB_QP_PORT       |
+					 IB_QP_ACCESS_FLAGS),
 				[RC]  = (IB_QP_PKEY_INDEX |
 				[RC]  = (IB_QP_PKEY_INDEX |
 					 IB_QP_PORT       |
 					 IB_QP_PORT       |
 					 IB_QP_ACCESS_FLAGS),
 					 IB_QP_ACCESS_FLAGS),
@@ -388,6 +394,11 @@ static const struct {
 		[IB_QPS_RTR]   = {
 		[IB_QPS_RTR]   = {
 			.trans = MTHCA_TRANS_INIT2RTR,
 			.trans = MTHCA_TRANS_INIT2RTR,
 			.req_param = {
 			.req_param = {
+				[UC]  = (IB_QP_AV                  |
+					 IB_QP_PATH_MTU            |
+					 IB_QP_DEST_QPN            |
+					 IB_QP_RQ_PSN              |
+					 IB_QP_MAX_DEST_RD_ATOMIC),
 				[RC]  = (IB_QP_AV                  |
 				[RC]  = (IB_QP_AV                  |
 					 IB_QP_PATH_MTU            |
 					 IB_QP_PATH_MTU            |
 					 IB_QP_DEST_QPN            |
 					 IB_QP_DEST_QPN            |
@@ -398,6 +409,9 @@ static const struct {
 			.opt_param = {
 			.opt_param = {
 				[UD]  = (IB_QP_PKEY_INDEX |
 				[UD]  = (IB_QP_PKEY_INDEX |
 					 IB_QP_QKEY),
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_ALT_PATH     |
+					 IB_QP_ACCESS_FLAGS |
+					 IB_QP_PKEY_INDEX),
 				[RC]  = (IB_QP_ALT_PATH     |
 				[RC]  = (IB_QP_ALT_PATH     |
 					 IB_QP_ACCESS_FLAGS |
 					 IB_QP_ACCESS_FLAGS |
 					 IB_QP_PKEY_INDEX),
 					 IB_QP_PKEY_INDEX),
@@ -413,6 +427,8 @@ static const struct {
 			.trans = MTHCA_TRANS_RTR2RTS,
 			.trans = MTHCA_TRANS_RTR2RTS,
 			.req_param = {
 			.req_param = {
 				[UD]  = IB_QP_SQ_PSN,
 				[UD]  = IB_QP_SQ_PSN,
+				[UC]  = (IB_QP_SQ_PSN            |
+					 IB_QP_MAX_QP_RD_ATOMIC),
 				[RC]  = (IB_QP_TIMEOUT           |
 				[RC]  = (IB_QP_TIMEOUT           |
 					 IB_QP_RETRY_CNT         |
 					 IB_QP_RETRY_CNT         |
 					 IB_QP_RNR_RETRY         |
 					 IB_QP_RNR_RETRY         |
@@ -423,6 +439,11 @@ static const struct {
 			.opt_param = {
 			.opt_param = {
 				[UD]  = (IB_QP_CUR_STATE             |
 				[UD]  = (IB_QP_CUR_STATE             |
 					 IB_QP_QKEY),
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_CUR_STATE             |
+					 IB_QP_ALT_PATH              |
+					 IB_QP_ACCESS_FLAGS          |
+					 IB_QP_PKEY_INDEX            |
+					 IB_QP_PATH_MIG_STATE),
 				[RC]  = (IB_QP_CUR_STATE             |
 				[RC]  = (IB_QP_CUR_STATE             |
 					 IB_QP_ALT_PATH              |
 					 IB_QP_ALT_PATH              |
 					 IB_QP_ACCESS_FLAGS          |
 					 IB_QP_ACCESS_FLAGS          |
@@ -442,6 +463,9 @@ static const struct {
 			.opt_param = {
 			.opt_param = {
 				[UD]  = (IB_QP_CUR_STATE             |
 				[UD]  = (IB_QP_CUR_STATE             |
 					 IB_QP_QKEY),
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_ACCESS_FLAGS          |
+					 IB_QP_ALT_PATH              |
+					 IB_QP_PATH_MIG_STATE),
 				[RC]  = (IB_QP_ACCESS_FLAGS          |
 				[RC]  = (IB_QP_ACCESS_FLAGS          |
 					 IB_QP_ALT_PATH              |
 					 IB_QP_ALT_PATH              |
 					 IB_QP_PATH_MIG_STATE        |
 					 IB_QP_PATH_MIG_STATE        |
@@ -462,6 +486,10 @@ static const struct {
 			.opt_param = {
 			.opt_param = {
 				[UD]  = (IB_QP_CUR_STATE             |
 				[UD]  = (IB_QP_CUR_STATE             |
 					 IB_QP_QKEY),
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_CUR_STATE             |
+					 IB_QP_ALT_PATH              |
+					 IB_QP_ACCESS_FLAGS          |
+					 IB_QP_PATH_MIG_STATE),
 				[RC]  = (IB_QP_CUR_STATE             |
 				[RC]  = (IB_QP_CUR_STATE             |
 					 IB_QP_ALT_PATH              |
 					 IB_QP_ALT_PATH              |
 					 IB_QP_ACCESS_FLAGS          |
 					 IB_QP_ACCESS_FLAGS          |
@@ -476,6 +504,14 @@ static const struct {
 			.opt_param = {
 			.opt_param = {
 				[UD]  = (IB_QP_PKEY_INDEX            |
 				[UD]  = (IB_QP_PKEY_INDEX            |
 					 IB_QP_QKEY),
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_AV                    |
+					 IB_QP_MAX_QP_RD_ATOMIC      |
+					 IB_QP_MAX_DEST_RD_ATOMIC    |
+					 IB_QP_CUR_STATE             |
+					 IB_QP_ALT_PATH              |
+					 IB_QP_ACCESS_FLAGS          |
+					 IB_QP_PKEY_INDEX            |
+					 IB_QP_PATH_MIG_STATE),
 				[RC]  = (IB_QP_AV                    |
 				[RC]  = (IB_QP_AV                    |
 					 IB_QP_TIMEOUT               |
 					 IB_QP_TIMEOUT               |
 					 IB_QP_RETRY_CNT             |
 					 IB_QP_RETRY_CNT             |
@@ -501,6 +537,7 @@ static const struct {
 			.opt_param = {
 			.opt_param = {
 				[UD]  = (IB_QP_CUR_STATE             |
 				[UD]  = (IB_QP_CUR_STATE             |
 					 IB_QP_QKEY),
 					 IB_QP_QKEY),
+				[UC]  = (IB_QP_CUR_STATE),
 				[RC]  = (IB_QP_CUR_STATE             |
 				[RC]  = (IB_QP_CUR_STATE             |
 					 IB_QP_MIN_RNR_TIMER),
 					 IB_QP_MIN_RNR_TIMER),
 				[MLX] = (IB_QP_CUR_STATE             |
 				[MLX] = (IB_QP_CUR_STATE             |
@@ -552,7 +589,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
 	struct mthca_qp *qp = to_mqp(ibqp);
 	enum ib_qp_state cur_state, new_state;
 	enum ib_qp_state cur_state, new_state;
-	void *mailbox = NULL;
+	struct mthca_mailbox *mailbox;
 	struct mthca_qp_param *qp_param;
 	struct mthca_qp_param *qp_param;
 	struct mthca_qp_context *qp_context;
 	struct mthca_qp_context *qp_context;
 	u32 req_param, opt_param;
 	u32 req_param, opt_param;
@@ -609,10 +646,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	mailbox = kmalloc(sizeof (*qp_param) + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL);
-	if (!mailbox)
-		return -ENOMEM;
-	qp_param = MAILBOX_ALIGN(mailbox);
+	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	qp_param = mailbox->buf;
 	qp_context = &qp_param->context;
 	qp_context = &qp_param->context;
 	memset(qp_param, 0, sizeof *qp_param);
 	memset(qp_param, 0, sizeof *qp_param);
 
 
@@ -683,7 +720,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	if (attr_mask & IB_QP_AV) {
 	if (attr_mask & IB_QP_AV) {
 		qp_context->pri_path.g_mylmc     = attr->ah_attr.src_path_bits & 0x7f;
 		qp_context->pri_path.g_mylmc     = attr->ah_attr.src_path_bits & 0x7f;
 		qp_context->pri_path.rlid        = cpu_to_be16(attr->ah_attr.dlid);
 		qp_context->pri_path.rlid        = cpu_to_be16(attr->ah_attr.dlid);
-		qp_context->pri_path.static_rate = (!!attr->ah_attr.static_rate) << 3;
+		qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate;
 		if (attr->ah_attr.ah_flags & IB_AH_GRH) {
 		if (attr->ah_attr.ah_flags & IB_AH_GRH) {
 			qp_context->pri_path.g_mylmc |= 1 << 7;
 			qp_context->pri_path.g_mylmc |= 1 << 7;
 			qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
 			qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
@@ -724,9 +761,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);
 	}
 	}
 
 
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
-		qp_context->params1 |= cpu_to_be32(min(attr->max_dest_rd_atomic ?
-						       ffs(attr->max_dest_rd_atomic) - 1 : 0,
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+		qp_context->params1 |= cpu_to_be32(min(attr->max_rd_atomic ?
+						       ffs(attr->max_rd_atomic) - 1 : 0,
 						       7) << 21);
 						       7) << 21);
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
 	}
 	}
@@ -764,10 +801,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		qp->atomic_rd_en = attr->qp_access_flags;
 		qp->atomic_rd_en = attr->qp_access_flags;
 	}
 	}
 
 
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
 		u8 rra_max;
 		u8 rra_max;
 
 
-		if (qp->resp_depth && !attr->max_rd_atomic) {
+		if (qp->resp_depth && !attr->max_dest_rd_atomic) {
 			/*
 			/*
 			 * Lowering our responder resources to zero.
 			 * Lowering our responder resources to zero.
 			 * Turn off RDMA/atomics as responder.
 			 * Turn off RDMA/atomics as responder.
@@ -778,7 +815,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 								MTHCA_QP_OPTPAR_RAE);
 								MTHCA_QP_OPTPAR_RAE);
 		}
 		}
 
 
-		if (!qp->resp_depth && attr->max_rd_atomic) {
+		if (!qp->resp_depth && attr->max_dest_rd_atomic) {
 			/*
 			/*
 			 * Increasing our responder resources from
 			 * Increasing our responder resources from
 			 * zero.  Turn on RDMA/atomics as appropriate.
 			 * zero.  Turn on RDMA/atomics as appropriate.
@@ -799,7 +836,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		}
 		}
 
 
 		for (rra_max = 0;
 		for (rra_max = 0;
-		     1 << rra_max < attr->max_rd_atomic &&
+		     1 << rra_max < attr->max_dest_rd_atomic &&
 			     rra_max < dev->qp_table.rdb_shift;
 			     rra_max < dev->qp_table.rdb_shift;
 		     ++rra_max)
 		     ++rra_max)
 			; /* nothing */
 			; /* nothing */
@@ -807,7 +844,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		qp_context->params2      |= cpu_to_be32(rra_max << 21);
 		qp_context->params2      |= cpu_to_be32(rra_max << 21);
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
 
 
-		qp->resp_depth = attr->max_rd_atomic;
+		qp->resp_depth = attr->max_dest_rd_atomic;
 	}
 	}
 
 
 	qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
 	qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
@@ -835,7 +872,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	}
 	}
 
 
 	err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
 	err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans,
-			      qp->qpn, 0, qp_param, 0, &status);
+			      qp->qpn, 0, mailbox, 0, &status);
 	if (status) {
 	if (status) {
 		mthca_warn(dev, "modify QP %d returned status %02x.\n",
 		mthca_warn(dev, "modify QP %d returned status %02x.\n",
 			   state_table[cur_state][new_state].trans, status);
 			   state_table[cur_state][new_state].trans, status);
@@ -845,7 +882,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	if (!err)
 	if (!err)
 		qp->state = new_state;
 		qp->state = new_state;
 
 
-	kfree(mailbox);
+	mthca_free_mailbox(dev, mailbox);
 
 
 	if (is_sqp(dev, qp))
 	if (is_sqp(dev, qp))
 		store_attrs(to_msqp(qp), attr, attr_mask);
 		store_attrs(to_msqp(qp), attr, attr_mask);
@@ -934,7 +971,8 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
 			mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n",
 			mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n",
 				  size, shift);
 				  size, shift);
 
 
-		qp->queue.direct.buf = pci_alloc_consistent(dev->pdev, size, &t);
+		qp->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev, size,
+							  &t, GFP_KERNEL);
 		if (!qp->queue.direct.buf)
 		if (!qp->queue.direct.buf)
 			goto err_out;
 			goto err_out;
 
 
@@ -973,7 +1011,8 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
 
 
 		for (i = 0; i < npages; ++i) {
 		for (i = 0; i < npages; ++i) {
 			qp->queue.page_list[i].buf =
 			qp->queue.page_list[i].buf =
-				pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t);
+				dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+						   &t, GFP_KERNEL);
 			if (!qp->queue.page_list[i].buf)
 			if (!qp->queue.page_list[i].buf)
 				goto err_out_free;
 				goto err_out_free;
 
 
@@ -996,16 +1035,15 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
 
 
  err_out_free:
  err_out_free:
 	if (qp->is_direct) {
 	if (qp->is_direct) {
-		pci_free_consistent(dev->pdev, size,
-				    qp->queue.direct.buf,
-				    pci_unmap_addr(&qp->queue.direct, mapping));
+		dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf,
+				  pci_unmap_addr(&qp->queue.direct, mapping));
 	} else
 	} else
 		for (i = 0; i < npages; ++i) {
 		for (i = 0; i < npages; ++i) {
 			if (qp->queue.page_list[i].buf)
 			if (qp->queue.page_list[i].buf)
-				pci_free_consistent(dev->pdev, PAGE_SIZE,
-						    qp->queue.page_list[i].buf,
-						    pci_unmap_addr(&qp->queue.page_list[i],
-								   mapping));
+				dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+						  qp->queue.page_list[i].buf,
+						  pci_unmap_addr(&qp->queue.page_list[i],
+								 mapping));
 
 
 		}
 		}
 
 
@@ -1073,11 +1111,12 @@ static void mthca_free_memfree(struct mthca_dev *dev,
 	if (mthca_is_memfree(dev)) {
 	if (mthca_is_memfree(dev)) {
 		mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
 		mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
 		mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
 		mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
-		mthca_table_put(dev, dev->qp_table.rdb_table,
-				qp->qpn << dev->qp_table.rdb_shift);
-		mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
-		mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
 	}
 	}
+
+	mthca_table_put(dev, dev->qp_table.rdb_table,
+			qp->qpn << dev->qp_table.rdb_shift);
+	mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
+	mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
 }
 }
 
 
 static void mthca_wq_init(struct mthca_wq* wq)
 static void mthca_wq_init(struct mthca_wq* wq)
@@ -1529,6 +1568,26 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 
 			break;
 			break;
 
 
+		case UC:
+			switch (wr->opcode) {
+			case IB_WR_RDMA_WRITE:
+			case IB_WR_RDMA_WRITE_WITH_IMM:
+				((struct mthca_raddr_seg *) wqe)->raddr =
+					cpu_to_be64(wr->wr.rdma.remote_addr);
+				((struct mthca_raddr_seg *) wqe)->rkey =
+					cpu_to_be32(wr->wr.rdma.rkey);
+				((struct mthca_raddr_seg *) wqe)->reserved = 0;
+				wqe += sizeof (struct mthca_raddr_seg);
+				size += sizeof (struct mthca_raddr_seg) / 16;
+				break;
+
+			default:
+				/* No extra segments required for sends */
+				break;
+			}
+
+			break;
+
 		case UD:
 		case UD:
 			((struct mthca_tavor_ud_seg *) wqe)->lkey =
 			((struct mthca_tavor_ud_seg *) wqe)->lkey =
 				cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
 				cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
@@ -1814,9 +1873,29 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 					sizeof (struct mthca_atomic_seg);
 					sizeof (struct mthca_atomic_seg);
 				break;
 				break;
 
 
+			case IB_WR_RDMA_READ:
+			case IB_WR_RDMA_WRITE:
+			case IB_WR_RDMA_WRITE_WITH_IMM:
+				((struct mthca_raddr_seg *) wqe)->raddr =
+					cpu_to_be64(wr->wr.rdma.remote_addr);
+				((struct mthca_raddr_seg *) wqe)->rkey =
+					cpu_to_be32(wr->wr.rdma.rkey);
+				((struct mthca_raddr_seg *) wqe)->reserved = 0;
+				wqe += sizeof (struct mthca_raddr_seg);
+				size += sizeof (struct mthca_raddr_seg) / 16;
+				break;
+
+			default:
+				/* No extra segments required for sends */
+				break;
+			}
+
+			break;
+
+		case UC:
+			switch (wr->opcode) {
 			case IB_WR_RDMA_WRITE:
 			case IB_WR_RDMA_WRITE:
 			case IB_WR_RDMA_WRITE_WITH_IMM:
 			case IB_WR_RDMA_WRITE_WITH_IMM:
-			case IB_WR_RDMA_READ:
 				((struct mthca_raddr_seg *) wqe)->raddr =
 				((struct mthca_raddr_seg *) wqe)->raddr =
 					cpu_to_be64(wr->wr.rdma.remote_addr);
 					cpu_to_be64(wr->wr.rdma.remote_addr);
 				((struct mthca_raddr_seg *) wqe)->rkey =
 				((struct mthca_raddr_seg *) wqe)->rkey =

+ 327 - 80
drivers/input/evdev.c

@@ -21,6 +21,7 @@
 #include <linux/smp_lock.h>
 #include <linux/smp_lock.h>
 #include <linux/device.h>
 #include <linux/device.h>
 #include <linux/devfs_fs_kernel.h>
 #include <linux/devfs_fs_kernel.h>
+#include <linux/compat.h>
 
 
 struct evdev {
 struct evdev {
 	int exist;
 	int exist;
@@ -145,6 +146,41 @@ static int evdev_open(struct inode * inode, struct file * file)
 	return 0;
 	return 0;
 }
 }
 
 
+#ifdef CONFIG_COMPAT
+struct input_event_compat {
+	struct compat_timeval time;
+	__u16 type;
+	__u16 code;
+	__s32 value;
+};
+
+#ifdef CONFIG_X86_64
+#  define COMPAT_TEST test_thread_flag(TIF_IA32)
+#elif defined(CONFIG_IA64)
+#  define COMPAT_TEST IS_IA32_PROCESS(ia64_task_regs(current))
+#elif defined(CONFIG_ARCH_S390)
+#  define COMPAT_TEST test_thread_flag(TIF_31BIT)
+#else
+#  define COMPAT_TEST test_thread_flag(TIF_32BIT)
+#endif
+
+static ssize_t evdev_write_compat(struct file * file, const char __user * buffer, size_t count, loff_t *ppos)
+{
+	struct evdev_list *list = file->private_data;
+	struct input_event_compat event;
+	int retval = 0;
+
+	while (retval < count) {
+		if (copy_from_user(&event, buffer + retval, sizeof(struct input_event_compat)))
+			return -EFAULT;
+		input_event(list->evdev->handle.dev, event.type, event.code, event.value);
+		retval += sizeof(struct input_event_compat);
+	}
+
+	return retval;
+}
+#endif
+
 static ssize_t evdev_write(struct file * file, const char __user * buffer, size_t count, loff_t *ppos)
 static ssize_t evdev_write(struct file * file, const char __user * buffer, size_t count, loff_t *ppos)
 {
 {
 	struct evdev_list *list = file->private_data;
 	struct evdev_list *list = file->private_data;
@@ -153,6 +189,11 @@ static ssize_t evdev_write(struct file * file, const char __user * buffer, size_
 
 
 	if (!list->evdev->exist) return -ENODEV;
 	if (!list->evdev->exist) return -ENODEV;
 
 
+#ifdef CONFIG_COMPAT
+	if (COMPAT_TEST)
+		return evdev_write_compat(file, buffer, count, ppos);
+#endif
+
 	while (retval < count) {
 	while (retval < count) {
 
 
 		if (copy_from_user(&event, buffer + retval, sizeof(struct input_event)))
 		if (copy_from_user(&event, buffer + retval, sizeof(struct input_event)))
@@ -164,11 +205,56 @@ static ssize_t evdev_write(struct file * file, const char __user * buffer, size_
 	return retval;
 	return retval;
 }
 }
 
 
+#ifdef CONFIG_COMPAT
+static ssize_t evdev_read_compat(struct file * file, char __user * buffer, size_t count, loff_t *ppos)
+{
+	struct evdev_list *list = file->private_data;
+	int retval;
+
+	if (count < sizeof(struct input_event_compat))
+		return -EINVAL;
+
+	if (list->head == list->tail && list->evdev->exist && (file->f_flags & O_NONBLOCK))
+		return -EAGAIN;
+
+	retval = wait_event_interruptible(list->evdev->wait,
+		list->head != list->tail || (!list->evdev->exist));
+
+	if (retval)
+		return retval;
+
+	if (!list->evdev->exist)
+		return -ENODEV;
+
+	while (list->head != list->tail && retval + sizeof(struct input_event_compat) <= count) {
+		struct input_event *event = (struct input_event *) list->buffer + list->tail;
+		struct input_event_compat event_compat;
+		event_compat.time.tv_sec = event->time.tv_sec;
+		event_compat.time.tv_usec = event->time.tv_usec;
+		event_compat.type = event->type;
+		event_compat.code = event->code;
+		event_compat.value = event->value;
+
+		if (copy_to_user(buffer + retval, &event_compat,
+			sizeof(struct input_event_compat))) return -EFAULT;
+		list->tail = (list->tail + 1) & (EVDEV_BUFFER_SIZE - 1);
+		retval += sizeof(struct input_event_compat);
+	}
+
+	return retval;
+}
+#endif
+
 static ssize_t evdev_read(struct file * file, char __user * buffer, size_t count, loff_t *ppos)
 static ssize_t evdev_read(struct file * file, char __user * buffer, size_t count, loff_t *ppos)
 {
 {
 	struct evdev_list *list = file->private_data;
 	struct evdev_list *list = file->private_data;
 	int retval;
 	int retval;
 
 
+#ifdef CONFIG_COMPAT
+	if (COMPAT_TEST)
+		return evdev_read_compat(file, buffer, count, ppos);
+#endif
+
 	if (count < sizeof(struct input_event))
 	if (count < sizeof(struct input_event))
 		return -EINVAL;
 		return -EINVAL;
 
 
@@ -186,7 +272,7 @@ static ssize_t evdev_read(struct file * file, char __user * buffer, size_t count
 
 
 	while (list->head != list->tail && retval + sizeof(struct input_event) <= count) {
 	while (list->head != list->tail && retval + sizeof(struct input_event) <= count) {
 		if (copy_to_user(buffer + retval, list->buffer + list->tail,
 		if (copy_to_user(buffer + retval, list->buffer + list->tail,
-			 sizeof(struct input_event))) return -EFAULT;
+			sizeof(struct input_event))) return -EFAULT;
 		list->tail = (list->tail + 1) & (EVDEV_BUFFER_SIZE - 1);
 		list->tail = (list->tail + 1) & (EVDEV_BUFFER_SIZE - 1);
 		retval += sizeof(struct input_event);
 		retval += sizeof(struct input_event);
 	}
 	}
@@ -203,7 +289,7 @@ static unsigned int evdev_poll(struct file *file, poll_table *wait)
 		(list->evdev->exist ? 0 : (POLLHUP | POLLERR));
 		(list->evdev->exist ? 0 : (POLLHUP | POLLERR));
 }
 }
 
 
-static int evdev_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+static long evdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 {
 	struct evdev_list *list = file->private_data;
 	struct evdev_list *list = file->private_data;
 	struct evdev *evdev = list->evdev;
 	struct evdev *evdev = list->evdev;
@@ -285,109 +371,267 @@ static int evdev_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 
 
 		default:
 		default:
 
 
-			if (_IOC_TYPE(cmd) != 'E' || _IOC_DIR(cmd) != _IOC_READ)
+			if (_IOC_TYPE(cmd) != 'E')
 				return -EINVAL;
 				return -EINVAL;
 
 
-			if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0,0))) {
-
-				long *bits;
-				int len;
-
-				switch (_IOC_NR(cmd) & EV_MAX) {
-					case      0: bits = dev->evbit;  len = EV_MAX;  break;
-					case EV_KEY: bits = dev->keybit; len = KEY_MAX; break;
-					case EV_REL: bits = dev->relbit; len = REL_MAX; break;
-					case EV_ABS: bits = dev->absbit; len = ABS_MAX; break;
-					case EV_MSC: bits = dev->mscbit; len = MSC_MAX; break;
-					case EV_LED: bits = dev->ledbit; len = LED_MAX; break;
-					case EV_SND: bits = dev->sndbit; len = SND_MAX; break;
-					case EV_FF:  bits = dev->ffbit;  len = FF_MAX;  break;
-					default: return -EINVAL;
+			if (_IOC_DIR(cmd) == _IOC_READ) {
+
+				if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0,0))) {
+
+					long *bits;
+					int len;
+
+					switch (_IOC_NR(cmd) & EV_MAX) {
+						case      0: bits = dev->evbit;  len = EV_MAX;  break;
+						case EV_KEY: bits = dev->keybit; len = KEY_MAX; break;
+						case EV_REL: bits = dev->relbit; len = REL_MAX; break;
+						case EV_ABS: bits = dev->absbit; len = ABS_MAX; break;
+						case EV_MSC: bits = dev->mscbit; len = MSC_MAX; break;
+						case EV_LED: bits = dev->ledbit; len = LED_MAX; break;
+						case EV_SND: bits = dev->sndbit; len = SND_MAX; break;
+						case EV_FF:  bits = dev->ffbit;  len = FF_MAX;  break;
+						default: return -EINVAL;
+					}
+					len = NBITS(len) * sizeof(long);
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, bits, len) ? -EFAULT : len;
 				}
 				}
-				len = NBITS(len) * sizeof(long);
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, bits, len) ? -EFAULT : len;
-			}
 
 
-			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0))) {
-				int len;
-				len = NBITS(KEY_MAX) * sizeof(long);
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, dev->key, len) ? -EFAULT : len;
-			}
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0))) {
+					int len;
+					len = NBITS(KEY_MAX) * sizeof(long);
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->key, len) ? -EFAULT : len;
+				}
 
 
-			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0))) {
-				int len;
-				len = NBITS(LED_MAX) * sizeof(long);
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, dev->led, len) ? -EFAULT : len;
-			}
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0))) {
+					int len;
+					len = NBITS(LED_MAX) * sizeof(long);
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->led, len) ? -EFAULT : len;
+				}
 
 
-			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0))) {
-				int len;
-				len = NBITS(SND_MAX) * sizeof(long);
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, dev->snd, len) ? -EFAULT : len;
-			}
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0))) {
+					int len;
+					len = NBITS(SND_MAX) * sizeof(long);
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->snd, len) ? -EFAULT : len;
+				}
 
 
-			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0))) {
-				int len;
-				if (!dev->name) return -ENOENT;
-				len = strlen(dev->name) + 1;
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, dev->name, len) ? -EFAULT : len;
-			}
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0))) {
+					int len;
+					if (!dev->name) return -ENOENT;
+					len = strlen(dev->name) + 1;
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->name, len) ? -EFAULT : len;
+				}
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0))) {
+					int len;
+					if (!dev->phys) return -ENOENT;
+					len = strlen(dev->phys) + 1;
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->phys, len) ? -EFAULT : len;
+				}
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0))) {
+					int len;
+					if (!dev->uniq) return -ENOENT;
+					len = strlen(dev->uniq) + 1;
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->uniq, len) ? -EFAULT : len;
+				}
+
+				if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) {
+
+					int t = _IOC_NR(cmd) & ABS_MAX;
+
+					abs.value = dev->abs[t];
+					abs.minimum = dev->absmin[t];
+					abs.maximum = dev->absmax[t];
+					abs.fuzz = dev->absfuzz[t];
+					abs.flat = dev->absflat[t];
+
+					if (copy_to_user(p, &abs, sizeof(struct input_absinfo)))
+						return -EFAULT;
+
+					return 0;
+				}
 
 
-			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0))) {
-				int len;
-				if (!dev->phys) return -ENOENT;
-				len = strlen(dev->phys) + 1;
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, dev->phys, len) ? -EFAULT : len;
 			}
 			}
 
 
-			if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0))) {
-				int len;
-				if (!dev->uniq) return -ENOENT;
-				len = strlen(dev->uniq) + 1;
-				if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
-				return copy_to_user(p, dev->uniq, len) ? -EFAULT : len;
+			if (_IOC_DIR(cmd) == _IOC_WRITE) {
+
+				if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) {
+
+					int t = _IOC_NR(cmd) & ABS_MAX;
+
+					if (copy_from_user(&abs, p, sizeof(struct input_absinfo)))
+						return -EFAULT;
+
+					dev->abs[t] = abs.value;
+					dev->absmin[t] = abs.minimum;
+					dev->absmax[t] = abs.maximum;
+					dev->absfuzz[t] = abs.fuzz;
+					dev->absflat[t] = abs.flat;
+
+					return 0;
+				}
 			}
 			}
+	}
+	return -EINVAL;
+}
 
 
-			if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) {
+#ifdef CONFIG_COMPAT
+
+#define BITS_PER_LONG_COMPAT (sizeof(compat_long_t) * 8)
+#define NBITS_COMPAT(x) ((((x)-1)/BITS_PER_LONG_COMPAT)+1)
+#define OFF_COMPAT(x)  ((x)%BITS_PER_LONG_COMPAT)
+#define BIT_COMPAT(x)  (1UL<<OFF_COMPAT(x))
+#define LONG_COMPAT(x) ((x)/BITS_PER_LONG_COMPAT)
+#define test_bit_compat(bit, array) ((array[LONG_COMPAT(bit)] >> OFF_COMPAT(bit)) & 1)
+
+#ifdef __BIG_ENDIAN
+#define bit_to_user(bit, max) \
+do { \
+	int i; \
+	int len = NBITS_COMPAT((max)) * sizeof(compat_long_t); \
+	if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd); \
+	for (i = 0; i < len / sizeof(compat_long_t); i++) \
+		if (copy_to_user((compat_long_t*) p + i, \
+				 (compat_long_t*) (bit) + i + 1 - ((i % 2) << 1), \
+				 sizeof(compat_long_t))) \
+			return -EFAULT; \
+	return len; \
+} while (0)
+#else
+#define bit_to_user(bit, max) \
+do { \
+	int len = NBITS_COMPAT((max)) * sizeof(compat_long_t); \
+	if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd); \
+	return copy_to_user(p, (bit), len) ? -EFAULT : len; \
+} while (0)
+#endif
+
+static long evdev_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct evdev_list *list = file->private_data;
+	struct evdev *evdev = list->evdev;
+	struct input_dev *dev = evdev->handle.dev;
+	struct input_absinfo abs;
+	void __user *p = compat_ptr(arg);
 
 
-				int t = _IOC_NR(cmd) & ABS_MAX;
+	if (!evdev->exist) return -ENODEV;
 
 
-				abs.value = dev->abs[t];
-				abs.minimum = dev->absmin[t];
-				abs.maximum = dev->absmax[t];
-				abs.fuzz = dev->absfuzz[t];
-				abs.flat = dev->absflat[t];
+	switch (cmd) {
 
 
-				if (copy_to_user(p, &abs, sizeof(struct input_absinfo)))
-					return -EFAULT;
+		case EVIOCGVERSION:
+		case EVIOCGID:
+		case EVIOCGKEYCODE:
+		case EVIOCSKEYCODE:
+		case EVIOCSFF:
+		case EVIOCRMFF:
+		case EVIOCGEFFECTS:
+		case EVIOCGRAB:
+			return evdev_ioctl(file, cmd, (unsigned long) p);
 
 
-				return 0;
+		default:
+
+			if (_IOC_TYPE(cmd) != 'E')
+				return -EINVAL;
+
+			if (_IOC_DIR(cmd) == _IOC_READ) {
+
+				if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0,0))) {
+					long *bits;
+					int max;
+
+					switch (_IOC_NR(cmd) & EV_MAX) {
+						case      0: bits = dev->evbit;  max = EV_MAX;  break;
+						case EV_KEY: bits = dev->keybit; max = KEY_MAX; break;
+						case EV_REL: bits = dev->relbit; max = REL_MAX; break;
+						case EV_ABS: bits = dev->absbit; max = ABS_MAX; break;
+						case EV_MSC: bits = dev->mscbit; max = MSC_MAX; break;
+						case EV_LED: bits = dev->ledbit; max = LED_MAX; break;
+						case EV_SND: bits = dev->sndbit; max = SND_MAX; break;
+						case EV_FF:  bits = dev->ffbit;  max = FF_MAX;  break;
+						default: return -EINVAL;
+					}
+					bit_to_user(bits, max);
+				}
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0)))
+					bit_to_user(dev->key, KEY_MAX);
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0)))
+					bit_to_user(dev->led, LED_MAX);
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0)))
+					bit_to_user(dev->snd, SND_MAX);
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0))) {
+					int len;
+					if (!dev->name) return -ENOENT;
+					len = strlen(dev->name) + 1;
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->name, len) ? -EFAULT : len;
+				}
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0))) {
+					int len;
+					if (!dev->phys) return -ENOENT;
+					len = strlen(dev->phys) + 1;
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->phys, len) ? -EFAULT : len;
+				}
+
+				if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0))) {
+					int len;
+					if (!dev->uniq) return -ENOENT;
+					len = strlen(dev->uniq) + 1;
+					if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd);
+					return copy_to_user(p, dev->uniq, len) ? -EFAULT : len;
+				}
+
+				if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) {
+
+					int t = _IOC_NR(cmd) & ABS_MAX;
+
+					abs.value = dev->abs[t];
+					abs.minimum = dev->absmin[t];
+					abs.maximum = dev->absmax[t];
+					abs.fuzz = dev->absfuzz[t];
+					abs.flat = dev->absflat[t];
+
+					if (copy_to_user(p, &abs, sizeof(struct input_absinfo)))
+						return -EFAULT;
+
+					return 0;
+				}
 			}
 			}
 
 
-			if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) {
+			if (_IOC_DIR(cmd) == _IOC_WRITE) {
 
 
-				int t = _IOC_NR(cmd) & ABS_MAX;
+				if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) {
 
 
-				if (copy_from_user(&abs, p, sizeof(struct input_absinfo)))
-					return -EFAULT;
+					int t = _IOC_NR(cmd) & ABS_MAX;
 
 
-				dev->abs[t] = abs.value;
-				dev->absmin[t] = abs.minimum;
-				dev->absmax[t] = abs.maximum;
-				dev->absfuzz[t] = abs.fuzz;
-				dev->absflat[t] = abs.flat;
+					if (copy_from_user(&abs, p, sizeof(struct input_absinfo)))
+						return -EFAULT;
 
 
-				return 0;
+					dev->abs[t] = abs.value;
+					dev->absmin[t] = abs.minimum;
+					dev->absmax[t] = abs.maximum;
+					dev->absfuzz[t] = abs.fuzz;
+					dev->absflat[t] = abs.flat;
+
+					return 0;
+				}
 			}
 			}
 	}
 	}
 	return -EINVAL;
 	return -EINVAL;
 }
 }
+#endif
 
 
 static struct file_operations evdev_fops = {
 static struct file_operations evdev_fops = {
 	.owner =	THIS_MODULE,
 	.owner =	THIS_MODULE,
@@ -396,7 +640,10 @@ static struct file_operations evdev_fops = {
 	.poll =		evdev_poll,
 	.poll =		evdev_poll,
 	.open =		evdev_open,
 	.open =		evdev_open,
 	.release =	evdev_release,
 	.release =	evdev_release,
-	.ioctl =	evdev_ioctl,
+	.unlocked_ioctl = evdev_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl =	evdev_ioctl_compat,
+#endif
 	.fasync =	evdev_fasync,
 	.fasync =	evdev_fasync,
 	.flush =	evdev_flush
 	.flush =	evdev_flush
 };
 };

+ 0 - 14
drivers/input/gameport/Kconfig

@@ -49,22 +49,8 @@ config GAMEPORT_EMU10K1
 	  To compile this driver as a module, choose M here: the
 	  To compile this driver as a module, choose M here: the
 	  module will be called emu10k1-gp.
 	  module will be called emu10k1-gp.
 
 
-config GAMEPORT_VORTEX
-	tristate "Aureal Vortex, Vortex 2 gameport support"
-	depends on PCI
-	help
-	  Say Y here if you have an Aureal Vortex 1 or 2  card and want
-	  to use its gameport.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called vortex.
-
 config GAMEPORT_FM801
 config GAMEPORT_FM801
 	tristate "ForteMedia FM801 gameport support"
 	tristate "ForteMedia FM801 gameport support"
 	depends on PCI
 	depends on PCI
 
 
-config GAMEPORT_CS461X
-	tristate "Crystal SoundFusion gameport support"
-	depends on PCI
-
 endif
 endif

+ 0 - 2
drivers/input/gameport/Makefile

@@ -5,9 +5,7 @@
 # Each configuration option enables a list of files.
 # Each configuration option enables a list of files.
 
 
 obj-$(CONFIG_GAMEPORT)		+= gameport.o
 obj-$(CONFIG_GAMEPORT)		+= gameport.o
-obj-$(CONFIG_GAMEPORT_CS461X)	+= cs461x.o
 obj-$(CONFIG_GAMEPORT_EMU10K1)	+= emu10k1-gp.o
 obj-$(CONFIG_GAMEPORT_EMU10K1)	+= emu10k1-gp.o
 obj-$(CONFIG_GAMEPORT_FM801)	+= fm801-gp.o
 obj-$(CONFIG_GAMEPORT_FM801)	+= fm801-gp.o
 obj-$(CONFIG_GAMEPORT_L4)	+= lightning.o
 obj-$(CONFIG_GAMEPORT_L4)	+= lightning.o
 obj-$(CONFIG_GAMEPORT_NS558)	+= ns558.o
 obj-$(CONFIG_GAMEPORT_NS558)	+= ns558.o
-obj-$(CONFIG_GAMEPORT_VORTEX)	+= vortex.o

+ 0 - 322
drivers/input/gameport/cs461x.c

@@ -1,322 +0,0 @@
-/*
-	The all defines and part of code (such as cs461x_*) are
-	contributed from ALSA 0.5.8 sources.
-	See http://www.alsa-project.org/ for sources
-
-	Tested on Linux 686 2.4.0-test9, ALSA 0.5.8a and CS4610
-*/
-
-#include <asm/io.h>
-
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/config.h>
-#include <linux/init.h>
-#include <linux/gameport.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-
-MODULE_AUTHOR("Victor Krapivin");
-MODULE_LICENSE("GPL");
-
-/*
-	These options are experimental
-
-#define CS461X_FULL_MAP
-*/
-
-
-#ifndef PCI_VENDOR_ID_CIRRUS
-#define PCI_VENDOR_ID_CIRRUS            0x1013
-#endif
-#ifndef PCI_DEVICE_ID_CIRRUS_4610
-#define PCI_DEVICE_ID_CIRRUS_4610       0x6001
-#endif
-#ifndef PCI_DEVICE_ID_CIRRUS_4612
-#define PCI_DEVICE_ID_CIRRUS_4612       0x6003
-#endif
-#ifndef PCI_DEVICE_ID_CIRRUS_4615
-#define PCI_DEVICE_ID_CIRRUS_4615       0x6004
-#endif
-
-/* Registers */
-
-#define BA0_JSPT                                0x00000480
-#define BA0_JSCTL                               0x00000484
-#define BA0_JSC1                                0x00000488
-#define BA0_JSC2                                0x0000048C
-#define BA0_JSIO                                0x000004A0
-
-/* Bits for JSPT */
-
-#define JSPT_CAX                                0x00000001
-#define JSPT_CAY                                0x00000002
-#define JSPT_CBX                                0x00000004
-#define JSPT_CBY                                0x00000008
-#define JSPT_BA1                                0x00000010
-#define JSPT_BA2                                0x00000020
-#define JSPT_BB1                                0x00000040
-#define JSPT_BB2                                0x00000080
-
-/* Bits for JSCTL */
-
-#define JSCTL_SP_MASK                           0x00000003
-#define JSCTL_SP_SLOW                           0x00000000
-#define JSCTL_SP_MEDIUM_SLOW                    0x00000001
-#define JSCTL_SP_MEDIUM_FAST                    0x00000002
-#define JSCTL_SP_FAST                           0x00000003
-#define JSCTL_ARE                               0x00000004
-
-/* Data register pairs masks */
-
-#define JSC1_Y1V_MASK                           0x0000FFFF
-#define JSC1_X1V_MASK                           0xFFFF0000
-#define JSC1_Y1V_SHIFT                          0
-#define JSC1_X1V_SHIFT                          16
-#define JSC2_Y2V_MASK                           0x0000FFFF
-#define JSC2_X2V_MASK                           0xFFFF0000
-#define JSC2_Y2V_SHIFT                          0
-#define JSC2_X2V_SHIFT                          16
-
-/* JS GPIO */
-
-#define JSIO_DAX                                0x00000001
-#define JSIO_DAY                                0x00000002
-#define JSIO_DBX                                0x00000004
-#define JSIO_DBY                                0x00000008
-#define JSIO_AXOE                               0x00000010
-#define JSIO_AYOE                               0x00000020
-#define JSIO_BXOE                               0x00000040
-#define JSIO_BYOE                               0x00000080
-
-/*
-   The card initialization code is obfuscated; the module cs461x
-   need to be loaded after ALSA modules initialized and something
-   played on the CS 4610 chip (see sources for details of CS4610
-   initialization code from ALSA)
-*/
-
-/* Card specific definitions */
-
-#define CS461X_BA0_SIZE         0x2000
-#define CS461X_BA1_DATA0_SIZE   0x3000
-#define CS461X_BA1_DATA1_SIZE   0x3800
-#define CS461X_BA1_PRG_SIZE     0x7000
-#define CS461X_BA1_REG_SIZE     0x0100
-
-#define BA1_SP_DMEM0                            0x00000000
-#define BA1_SP_DMEM1                            0x00010000
-#define BA1_SP_PMEM                             0x00020000
-#define BA1_SP_REG                              0x00030000
-
-#define BA1_DWORD_SIZE          (13 * 1024 + 512)
-#define BA1_MEMORY_COUNT        3
-
-/*
-   Only one CS461x card is still suppoted; the code requires
-   redesign to avoid this limitatuion.
-*/
-
-static unsigned long ba0_addr;
-static unsigned int __iomem *ba0;
-
-#ifdef CS461X_FULL_MAP
-static unsigned long ba1_addr;
-static union ba1_t {
-        struct {
-                unsigned int __iomem *data0;
-                unsigned int __iomem *data1;
-                unsigned int __iomem *pmem;
-                unsigned int __iomem *reg;
-        } name;
-        unsigned int __iomem *idx[4];
-} ba1;
-
-static void cs461x_poke(unsigned long reg, unsigned int val)
-{
-        writel(val, &ba1.idx[(reg >> 16) & 3][(reg >> 2) & 0x3fff]);
-}
-
-static unsigned int cs461x_peek(unsigned long reg)
-{
-        return readl(&ba1.idx[(reg >> 16) & 3][(reg >> 2) & 0x3fff]);
-}
-
-#endif
-
-static void cs461x_pokeBA0(unsigned long reg, unsigned int val)
-{
-        writel(val, &ba0[reg >> 2]);
-}
-
-static unsigned int cs461x_peekBA0(unsigned long reg)
-{
-        return readl(&ba0[reg >> 2]);
-}
-
-static int cs461x_free(struct pci_dev *pdev)
-{
-	struct gameport *port = pci_get_drvdata(pdev);
-
-	if (port)
-	    gameport_unregister_port(port);
-
-	if (ba0) iounmap(ba0);
-#ifdef CS461X_FULL_MAP
-	if (ba1.name.data0) iounmap(ba1.name.data0);
-	if (ba1.name.data1) iounmap(ba1.name.data1);
-	if (ba1.name.pmem)  iounmap(ba1.name.pmem);
-	if (ba1.name.reg)   iounmap(ba1.name.reg);
-#endif
-	return 0;
-}
-
-static void cs461x_gameport_trigger(struct gameport *gameport)
-{
-	cs461x_pokeBA0(BA0_JSPT, 0xFF);  //outb(gameport->io, 0xFF);
-}
-
-static unsigned char cs461x_gameport_read(struct gameport *gameport)
-{
-	return cs461x_peekBA0(BA0_JSPT); //inb(gameport->io);
-}
-
-static int cs461x_gameport_cooked_read(struct gameport *gameport, int *axes, int *buttons)
-{
-	unsigned js1, js2, jst;
-
-	js1 = cs461x_peekBA0(BA0_JSC1);
-	js2 = cs461x_peekBA0(BA0_JSC2);
-	jst = cs461x_peekBA0(BA0_JSPT);
-
-	*buttons = (~jst >> 4) & 0x0F;
-
-	axes[0] = ((js1 & JSC1_Y1V_MASK) >> JSC1_Y1V_SHIFT) & 0xFFFF;
-	axes[1] = ((js1 & JSC1_X1V_MASK) >> JSC1_X1V_SHIFT) & 0xFFFF;
-	axes[2] = ((js2 & JSC2_Y2V_MASK) >> JSC2_Y2V_SHIFT) & 0xFFFF;
-	axes[3] = ((js2 & JSC2_X2V_MASK) >> JSC2_X2V_SHIFT) & 0xFFFF;
-
-	for(jst=0;jst<4;++jst)
-		if(axes[jst]==0xFFFF) axes[jst] = -1;
-	return 0;
-}
-
-static int cs461x_gameport_open(struct gameport *gameport, int mode)
-{
-	switch (mode) {
-		case GAMEPORT_MODE_COOKED:
-		case GAMEPORT_MODE_RAW:
-			return 0;
-		default:
-			return -1;
-	}
-	return 0;
-}
-
-static struct pci_device_id cs461x_pci_tbl[] = {
-	{ PCI_VENDOR_ID_CIRRUS, 0x6001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Cirrus CS4610 */
-	{ PCI_VENDOR_ID_CIRRUS, 0x6003, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Cirrus CS4612 */
-	{ PCI_VENDOR_ID_CIRRUS, 0x6005, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Cirrus CS4615 */
-	{ 0, }
-};
-MODULE_DEVICE_TABLE(pci, cs461x_pci_tbl);
-
-static int __devinit cs461x_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	int rc;
-	struct gameport* port;
-
-	rc = pci_enable_device(pdev);
-	if (rc) {
-		printk(KERN_ERR "cs461x: Cannot enable PCI gameport (bus %d, devfn %d) error=%d\n",
-			pdev->bus->number, pdev->devfn, rc);
-		return rc;
-	}
-
-	ba0_addr = pci_resource_start(pdev, 0);
-#ifdef CS461X_FULL_MAP
-	ba1_addr = pci_resource_start(pdev, 1);
-#endif
-	if (ba0_addr == 0 || ba0_addr == ~0
-#ifdef CS461X_FULL_MAP
-            || ba1_addr == 0 || ba1_addr == ~0
-#endif
-	    ) {
-                printk(KERN_ERR "cs461x: wrong address - ba0 = 0x%lx\n", ba0_addr);
-#ifdef CS461X_FULL_MAP
-                printk(KERN_ERR "cs461x: wrong address - ba1 = 0x%lx\n", ba1_addr);
-#endif
-		cs461x_free(pdev);
-                return -ENOMEM;
-        }
-
-	ba0 = ioremap(ba0_addr, CS461X_BA0_SIZE);
-#ifdef CS461X_FULL_MAP
-	ba1.name.data0 = ioremap(ba1_addr + BA1_SP_DMEM0, CS461X_BA1_DATA0_SIZE);
-	ba1.name.data1 = ioremap(ba1_addr + BA1_SP_DMEM1, CS461X_BA1_DATA1_SIZE);
-	ba1.name.pmem  = ioremap(ba1_addr + BA1_SP_PMEM, CS461X_BA1_PRG_SIZE);
-	ba1.name.reg   = ioremap(ba1_addr + BA1_SP_REG, CS461X_BA1_REG_SIZE);
-
-	if (ba0 == NULL || ba1.name.data0 == NULL ||
-            ba1.name.data1 == NULL || ba1.name.pmem == NULL ||
-            ba1.name.reg == NULL) {
-		cs461x_free(pdev);
-                return -ENOMEM;
-        }
-#else
-	if (ba0 == NULL) {
-		cs461x_free(pdev);
-		return -ENOMEM;
-	}
-#endif
-
-	if (!(port = gameport_allocate_port())) {
-		printk(KERN_ERR "cs461x: Memory allocation failed\n");
-		cs461x_free(pdev);
-		return -ENOMEM;
-	}
-
-	pci_set_drvdata(pdev, port);
-
-	port->open = cs461x_gameport_open;
-	port->trigger = cs461x_gameport_trigger;
-	port->read = cs461x_gameport_read;
-	port->cooked_read = cs461x_gameport_cooked_read;
-
-	gameport_set_name(port, "CS416x");
-	gameport_set_phys(port, "pci%s/gameport0", pci_name(pdev));
-	port->dev.parent = &pdev->dev;
-
-	cs461x_pokeBA0(BA0_JSIO, 0xFF); // ?
-	cs461x_pokeBA0(BA0_JSCTL, JSCTL_SP_MEDIUM_SLOW);
-
-	gameport_register_port(port);
-
-	return 0;
-}
-
-static void __devexit cs461x_pci_remove(struct pci_dev *pdev)
-{
-	cs461x_free(pdev);
-}
-
-static struct pci_driver cs461x_pci_driver = {
-        .name =         "CS461x_gameport",
-        .id_table =     cs461x_pci_tbl,
-        .probe =        cs461x_pci_probe,
-        .remove =       __devexit_p(cs461x_pci_remove),
-};
-
-static int __init cs461x_init(void)
-{
-        return pci_register_driver(&cs461x_pci_driver);
-}
-
-static void __exit cs461x_exit(void)
-{
-        pci_unregister_driver(&cs461x_pci_driver);
-}
-
-module_init(cs461x_init);
-module_exit(cs461x_exit);
-

+ 12 - 19
drivers/input/gameport/gameport.c

@@ -17,11 +17,10 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/gameport.h>
 #include <linux/gameport.h>
 #include <linux/wait.h>
 #include <linux/wait.h>
-#include <linux/completion.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
+#include <linux/kthread.h>
 
 
 /*#include <asm/io.h>*/
 /*#include <asm/io.h>*/
 
 
@@ -238,8 +237,7 @@ struct gameport_event {
 static DEFINE_SPINLOCK(gameport_event_lock);	/* protects gameport_event_list */
 static DEFINE_SPINLOCK(gameport_event_lock);	/* protects gameport_event_list */
 static LIST_HEAD(gameport_event_list);
 static LIST_HEAD(gameport_event_list);
 static DECLARE_WAIT_QUEUE_HEAD(gameport_wait);
 static DECLARE_WAIT_QUEUE_HEAD(gameport_wait);
-static DECLARE_COMPLETION(gameport_exited);
-static int gameport_pid;
+static struct task_struct *gameport_task;
 
 
 static void gameport_queue_event(void *object, struct module *owner,
 static void gameport_queue_event(void *object, struct module *owner,
 			      enum gameport_event_type event_type)
 			      enum gameport_event_type event_type)
@@ -250,12 +248,12 @@ static void gameport_queue_event(void *object, struct module *owner,
 	spin_lock_irqsave(&gameport_event_lock, flags);
 	spin_lock_irqsave(&gameport_event_lock, flags);
 
 
 	/*
 	/*
- 	 * Scan event list for the other events for the same gameport port,
+	 * Scan event list for the other events for the same gameport port,
 	 * starting with the most recent one. If event is the same we
 	 * starting with the most recent one. If event is the same we
 	 * do not need add new one. If event is of different type we
 	 * do not need add new one. If event is of different type we
 	 * need to add this event and should not look further because
 	 * need to add this event and should not look further because
 	 * we need to preseve sequence of distinct events.
 	 * we need to preseve sequence of distinct events.
- 	 */
+	 */
 	list_for_each_entry_reverse(event, &gameport_event_list, node) {
 	list_for_each_entry_reverse(event, &gameport_event_list, node) {
 		if (event->object == object) {
 		if (event->object == object) {
 			if (event->type == event_type)
 			if (event->type == event_type)
@@ -432,20 +430,15 @@ static struct gameport *gameport_get_pending_child(struct gameport *parent)
 
 
 static int gameport_thread(void *nothing)
 static int gameport_thread(void *nothing)
 {
 {
-	lock_kernel();
-	daemonize("kgameportd");
-	allow_signal(SIGTERM);
-
 	do {
 	do {
 		gameport_handle_events();
 		gameport_handle_events();
-		wait_event_interruptible(gameport_wait, !list_empty(&gameport_event_list));
+		wait_event_interruptible(gameport_wait,
+			kthread_should_stop() || !list_empty(&gameport_event_list));
 		try_to_freeze();
 		try_to_freeze();
-	} while (!signal_pending(current));
+	} while (!kthread_should_stop());
 
 
 	printk(KERN_DEBUG "gameport: kgameportd exiting\n");
 	printk(KERN_DEBUG "gameport: kgameportd exiting\n");
-
-	unlock_kernel();
-	complete_and_exit(&gameport_exited, 0);
+	return 0;
 }
 }
 
 
 
 
@@ -773,9 +766,10 @@ void gameport_close(struct gameport *gameport)
 
 
 static int __init gameport_init(void)
 static int __init gameport_init(void)
 {
 {
-	if (!(gameport_pid = kernel_thread(gameport_thread, NULL, CLONE_KERNEL))) {
+	gameport_task = kthread_run(gameport_thread, NULL, "kgameportd");
+	if (IS_ERR(gameport_task)) {
 		printk(KERN_ERR "gameport: Failed to start kgameportd\n");
 		printk(KERN_ERR "gameport: Failed to start kgameportd\n");
-		return -1;
+		return PTR_ERR(gameport_task);
 	}
 	}
 
 
 	gameport_bus.dev_attrs = gameport_device_attrs;
 	gameport_bus.dev_attrs = gameport_device_attrs;
@@ -789,8 +783,7 @@ static int __init gameport_init(void)
 static void __exit gameport_exit(void)
 static void __exit gameport_exit(void)
 {
 {
 	bus_unregister(&gameport_bus);
 	bus_unregister(&gameport_bus);
-	kill_proc(gameport_pid, SIGTERM, 1);
-	wait_for_completion(&gameport_exited);
+	kthread_stop(gameport_task);
 }
 }
 
 
 module_init(gameport_init);
 module_init(gameport_init);

+ 6 - 6
drivers/input/gameport/ns558.c

@@ -258,18 +258,18 @@ static int __init ns558_init(void)
 {
 {
 	int i = 0;
 	int i = 0;
 
 
+	if (pnp_register_driver(&ns558_pnp_driver) >= 0)
+		pnp_registered = 1;
+
 /*
 /*
- * Probe ISA ports first so that PnP gets to choose free port addresses
- * not occupied by the ISA ports.
+ * Probe ISA ports after PnP, so that PnP ports that are already
+ * enabled get detected as PnP. This may be suboptimal in multi-device
+ * configurations, but saves hassle with simple setups.
  */
  */
 
 
 	while (ns558_isa_portlist[i])
 	while (ns558_isa_portlist[i])
 		ns558_isa_probe(ns558_isa_portlist[i++]);
 		ns558_isa_probe(ns558_isa_portlist[i++]);
 
 
-	if (pnp_register_driver(&ns558_pnp_driver) >= 0)
-		pnp_registered = 1;
-
-
 	return (list_empty(&ns558_list) && !pnp_registered) ? -ENODEV : 0;
 	return (list_empty(&ns558_list) && !pnp_registered) ? -ENODEV : 0;
 }
 }
 
 

+ 0 - 186
drivers/input/gameport/vortex.c

@@ -1,186 +0,0 @@
-/*
- * $Id: vortex.c,v 1.5 2002/07/01 15:39:30 vojtech Exp $
- *
- *  Copyright (c) 2000-2001 Vojtech Pavlik
- *
- *  Based on the work of:
- *	Raymond Ingles
- */
-
-/*
- * Trident 4DWave and Aureal Vortex gameport driver for Linux
- */
-
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
- */
-
-#include <asm/io.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/ioport.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/gameport.h>
-
-MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>");
-MODULE_DESCRIPTION("Aureal Vortex and Vortex2 gameport driver");
-MODULE_LICENSE("GPL");
-
-#define VORTEX_GCR		0x0c	/* Gameport control register */
-#define VORTEX_LEG		0x08	/* Legacy port location */
-#define VORTEX_AXD		0x10	/* Axes start */
-#define VORTEX_DATA_WAIT	20	/* 20 ms */
-
-struct vortex {
-	struct gameport *gameport;
-	struct pci_dev *dev;
-	unsigned char __iomem *base;
-	unsigned char __iomem *io;
-};
-
-static unsigned char vortex_read(struct gameport *gameport)
-{
-	struct vortex *vortex = gameport->port_data;
-	return readb(vortex->io + VORTEX_LEG);
-}
-
-static void vortex_trigger(struct gameport *gameport)
-{
-	struct vortex *vortex = gameport->port_data;
-	writeb(0xff, vortex->io + VORTEX_LEG);
-}
-
-static int vortex_cooked_read(struct gameport *gameport, int *axes, int *buttons)
-{
-	struct vortex *vortex = gameport->port_data;
-	int i;
-
-	*buttons = (~readb(vortex->base + VORTEX_LEG) >> 4) & 0xf;
-
-	for (i = 0; i < 4; i++) {
-		axes[i] = readw(vortex->io + VORTEX_AXD + i * sizeof(u32));
-		if (axes[i] == 0x1fff) axes[i] = -1;
-	}
-
-        return 0;
-}
-
-static int vortex_open(struct gameport *gameport, int mode)
-{
-	struct vortex *vortex = gameport->port_data;
-
-	switch (mode) {
-		case GAMEPORT_MODE_COOKED:
-			writeb(0x40, vortex->io + VORTEX_GCR);
-			msleep(VORTEX_DATA_WAIT);
-			return 0;
-		case GAMEPORT_MODE_RAW:
-			writeb(0x00, vortex->io + VORTEX_GCR);
-			return 0;
-		default:
-			return -1;
-	}
-
-	return 0;
-}
-
-static int __devinit vortex_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct vortex *vortex;
-	struct gameport *port;
-	int i;
-
-	vortex = kcalloc(1, sizeof(struct vortex), GFP_KERNEL);
-	port = gameport_allocate_port();
-	if (!vortex || !port) {
-		printk(KERN_ERR "vortex: Memory allocation failed.\n");
-		kfree(vortex);
-		gameport_free_port(port);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < 6; i++)
-		if (~pci_resource_flags(dev, i) & IORESOURCE_IO)
-			break;
-
-	pci_enable_device(dev);
-
-	vortex->dev = dev;
-	vortex->gameport = port;
-	vortex->base = ioremap(pci_resource_start(vortex->dev, i),
-				pci_resource_len(vortex->dev, i));
-	vortex->io = vortex->base + id->driver_data;
-
-	pci_set_drvdata(dev, vortex);
-
-	port->port_data = vortex;
-	port->fuzz = 64;
-
-	gameport_set_name(port, "AU88x0");
-	gameport_set_phys(port, "pci%s/gameport0", pci_name(dev));
-	port->dev.parent = &dev->dev;
-	port->read = vortex_read;
-	port->trigger = vortex_trigger;
-	port->cooked_read = vortex_cooked_read;
-	port->open = vortex_open;
-
-	gameport_register_port(port);
-
-	return 0;
-}
-
-static void __devexit vortex_remove(struct pci_dev *dev)
-{
-	struct vortex *vortex = pci_get_drvdata(dev);
-
-	gameport_unregister_port(vortex->gameport);
-	iounmap(vortex->base);
-	kfree(vortex);
-}
-
-static struct pci_device_id vortex_id_table[] = {
-	{ 0x12eb, 0x0001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0x11000 },
-	{ 0x12eb, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0x28800 },
-	{ 0 }
-};
-
-static struct pci_driver vortex_driver = {
-	.name =		"vortex_gameport",
-	.id_table =	vortex_id_table,
-	.probe =	vortex_probe,
-	.remove =	__devexit_p(vortex_remove),
-};
-
-static int __init vortex_init(void)
-{
-	return pci_register_driver(&vortex_driver);
-}
-
-static void __exit vortex_exit(void)
-{
-	pci_unregister_driver(&vortex_driver);
-}
-
-module_init(vortex_init);
-module_exit(vortex_exit);

+ 32 - 5
drivers/input/input.c

@@ -219,10 +219,24 @@ void input_release_device(struct input_handle *handle)
 
 
 int input_open_device(struct input_handle *handle)
 int input_open_device(struct input_handle *handle)
 {
 {
+	struct input_dev *dev = handle->dev;
+	int err;
+
+	err = down_interruptible(&dev->sem);
+	if (err)
+		return err;
+
 	handle->open++;
 	handle->open++;
-	if (handle->dev->open)
-		return handle->dev->open(handle->dev);
-	return 0;
+
+	if (!dev->users++ && dev->open)
+		err = dev->open(dev);
+
+	if (err)
+		handle->open--;
+
+	up(&dev->sem);
+
+	return err;
 }
 }
 
 
 int input_flush_device(struct input_handle* handle, struct file* file)
 int input_flush_device(struct input_handle* handle, struct file* file)
@@ -235,10 +249,17 @@ int input_flush_device(struct input_handle* handle, struct file* file)
 
 
 void input_close_device(struct input_handle *handle)
 void input_close_device(struct input_handle *handle)
 {
 {
+	struct input_dev *dev = handle->dev;
+
 	input_release_device(handle);
 	input_release_device(handle);
-	if (handle->dev->close)
-		handle->dev->close(handle->dev);
+
+	down(&dev->sem);
+
+	if (!--dev->users && dev->close)
+		dev->close(dev);
 	handle->open--;
 	handle->open--;
+
+	up(&dev->sem);
 }
 }
 
 
 static void input_link_handle(struct input_handle *handle)
 static void input_link_handle(struct input_handle *handle)
@@ -415,6 +436,8 @@ void input_register_device(struct input_dev *dev)
 
 
 	set_bit(EV_SYN, dev->evbit);
 	set_bit(EV_SYN, dev->evbit);
 
 
+	init_MUTEX(&dev->sem);
+
 	/*
 	/*
 	 * If delay and period are pre-set by the driver, then autorepeating
 	 * If delay and period are pre-set by the driver, then autorepeating
 	 * is handled by the driver itself and we don't do it in input.c.
 	 * is handled by the driver itself and we don't do it in input.c.
@@ -674,6 +697,8 @@ static int input_handlers_read(char *buf, char **start, off_t pos, int count, in
 	return (count > cnt) ? cnt : count;
 	return (count > cnt) ? cnt : count;
 }
 }
 
 
+static struct file_operations input_fileops;
+
 static int __init input_proc_init(void)
 static int __init input_proc_init(void)
 {
 {
 	struct proc_dir_entry *entry;
 	struct proc_dir_entry *entry;
@@ -688,6 +713,8 @@ static int __init input_proc_init(void)
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 	entry->owner = THIS_MODULE;
 	entry->owner = THIS_MODULE;
+	input_fileops = *entry->proc_fops;
+	entry->proc_fops = &input_fileops;
 	entry->proc_fops->poll = input_devices_poll;
 	entry->proc_fops->poll = input_devices_poll;
 	entry = create_proc_read_entry("handlers", 0, proc_bus_input_dir, input_handlers_read, NULL);
 	entry = create_proc_read_entry("handlers", 0, proc_bus_input_dir, input_handlers_read, NULL);
 	if (entry == NULL) {
 	if (entry == NULL) {

+ 91 - 25
drivers/input/joydev.c

@@ -285,48 +285,33 @@ static unsigned int joydev_poll(struct file *file, poll_table *wait)
 		(POLLIN | POLLRDNORM) : 0) | (list->joydev->exist ? 0 : (POLLHUP | POLLERR));
 		(POLLIN | POLLRDNORM) : 0) | (list->joydev->exist ? 0 : (POLLHUP | POLLERR));
 }
 }
 
 
-static int joydev_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+static int joydev_ioctl_common(struct joydev *joydev, unsigned int cmd, void __user *argp)
 {
 {
-	struct joydev_list *list = file->private_data;
-	struct joydev *joydev = list->joydev;
 	struct input_dev *dev = joydev->handle.dev;
 	struct input_dev *dev = joydev->handle.dev;
-	void __user *argp = (void __user *)arg;
 	int i, j;
 	int i, j;
 
 
-	if (!joydev->exist) return -ENODEV;
-
 	switch (cmd) {
 	switch (cmd) {
 
 
 		case JS_SET_CAL:
 		case JS_SET_CAL:
 			return copy_from_user(&joydev->glue.JS_CORR, argp,
 			return copy_from_user(&joydev->glue.JS_CORR, argp,
-				sizeof(struct JS_DATA_TYPE)) ? -EFAULT : 0;
+				sizeof(joydev->glue.JS_CORR)) ? -EFAULT : 0;
 		case JS_GET_CAL:
 		case JS_GET_CAL:
 			return copy_to_user(argp, &joydev->glue.JS_CORR,
 			return copy_to_user(argp, &joydev->glue.JS_CORR,
-				sizeof(struct JS_DATA_TYPE)) ? -EFAULT : 0;
+				sizeof(joydev->glue.JS_CORR)) ? -EFAULT : 0;
 		case JS_SET_TIMEOUT:
 		case JS_SET_TIMEOUT:
-			return get_user(joydev->glue.JS_TIMEOUT, (int __user *) arg);
+			return get_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp);
 		case JS_GET_TIMEOUT:
 		case JS_GET_TIMEOUT:
-			return put_user(joydev->glue.JS_TIMEOUT, (int __user *) arg);
-		case JS_SET_TIMELIMIT:
-			return get_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg);
-		case JS_GET_TIMELIMIT:
-			return put_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg);
-		case JS_SET_ALL:
-			return copy_from_user(&joydev->glue, argp,
-						sizeof(struct JS_DATA_SAVE_TYPE)) ? -EFAULT : 0;
-		case JS_GET_ALL:
-			return copy_to_user(argp, &joydev->glue,
-						sizeof(struct JS_DATA_SAVE_TYPE)) ? -EFAULT : 0;
+			return put_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp);
 
 
 		case JSIOCGVERSION:
 		case JSIOCGVERSION:
-			return put_user(JS_VERSION, (__u32 __user *) arg);
+			return put_user(JS_VERSION, (__u32 __user *) argp);
 		case JSIOCGAXES:
 		case JSIOCGAXES:
-			return put_user(joydev->nabs, (__u8 __user *) arg);
+			return put_user(joydev->nabs, (__u8 __user *) argp);
 		case JSIOCGBUTTONS:
 		case JSIOCGBUTTONS:
-			return put_user(joydev->nkey, (__u8 __user *) arg);
+			return put_user(joydev->nkey, (__u8 __user *) argp);
 		case JSIOCSCORR:
 		case JSIOCSCORR:
 			if (copy_from_user(joydev->corr, argp,
 			if (copy_from_user(joydev->corr, argp,
-				      sizeof(struct js_corr) * joydev->nabs))
+				      sizeof(joydev->corr[0]) * joydev->nabs))
 			    return -EFAULT;
 			    return -EFAULT;
 			for (i = 0; i < joydev->nabs; i++) {
 			for (i = 0; i < joydev->nabs; i++) {
 				j = joydev->abspam[i];
 				j = joydev->abspam[i];
@@ -335,7 +320,7 @@ static int joydev_ioctl(struct inode *inode, struct file *file, unsigned int cmd
 			return 0;
 			return 0;
 		case JSIOCGCORR:
 		case JSIOCGCORR:
 			return copy_to_user(argp, joydev->corr,
 			return copy_to_user(argp, joydev->corr,
-						sizeof(struct js_corr) * joydev->nabs) ? -EFAULT : 0;
+						sizeof(joydev->corr[0]) * joydev->nabs) ? -EFAULT : 0;
 		case JSIOCSAXMAP:
 		case JSIOCSAXMAP:
 			if (copy_from_user(joydev->abspam, argp, sizeof(__u8) * (ABS_MAX + 1)))
 			if (copy_from_user(joydev->abspam, argp, sizeof(__u8) * (ABS_MAX + 1)))
 				return -EFAULT;
 				return -EFAULT;
@@ -371,6 +356,84 @@ static int joydev_ioctl(struct inode *inode, struct file *file, unsigned int cmd
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 
+#ifdef CONFIG_COMPAT
+static long joydev_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct joydev_list *list = file->private_data;
+	struct joydev *joydev = list->joydev;
+	void __user *argp = (void __user *)arg;
+	s32 tmp32;
+	struct JS_DATA_SAVE_TYPE_32 ds32;
+	int err;
+
+	if (!joydev->exist) return -ENODEV;
+	switch(cmd) {
+	case JS_SET_TIMELIMIT:
+		err = get_user(tmp32, (s32 __user *) arg);
+		if (err == 0)
+			joydev->glue.JS_TIMELIMIT = tmp32;
+		break;
+	case JS_GET_TIMELIMIT:
+		tmp32 = joydev->glue.JS_TIMELIMIT;
+		err = put_user(tmp32, (s32 __user *) arg);
+		break;
+
+	case JS_SET_ALL:
+		err = copy_from_user(&ds32, argp,
+				     sizeof(ds32)) ? -EFAULT : 0;
+		if (err == 0) {
+			joydev->glue.JS_TIMEOUT    = ds32.JS_TIMEOUT;
+			joydev->glue.BUSY          = ds32.BUSY;
+			joydev->glue.JS_EXPIRETIME = ds32.JS_EXPIRETIME;
+			joydev->glue.JS_TIMELIMIT  = ds32.JS_TIMELIMIT;
+			joydev->glue.JS_SAVE       = ds32.JS_SAVE;
+			joydev->glue.JS_CORR       = ds32.JS_CORR;
+		}
+		break;
+
+	case JS_GET_ALL:
+		ds32.JS_TIMEOUT    = joydev->glue.JS_TIMEOUT;
+		ds32.BUSY          = joydev->glue.BUSY;
+		ds32.JS_EXPIRETIME = joydev->glue.JS_EXPIRETIME;
+		ds32.JS_TIMELIMIT  = joydev->glue.JS_TIMELIMIT;
+		ds32.JS_SAVE       = joydev->glue.JS_SAVE;
+		ds32.JS_CORR       = joydev->glue.JS_CORR;
+
+		err = copy_to_user(argp, &ds32,
+					  sizeof(ds32)) ? -EFAULT : 0;
+		break;
+
+	default:
+		err = joydev_ioctl_common(joydev, cmd, argp);
+	}
+	return err;
+}
+#endif /* CONFIG_COMPAT */
+
+static int joydev_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct joydev_list *list = file->private_data;
+	struct joydev *joydev = list->joydev;
+	void __user *argp = (void __user *)arg;
+
+	if (!joydev->exist) return -ENODEV;
+
+	switch(cmd) {
+		case JS_SET_TIMELIMIT:
+			return get_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg);
+		case JS_GET_TIMELIMIT:
+			return put_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg);
+		case JS_SET_ALL:
+			return copy_from_user(&joydev->glue, argp,
+						sizeof(joydev->glue)) ? -EFAULT : 0;
+		case JS_GET_ALL:
+			return copy_to_user(argp, &joydev->glue,
+						sizeof(joydev->glue)) ? -EFAULT : 0;
+		default:
+			return joydev_ioctl_common(joydev, cmd, argp);
+	}
+}
+
 static struct file_operations joydev_fops = {
 static struct file_operations joydev_fops = {
 	.owner =	THIS_MODULE,
 	.owner =	THIS_MODULE,
 	.read =		joydev_read,
 	.read =		joydev_read,
@@ -379,6 +442,9 @@ static struct file_operations joydev_fops = {
 	.open =		joydev_open,
 	.open =		joydev_open,
 	.release =	joydev_release,
 	.release =	joydev_release,
 	.ioctl =	joydev_ioctl,
 	.ioctl =	joydev_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl =	joydev_compat_ioctl,
+#endif
 	.fasync =	joydev_fasync,
 	.fasync =	joydev_fasync,
 };
 };
 
 

Một số tệp đã không được hiển thị bởi vì quá nhiều tập tin thay đổi trong này khác