浏览代码

Merge branch 'merge-fixes' into devel

Russell King 17 年之前
父节点
当前提交
cf816ecb53
共有 100 个文件被更改,包括 3841 次插入3003 次删除
  1. 2 4
      Documentation/00-INDEX
  2. 11 0
      Documentation/ABI/obsolete/o2cb
  3. 10 0
      Documentation/ABI/stable/o2cb
  4. 89 0
      Documentation/ABI/testing/sysfs-ocfs2
  5. 3 2
      Documentation/DocBook/Makefile
  6. 447 0
      Documentation/DocBook/kgdb.tmpl
  7. 335 0
      Documentation/DocBook/mac80211.tmpl
  8. 10 6
      Documentation/debugging-via-ohci1394.txt
  9. 11 38
      Documentation/feature-removal-schedule.txt
  10. 3 3
      Documentation/filesystems/seq_file.txt
  11. 7 8
      Documentation/filesystems/xfs.txt
  12. 28 0
      Documentation/i386/boot.txt
  13. 4 43
      Documentation/ide/ide.txt
  14. 13 0
      Documentation/ide/warm-plug-howto.txt
  15. 35 2
      Documentation/kernel-parameters.txt
  16. 1 1
      Documentation/laptops/acer-wmi.txt
  17. 0 2
      Documentation/networking/00-INDEX
  18. 0 89
      Documentation/networking/bcm43xx.txt
  19. 4 4
      Documentation/networking/can.txt
  20. 0 621
      Documentation/networking/wan-router.txt
  21. 21 0
      Documentation/s390/s390dbf.txt
  22. 11 1
      Documentation/scsi/st.txt
  23. 0 0
      Documentation/timers/highres.txt
  24. 0 0
      Documentation/timers/hrtimers.txt
  25. 0 0
      Documentation/timers/timer_stats.txt
  26. 3 4
      Documentation/vm/hugetlbpage.txt
  27. 100 0
      Documentation/x86/pat.txt
  28. 5 0
      Documentation/x86_64/boot-options.txt
  29. 19 19
      MAINTAINERS
  30. 1 1
      Makefile
  31. 1 1
      arch/alpha/kernel/Makefile
  32. 0 9
      arch/alpha/kernel/alpha_ksyms.c
  33. 0 224
      arch/alpha/kernel/semaphore.c
  34. 1 1
      arch/arm/kernel/Makefile
  35. 0 221
      arch/arm/kernel/semaphore.c
  36. 0 1
      arch/avr32/Kconfig
  37. 1 1
      arch/avr32/kernel/Makefile
  38. 0 148
      arch/avr32/kernel/semaphore.c
  39. 0 4
      arch/blackfin/Kconfig
  40. 0 5
      arch/blackfin/kernel/bfin_ksyms.c
  41. 1 2
      arch/cris/kernel/Makefile
  42. 0 7
      arch/cris/kernel/crisksyms.c
  43. 0 129
      arch/cris/kernel/semaphore.c
  44. 1 1
      arch/frv/kernel/Makefile
  45. 0 1
      arch/frv/kernel/frv_ksyms.c
  46. 0 155
      arch/frv/kernel/semaphore.c
  47. 1 1
      arch/frv/kernel/traps.c
  48. 1 1
      arch/h8300/kernel/Makefile
  49. 0 1
      arch/h8300/kernel/h8300_ksyms.c
  50. 0 132
      arch/h8300/kernel/semaphore.c
  51. 14 0
      arch/ia64/Kconfig
  52. 39 17
      arch/ia64/hp/common/sba_iommu.c
  53. 1 1
      arch/ia64/hp/sim/simeth.c
  54. 4 19
      arch/ia64/hp/sim/simscsi.c
  55. 13 1
      arch/ia64/ia32/elfcore32.h
  56. 624 25
      arch/ia64/ia32/sys_ia32.c
  57. 1 1
      arch/ia64/kernel/Makefile
  58. 3 1
      arch/ia64/kernel/acpi.c
  59. 13 0
      arch/ia64/kernel/asm-offsets.c
  60. 36 20
      arch/ia64/kernel/crash.c
  61. 46 0
      arch/ia64/kernel/efi.c
  62. 65 0
      arch/ia64/kernel/entry.S
  63. 68 20
      arch/ia64/kernel/fsys.S
  64. 20 0
      arch/ia64/kernel/head.S
  65. 0 6
      arch/ia64/kernel/ia64_ksyms.c
  66. 1 1
      arch/ia64/kernel/irq_ia64.c
  67. 69 0
      arch/ia64/kernel/ivt.S
  68. 107 26
      arch/ia64/kernel/kprobes.c
  69. 54 6
      arch/ia64/kernel/mca.c
  70. 5 0
      arch/ia64/kernel/mca_asm.S
  71. 14 0
      arch/ia64/kernel/minstate.h
  72. 1 1
      arch/ia64/kernel/numa.c
  73. 4 4
      arch/ia64/kernel/patch.c
  74. 2 2
      arch/ia64/kernel/perfmon.c
  75. 0 30
      arch/ia64/kernel/process.c
  76. 895 322
      arch/ia64/kernel/ptrace.c
  77. 0 165
      arch/ia64/kernel/semaphore.c
  78. 29 2
      arch/ia64/kernel/setup.c
  79. 82 0
      arch/ia64/kernel/smp.c
  80. 1 1
      arch/ia64/kernel/smpboot.c
  81. 78 0
      arch/ia64/kernel/time.c
  82. 2 1
      arch/ia64/kernel/unaligned.c
  83. 1 3
      arch/ia64/mm/contig.c
  84. 6 11
      arch/ia64/mm/discontig.c
  85. 3 11
      arch/ia64/mm/init.c
  86. 3 1
      arch/ia64/mm/numa.c
  87. 342 15
      arch/ia64/mm/tlb.c
  88. 6 1
      arch/ia64/pci/pci.c
  89. 4 4
      arch/ia64/sn/kernel/xpc_main.c
  90. 1 1
      arch/ia64/sn/kernel/xpc_partition.c
  91. 1 1
      arch/m32r/kernel/Makefile
  92. 0 5
      arch/m32r/kernel/m32r_ksyms.c
  93. 0 185
      arch/m32r/kernel/semaphore.c
  94. 1 1
      arch/m68k/kernel/Makefile
  95. 0 6
      arch/m68k/kernel/m68k_ksyms.c
  96. 0 132
      arch/m68k/kernel/semaphore.c
  97. 1 1
      arch/m68k/lib/Makefile
  98. 0 53
      arch/m68k/lib/semaphore.S
  99. 1 1
      arch/m68knommu/kernel/Makefile
  100. 0 6
      arch/m68knommu/kernel/m68k_ksyms.c

+ 2 - 4
Documentation/00-INDEX

@@ -167,10 +167,8 @@ highuid.txt
 	- notes on the change from 16 bit to 32 bit user/group IDs.
 hpet.txt
 	- High Precision Event Timer Driver for Linux.
-hrtimer/
-	- info on the timer_stats debugging facility for timer (ab)use.
-hrtimers/
-	- info on the hrtimers subsystem for high-resolution kernel timers.
+timers/
+	- info on the timer related topics
 hw_random.txt
 	- info on Linux support for random number generator in i8xx chipsets.
 hwmon/

+ 11 - 0
Documentation/ABI/obsolete/o2cb

@@ -0,0 +1,11 @@
+What:		/sys/o2cb symlink
+Date:		Dec 2005
+KernelVersion:	2.6.16
+Contact:	ocfs2-devel@oss.oracle.com
+Description:	This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will
+		be removed when new versions of ocfs2-tools which know to look
+		in /sys/fs/o2cb are sufficiently prevalent. Don't code new
+		software to look here, it should try /sys/fs/o2cb instead.
+		See Documentation/ABI/stable/o2cb for more information on usage.
+Users:		ocfs2-tools. It's sufficient to mail proposed changes to
+		ocfs2-devel@oss.oracle.com.

+ 10 - 0
Documentation/ABI/stable/o2cb

@@ -0,0 +1,10 @@
+What:		/sys/fs/o2cb/ (was /sys/o2cb)
+Date:		Dec 2005
+KernelVersion:	2.6.16
+Contact:	ocfs2-devel@oss.oracle.com
+Description:	Ocfs2-tools looks at 'interface-revision' for versioning
+		information. Each logmask/ file controls a set of debug prints
+		and can be written into with the strings "allow", "deny", or
+		"off". Reading the file returns the current state.
+Users:		ocfs2-tools. It's sufficient to mail proposed changes to
+		ocfs2-devel@oss.oracle.com.

+ 89 - 0
Documentation/ABI/testing/sysfs-ocfs2

@@ -0,0 +1,89 @@
+What:		/sys/fs/ocfs2/
+Date:		April 2008
+Contact:	ocfs2-devel@oss.oracle.com
+Description:
+		The /sys/fs/ocfs2 directory contains knobs used by the
+		ocfs2-tools to interact with the filesystem.
+
+What:		/sys/fs/ocfs2/max_locking_protocol
+Date:		April 2008
+Contact:	ocfs2-devel@oss.oracle.com
+Description:
+		The /sys/fs/ocfs2/max_locking_protocol file displays version
+		of ocfs2 locking supported by the filesystem.  This version
+		covers how ocfs2 uses distributed locking between cluster
+		nodes.
+
+		The protocol version has a major and minor number.  Two
+		cluster nodes can interoperate if they have an identical
+		major number and an overlapping minor number - thus,
+		a node with version 1.10 can interoperate with a node
+		sporting version 1.8, as long as both use the 1.8 protocol.
+
+		Reading from this file returns a single line, the major
+		number and minor number joined by a period, eg "1.10".
+
+		This file is read-only.  The value is compiled into the
+		driver.
+
+What:		/sys/fs/ocfs2/loaded_cluster_plugins
+Date:		April 2008
+Contact:	ocfs2-devel@oss.oracle.com
+Description:
+		The /sys/fs/ocfs2/loaded_cluster_plugins file describes
+		the available plugins to support ocfs2 cluster operation.
+		A cluster plugin is required to use ocfs2 in a cluster.
+		There are currently two available plugins:
+
+		* 'o2cb' - The classic o2cb cluster stack that ocfs2 has
+			used since its inception.
+		* 'user' - A plugin supporting userspace cluster software
+			in conjunction with fs/dlm.
+
+		Reading from this file returns the names of all loaded
+		plugins, one per line.
+
+		This file is read-only.  Its contents may change as
+		plugins are loaded or removed.
+
+What:		/sys/fs/ocfs2/active_cluster_plugin
+Date:		April 2008
+Contact:	ocfs2-devel@oss.oracle.com
+Description:
+		The /sys/fs/ocfs2/active_cluster_plugin displays which
+		cluster plugin is currently in use by the filesystem.
+		The active plugin will appear in the loaded_cluster_plugins
+		file as well.  Only one plugin can be used at a time.
+
+		Reading from this file returns the name of the active plugin
+		on a single line.
+
+		This file is read-only.  Which plugin is active depends on
+		the cluster stack in use.  The contents may change
+		when all filesystems are unmounted and the cluster stack
+		is changed.
+
+What:		/sys/fs/ocfs2/cluster_stack
+Date:		April 2008
+Contact:	ocfs2-devel@oss.oracle.com
+Description:
+		The /sys/fs/ocfs2/cluster_stack file contains the name
+		of current ocfs2 cluster stack.  This value is set by
+		userspace tools when bringing the cluster stack online.
+
+		Cluster stack names are 4 characters in length.
+
+		When the 'o2cb' cluster stack is used, the 'o2cb' cluster
+		plugin is active.  All other cluster stacks use the 'user'
+		cluster plugin.
+
+		Reading from this file returns the name of the current
+		cluster stack on a single line.
+
+		Writing a new stack name to this file changes the current
+		cluster stack unless there are mounted ocfs2 filesystems.
+		If there are mounted filesystems, attempts to change the
+		stack return an error.
+
+Users:
+	ocfs2-tools <ocfs2-tools-devel@oss.oracle.com>

+ 3 - 2
Documentation/DocBook/Makefile

@@ -9,9 +9,10 @@
 DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
 	    kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
 	    procfs-guide.xml writing_usb_driver.xml networking.xml \
-	    kernel-api.xml filesystems.xml lsm.xml usb.xml \
+	    kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
-	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml
+	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
+	    mac80211.xml
 
 ###
 # The build process is as follows (targets):

+ 447 - 0
Documentation/DocBook/kgdb.tmpl

@@ -0,0 +1,447 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="kgdbOnLinux">
+ <bookinfo>
+  <title>Using kgdb and the kgdb Internals</title>
+
+  <authorgroup>
+   <author>
+    <firstname>Jason</firstname>
+    <surname>Wessel</surname>
+    <affiliation>
+     <address>
+      <email>jason.wessel@windriver.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <authorgroup>
+   <author>
+    <firstname>Tom</firstname>
+    <surname>Rini</surname>
+    <affiliation>
+     <address>
+      <email>trini@kernel.crashing.org</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <authorgroup>
+   <author>
+    <firstname>Amit S.</firstname>
+    <surname>Kale</surname>
+    <affiliation>
+     <address>
+      <email>amitkale@linsyssoft.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2008</year>
+   <holder>Wind River Systems, Inc.</holder>
+  </copyright>
+  <copyright>
+   <year>2004-2005</year>
+   <holder>MontaVista Software, Inc.</holder>
+  </copyright>
+  <copyright>
+   <year>2004</year>
+   <holder>Amit S. Kale</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+   This file is licensed under the terms of the GNU General Public License
+   version 2. This program is licensed "as is" without any warranty of any
+   kind, whether express or implied.
+   </para>
+
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+  <chapter id="Introduction">
+    <title>Introduction</title>
+    <para>
+    kgdb is a source level debugger for linux kernel. It is used along
+    with gdb to debug a linux kernel.  The expectation is that gdb can
+    be used to "break in" to the kernel to inspect memory, variables
+    and look through a cal stack information similar to what an
+    application developer would use gdb for.  It is possible to place
+    breakpoints in kernel code and perform some limited execution
+    stepping.
+    </para>
+    <para>
+    Two machines are required for using kgdb. One of these machines is a
+    development machine and the other is a test machine.  The kernel
+    to be debugged runs on the test machine. The development machine
+    runs an instance of gdb against the vmlinux file which contains
+    the symbols (not boot image such as bzImage, zImage, uImage...).
+    In gdb the developer specifies the connection parameters and
+    connects to kgdb.  Depending on which kgdb I/O modules exist in
+    the kernel for a given architecture, it may be possible to debug
+    the test machine's kernel with the development machine using a
+    rs232 or ethernet connection.
+    </para>
+  </chapter>
+  <chapter id="CompilingAKernel">
+    <title>Compiling a kernel</title>
+    <para>
+    To enable <symbol>CONFIG_KGDB</symbol>, look under the "Kernel debugging"
+    and then select "KGDB: kernel debugging with remote gdb".
+    </para>
+    <para>
+    Next you should choose one of more I/O drivers to interconnect debugging
+    host and debugged target.  Early boot debugging requires a KGDB
+    I/O driver that supports early debugging and the driver must be
+    built into the kernel directly. Kgdb I/O driver configuration
+    takes place via kernel or module parameters, see following
+    chapter.
+    </para>
+    <para>
+    The kgdb test compile options are described in the kgdb test suite chapter.
+    </para>
+
+  </chapter>
+  <chapter id="EnableKGDB">
+   <title>Enable kgdb for debugging</title>
+   <para>
+   In order to use kgdb you must activate it by passing configuration
+   information to one of the kgdb I/O drivers.  If you do not pass any
+   configuration information kgdb will not do anything at all.  Kgdb
+   will only actively hook up to the kernel trap hooks if a kgdb I/O
+   driver is loaded and configured.  If you unconfigure a kgdb I/O
+   driver, kgdb will unregister all the kernel hook points.
+   </para>
+   <para>
+   All drivers can be reconfigured at run time, if
+   <symbol>CONFIG_SYSFS</symbol> and <symbol>CONFIG_MODULES</symbol>
+   are enabled, by echo'ing a new config string to
+   <constant>/sys/module/&lt;driver&gt;/parameter/&lt;option&gt;</constant>.
+   The driver can be unconfigured by passing an empty string.  You cannot
+   change the configuration while the debugger is attached.  Make sure
+   to detach the debugger with the <constant>detach</constant> command
+   prior to trying unconfigure a kgdb I/O driver.
+   </para>
+   <sect1 id="kgdbwait">
+   <title>Kernel parameter: kgdbwait</title>
+   <para>
+   The Kernel command line option <constant>kgdbwait</constant> makes
+   kgdb wait for a debugger connection during booting of a kernel.  You
+   can only use this option you compiled a kgdb I/O driver into the
+   kernel and you specified the I/O driver configuration as a kernel
+   command line option.  The kgdbwait parameter should always follow the
+   configuration parameter for the kgdb I/O driver in the kernel
+   command line else the I/O driver will not be configured prior to
+   asking the kernel to use it to wait.
+   </para>
+   <para>
+   The kernel will stop and wait as early as the I/O driver and
+   architecture will allow when you use this option.  If you build the
+   kgdb I/O driver as a kernel module kgdbwait will not do anything.
+   </para>
+   </sect1>
+  <sect1 id="kgdboc">
+  <title>Kernel parameter: kgdboc</title>
+  <para>
+  The kgdboc driver was originally an abbreviation meant to stand for
+  "kgdb over console".  Kgdboc is designed to work with a single
+  serial port. It was meant to cover the circumstance
+  where you wanted to use a serial console as your primary console as
+  well as using it to perform kernel debugging.  Of course you can
+  also use kgdboc without assigning a console to the same port.
+  </para>
+  <sect2 id="UsingKgdboc">
+  <title>Using kgdboc</title>
+  <para>
+  You can configure kgdboc via sysfs or a module or kernel boot line
+  parameter depending on if you build with CONFIG_KGDBOC as a module
+  or built-in.
+  <orderedlist>
+  <listitem><para>From the module load or build-in</para>
+  <para><constant>kgdboc=&lt;tty-device&gt;,[baud]</constant></para>
+  <para>
+  The example here would be if your console port was typically ttyS0, you would use something like <constant>kgdboc=ttyS0,115200</constant> or on the ARM Versatile AB you would likely use <constant>kgdboc=ttyAMA0,115200</constant>
+  </para>
+  </listitem>
+  <listitem><para>From sysfs</para>
+  <para><constant>echo ttyS0 &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para>
+  </listitem>
+  </orderedlist>
+  </para>
+  <para>
+  NOTE: Kgdboc does not support interrupting the target via the
+  gdb remote protocol.  You must manually send a sysrq-g unless you
+  have a proxy that splits console output to a terminal problem and
+  has a separate port for the debugger to connect to that sends the
+  sysrq-g for you.
+  </para>
+  <para>When using kgdboc with no debugger proxy, you can end up
+  connecting the debugger for one of two entry points.  If an
+  exception occurs after you have loaded kgdboc a message should print
+  on the console stating it is waiting for the debugger.  In case you
+  disconnect your terminal program and then connect the debugger in
+  its place.  If you want to interrupt the target system and forcibly
+  enter a debug session you have to issue a Sysrq sequence and then
+  type the letter <constant>g</constant>.  Then you disconnect the
+  terminal session and connect gdb.  Your options if you don't like
+  this are to hack gdb to send the sysrq-g for you as well as on the
+  initial connect, or to use a debugger proxy that allows an
+  unmodified gdb to do the debugging.
+  </para>
+  </sect2>
+  </sect1>
+  <sect1 id="kgdbcon">
+  <title>Kernel parameter: kgdbcon</title>
+  <para>
+  Kgdb supports using the gdb serial protocol to send console messages
+  to the debugger when the debugger is connected and running.  There
+  are two ways to activate this feature.
+  <orderedlist>
+  <listitem><para>Activate with the kernel command line option:</para>
+  <para><constant>kgdbcon</constant></para>
+  </listitem>
+  <listitem><para>Use sysfs before configuring an io driver</para>
+  <para>
+  <constant>echo 1 &gt; /sys/module/kgdb/parameters/kgdb_use_con</constant>
+  </para>
+  <para>
+  NOTE: If you do this after you configure the kgdb I/O driver, the
+  setting will not take effect until the next point the I/O is
+  reconfigured.
+  </para>
+  </listitem>
+  </orderedlist>
+  </para>
+  <para>
+  IMPORTANT NOTE: Using this option with kgdb over the console
+  (kgdboc) or kgdb over ethernet (kgdboe) is not supported.
+  </para>
+  </sect1>
+  </chapter>
+  <chapter id="ConnectingGDB">
+  <title>Connecting gdb</title>
+    <para>
+    If you are using kgdboc, you need to have used kgdbwait as a boot
+    argument, issued a sysrq-g, or the system you are going to debug
+    has already taken an exception and is waiting for the debugger to
+    attach before you can connect gdb.
+    </para>
+    <para>
+    If you are not using different kgdb I/O driver other than kgdboc,
+    you should be able to connect and the target will automatically
+    respond.
+    </para>
+    <para>
+    Example (using a serial port):
+    </para>
+    <programlisting>
+    % gdb ./vmlinux
+    (gdb) set remotebaud 115200
+    (gdb) target remote /dev/ttyS0
+    </programlisting>
+    <para>
+    Example (kgdb to a terminal server):
+    </para>
+    <programlisting>
+    % gdb ./vmlinux
+    (gdb) target remote udp:192.168.2.2:6443
+    </programlisting>
+    <para>
+    Example (kgdb over ethernet):
+    </para>
+    <programlisting>
+    % gdb ./vmlinux
+    (gdb) target remote udp:192.168.2.2:6443
+    </programlisting>
+    <para>
+    Once connected, you can debug a kernel the way you would debug an
+    application program.
+    </para>
+    <para>
+    If you are having problems connecting or something is going
+    seriously wrong while debugging, it will most often be the case
+    that you want to enable gdb to be verbose about its target
+    communications.  You do this prior to issuing the <constant>target
+    remote</constant> command by typing in: <constant>set remote debug 1</constant>
+    </para>
+  </chapter>
+  <chapter id="KGDBTestSuite">
+    <title>kgdb Test Suite</title>
+    <para>
+    When kgdb is enabled in the kernel config you can also elect to
+    enable the config parameter KGDB_TESTS.  Turning this on will
+    enable a special kgdb I/O module which is designed to test the
+    kgdb internal functions.
+    </para>
+    <para>
+    The kgdb tests are mainly intended for developers to test the kgdb
+    internals as well as a tool for developing a new kgdb architecture
+    specific implementation.  These tests are not really for end users
+    of the Linux kernel.  The primary source of documentation would be
+    to look in the drivers/misc/kgdbts.c file.
+    </para>
+    <para>
+    The kgdb test suite can also be configured at compile time to run
+    the core set of tests by setting the kernel config parameter
+    KGDB_TESTS_ON_BOOT.  This particular option is aimed at automated
+    regression testing and does not require modifying the kernel boot
+    config arguments.  If this is turned on, the kgdb test suite can
+    be disabled by specifying "kgdbts=" as a kernel boot argument.
+    </para>
+  </chapter>
+  <chapter id="CommonBackEndReq">
+  <title>KGDB Internals</title>
+  <sect1 id="kgdbArchitecture">
+    <title>Architecture Specifics</title>
+      <para>
+      Kgdb is organized into three basic components:
+      <orderedlist>
+      <listitem><para>kgdb core</para>
+      <para>
+      The kgdb core is found in kernel/kgdb.c.  It contains:
+      <itemizedlist>
+      <listitem><para>All the logic to implement the gdb serial protocol</para></listitem>
+      <listitem><para>A generic OS exception handler which includes sync'ing the processors into a stopped state on an multi cpu system.</para></listitem>
+      <listitem><para>The API to talk to the kgdb I/O drivers</para></listitem>
+      <listitem><para>The API to make calls to the arch specific kgdb implementation</para></listitem>
+      <listitem><para>The logic to perform safe memory reads and writes to memory while using the debugger</para></listitem>
+      <listitem><para>A full implementation for software breakpoints unless overridden by the arch</para></listitem>
+      </itemizedlist>
+      </para>
+      </listitem>
+      <listitem><para>kgdb arch specific implementation</para>
+      <para>
+      This implementation is generally found in arch/*/kernel/kgdb.c.
+      As an example, arch/x86/kernel/kgdb.c contains the specifics to
+      implement HW breakpoint as well as the initialization to
+      dynamically register and unregister for the trap handlers on
+      this architecture.  The arch specific portion implements:
+      <itemizedlist>
+      <listitem><para>contains an arch specific trap catcher which
+      invokes kgdb_handle_exception() to start kgdb about doing its
+      work</para></listitem>
+      <listitem><para>translation to and from gdb specific packet format to pt_regs</para></listitem>
+      <listitem><para>Registration and unregistration of architecture specific trap hooks</para></listitem>
+      <listitem><para>Any special exception handling and cleanup</para></listitem>
+      <listitem><para>NMI exception handling and cleanup</para></listitem>
+      <listitem><para>(optional)HW breakpoints</para></listitem>
+      </itemizedlist>
+      </para>
+      </listitem>
+      <listitem><para>kgdb I/O driver</para>
+      <para>
+      Each kgdb I/O driver has to provide an implemenation for the following:
+      <itemizedlist>
+      <listitem><para>configuration via builtin or module</para></listitem>
+      <listitem><para>dynamic configuration and kgdb hook registration calls</para></listitem>
+      <listitem><para>read and write character interface</para></listitem>
+      <listitem><para>A cleanup handler for unconfiguring from the kgdb core</para></listitem>
+      <listitem><para>(optional) Early debug methodology</para></listitem>
+      </itemizedlist>
+      Any given kgdb I/O driver has to operate very closely with the
+      hardware and must do it in such a way that does not enable
+      interrupts or change other parts of the system context without
+      completely restoring them. The kgdb core will repeatedly "poll"
+      a kgdb I/O driver for characters when it needs input.  The I/O
+      driver is expected to return immediately if there is no data
+      available.  Doing so allows for the future possibility to touch
+      watch dog hardware in such a way as to have a target system not
+      reset when these are enabled.
+      </para>
+      </listitem>
+      </orderedlist>
+      </para>
+      <para>
+      If you are intent on adding kgdb architecture specific support
+      for a new architecture, the architecture should define
+      <constant>HAVE_ARCH_KGDB</constant> in the architecture specific
+      Kconfig file.  This will enable kgdb for the architecture, and
+      at that point you must create an architecture specific kgdb
+      implementation.
+      </para>
+      <para>
+      There are a few flags which must be set on every architecture in
+      their &lt;asm/kgdb.h&gt; file.  These are:
+      <itemizedlist>
+        <listitem>
+	  <para>
+	  NUMREGBYTES: The size in bytes of all of the registers, so
+	  that we can ensure they will all fit into a packet.
+	  </para>
+	  <para>
+	  BUFMAX: The size in bytes of the buffer GDB will read into.
+	  This must be larger than NUMREGBYTES.
+	  </para>
+	  <para>
+	  CACHE_FLUSH_IS_SAFE: Set to 1 if it is always safe to call
+	  flush_cache_range or flush_icache_range.  On some architectures,
+	  these functions may not be safe to call on SMP since we keep other
+	  CPUs in a holding pattern.
+	  </para>
+	</listitem>
+      </itemizedlist>
+      </para>
+      <para>
+      There are also the following functions for the common backend,
+      found in kernel/kgdb.c, that must be supplied by the
+      architecture-specific backend unless marked as (optional), in
+      which case a default function maybe used if the architecture
+      does not need to provide a specific implementation.
+      </para>
+!Iinclude/linux/kgdb.h
+  </sect1>
+  <sect1 id="kgdbocDesign">
+  <title>kgdboc internals</title>
+  <para>
+  The kgdboc driver is actually a very thin driver that relies on the
+  underlying low level to the hardware driver having "polling hooks"
+  which the to which the tty driver is attached.  In the initial
+  implementation of kgdboc it the serial_core was changed to expose a
+  low level uart hook for doing polled mode reading and writing of a
+  single character while in an atomic context.  When kgdb makes an I/O
+  request to the debugger, kgdboc invokes a call back in the serial
+  core which in turn uses the call back in the uart driver.  It is
+  certainly possible to extend kgdboc to work with non-uart based
+  consoles in the future.
+  </para>
+  <para>
+  When using kgdboc with a uart, the uart driver must implement two callbacks in the <constant>struct uart_ops</constant>. Example from drivers/8250.c:<programlisting>
+#ifdef CONFIG_CONSOLE_POLL
+	.poll_get_char = serial8250_get_poll_char,
+	.poll_put_char = serial8250_put_poll_char,
+#endif
+  </programlisting>
+  Any implementation specifics around creating a polling driver use the
+  <constant>#ifdef CONFIG_CONSOLE_POLL</constant>, as shown above.
+  Keep in mind that polling hooks have to be implemented in such a way
+  that they can be called from an atomic context and have to restore
+  the state of the uart chip on return such that the system can return
+  to normal when the debugger detaches.  You need to be very careful
+  with any kind of lock you consider, because failing here is most
+  going to mean pressing the reset button.
+  </para>
+  </sect1>
+  </chapter>
+  <chapter id="credits">
+     <title>Credits</title>
+	<para>
+		The following people have contributed to this document:
+		<orderedlist>
+			<listitem><para>Amit Kale<email>amitkale@linsyssoft.com</email></para></listitem>
+			<listitem><para>Tom Rini<email>trini@kernel.crashing.org</email></para></listitem>
+		</orderedlist>
+                In March 2008 this document was completely rewritten by:
+		<itemizedlist>
+		<listitem><para>Jason Wessel<email>jason.wessel@windriver.com</email></para></listitem>
+		</itemizedlist>
+	</para>
+  </chapter>
+</book>
+

+ 335 - 0
Documentation/DocBook/mac80211.tmpl

@@ -0,0 +1,335 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="mac80211-developers-guide">
+  <bookinfo>
+    <title>The mac80211 subsystem for kernel developers</title>
+
+    <authorgroup>
+      <author>
+        <firstname>Johannes</firstname>
+        <surname>Berg</surname>
+        <affiliation>
+          <address><email>johannes@sipsolutions.net</email></address>
+        </affiliation>
+      </author>
+    </authorgroup>
+
+    <copyright>
+      <year>2007</year>
+      <year>2008</year>
+      <holder>Johannes Berg</holder>
+    </copyright>
+
+    <legalnotice>
+      <para>
+        This documentation is free software; you can redistribute
+        it and/or modify it under the terms of the GNU General Public
+        License version 2 as published by the Free Software Foundation.
+      </para>
+
+      <para>
+        This documentation is distributed in the hope that it will be
+        useful, but WITHOUT ANY WARRANTY; without even the implied
+        warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+        See the GNU General Public License for more details.
+      </para>
+
+      <para>
+        You should have received a copy of the GNU General Public
+        License along with this documentation; if not, write to the Free
+        Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+        MA 02111-1307 USA
+      </para>
+
+      <para>
+        For more details see the file COPYING in the source
+        distribution of Linux.
+      </para>
+    </legalnotice>
+
+    <abstract>
+!Pinclude/net/mac80211.h Introduction
+!Pinclude/net/mac80211.h Warning
+    </abstract>
+  </bookinfo>
+
+  <toc></toc>
+
+<!--
+Generally, this document shall be ordered by increasing complexity.
+It is important to note that readers should be able to read only
+the first few sections to get a working driver and only advanced
+usage should require reading the full document.
+-->
+
+  <part>
+    <title>The basic mac80211 driver interface</title>
+    <partintro>
+      <para>
+        You should read and understand the information contained
+        within this part of the book while implementing a driver.
+        In some chapters, advanced usage is noted, that may be
+        skipped at first.
+      </para>
+      <para>
+        This part of the book only covers station and monitor mode
+        functionality, additional information required to implement
+        the other modes is covered in the second part of the book.
+      </para>
+    </partintro>
+
+    <chapter id="basics">
+      <title>Basic hardware handling</title>
+      <para>TBD</para>
+      <para>
+        This chapter shall contain information on getting a hw
+        struct allocated and registered with mac80211.
+      </para>
+      <para>
+        Since it is required to allocate rates/modes before registering
+        a hw struct, this chapter shall also contain information on setting
+        up the rate/mode structs.
+      </para>
+      <para>
+        Additionally, some discussion about the callbacks and
+        the general programming model should be in here, including
+        the definition of ieee80211_ops which will be referred to
+        a lot.
+      </para>
+      <para>
+        Finally, a discussion of hardware capabilities should be done
+        with references to other parts of the book.
+      </para>
+<!-- intentionally multiple !F lines to get proper order -->
+!Finclude/net/mac80211.h ieee80211_hw
+!Finclude/net/mac80211.h ieee80211_hw_flags
+!Finclude/net/mac80211.h SET_IEEE80211_DEV
+!Finclude/net/mac80211.h SET_IEEE80211_PERM_ADDR
+!Finclude/net/mac80211.h ieee80211_ops
+!Finclude/net/mac80211.h ieee80211_alloc_hw
+!Finclude/net/mac80211.h ieee80211_register_hw
+!Finclude/net/mac80211.h ieee80211_get_tx_led_name
+!Finclude/net/mac80211.h ieee80211_get_rx_led_name
+!Finclude/net/mac80211.h ieee80211_get_assoc_led_name
+!Finclude/net/mac80211.h ieee80211_get_radio_led_name
+!Finclude/net/mac80211.h ieee80211_unregister_hw
+!Finclude/net/mac80211.h ieee80211_free_hw
+    </chapter>
+
+    <chapter id="phy-handling">
+      <title>PHY configuration</title>
+      <para>TBD</para>
+      <para>
+        This chapter should describe PHY handling including
+        start/stop callbacks and the various structures used.
+      </para>
+!Finclude/net/mac80211.h ieee80211_conf
+!Finclude/net/mac80211.h ieee80211_conf_flags
+    </chapter>
+
+    <chapter id="iface-handling">
+      <title>Virtual interfaces</title>
+      <para>TBD</para>
+      <para>
+        This chapter should describe virtual interface basics
+        that are relevant to the driver (VLANs, MGMT etc are not.)
+        It should explain the use of the add_iface/remove_iface
+        callbacks as well as the interface configuration callbacks.
+      </para>
+      <para>Things related to AP mode should be discussed there.</para>
+      <para>
+        Things related to supporting multiple interfaces should be
+        in the appropriate chapter, a BIG FAT note should be here about
+        this though and the recommendation to allow only a single
+        interface in STA mode at first!
+      </para>
+!Finclude/net/mac80211.h ieee80211_if_types
+!Finclude/net/mac80211.h ieee80211_if_init_conf
+!Finclude/net/mac80211.h ieee80211_if_conf
+    </chapter>
+
+    <chapter id="rx-tx">
+      <title>Receive and transmit processing</title>
+      <sect1>
+        <title>what should be here</title>
+        <para>TBD</para>
+        <para>
+          This should describe the receive and transmit
+          paths in mac80211/the drivers as well as
+          transmit status handling.
+        </para>
+      </sect1>
+      <sect1>
+        <title>Frame format</title>
+!Pinclude/net/mac80211.h Frame format
+      </sect1>
+      <sect1>
+        <title>Alignment issues</title>
+        <para>TBD</para>
+      </sect1>
+      <sect1>
+        <title>Calling into mac80211 from interrupts</title>
+!Pinclude/net/mac80211.h Calling mac80211 from interrupts
+      </sect1>
+      <sect1>
+        <title>functions/definitions</title>
+!Finclude/net/mac80211.h ieee80211_rx_status
+!Finclude/net/mac80211.h mac80211_rx_flags
+!Finclude/net/mac80211.h ieee80211_tx_control
+!Finclude/net/mac80211.h ieee80211_tx_status_flags
+!Finclude/net/mac80211.h ieee80211_rx
+!Finclude/net/mac80211.h ieee80211_rx_irqsafe
+!Finclude/net/mac80211.h ieee80211_tx_status
+!Finclude/net/mac80211.h ieee80211_tx_status_irqsafe
+!Finclude/net/mac80211.h ieee80211_rts_get
+!Finclude/net/mac80211.h ieee80211_rts_duration
+!Finclude/net/mac80211.h ieee80211_ctstoself_get
+!Finclude/net/mac80211.h ieee80211_ctstoself_duration
+!Finclude/net/mac80211.h ieee80211_generic_frame_duration
+!Finclude/net/mac80211.h ieee80211_get_hdrlen_from_skb
+!Finclude/net/mac80211.h ieee80211_get_hdrlen
+!Finclude/net/mac80211.h ieee80211_wake_queue
+!Finclude/net/mac80211.h ieee80211_stop_queue
+!Finclude/net/mac80211.h ieee80211_start_queues
+!Finclude/net/mac80211.h ieee80211_stop_queues
+!Finclude/net/mac80211.h ieee80211_wake_queues
+      </sect1>
+    </chapter>
+
+    <chapter id="filters">
+      <title>Frame filtering</title>
+!Pinclude/net/mac80211.h Frame filtering
+!Finclude/net/mac80211.h ieee80211_filter_flags
+    </chapter>
+  </part>
+
+  <part id="advanced">
+    <title>Advanced driver interface</title>
+    <partintro>
+      <para>
+       Information contained within this part of the book is
+       of interest only for advanced interaction of mac80211
+       with drivers to exploit more hardware capabilities and
+       improve performance.
+      </para>
+    </partintro>
+
+    <chapter id="hardware-crypto-offload">
+      <title>Hardware crypto acceleration</title>
+!Pinclude/net/mac80211.h Hardware crypto acceleration
+<!-- intentionally multiple !F lines to get proper order -->
+!Finclude/net/mac80211.h set_key_cmd
+!Finclude/net/mac80211.h ieee80211_key_conf
+!Finclude/net/mac80211.h ieee80211_key_alg
+!Finclude/net/mac80211.h ieee80211_key_flags
+    </chapter>
+
+    <chapter id="qos">
+      <title>Multiple queues and QoS support</title>
+      <para>TBD</para>
+!Finclude/net/mac80211.h ieee80211_tx_queue_params
+!Finclude/net/mac80211.h ieee80211_tx_queue_stats_data
+!Finclude/net/mac80211.h ieee80211_tx_queue
+    </chapter>
+
+    <chapter id="AP">
+      <title>Access point mode support</title>
+      <para>TBD</para>
+      <para>Some parts of the if_conf should be discussed here instead</para>
+      <para>
+        Insert notes about VLAN interfaces with hw crypto here or
+        in the hw crypto chapter.
+      </para>
+!Finclude/net/mac80211.h ieee80211_get_buffered_bc
+!Finclude/net/mac80211.h ieee80211_beacon_get
+    </chapter>
+
+    <chapter id="multi-iface">
+      <title>Supporting multiple virtual interfaces</title>
+      <para>TBD</para>
+      <para>
+        Note: WDS with identical MAC address should almost always be OK
+      </para>
+      <para>
+        Insert notes about having multiple virtual interfaces with
+        different MAC addresses here, note which configurations are
+        supported by mac80211, add notes about supporting hw crypto
+        with it.
+      </para>
+    </chapter>
+
+    <chapter id="hardware-scan-offload">
+      <title>Hardware scan offload</title>
+      <para>TBD</para>
+!Finclude/net/mac80211.h ieee80211_scan_completed
+    </chapter>
+  </part>
+
+  <part id="rate-control">
+    <title>Rate control interface</title>
+    <partintro>
+      <para>TBD</para>
+      <para>
+       This part of the book describes the rate control algorithm
+       interface and how it relates to mac80211 and drivers.
+      </para>
+    </partintro>
+    <chapter id="dummy">
+      <title>dummy chapter</title>
+      <para>TBD</para>
+    </chapter>
+  </part>
+
+  <part id="internal">
+    <title>Internals</title>
+    <partintro>
+      <para>TBD</para>
+      <para>
+       This part of the book describes mac80211 internals.
+      </para>
+    </partintro>
+
+    <chapter id="key-handling">
+      <title>Key handling</title>
+      <sect1>
+        <title>Key handling basics</title>
+!Pnet/mac80211/key.c Key handling basics
+      </sect1>
+      <sect1>
+        <title>MORE TBD</title>
+        <para>TBD</para>
+      </sect1>
+    </chapter>
+
+    <chapter id="rx-processing">
+      <title>Receive processing</title>
+      <para>TBD</para>
+    </chapter>
+
+    <chapter id="tx-processing">
+      <title>Transmit processing</title>
+      <para>TBD</para>
+    </chapter>
+
+    <chapter id="sta-info">
+      <title>Station info handling</title>
+      <sect1>
+        <title>Programming information</title>
+!Fnet/mac80211/sta_info.h sta_info
+!Fnet/mac80211/sta_info.h ieee80211_sta_info_flags
+      </sect1>
+      <sect1>
+        <title>STA information lifetime rules</title>
+!Pnet/mac80211/sta_info.c STA information lifetime rules
+      </sect1>
+    </chapter>
+
+    <chapter id="synchronisation">
+      <title>Synchronisation</title>
+      <para>TBD</para>
+      <para>Locking, lots of RCU</para>
+    </chapter>
+  </part>
+</book>

+ 10 - 6
Documentation/debugging-via-ohci1394.txt

@@ -41,15 +41,19 @@ to a working state and enables physical DMA by default for all remote nodes.
 This can be turned off by ohci1394's module parameter phys_dma=0.
 
 The alternative firewire-ohci driver in drivers/firewire uses filtered physical
-DMA, hence is not yet suitable for remote debugging.
+DMA by default, which is more secure but not suitable for remote debugging.
+Compile the driver with CONFIG_FIREWIRE_OHCI_REMOTE_DMA (Kernel hacking menu:
+Remote debugging over FireWire with firewire-ohci) to get unfiltered physical
+DMA.
 
-Because ohci1394 depends on the PCI enumeration to be completed, an
-initialization routine which runs pretty early (long before console_init()
-which makes the printk buffer appear on the console can be called) was written.
+Because ohci1394 and firewire-ohci depend on the PCI enumeration to be
+completed, an initialization routine which runs pretty early has been
+implemented for x86.  This routine runs long before console_init() can be
+called, i.e. before the printk buffer appears on the console.
 
 To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu:
-Provide code for enabling DMA over FireWire early on boot) and pass the
-parameter "ohci1394_dma=early" to the recompiled kernel on boot.
+Remote debugging over FireWire early on boot) and pass the parameter
+"ohci1394_dma=early" to the recompiled kernel on boot.
 
 Tools
 -----

+ 11 - 38
Documentation/feature-removal-schedule.txt

@@ -203,16 +203,8 @@ Who:  linuxppc-dev@ozlabs.org
 
 ---------------------------
 
-What:   sk98lin network driver
-When:   Feburary 2008
-Why:    In kernel tree version of driver is unmaintained. Sk98lin driver
-	replaced by the skge driver. 
-Who:    Stephen Hemminger <shemminger@linux-foundation.org>
-
----------------------------
-
 What:	i386/x86_64 bzImage symlinks
-When:	April 2008
+When:	April 2010
 
 Why:	The i386/x86_64 merge provides a symlink to the old bzImage
 	location so not yet updated user space tools, e.g. package
@@ -221,8 +213,6 @@ Who:	Thomas Gleixner <tglx@linutronix.de>
 
 ---------------------------
 
----------------------------
-
 What:	i2c-i810, i2c-prosavage and i2c-savage4
 When:	May 2008
 Why:	These drivers are superseded by i810fb, intelfb and savagefb.
@@ -230,33 +220,6 @@ Who:	Jean Delvare <khali@linux-fr.org>
 
 ---------------------------
 
-What:	bcm43xx wireless network driver
-When:	2.6.26
-Files:	drivers/net/wireless/bcm43xx
-Why:	This driver's functionality has been replaced by the
-	mac80211-based b43 and b43legacy drivers.
-Who:	John W. Linville <linville@tuxdriver.com>
-
----------------------------
-
-What:	ieee80211 softmac wireless networking component
-When:	2.6.26 (or after removal of bcm43xx and port of zd1211rw to mac80211)
-Files:	net/ieee80211/softmac
-Why:	No in-kernel drivers will depend on it any longer.
-Who:	John W. Linville <linville@tuxdriver.com>
-
----------------------------
-
-What:	rc80211-simple rate control algorithm for mac80211
-When:	2.6.26
-Files:	net/mac80211/rc80211-simple.c
-Why:	This algorithm was provided for reference but always exhibited bad
-	responsiveness and performance and has some serious flaws. It has been
-	replaced by rc80211-pid.
-Who:	Stefano Brivio <stefano.brivio@polimi.it>
-
----------------------------
-
 What (Why):
 	- include/linux/netfilter_ipv4/ipt_TOS.h ipt_tos.h header files
 	  (superseded by xt_TOS/xt_tos target & match)
@@ -318,3 +281,13 @@ Why:	Not used in-tree. The current out-of-tree users used it to
 	code / infrastructure should be in the kernel and not in some
 	out-of-tree driver.
 Who:	Thomas Gleixner <tglx@linutronix.de>
+
+---------------------------
+
+What:	/sys/o2cb symlink
+When:	January 2010
+Why:	/sys/fs/o2cb is the proper location for this information - /sys/o2cb
+	exists as a symlink for backwards compatibility for old versions of
+	ocfs2-tools. 2 years should be sufficient time to phase in new versions
+	which know to look in /sys/fs/o2cb.
+Who:	ocfs2-devel@oss.oracle.com

+ 3 - 3
Documentation/filesystems/seq_file.txt

@@ -92,7 +92,7 @@ implementations; in most cases the start() function should check for a
 "past end of file" condition and return NULL if need be.
 
 For more complicated applications, the private field of the seq_file
-structure can be used. There is also a special value whch can be returned
+structure can be used. There is also a special value which can be returned
 by the start() function called SEQ_START_TOKEN; it can be used if you wish
 to instruct your show() function (described below) to print a header at the
 top of the output. SEQ_START_TOKEN should only be used if the offset is
@@ -146,7 +146,7 @@ the four functions we have just defined:
 This structure will be needed to tie our iterator to the /proc file in
 a little bit.
 
-It's worth noting that the interator value returned by start() and
+It's worth noting that the iterator value returned by start() and
 manipulated by the other functions is considered to be completely opaque by
 the seq_file code. It can thus be anything that is useful in stepping
 through the data to be output. Counters can be useful, but it could also be
@@ -262,7 +262,7 @@ routines useful:
 
 These helpers will interpret pos as a position within the list and iterate
 accordingly.  Your start() and next() functions need only invoke the
-seq_list_* helpers with a pointer to the appropriate list_head structure.  
+seq_list_* helpers with a pointer to the appropriate list_head structure.
 
 
 The extra-simple version

+ 7 - 8
Documentation/filesystems/xfs.txt

@@ -52,16 +52,15 @@ When mounting an XFS filesystem, the following options are accepted.
 	and also gets the setgid bit set if it is a directory itself.
 
   ihashsize=value
-	Sets the number of hash buckets available for hashing the
-	in-memory inodes of the specified mount point.  If a value
-	of zero is used, the value selected by the default algorithm
-	will be displayed in /proc/mounts.
+	In memory inode hashes have been removed, so this option has
+	no function as of August 2007. Option is deprecated.
 
   ikeep/noikeep
-	When inode clusters are emptied of inodes, keep them around
-	on the disk (ikeep) - this is the traditional XFS behaviour
-	and is still the default for now.  Using the noikeep option,
-	inode clusters are returned to the free space pool.
+	When ikeep is specified, XFS does not delete empty inode clusters
+	and keeps them around on disk. ikeep is the traditional XFS
+	behaviour. When noikeep is specified, empty inode clusters
+	are returned to the free space pool. The default is noikeep for
+	non-DMAPI mounts, while ikeep is the default when DMAPI is in use.
 
   inode64
 	Indicates that XFS is allowed to create inodes at any location

+ 28 - 0
Documentation/i386/boot.txt

@@ -170,6 +170,8 @@ Offset	Proto	Name		Meaning
 0238/4	2.06+	cmdline_size	Maximum size of the kernel command line
 023C/4	2.07+	hardware_subarch Hardware subarchitecture
 0240/8	2.07+	hardware_subarch_data Subarchitecture-specific data
+0248/4	2.08+	payload_offset	Offset of kernel payload
+024C/4	2.08+	payload_length	Length of kernel payload
 
 (1) For backwards compatibility, if the setup_sects field contains 0, the
     real value is 4.
@@ -512,6 +514,32 @@ Protocol:	2.07+
 
   A pointer to data that is specific to hardware subarch
 
+Field name:	payload_offset
+Type:		read
+Offset/size:	0x248/4
+Protocol:	2.08+
+
+  If non-zero then this field contains the offset from the end of the
+  real-mode code to the payload.
+
+  The payload may be compressed. The format of both the compressed and
+  uncompressed data should be determined using the standard magic
+  numbers. Currently only gzip compressed ELF is used.
+  
+Field name:	payload_length
+Type:		read
+Offset/size:	0x24c/4
+Protocol:	2.08+
+
+  The length of the payload.
+
+**** THE IMAGE CHECKSUM
+
+From boot protocol version 2.08 onwards the CRC-32 is calculated over
+the entire file using the characteristic polynomial 0x04C11DB7 and an
+initial remainder of 0xffffffff.  The checksum is appended to the
+file; therefore the CRC of the file up to the limit specified in the
+syssize field of the header is always 0.
 
 **** THE KERNEL COMMAND LINE
 

+ 4 - 43
Documentation/ide/ide.txt

@@ -71,29 +71,6 @@ This driver automatically probes for most IDE interfaces (including all PCI
 ones), for the drives/geometries attached to those interfaces, and for the IRQ
 lines being used by the interfaces (normally 14, 15 for ide0/ide1).
 
-For special cases, interfaces may be specified using kernel "command line"
-options.  For example,
-
-	ide3=0x168,0x36e,10	/* ioports 0x168-0x16f,0x36e, irq 10 */
-
-Normally the irq number need not be specified, as ide.c will probe for it:
-
-	ide3=0x168,0x36e	/* ioports 0x168-0x16f,0x36e */
-
-The standard port, and irq values are these:
-
-	ide0=0x1f0,0x3f6,14
-	ide1=0x170,0x376,15
-	ide2=0x1e8,0x3ee,11
-	ide3=0x168,0x36e,10
-
-Note that the first parameter reserves 8 contiguous ioports, whereas the
-second value denotes a single ioport. If in doubt, do a 'cat /proc/ioports'.
-
-In all probability the device uses these ports and IRQs if it is attached
-to the appropriate ide channel.  Pass the parameter for the correct ide
-channel to the kernel, as explained above.
-
 Any number of interfaces may share a single IRQ if necessary, at a slight
 performance penalty, whether on separate cards or a single VLB card.
 The IDE driver automatically detects and handles this.  However, this may
@@ -184,13 +161,6 @@ provided it is mounted with the default block size of 1024 (as above).
 Please pass on any feedback on any of this stuff to the maintainer,
 whose address can be found in linux/MAINTAINERS.
 
-Note that if BOTH hd.c and ide.c are configured into the kernel,
-hd.c will normally be allowed to control the primary IDE interface.
-This is useful for older hardware that may be incompatible with ide.c,
-and still allows newer hardware to run on the 2nd/3rd/4th IDE ports
-under control of ide.c.   To have ide.c also "take over" the primary
-IDE port in this situation, use the "command line" parameter:  ide0=0x1f0
-
 The IDE driver is modularized.  The high level disk/CD-ROM/tape/floppy
 drivers can always be compiled as loadable modules, the chipset drivers
 can only be compiled into the kernel, and the core code (ide.c) can be
@@ -206,7 +176,7 @@ When ide.c is used as a module, you can pass command line parameters to the
 driver using the "options=" keyword to insmod, while replacing any ',' with
 ';'.  For example:
 
-	insmod ide.o options="ide0=serialize ide1=serialize ide2=0x1e8;0x3ee;11"
+	insmod ide.o options="hda=nodma hdb=nodma"
 
 
 ================================================================================
@@ -247,21 +217,11 @@ Summary of ide driver parameters for kernel command line
 			  As for VLB, it is safest to not specify it.
 			  Bigger values are safer than smaller ones.
 
- "idex=base"		: probe for an interface at the addr specified,
-			  where "base" is usually 0x1f0 or 0x170
-			  and "ctl" is assumed to be "base"+0x206
-
- "idex=base,ctl"	: specify both base and ctl
-
- "idex=base,ctl,irq"	: specify base, ctl, and irq number
-
  "idex=serialize"	: do not overlap operations on idex. Please note
 			  that you will have to specify this option for
 			  both the respective primary and secondary channel
 			  to take effect.
 
- "idex=four"		: four drives on idex and ide(x^1) share same ports
-
  "idex=reset"		: reset interface after probe
 
  "idex=ata66"		: informs the interface that it has an 80c cable
@@ -269,8 +229,6 @@ Summary of ide driver parameters for kernel command line
 			  ability to bit test for detection is currently
 			  unknown.
 
- "ide=reverse"		: formerly called to pci sub-system, but now local.
-
  "ide=doubler"		: probe/support IDE doublers on Amiga
 
 There may be more options than shown -- use the source, Luke!
@@ -290,6 +248,9 @@ Also for legacy CMD640 host driver (cmd640) you need to use "probe_vlb"
 kernel paremeter to enable probing for VLB version of the chipset (PCI ones
 are detected automatically).
 
+You also need to use "probe" kernel parameter for ide-4drives driver
+(support for IDE generic chipset with four drives on one port).
+
 ================================================================================
 
 Some Terminology

+ 13 - 0
Documentation/ide/warm-plug-howto.txt

@@ -0,0 +1,13 @@
+
+IDE warm-plug HOWTO
+===================
+
+To warm-plug devices on a port 'idex':
+
+# echo -n "1" > /sys/class/ide_port/idex/delete_devices
+
+unplug old device(s) and plug new device(s)
+
+# echo -n "1" > /sys/class/ide_port/idex/scan
+
+done

+ 35 - 2
Documentation/kernel-parameters.txt

@@ -366,6 +366,12 @@ and is between 256 and 4096 characters. It is defined in the file
 			possible to determine what the correct size should be.
 			This option provides an override for these situations.
 
+	security=	[SECURITY] Choose a security module to enable at boot.
+			If this boot parameter is not specified, only the first
+			security module asking for security registration will be
+			loaded. An invalid security module name will be treated
+			as if no module has been chosen.
+
 	capability.disable=
 			[SECURITY] Disable capabilities.  This would normally
 			be used only if an alternative security model is to be
@@ -763,11 +769,11 @@ and is between 256 and 4096 characters. It is defined in the file
 			Format: <io>[,<membase>[,<icn_id>[,<icn_id2>]]]
 
 	ide=		[HW] (E)IDE subsystem
-			Format: ide=nodma or ide=doubler or ide=reverse
+			Format: ide=nodma or ide=doubler
 			See Documentation/ide/ide.txt.
 
 	ide?=		[HW] (E)IDE subsystem
-			Format: ide?=noprobe or chipset specific parameters.
+			Format: ide?=ata66 or chipset specific parameters.
 			See Documentation/ide/ide.txt.
 
 	idebus=		[HW] (E)IDE subsystem - VLB/PCI bus speed
@@ -812,6 +818,19 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	inttest=	[IA64]
 
+	iommu=		[x86]
+		off
+		force
+		noforce
+		biomerge
+		panic
+		nopanic
+		merge
+		nomerge
+		forcesac
+		soft
+
+
 	intel_iommu=	[DMAR] Intel IOMMU driver (DMAR) option
 		off
 			Disable intel iommu driver.
@@ -928,6 +947,11 @@ and is between 256 and 4096 characters. It is defined in the file
 	kstack=N	[X86-32,X86-64] Print N words from the kernel stack
 			in oops dumps.
 
+	kgdboc=		[HW] kgdb over consoles.
+			Requires a tty driver that supports console polling.
+			(only serial suported for now)
+			Format: <serial_device>[,baud]
+
 	l2cr=		[PPC]
 
 	lapic		[X86-32,APIC] Enable the local APIC even if BIOS
@@ -1134,6 +1158,11 @@ and is between 256 and 4096 characters. It is defined in the file
 			         or
 			         memmap=0x10000$0x18690000
 
+	memtest=	[KNL,X86_64] Enable memtest
+			Format: <integer>
+			range: 0,4 : pattern number
+			default : 0 <disable>
+
 	meye.*=		[HW] Set MotionEye Camera parameters
 			See Documentation/video4linux/meye.txt.
 
@@ -1339,6 +1368,10 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	nowb		[ARM]
 
+	nptcg=		[IA64] Override max number of concurrent global TLB
+			purges which is reported from either PAL_VM_SUMMARY or
+			SAL PALO.
+
 	numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
 			one of ['zone', 'node', 'default'] can be specified
 			This can be set from sysctl after boot.

+ 1 - 1
Documentation/laptops/acer-wmi.txt

@@ -80,7 +80,7 @@ once you enable the radio, will depend on your hardware and driver combination.
 e.g. With the BCM4318 on the Acer Aspire 5020 series:
 
 ndiswrapper: Light blinks on when transmitting
-bcm43xx/b43: Solid light, blinks off when transmitting
+b43: Solid light, blinks off when transmitting
 
 Wireless radio control is unconditionally enabled - all Acer laptops that support
 acer-wmi come with built-in wireless. However, should you feel so inclined to

+ 0 - 2
Documentation/networking/00-INDEX

@@ -100,8 +100,6 @@ tuntap.txt
 	- TUN/TAP device driver, allowing user space Rx/Tx of packets.
 vortex.txt
 	- info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards.
-wan-router.txt
-	- WAN router documentation
 wavelan.txt
 	- AT&T GIS (nee NCR) WaveLAN card: An Ethernet-like radio transceiver
 x25.txt

+ 0 - 89
Documentation/networking/bcm43xx.txt

@@ -1,89 +0,0 @@
-
-			BCM43xx Linux Driver Project
-			============================
-
-Introduction
-------------
-
-Many of the wireless devices found in modern notebook computers are
-based on the wireless chips produced by Broadcom. These devices have
-been a problem for Linux users as there is no open-source driver
-available. In addition, Broadcom has not released specifications
-for the device, and driver availability has been limited to the
-binary-only form used in the GPL versions of AP hardware such as the
-Linksys WRT54G, and the Windows and OS X drivers.  Before this project
-began, the only way to use these devices were to use the Windows or
-OS X drivers with either the Linuxant or ndiswrapper modules. There
-is a strong penalty if this method is used as loading the binary-only
-module "taints" the kernel, and no kernel developer will help diagnose
-any kernel problems.
-
-Development
------------
-
-This driver has been developed using
-a clean-room technique that is described at
-http://bcm-specs.sipsolutions.net/ReverseEngineeringProcess. For legal
-reasons, none of the clean-room crew works on the on the Linux driver,
-and none of the Linux developers sees anything but the specifications,
-which are the ultimate product of the reverse-engineering group.
-
-Software
---------
-
-Since the release of the 2.6.17 kernel, the bcm43xx driver has been
-distributed with the kernel source, and is prebuilt in most, if not
-all, distributions.  There is, however, additional software that is
-required. The firmware used by the chip is the intellectual property
-of Broadcom and they have not given the bcm43xx team redistribution
-rights to this firmware.  Since we cannot legally redistribute
-the firmware we cannot include it with the driver. Furthermore, it
-cannot be placed in the downloadable archives of any distributing
-organization; therefore, the user is responsible for obtaining the
-firmware and placing it in the appropriate location so that the driver
-can find it when initializing.
-
-To help with this process, the bcm43xx developers provide a separate
-program named bcm43xx-fwcutter to "cut" the firmware out of a
-Windows or OS X driver and write the extracted files to the proper
-location. This program is usually provided with the distribution;
-however, it may be downloaded from
-
-http://developer.berlios.de/project/showfiles.php?group_id=4547
-
-The firmware is available in two versions. V3 firmware is used with
-the in-kernel bcm43xx driver that uses a software MAC layer called
-SoftMAC, and will have a microcode revision of 0x127 or smaller. The
-V4 firmware is used by an out-of-kernel driver employing a variation of
-the Devicescape MAC layer known as d80211. Once bcm43xx-d80211 reaches
-a satisfactory level of development, it will replace bcm43xx-softmac
-in the kernel as it is much more flexible and powerful.
-
-A source for the latest V3 firmware is
-
-http://downloads.openwrt.org/sources/wl_apsta-3.130.20.0.o
-
-Once this file is downloaded, the command
-'bcm43xx-fwcutter -w <dir> <filename>'
-will extract the microcode and write it to directory
-<dir>. The correct directory will depend on your distribution;
-however, most use '/lib/firmware'. Once this step is completed,
-the bcm3xx driver should load when the system is booted. To see
-any messages relating to the driver, issue the command 'dmesg |
-grep bcm43xx' from a terminal window. If there are any problems,
-please send that output to Bcm43xx-dev@lists.berlios.de.
-
-Although the driver has been in-kernel since 2.6.17, the earliest
-version is quite limited in its capability. Patches that include
-all features of later versions are available for the stable kernel
-versions from 2.6.18. These will be needed if you use a BCM4318,
-or a PCI Express version (BCM4311 and BCM4312). In addition, if you
-have an early BCM4306 and more than 1 GB RAM, your kernel will need
-to be patched.	These patches, which are being updated regularly,
-are available at ftp://lwfinger.dynalias.org/patches. Look for
-combined_2.6.YY.patch. Of course you will need kernel source downloaded
-from kernel.org, or the source from your distribution.
-
-If you build your own kernel, please enable CONFIG_BCM43XX_DEBUG
-and CONFIG_IEEE80211_SOFTMAC_DEBUG. The log information provided is
-essential for solving any problems.

+ 4 - 4
Documentation/networking/can.txt

@@ -281,10 +281,10 @@ solution for a couple of reasons:
             sa_family_t can_family;
             int         can_ifindex;
             union {
-                    struct { canid_t rx_id, tx_id; } tp16;
-                    struct { canid_t rx_id, tx_id; } tp20;
-                    struct { canid_t rx_id, tx_id; } mcnet;
-                    struct { canid_t rx_id, tx_id; } isotp;
+                    /* transport protocol class address info (e.g. ISOTP) */
+                    struct { canid_t rx_id, tx_id; } tp;
+
+                    /* reserved for future CAN protocols address information */
             } can_addr;
     };
 

+ 0 - 621
Documentation/networking/wan-router.txt

@@ -1,621 +0,0 @@
-------------------------------------------------------------------------------
-Linux WAN Router Utilities Package
-------------------------------------------------------------------------------
-Version 2.2.1 
-Mar 28, 2001
-Author: Nenad Corbic <ncorbic@sangoma.com>
-Copyright (c) 1995-2001 Sangoma Technologies Inc.
-------------------------------------------------------------------------------
-
-INTRODUCTION
-
-Wide Area Networks (WANs) are used to interconnect Local Area Networks (LANs)
-and/or stand-alone hosts over vast distances with data transfer rates
-significantly higher than those achievable with commonly used dial-up
-connections.
-
-Usually an external device called `WAN router' sitting on your local network
-or connected to your machine's serial port provides physical connection to
-WAN.  Although router's job may be as simple as taking your local network
-traffic, converting it to WAN format and piping it through the WAN link, these
-devices are notoriously expensive, with prices as much as 2 - 5 times higher
-then the price of a typical PC box.
-
-Alternatively, considering robustness and multitasking capabilities of Linux,
-an internal router can be built (most routers use some sort of stripped down
-Unix-like operating system anyway). With a number of relatively inexpensive WAN
-interface cards available on the market, a perfectly usable router can be
-built for less than half a price of an external router.  Yet a Linux box
-acting as a router can still be used for other purposes, such as fire-walling,
-running FTP, WWW or DNS server, etc.
-
-This kernel module introduces the notion of a WAN Link Driver (WLD) to Linux
-operating system and provides generic hardware-independent services for such
-drivers.  Why can existing Linux network device interface not be used for
-this purpose?  Well, it can.  However, there are a few key differences between
-a typical network interface (e.g. Ethernet) and a WAN link.
-
-Many WAN protocols, such as X.25 and frame relay, allow for multiple logical
-connections (known as `virtual circuits' in X.25 terminology) over a single
-physical link.  Each such virtual circuit may (and almost always does) lead
-to a different geographical location and, therefore, different network.  As a
-result, it is the virtual circuit, not the physical link, that represents a
-route and, therefore, a network interface in Linux terms.
-
-To further complicate things, virtual circuits are usually volatile in nature
-(excluding so called `permanent' virtual circuits or PVCs).  With almost no
-time required to set up and tear down a virtual circuit, it is highly desirable
-to implement on-demand connections in order to minimize network charges.  So
-unlike a typical network driver, the WAN driver must be able to handle multiple
-network interfaces and cope as multiple virtual circuits come into existence
-and go away dynamically.
- 
-Last, but not least, WAN configuration is much more complex than that of say
-Ethernet and may well amount to several dozens of parameters.  Some of them
-are "link-wide"  while others are virtual circuit-specific.  The same holds
-true for WAN statistics which is by far more extensive and extremely useful
-when troubleshooting WAN connections.  Extending the ifconfig utility to suit
-these needs may be possible, but does not seem quite reasonable.  Therefore, a
-WAN configuration utility and corresponding application programmer's interface
-is needed for this purpose.
-
-Most of these problems are taken care of by this module.  Its goal is to
-provide a user with more-or-less standard look and feel for all WAN devices and
-assist a WAN device driver writer by providing common services, such as:
-
- o User-level interface via /proc file system
- o Centralized configuration
- o Device management (setup, shutdown, etc.)
- o Network interface management (dynamic creation/destruction)
- o Protocol encapsulation/decapsulation
-
-To ba able to use the Linux WAN Router you will also need a WAN Tools package
-available from
-
-	ftp.sangoma.com/pub/linux/current_wanpipe/wanpipe-X.Y.Z.tgz
-
-where vX.Y.Z represent the wanpipe version number.
-
-For technical questions and/or comments please e-mail to ncorbic@sangoma.com.
-For general inquiries please contact Sangoma Technologies Inc. by
-
-	Hotline:	1-800-388-2475	(USA and Canada, toll free)
-	Phone:		(905) 474-1990  ext: 106
-	Fax:		(905) 474-9223
-	E-mail:		dm@sangoma.com	(David Mandelstam)
-	WWW:		http://www.sangoma.com
-
-
-INSTALLATION
-
-Please read the WanpipeForLinux.pdf manual on how to 
-install the WANPIPE tools and drivers properly. 
-
-
-After installing wanpipe package: /usr/local/wanrouter/doc. 
-On the ftp.sangoma.com : /linux/current_wanpipe/doc
-
-
-COPYRIGHT AND LICENSING INFORMATION
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; either version 2, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 675 Mass
-Ave, Cambridge, MA 02139, USA.
-
-
-
-ACKNOWLEDGEMENTS
-
-This product is based on the WANPIPE(tm) Multiprotocol WAN Router developed
-by Sangoma Technologies Inc. for Linux 2.0.x and 2.2.x.  Success of the WANPIPE
-together with the next major release of Linux kernel in summer 1996 commanded
-adequate changes to the WANPIPE code to take full advantage of new Linux
-features.
-
-Instead of continuing developing proprietary interface tied to Sangoma WAN
-cards, we decided to separate all hardware-independent code into a separate
-module and defined two levels of interfaces - one for user-level applications
-and another for kernel-level WAN drivers.  WANPIPE is now implemented as a
-WAN driver compliant with the WAN Link Driver interface.  Also a general
-purpose WAN configuration utility and a set of shell scripts was developed to 
-support WAN router at the user level.
-
-Many useful ideas concerning hardware-independent interface implementation
-were given by Mike McLagan <mike.mclagan@linux.org> and his implementation
-of the Frame Relay router and drivers for Sangoma cards (dlci/sdla).
-
-With the new implementation of the APIs being incorporated into the WANPIPE,
-a special thank goes to Alan Cox in providing insight into BSD sockets.
-
-Special thanks to all the WANPIPE users who performed field-testing, reported
-bugs and made valuable comments and suggestions that help us to improve this
-product.
-
-
-
-NEW IN THIS RELEASE
-
-	o Updated the WANCFG utility
-		Calls the pppconfig to configure the PPPD
-		for async connections.
-
-	o Added the PPPCONFIG utility
-		Used to configure the PPPD daemon for the
-		WANPIPE Async PPP and standard serial port.
-		The wancfg calls the pppconfig to configure
-		the pppd.
-
-	o Fixed the PCI autodetect feature.  
-		The SLOT 0 was used as an autodetect option
-		however, some high end PC's slot numbers start
-		from 0. 
-
-	o This release has been tested with the new backupd
-	  daemon release.
-	
-
-PRODUCT COMPONENTS AND RELATED FILES
-
-/etc: (or user defined)
-	wanpipe1.conf	default router configuration file
-
-/lib/modules/X.Y.Z/misc:
-	wanrouter.o	router kernel loadable module
-	af_wanpipe.o	wanpipe api socket module
-
-/lib/modules/X.Y.Z/net:
-	sdladrv.o	Sangoma SDLA support module
-	wanpipe.o	Sangoma WANPIPE(tm) driver module
-
-/proc/net/wanrouter
-	Config		reads current router configuration
-	Status		reads current router status
-	{name}		reads WAN driver statistics
-
-/usr/sbin:
-	wanrouter	wanrouter start-up script
-	wanconfig	wanrouter configuration utility
-	sdladump	WANPIPE adapter memory dump utility
-        fpipemon        Monitor for Frame Relay
-        cpipemon        Monitor for Cisco HDLC
-	ppipemon 	Monitor for PPP
-	xpipemon 	Monitor for X25
-	wpkbdmon        WANPIPE keyboard led monitor/debugger
-
-/usr/local/wanrouter:
-	README		this file
-	COPYING		GNU General Public License
-	Setup		installation script
-	Filelist	distribution definition file
-	wanrouter.rc	meta-configuration file 
-			(used by the Setup and wanrouter script)
-
-/usr/local/wanrouter/doc:
-	wanpipeForLinux.pdf 	WAN Router User's Manual
-
-/usr/local/wanrouter/patches:
-	wanrouter-v2213.gz  	patch for Linux kernels 2.2.11 up to 2.2.13.
-	wanrouter-v2214.gz	patch for Linux kernel 2.2.14. 
-	wanrouter-v2215.gz	patch for Linux kernels 2.2.15 to 2.2.17.
-	wanrouter-v2218.gz	patch for Linux kernels 2.2.18 and up.
-	wanrouter-v240.gz	patch for Linux kernel 2.4.0.  
-	wanrouter-v242.gz	patch for Linux kernel 2.4.2 and up.
-	wanrouter-v2034.gz	patch for Linux kernel 2.0.34
-	wanrouter-v2036.gz 	patch for Linux kernel 2.0.36 and up. 
-
-/usr/local/wanrouter/patches/kdrivers:
-	Sources of the latest WANPIPE device drivers.
-	These are used to UPGRADE the linux kernel to the newest
-	version if the kernel source has already been patched with
-	WANPIPE drivers.
-
-/usr/local/wanrouter/samples:
-	interface	sample interface configuration file
-	wanpipe1.cpri 	CHDLC primary port
-     	wanpipe2.csec 	CHDLC secondary port
-     	wanpipe1.fr   	Frame Relay protocol
-     	wanpipe1.ppp  	PPP protocol ) 
-	wanpipe1.asy	CHDLC ASYNC protocol
-	wanpipe1.x25	X25 protocol
-	wanpipe1.stty	Sync TTY driver (Used by Kernel PPPD daemon)
-	wanpipe1.atty	Async TTY driver (Used by Kernel PPPD daemon)
-	wanrouter.rc	sample meta-configuration file
-
-/usr/local/wanrouter/util:
-	*		wan-tools utilities source code
-
-/usr/local/wanrouter/api/x25:
-	*		x25 api sample programs.
-/usr/local/wanrouter/api/chdlc:
-	*		chdlc api sample programs.
-/usr/local/wanrouter/api/fr:
-	*		fr api sample programs.
-/usr/local/wanrouter/config/wancfg:
-	wancfg		WANPIPE GUI configuration program.
-                        Creates wanpipe#.conf files. 
-/usr/local/wanrouter/config/cfgft1:
-	cfgft1		GUI CSU/DSU configuration program.
-
-/usr/include/linux:
-	wanrouter.h	router API definitions
-	wanpipe.h	WANPIPE API definitions
-	sdladrv.h	SDLA support module API definitions
-	sdlasfm.h	SDLA firmware module definitions
-	if_wanpipe.h	WANPIPE Socket definitions
-	sdlapci.h	WANPIPE PCI definitions
-	
-
-/usr/src/linux/net/wanrouter:
-	*		wanrouter source code
-
-/var/log:
-	wanrouter	wanrouter start-up log (created by the Setup script)
-
-/var/lock:  (or /var/lock/subsys for RedHat)
-	wanrouter	wanrouter lock file (created by the Setup script)
-
-/usr/local/wanrouter/firmware:
-	fr514.sfm	Frame relay firmware for Sangoma S508/S514 card
-	cdual514.sfm	Dual Port Cisco HDLC firmware for Sangoma S508/S514 card
-	ppp514.sfm      PPP Firmware for Sangoma S508 and S514 cards
-	x25_508.sfm	X25 Firmware for Sangoma S508 card.
-
-
-REVISION HISTORY
-
-1.0.0	December 31, 1996	Initial version
-
-1.0.1	January 30, 1997	Status and statistics can be read via /proc
-				filesystem entries.
-
-1.0.2   April 30, 1997          Added UDP management via monitors.
-
-1.0.3	June 3, 1997		UDP management for multiple boards using Frame
-				Relay and PPP
-				Enabled continuous transmission of Configure 
-				Request Packet for PPP (for 508 only)
-				Connection Timeout for PPP changed from 900 to 0
-				Flow Control Problem fixed for Frame Relay
-
-1.0.4	July 10, 1997		S508/FT1 monitoring capability in fpipemon and
-				ppipemon utilities.
-				Configurable TTL for UDP packets.
-				Multicast and Broadcast IP source addresses are
-				silently discarded.
-
-1.0.5	July 28, 1997		Configurable T391,T392,N391,N392,N393 for Frame
-				Relay in router.conf.
-				Configurable Memory Address through router.conf 
-				for Frame Relay, PPP and X.25. (commenting this
- 				out enables auto-detection).
-				Fixed freeing up received buffers using kfree()
- 				for Frame Relay and X.25.
-				Protect sdla_peek() by calling save_flags(),
-				cli() and restore_flags().
-				Changed number of Trace elements from 32 to 20
-				Added DLCI specific data monitoring in FPIPEMON. 
-2.0.0	Nov 07, 1997		Implemented protection of RACE conditions by 
-				critical flags for FRAME RELAY and PPP.
-				DLCI List interrupt mode implemented.
-				IPX support in FRAME RELAY and PPP.
-				IPX Server Support (MARS)
-				More driver specific stats included in FPIPEMON
-				and PIPEMON.
-
-2.0.1	Nov 28, 1997		Bug Fixes for version 2.0.0.
-				Protection of "enable_irq()" while 
-				"disable_irq()" has been enabled from any other
-				routine (for Frame Relay, PPP and X25).
-				Added additional Stats for Fpipemon and Ppipemon
-				Improved Load Sharing for multiple boards
-
-2.0.2	Dec 09, 1997		Support for PAP and CHAP for ppp has been
-				implemented.
-
-2.0.3	Aug 15, 1998		New release supporting Cisco HDLC, CIR for Frame
-				relay, Dynamic IP assignment for PPP and Inverse
-				Arp support for Frame-relay.  Man Pages are 
-				included for better support and a new utility
-				for configuring FT1 cards.
-
-2.0.4	Dec 09, 1998	        Dual Port support for Cisco HDLC.
-				Support for HDLC (LAPB) API.
-				Supports BiSync Streaming code for S502E 
-				and S503 cards.
-				Support for Streaming HDLC API.
-				Provides a BSD socket interface for 
-				creating applications using BiSync
-   				streaming.        
-
-2.0.5   Aug 04, 1999 		CHDLC initialization bug fix.
-				PPP interrupt driven driver: 
-  				Fix to the PPP line hangup problem.
-				New PPP firmware
-				Added comments to the startup SYSTEM ERROR messages
-				Xpipemon debugging application for the X25 protocol
-				New USER_MANUAL.txt
-				Fixed the odd boundary 4byte writes to the board.
-				BiSync Streaming code has been taken out.  
-				 Available as a patch.
-				Streaming HDLC API has been taken out.  
-				 Available as a patch.                 
-
-2.0.6   Aug 17, 1999		Increased debugging in statup scripts
-				Fixed installation bugs from 2.0.5
-				Kernel patch works for both 2.2.10 and 2.2.11 kernels.
-				There is no functional difference between the two packages         
-
-2.0.7   Aug 26, 1999		o  Merged X25API code into WANPIPE.
-				o  Fixed a memory leak for X25API
-				o  Updated the X25API code for 2.2.X kernels.
-				o  Improved NEM handling.   
-
-2.1.0	Oct 25, 1999		o New code for S514 PCI Card
-				o New CHDLC and Frame Relay drivers
-				o PPP and X25 are not supported in this release    
-
-2.1.1	Nov 30, 1999		o PPP support for S514 PCI Cards
-
-2.1.3   Apr 06, 2000		o Socket based x25api 
-				o Socket based chdlc api
-				o Socket based fr api
-				o Dual Port Receive only CHDLC support.
-				o Asynchronous CHDLC support (Secondary Port)
-				o cfgft1 GUI csu/dsu configurator
-				o wancfg GUI configuration file 
-				  configurator.
-				o Architectural directory changes.
-
-beta-2.1.4 Jul 2000		o Dynamic interface configuration:
-					Network interfaces reflect the state
-					of protocol layer.  If the protocol becomes
-					disconnected, driver will bring down
-					the interface.  Once the protocol reconnects
-					the interface will be brought up. 
-					
-					Note: This option is turned off by default.
-
-				o Dynamic wanrouter setup using 'wanconfig':
-					wanconfig utility can be used to
-					shutdown,restart,start or reconfigure 
-					a virtual circuit dynamically.
-				     
-					Frame Relay:  Each DLCI can be: 
-						      created,stopped,restarted and reconfigured
-						      dynamically using wanconfig.
-					
-						      ex: wanconfig card wanpipe1 dev wp1_fr16 up
-				  
-				o Wanrouter startup via command line arguments:
-					wanconfig also supports wanrouter startup via command line
-					arguments.  Thus, there is no need to create a wanpipe#.conf
-					configuration file.  
-
-				o Socket based x25api update/bug fixes.
-					Added support for LCN numbers greater than 255.
-					Option to pass up modem messages.
-					Provided a PCI IRQ check, so a single S514
-					card is guaranteed to have a non-sharing interrupt.
-
-				o Fixes to the wancfg utility.
-				o New FT1 debugging support via *pipemon utilities.
-				o Frame Relay ARP support Enabled.
-
-beta3-2.1.4 Jul 2000		o X25 M_BIT Problem fix.
-				o Added the Multi-Port PPP
-				  Updated utilities for the Multi-Port PPP.
-
-2.1.4	Aut 2000
-				o In X25API:
-					Maximum packet an application can send
-					to the driver has been extended to 4096 bytes.
-
-					Fixed the x25 startup bug. Enable 
-					communications only after all interfaces
-					come up.  HIGH SVC/PVC is used to calculate
-					the number of channels.
-					Enable protocol only after all interfaces
-					are enabled.
-
-				o Added an extra state to the FT1 config, kernel module.
-				o Updated the pipemon debuggers.
-
-				o Blocked the Multi-Port PPP from running on kernels
-				  2.2.16 or greater, due to syncppp kernel module
-				  change. 
-	  
-beta1-2.1.5 	Nov 15 2000
-				o Fixed the MultiPort PPP Support for kernels 2.2.16 and above.
-				  2.2.X kernels only
-
-				o Secured the driver UDP debugging calls
-					- All illegal network debugging calls are reported to
-					  the log.
-					- Defined a set of allowed commands, all other denied.
-					
-				o Cpipemon
-					- Added set FT1 commands to the cpipemon. Thus CSU/DSU
-					  configuration can be performed using cpipemon.
-					  All systems that cannot run cfgft1 GUI utility should
-					  use cpipemon to configure the on board CSU/DSU.
-
-
-				o Keyboard Led Monitor/Debugger
-					- A new utility /usr/sbin/wpkbdmon uses keyboard leds
-					  to convey operational statistic information of the 
-					  Sangoma WANPIPE cards.
-					NUM_LOCK    = Line State  (On=connected,    Off=disconnected)
-					CAPS_LOCK   = Tx data     (On=transmitting, Off=no tx data)
-					SCROLL_LOCK = Rx data     (On=receiving,    Off=no rx data
-					
-				o Hardware probe on module load and dynamic device allocation
-					- During WANPIPE module load, all Sangoma cards are probed
-					  and found information is printed in the /var/log/messages.
-					- If no cards are found, the module load fails.
-					- Appropriate number of devices are dynamically loaded 
-					  based on the number of Sangoma cards found.
-
-					  Note: The kernel configuration option 
-						CONFIG_WANPIPE_CARDS has been taken out.
-					
-				o Fixed the Frame Relay and Chdlc network interfaces so they are
-				  compatible with libpcap libraries.  Meaning, tcpdump, snort,
-				  ethereal, and all other packet sniffers and debuggers work on
-				  all WANPIPE network interfaces.
-					- Set the network interface encoding type to ARPHRD_PPP.
-					  This tell the sniffers that data obtained from the
-					  network interface is in pure IP format.
-				  Fix for 2.2.X kernels only.
-				
-				o True interface encoding option for Frame Relay and CHDLC
-					- The above fix sets the network interface encoding
-					  type to ARPHRD_PPP, however some customers use
-					  the encoding interface type to determine the
-					  protocol running.  Therefore, the TURE ENCODING
-					  option will set the interface type back to the
-					  original value.  
-
-					  NOTE: If this option is used with Frame Relay and CHDLC
-						libpcap library support will be broken.  
-						i.e. tcpdump will not work.
-					Fix for 2.2.x Kernels only.
-						
-				o Ethernet Bridgind over Frame Relay
-					- The Frame Relay bridging has been developed by 
-					  Kristian Hoffmann and Mark Wells.  
-					- The Linux kernel bridge is used to send ethernet 
-					  data over the frame relay links.
-					For 2.2.X Kernels only.
-
-				o Added extensive 2.0.X support. Most new features of
-				  2.1.5 for protocols Frame Relay, PPP and CHDLC are
-				  supported under 2.0.X kernels. 
-
-beta1-2.2.0 	Dec 30 2000
-				o Updated drivers for 2.4.X kernels.
-				o Updated drivers for SMP support.
-				o X25API is now able to share PCI interrupts.
-				o Took out a general polling routine that was used
-				  only by X25API. 
-				o Added appropriate locks to the dynamic reconfiguration
-				  code.
-				o Fixed a bug in the keyboard debug monitor.
-
-beta2-2.2.0	Jan 8 2001
-				o Patches for 2.4.0 kernel
-				o Patches for 2.2.18 kernel
-				o Minor updates to PPP and CHLDC drivers.
-				  Note: No functional difference.
-
-beta3-2.2.9	Jan 10 2001
-				o I missed the 2.2.18 kernel patches in beta2-2.2.0
-				  release.  They are included in this release.
-
-Stable Release
-2.2.0		Feb 01 2001
-				o Bug fix in wancfg GUI configurator.
-					The edit function didn't work properly.
-
-
-bata1-2.2.1	Feb 09 2001
-			o WANPIPE TTY Driver emulation. 
-			  Two modes of operation Sync and Async.
-				Sync: Using the PPPD daemon, kernel SyncPPP layer
-				      and the Wanpipe sync TTY driver: a PPP protocol 
-				      connection can be established via Sangoma adapter, over
-				      a T1 leased line.
-			
-				      The 2.4.0 kernel PPP layer supports MULTILINK
-				      protocol, that can be used to bundle any number of Sangoma
-				      adapters (T1 lines) into one, under a single IP address.
-				      Thus, efficiently obtaining multiple T1 throughput. 
-
-				      NOTE: The remote side must also implement MULTILINK PPP
-					    protocol.
-
-				Async:Using the PPPD daemon, kernel AsyncPPP layer
-				      and the WANPIPE async TTY driver: a PPP protocol
-				      connection can be established via Sangoma adapter and
-				      a modem, over a telephone line.
-
-				      Thus, the WANPIPE async TTY driver simulates a serial
-				      TTY driver that would normally be used to interface the 
-				      MODEM to the linux kernel.
-				
-			o WANPIPE PPP Backup Utility
-				This utility will monitor the state of the PPP T1 line.
-				In case of failure, a dial up connection will be established
-				via pppd daemon, ether via a serial tty driver (serial port), 
-				or a WANPIPE async TTY driver (in case serial port is unavailable).
-				
-				Furthermore, while in dial up mode, the primary PPP T1 link
-				will be monitored for signs of life.  
-
-				If the PPP T1 link comes back to life, the dial up connection
-				will be shutdown and T1 line re-established.
-			
-
-			o New Setup installation script.
-				Option to UPGRADE device drivers if the kernel source has
-				already been patched with WANPIPE.
-
-				Option to COMPILE WANPIPE modules against the currently 
-				running kernel, thus no need for manual kernel and module
-				re-compilation.
-			
-			o Updates and Bug Fixes to wancfg utility.
-
-bata2-2.2.1	Feb 20 2001
-
-			o Bug fixes to the CHDLC device drivers.
-				The driver had compilation problems under kernels
-				2.2.14 or lower.
-
-			o Bug fixes to the Setup installation script.
-				The device drivers compilation options didn't work
-				properly.
-
-			o Update to the wpbackupd daemon.  
-				Optimized the cross-over times, between the primary
-				link and the backup dialup.
-
-beta3-2.2.1	Mar 02 2001
-			o Patches for 2.4.2 kernel.
-
-			o Bug fixes to util/ make files.
-			o Bug fixes to the Setup installation script.
-
-			o Took out the backupd support and made it into
-			  as separate package.
-			  
-beta4-2.2.1     Mar 12 2001
-
-		o Fix to the Frame Relay Device driver.
-			IPSAC sends a packet of zero length
-			header to the frame relay driver.  The
-			driver tries to push its own 2 byte header
-			into the packet, which causes the driver to
-			crash.
-
-		o Fix the WANPIPE re-configuration code.
-			Bug was found by trying to run  the cfgft1 while the
-			interface was already running.  
-
-		o Updates to cfgft1.
-			Writes a wanpipe#.cfgft1 configuration file
-			once the CSU/DSU is configured. This file can
-			holds the current CSU/DSU configuration.
-
-
-
->>>>>> END OF README <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-

+ 21 - 0
Documentation/s390/s390dbf.txt

@@ -115,6 +115,27 @@ Return Value: Handle for generated debug area
 Description:  Allocates memory for a debug log     
               Must not be called within an interrupt handler 
 
+----------------------------------------------------------------------------
+debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
+				  int buf_size, mode_t mode, uid_t uid,
+				  gid_t gid);
+
+Parameter:    name:	   Name of debug log (e.g. used for debugfs entry)
+	      pages:	   Number of pages, which will be allocated per area
+	      nr_areas:    Number of debug areas
+	      buf_size:    Size of data area in each debug entry
+	      mode:	   File mode for debugfs files. E.g. S_IRWXUGO
+	      uid:	   User ID for debugfs files. Currently only 0 is
+			   supported.
+	      gid:	   Group ID for debugfs files. Currently only 0 is
+			   supported.
+
+Return Value: Handle for generated debug area
+	      NULL if register failed
+
+Description:  Allocates memory for a debug log
+	      Must not be called within an interrupt handler
+
 ---------------------------------------------------------------------------
 void debug_unregister (debug_info_t * id);
 

+ 11 - 1
Documentation/scsi/st.txt

@@ -2,7 +2,7 @@ This file contains brief information about the SCSI tape driver.
 The driver is currently maintained by Kai Mäkisara (email
 Kai.Makisara@kolumbus.fi)
 
-Last modified: Mon Mar  7 21:14:44 2005 by kai.makisara
+Last modified: Sun Feb 24 21:59:07 2008 by kai.makisara
 
 
 BASICS
@@ -133,6 +133,11 @@ the defaults set by the user. The value -1 means the default is not set. The
 file 'dev' contains the device numbers corresponding to this device. The links
 'device' and 'driver' point to the SCSI device and driver entries.
 
+Each directory also contains the entry 'options' which shows the currently
+enabled driver and mode options. The value in the file is a bit mask where the
+bit definitions are the same as those used with MTSETDRVBUFFER in setting the
+options.
+
 A link named 'tape' is made from the SCSI device directory to the class
 directory corresponding to the mode 0 auto-rewind device (e.g., st0). 
 
@@ -372,6 +377,11 @@ MTSETDRVBUFFER
 	     MT_ST_SYSV sets the SYSV semantics (mode)
 	     MT_ST_NOWAIT enables immediate mode (i.e., don't wait for
 	        the command to finish) for some commands (e.g., rewind)
+	     MT_ST_SILI enables setting the SILI bit in SCSI commands when
+		reading in variable block mode to enhance performance when
+		reading blocks shorter than the byte count; set this only
+		if you are sure that the drive supports SILI and the HBA
+		correctly returns transfer residuals
 	     MT_ST_DEBUGGING debugging (global; debugging must be
 		compiled into the driver)
 	MT_ST_SETBOOLEANS

+ 0 - 0
Documentation/hrtimers/highres.txt → Documentation/timers/highres.txt


+ 0 - 0
Documentation/hrtimers/hrtimers.txt → Documentation/timers/hrtimers.txt


+ 0 - 0
Documentation/hrtimer/timer_stats.txt → Documentation/timers/timer_stats.txt


+ 3 - 4
Documentation/vm/hugetlbpage.txt

@@ -88,10 +88,9 @@ hugepages from the buddy allocator, if the normal pool is exhausted. As
 these surplus hugepages go out of use, they are freed back to the buddy
 allocator.
 
-Caveat: Shrinking the pool via nr_hugepages while a surplus is in effect
-will allow the number of surplus huge pages to exceed the overcommit
-value, as the pool hugepages (which must have been in use for a surplus
-hugepages to be allocated) will become surplus hugepages.  As long as
+Caveat: Shrinking the pool via nr_hugepages such that it becomes less
+than the number of hugepages in use will convert the balance to surplus
+huge pages even if it would exceed the overcommit value.  As long as
 this condition holds, however, no more surplus huge pages will be
 allowed on the system until one of the two sysctls are increased
 sufficiently, or the surplus huge pages go out of use and are freed.

+ 100 - 0
Documentation/x86/pat.txt

@@ -0,0 +1,100 @@
+
+PAT (Page Attribute Table)
+
+x86 Page Attribute Table (PAT) allows for setting the memory attribute at the
+page level granularity. PAT is complementary to the MTRR settings which allows
+for setting of memory types over physical address ranges. However, PAT is
+more flexible than MTRR due to its capability to set attributes at page level
+and also due to the fact that there are no hardware limitations on number of
+such attribute settings allowed. Added flexibility comes with guidelines for
+not having memory type aliasing for the same physical memory with multiple
+virtual addresses.
+
+PAT allows for different types of memory attributes. The most commonly used
+ones that will be supported at this time are Write-back, Uncached,
+Write-combined and Uncached Minus.
+
+There are many different APIs in the kernel that allows setting of memory
+attributes at the page level. In order to avoid aliasing, these interfaces
+should be used thoughtfully. Below is a table of interfaces available,
+their intended usage and their memory attribute relationships. Internally,
+these APIs use a reserve_memtype()/free_memtype() interface on the physical
+address range to avoid any aliasing.
+
+
+-------------------------------------------------------------------
+API                    |    RAM   |  ACPI,...  |  Reserved/Holes  |
+-----------------------|----------|------------|------------------|
+                       |          |            |                  |
+ioremap                |    --    |    UC      |       UC         |
+                       |          |            |                  |
+ioremap_cache          |    --    |    WB      |       WB         |
+                       |          |            |                  |
+ioremap_nocache        |    --    |    UC      |       UC         |
+                       |          |            |                  |
+ioremap_wc             |    --    |    --      |       WC         |
+                       |          |            |                  |
+set_memory_uc          |    UC    |    --      |       --         |
+ set_memory_wb         |          |            |                  |
+                       |          |            |                  |
+set_memory_wc          |    WC    |    --      |       --         |
+ set_memory_wb         |          |            |                  |
+                       |          |            |                  |
+pci sysfs resource     |    --    |    --      |       UC         |
+                       |          |            |                  |
+pci sysfs resource_wc  |    --    |    --      |       WC         |
+ is IORESOURCE_PREFETCH|          |            |                  |
+                       |          |            |                  |
+pci proc               |    --    |    --      |       UC         |
+ !PCIIOC_WRITE_COMBINE |          |            |                  |
+                       |          |            |                  |
+pci proc               |    --    |    --      |       WC         |
+ PCIIOC_WRITE_COMBINE  |          |            |                  |
+                       |          |            |                  |
+/dev/mem               |    --    |    UC      |       UC         |
+ read-write            |          |            |                  |
+                       |          |            |                  |
+/dev/mem               |    --    |    UC      |       UC         |
+ mmap SYNC flag        |          |            |                  |
+                       |          |            |                  |
+/dev/mem               |    --    |  WB/WC/UC  |    WB/WC/UC      |
+ mmap !SYNC flag       |          |(from exist-|  (from exist-    |
+ and                   |          |  ing alias)|    ing alias)    |
+ any alias to this area|          |            |                  |
+                       |          |            |                  |
+/dev/mem               |    --    |    WB      |       WB         |
+ mmap !SYNC flag       |          |            |                  |
+ no alias to this area |          |            |                  |
+ and                   |          |            |                  |
+ MTRR says WB          |          |            |                  |
+                       |          |            |                  |
+/dev/mem               |    --    |    --      |    UC_MINUS      |
+ mmap !SYNC flag       |          |            |                  |
+ no alias to this area |          |            |                  |
+ and                   |          |            |                  |
+ MTRR says !WB         |          |            |                  |
+                       |          |            |                  |
+-------------------------------------------------------------------
+
+Notes:
+
+-- in the above table mean "Not suggested usage for the API". Some of the --'s
+are strictly enforced by the kernel. Some others are not really enforced
+today, but may be enforced in future.
+
+For ioremap and pci access through /sys or /proc - The actual type returned
+can be more restrictive, in case of any existing aliasing for that address.
+For example: If there is an existing uncached mapping, a new ioremap_wc can
+return uncached mapping in place of write-combine requested.
+
+set_memory_[uc|wc] and set_memory_wb should be used in pairs, where driver will
+first make a region uc or wc and switch it back to wb after use.
+
+Over time writes to /proc/mtrr will be deprecated in favor of using PAT based
+interfaces. Users writing to /proc/mtrr are suggested to use above interfaces.
+
+Drivers should use ioremap_[uc|wc] to access PCI BARs with [uc|wc] access
+types.
+
+Drivers should use set_memory_[uc|wc] to set access type for RAM ranges.
+

+ 5 - 0
Documentation/x86_64/boot-options.txt

@@ -307,3 +307,8 @@ Debugging
 			stuck (default)
 
 Miscellaneous
+
+	nogbpages
+		Do not use GB pages for kernel direct mappings.
+	gbpages
+		Use GB pages for kernel direct mappings.

+ 19 - 19
MAINTAINERS

@@ -846,15 +846,6 @@ L:	linux-wireless@vger.kernel.org
 W:	http://linuxwireless.org/en/users/Drivers/b43
 S:	Maintained
 
-BCM43XX WIRELESS DRIVER (SOFTMAC BASED VERSION)
-P:	Larry Finger
-M:	Larry.Finger@lwfinger.net
-P:	Stefano Brivio
-M:	stefano.brivio@polimi.it
-L:	linux-wireless@vger.kernel.org
-W:	http://bcm43xx.berlios.de/
-S:	Obsolete
-
 BEFS FILE SYSTEM
 P:	Sergey S. Kostyliov
 M:	rathamahata@php4.ru
@@ -2122,7 +2113,7 @@ M:	reinette.chatre@intel.com
 L:	linux-wireless@vger.kernel.org
 L:	ipw3945-devel@lists.sourceforge.net
 W:	http://intellinuxwireless.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rchatre/iwlwifi-2.6.git
+T:	git kernel.org:/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-2.6.git
 S:	Supported
 
 IOC3 ETHERNET DRIVER
@@ -2203,7 +2194,7 @@ S:	Maintained
 ISDN SUBSYSTEM
 P:	Karsten Keil
 M:	kkeil@suse.de
-L:	isdn4linux@listserv.isdn4linux.de
+L:	isdn4linux@listserv.isdn4linux.de (subscribers-only)
 W:	http://www.isdn4linux.de
 T:	git kernel.org:/pub/scm/linux/kernel/kkeil/isdn-2.6.git
 S:	Maintained
@@ -2211,7 +2202,7 @@ S:	Maintained
 ISDN SUBSYSTEM (Eicon active card driver)
 P:	Armin Schindler
 M:	mac@melware.de
-L:	isdn4linux@listserv.isdn4linux.de
+L:	isdn4linux@listserv.isdn4linux.de (subscribers-only)
 W:	http://www.melware.de
 S:	Maintained
 
@@ -2325,6 +2316,12 @@ L:	linux-kernel@vger.kernel.org
 L:	kexec@lists.infradead.org
 S:	Maintained
 
+KGDB
+P:	Jason Wessel
+M:	jason.wessel@windriver.com
+L:	kgdb-bugreport@lists.sourceforge.net
+S:	Maintained
+
 KPROBES
 P:	Ananth N Mavinakayanahalli
 M:	ananth@in.ibm.com
@@ -2952,6 +2949,7 @@ P:	Joel Becker
 M:	joel.becker@oracle.com
 L:	ocfs2-devel@oss.oracle.com
 W:	http://oss.oracle.com/projects/ocfs2/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2.git
 S:	Supported
 
 OMNIKEY CARDMAN 4000 DRIVER
@@ -3286,6 +3284,7 @@ L:	linux-wireless@vger.kernel.org
 L:	rt2400-devel@lists.sourceforge.net
 W:	http://rt2x00.serialmonkey.com/
 S:	Maintained
+T:	git kernel.org:/pub/scm/linux/kernel/git/ivd/rt2x00.git
 F:	drivers/net/wireless/rt2x00/
 
 RAMDISK RAM BLOCK DEVICE DRIVER
@@ -3348,6 +3347,13 @@ L:	reiserfs-devel@vger.kernel.org
 W:	http://www.namesys.com
 S:	Supported
 
+RFKILL
+P:	Ivo van Doorn
+M:	IvDoorn@gmail.com
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	net/rfkill
+
 ROCKETPORT DRIVER
 P:	Comtrol Corp.
 W:	http://www.comtrol.com
@@ -3470,7 +3476,7 @@ P:	Vlad Yasevich
 M:	vladislav.yasevich@hp.com
 P:	Sridhar Samudrala
 M:	sri@us.ibm.com
-L:	lksctp-developers@lists.sourceforge.net
+L:	linux-sctp@vger.kernel.org
 W:	http://lksctp.sourceforge.net
 S:	Supported
 
@@ -3604,12 +3610,6 @@ M:	mhoffman@lightlink.com
 L:	lm-sensors@lm-sensors.org
 S:	Maintained
 
-SOFTMAC LAYER (IEEE 802.11)
-P:	Daniel Drake
-M:	dsd@gentoo.org
-L:	linux-wireless@vger.kernel.org
-S:	Obsolete
-
 SOFTWARE RAID (Multiple Disks) SUPPORT
 P:	Ingo Molnar
 M:	mingo@redhat.com

+ 1 - 1
Makefile

@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 25
-EXTRAVERSION = -rc9
+EXTRAVERSION =
 NAME = Funky Weasel is Jiggy wit it
 
 # *DOCUMENTATION*

+ 1 - 1
arch/alpha/kernel/Makefile

@@ -7,7 +7,7 @@ EXTRA_AFLAGS	:= $(KBUILD_CFLAGS)
 EXTRA_CFLAGS	:= -Werror -Wno-sign-compare
 
 obj-y    := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
-	    irq_alpha.o signal.o setup.o ptrace.o time.o semaphore.o \
+	    irq_alpha.o signal.o setup.o ptrace.o time.o \
 	    alpha_ksyms.o systbls.o err_common.o io.o
 
 obj-$(CONFIG_VGA_HOSE)	+= console.o

+ 0 - 9
arch/alpha/kernel/alpha_ksyms.c

@@ -77,15 +77,6 @@ EXPORT_SYMBOL(__do_clear_user);
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__strnlen_user);
 
-/* Semaphore helper functions.  */
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__up_wakeup);
-EXPORT_SYMBOL(down);
-EXPORT_SYMBOL(down_interruptible);
-EXPORT_SYMBOL(down_trylock);
-EXPORT_SYMBOL(up);
-
 /* 
  * SMP-specific symbols.
  */

+ 0 - 224
arch/alpha/kernel/semaphore.c

@@ -1,224 +0,0 @@
-/*
- * Alpha semaphore implementation.
- *
- * (C) Copyright 1996 Linus Torvalds
- * (C) Copyright 1999, 2000 Richard Henderson
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-
-/*
- * This is basically the PPC semaphore scheme ported to use
- * the Alpha ll/sc sequences, so see the PPC code for
- * credits.
- */
-
-/*
- * Atomically update sem->count.
- * This does the equivalent of the following:
- *
- *	old_count = sem->count;
- *	tmp = MAX(old_count, 0) + incr;
- *	sem->count = tmp;
- *	return old_count;
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	long old_count, tmp = 0;
-
-	__asm__ __volatile__(
-	"1:	ldl_l	%0,%2\n"
-	"	cmovgt	%0,%0,%1\n"
-	"	addl	%1,%3,%1\n"
-	"	stl_c	%1,%2\n"
-	"	beq	%1,2f\n"
-	"	mb\n"
-	".subsection 2\n"
-	"2:	br	1b\n"
-	".previous"
-	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-	: "Ir" (incr), "1" (tmp), "m" (sem->count));
-
-	return old_count;
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- */
-
-void __sched
-__down_failed(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down failed(%p)\n",
-	       tsk->comm, task_pid_nr(tsk), sem);
-#endif
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	wmb();
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	/*
-	 * Try to get the semaphore.  If the count is > 0, then we've
-	 * got the semaphore; we decrement count and exit the loop.
-	 * If the count is 0 or negative, we set it to -1, indicating
-	 * that we are asleep, and then sleep.
-	 */
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-
-	/*
-	 * If there are any more sleepers, wake one of them up so
-	 * that it can either get the semaphore, or set count to -1
-	 * indicating that there are still processes sleeping.
-	 */
-	wake_up(&sem->wait);
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down acquired(%p)\n",
-	       tsk->comm, task_pid_nr(tsk), sem);
-#endif
-}
-
-int __sched
-__down_failed_interruptible(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	long ret = 0;
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down failed(%p)\n",
-	       tsk->comm, task_pid_nr(tsk), sem);
-#endif
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	wmb();
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			/*
-			 * A signal is pending - give up trying.
-			 * Set sem->count to 0 if it is negative,
-			 * since we are no longer sleeping.
-			 */
-			__sem_update_count(sem, 0);
-			ret = -EINTR;
-			break;
-		}
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-	wake_up(&sem->wait);
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down %s(%p)\n",
-	       current->comm, task_pid_nr(current),
-	       (ret < 0 ? "interrupted" : "acquired"), sem);
-#endif
-	return ret;
-}
-
-void
-__up_wakeup(struct semaphore *sem)
-{
-	/*
-	 * Note that we incremented count in up() before we came here,
-	 * but that was ineffective since the result was <= 0, and
-	 * any negative value of count is equivalent to 0.
-	 * This ends up setting count to 1, unless count is now > 0
-	 * (i.e. because some other cpu has called up() in the meantime),
-	 * in which case we just increment count.
-	 */
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-
-void __sched
-down(struct semaphore *sem)
-{
-#ifdef WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down(%p) <count=%d> from %p\n",
-	       current->comm, task_pid_nr(current), sem,
-	       atomic_read(&sem->count), __builtin_return_address(0));
-#endif
-	__down(sem);
-}
-
-int __sched
-down_interruptible(struct semaphore *sem)
-{
-#ifdef WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down(%p) <count=%d> from %p\n",
-	       current->comm, task_pid_nr(current), sem,
-	       atomic_read(&sem->count), __builtin_return_address(0));
-#endif
-	return __down_interruptible(sem);
-}
-
-int
-down_trylock(struct semaphore *sem)
-{
-	int ret;
-
-#ifdef WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-
-	ret = __down_trylock(sem);
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down_trylock %s from %p\n",
-	       current->comm, task_pid_nr(current),
-	       ret ? "failed" : "acquired",
-	       __builtin_return_address(0));
-#endif
-
-	return ret;
-}
-
-void
-up(struct semaphore *sem)
-{
-#ifdef WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): up(%p) <count=%d> from %p\n",
-	       current->comm, task_pid_nr(current), sem,
-	       atomic_read(&sem->count), __builtin_return_address(0));
-#endif
-	__up(sem);
-}

+ 1 - 1
arch/arm/kernel/Makefile

@@ -7,7 +7,7 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
 # Object file lists.
 
 obj-y		:= compat.o entry-armv.o entry-common.o irq.o \
-		   process.o ptrace.o semaphore.o setup.o signal.o \
+		   process.o ptrace.o setup.o signal.o \
 		   sys_arm.o stacktrace.o time.o traps.o
 
 obj-$(CONFIG_ISA_DMA_API)	+= dma.o

+ 0 - 221
arch/arm/kernel/semaphore.c

@@ -1,221 +0,0 @@
-/*
- *  ARM semaphore implementation, taken from
- *
- *  i386 semaphore implementation.
- *
- *  (C) Copyright 1999 Linus Torvalds
- *
- *  Modified for ARM by Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-
-#include <asm/semaphore.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is
- * protected by the semaphore spinlock.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-static DEFINE_SPINLOCK(semaphore_lock);
-
-void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_UNINTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-	wake_up(&sem->wait);
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_INTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers ++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock. The
-		 * "-1" is because we're still hoping to get
-		 * the lock.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_INTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	tsk->state = TASK_RUNNING;
-	remove_wait_queue(&sem->wait, &wait);
-	wake_up(&sem->wait);
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- *
- * We could have done the trylock with a
- * single "cmpxchg" without failure cases,
- * but then it wouldn't work on a 386.
- */
-int __down_trylock(struct semaphore * sem)
-{
-	int sleepers;
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count))
-		wake_up(&sem->wait);
-
-	spin_unlock_irqrestore(&semaphore_lock, flags);
-	return 1;
-}
-
-/*
- * The semaphore operations have a special calling sequence that
- * allow us to do a simpler in-line version of them. These routines
- * need to convert that sequence back into the C sequence when
- * there is contention on the semaphore.
- *
- * ip contains the semaphore pointer on entry. Save the C-clobbered
- * registers (r0 to r3 and lr), but not ip, as we use it as a return
- * value in some cases..
- * To remain AAPCS compliant (64-bit stack align) we save r4 as well.
- */
-asm("	.section .sched.text,\"ax\",%progbits	\n\
-	.align	5				\n\
-	.globl	__down_failed			\n\
-__down_failed:					\n\
-	stmfd	sp!, {r0 - r4, lr}		\n\
-	mov	r0, ip				\n\
-	bl	__down				\n\
-	ldmfd	sp!, {r0 - r4, pc}		\n\
-						\n\
-	.align	5				\n\
-	.globl	__down_interruptible_failed	\n\
-__down_interruptible_failed:			\n\
-	stmfd	sp!, {r0 - r4, lr}		\n\
-	mov	r0, ip				\n\
-	bl	__down_interruptible		\n\
-	mov	ip, r0				\n\
-	ldmfd	sp!, {r0 - r4, pc}		\n\
-						\n\
-	.align	5				\n\
-	.globl	__down_trylock_failed		\n\
-__down_trylock_failed:				\n\
-	stmfd	sp!, {r0 - r4, lr}		\n\
-	mov	r0, ip				\n\
-	bl	__down_trylock			\n\
-	mov	ip, r0				\n\
-	ldmfd	sp!, {r0 - r4, pc}		\n\
-						\n\
-	.align	5				\n\
-	.globl	__up_wakeup			\n\
-__up_wakeup:					\n\
-	stmfd	sp!, {r0 - r4, lr}		\n\
-	mov	r0, ip				\n\
-	bl	__up				\n\
-	ldmfd	sp!, {r0 - r4, pc}		\n\
-	");
-
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_interruptible_failed);
-EXPORT_SYMBOL(__down_trylock_failed);
-EXPORT_SYMBOL(__up_wakeup);

+ 0 - 1
arch/avr32/Kconfig

@@ -10,7 +10,6 @@ config AVR32
 	# With EMBEDDED=n, we get lots of stuff automatically selected
 	# that we usually don't need on AVR32.
 	select EMBEDDED
-	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
 	help

+ 1 - 1
arch/avr32/kernel/Makefile

@@ -6,7 +6,7 @@ extra-y				:= head.o vmlinux.lds
 
 obj-$(CONFIG_SUBARCH_AVR32B)	+= entry-avr32b.o
 obj-y				+= syscall_table.o syscall-stubs.o irq.o
-obj-y				+= setup.o traps.o semaphore.o ocd.o ptrace.o
+obj-y				+= setup.o traps.o ocd.o ptrace.o
 obj-y				+= signal.o sys_avr32.o process.o time.o
 obj-y				+= init_task.o switch_to.o cpu.o
 obj-$(CONFIG_MODULES)		+= module.o avr32_ksyms.o

+ 0 - 148
arch/avr32/kernel/semaphore.c

@@ -1,148 +0,0 @@
-/*
- * AVR32 sempahore implementation.
- *
- * Copyright (C) 2004-2006 Atmel Corporation
- *
- * Based on linux/arch/i386/kernel/semaphore.c
- *  Copyright (C) 1999 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-
-#include <asm/semaphore.h>
-#include <asm/atomic.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-EXPORT_SYMBOL(__up);
-
-void __sched __down(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-        DECLARE_WAITQUEUE(wait, tsk);
-        unsigned long flags;
-
-        tsk->state = TASK_UNINTERRUPTIBLE;
-        spin_lock_irqsave(&sem->wait.lock, flags);
-        add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-        sem->sleepers++;
-        for (;;) {
-                int sleepers = sem->sleepers;
-
-                /*
-                 * Add "everybody else" into it. They aren't
-                 * playing, because we own the spinlock in
-                 * the wait_queue_head.
-                 */
-                if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
-                        sem->sleepers = 0;
-                        break;
-                }
-                sem->sleepers = 1;      /* us - see -1 above */
-                spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-                schedule();
-
-                spin_lock_irqsave(&sem->wait.lock, flags);
-                tsk->state = TASK_UNINTERRUPTIBLE;
-        }
-        remove_wait_queue_locked(&sem->wait, &wait);
-        wake_up_locked(&sem->wait);
-        spin_unlock_irqrestore(&sem->wait.lock, flags);
-        tsk->state = TASK_RUNNING;
-}
-EXPORT_SYMBOL(__down);
-
-int __sched __down_interruptible(struct semaphore *sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-        DECLARE_WAITQUEUE(wait, tsk);
-        unsigned long flags;
-
-        tsk->state = TASK_INTERRUPTIBLE;
-        spin_lock_irqsave(&sem->wait.lock, flags);
-        add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-        sem->sleepers++;
-        for (;;) {
-                int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into the trylock
-		 * failure case - we won't be sleeping, and we can't
-		 * get the lock as it has contention. Just correct the
-		 * count and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-                /*
-                 * Add "everybody else" into it. They aren't
-                 * playing, because we own the spinlock in
-                 * the wait_queue_head.
-                 */
-                if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
-                        sem->sleepers = 0;
-                        break;
-                }
-                sem->sleepers = 1;      /* us - see -1 above */
-                spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-                schedule();
-
-                spin_lock_irqsave(&sem->wait.lock, flags);
-                tsk->state = TASK_INTERRUPTIBLE;
-        }
-        remove_wait_queue_locked(&sem->wait, &wait);
-        wake_up_locked(&sem->wait);
-        spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-        tsk->state = TASK_RUNNING;
-	return retval;
-}
-EXPORT_SYMBOL(__down_interruptible);

+ 0 - 4
arch/blackfin/Kconfig

@@ -31,10 +31,6 @@ config ZONE_DMA
 	bool
 	default y
 
-config SEMAPHORE_SLEEPERS
-	bool
-	default y
-
 config GENERIC_FIND_NEXT_BIT
 	bool
 	default y

+ 0 - 5
arch/blackfin/kernel/bfin_ksyms.c

@@ -42,11 +42,6 @@ EXPORT_SYMBOL(ip_fast_csum);
 
 EXPORT_SYMBOL(kernel_thread);
 
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__down_interruptible);
-
 EXPORT_SYMBOL(is_in_rom);
 EXPORT_SYMBOL(bfin_return_from_exception);
 

+ 1 - 2
arch/cris/kernel/Makefile

@@ -5,8 +5,7 @@
 
 extra-y	:= vmlinux.lds
 
-obj-y   := process.o traps.o irq.o ptrace.o setup.o \
-	   time.o sys_cris.o semaphore.o
+obj-y   := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o
 
 obj-$(CONFIG_MODULES)    += crisksyms.o
 obj-$(CONFIG_MODULES)	 += module.o

+ 0 - 7
arch/cris/kernel/crisksyms.c

@@ -9,7 +9,6 @@
 #include <linux/string.h>
 #include <linux/tty.h>
 
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
@@ -49,12 +48,6 @@ EXPORT_SYMBOL(__negdi2);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
 
-/* Semaphore functions */
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-
 /* Userspace access functions */
 EXPORT_SYMBOL(__copy_user_zeroing);
 EXPORT_SYMBOL(__copy_user);

+ 0 - 129
arch/cris/kernel/semaphore.c

@@ -1,129 +0,0 @@
-/*
- *  Generic semaphore code. Buyer beware. Do your own
- * specific changes in <asm/semaphore-helper.h>
- */
-
-#include <linux/sched.h>
-#include <asm/semaphore-helper.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-#define DOWN_VAR				\
-	struct task_struct *tsk = current;	\
-	wait_queue_t wait;			\
-	init_waitqueue_entry(&wait, tsk);
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	tsk->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		tsk->state = (task_state);	\
-	}					\
-	tsk->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DOWN_VAR
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-	DOWN_VAR
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, tsk);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}

+ 1 - 1
arch/frv/kernel/Makefile

@@ -9,7 +9,7 @@ extra-y:= head.o init_task.o vmlinux.lds
 
 obj-y := $(heads-y) entry.o entry-table.o break.o switch_to.o kernel_thread.o \
 	 kernel_execve.o process.o traps.o ptrace.o signal.o dma.o \
-	 sys_frv.o time.o semaphore.o setup.o frv_ksyms.o \
+	 sys_frv.o time.o setup.o frv_ksyms.o \
 	 debug-stub.o irq.o sleep.o uaccess.o
 
 obj-$(CONFIG_GDBSTUB)		+= gdb-stub.o gdb-io.o

+ 0 - 1
arch/frv/kernel/frv_ksyms.c

@@ -12,7 +12,6 @@
 #include <asm/pgalloc.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/hardirq.h>
 #include <asm/cacheflush.h>

+ 0 - 155
arch/frv/kernel/semaphore.c

@@ -1,155 +0,0 @@
-/* semaphore.c: FR-V semaphores
- *
- * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- * - Derived from lib/rwsem-spinlock.c
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <asm/semaphore.h>
-
-struct sem_waiter {
-	struct list_head	list;
-	struct task_struct	*task;
-};
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-void semtrace(struct semaphore *sem, const char *str)
-{
-	if (sem->debug)
-		printk("[%d] %s({%d,%d})\n",
-		       current->pid,
-		       str,
-		       sem->counter,
-		       list_empty(&sem->wait_list) ? 0 : 1);
-}
-#else
-#define semtrace(SEM,STR) do { } while(0)
-#endif
-
-/*
- * wait for a token to be granted from a semaphore
- * - entered with lock held and interrupts disabled
- */
-void __down(struct semaphore *sem, unsigned long flags)
-{
-	struct task_struct *tsk = current;
-	struct sem_waiter waiter;
-
-	semtrace(sem, "Entering __down");
-
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
-	get_task_struct(tsk);
-
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we don't need to touch the semaphore struct anymore */
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	/* wait to be given the semaphore */
-	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-
-	for (;;) {
-		if (list_empty(&waiter.list))
-			break;
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-
-	tsk->state = TASK_RUNNING;
-	semtrace(sem, "Leaving __down");
-}
-
-EXPORT_SYMBOL(__down);
-
-/*
- * interruptibly wait for a token to be granted from a semaphore
- * - entered with lock held and interrupts disabled
- */
-int __down_interruptible(struct semaphore *sem, unsigned long flags)
-{
-	struct task_struct *tsk = current;
-	struct sem_waiter waiter;
-	int ret;
-
-	semtrace(sem,"Entering __down_interruptible");
-
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
-	get_task_struct(tsk);
-
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we don't need to touch the semaphore struct anymore */
-	set_task_state(tsk, TASK_INTERRUPTIBLE);
-
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	/* wait to be given the semaphore */
-	ret = 0;
-	for (;;) {
-		if (list_empty(&waiter.list))
-			break;
-		if (unlikely(signal_pending(current)))
-			goto interrupted;
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-
- out:
-	tsk->state = TASK_RUNNING;
-	semtrace(sem, "Leaving __down_interruptible");
-	return ret;
-
- interrupted:
-	spin_lock_irqsave(&sem->wait_lock, flags);
-
-	if (!list_empty(&waiter.list)) {
-		list_del(&waiter.list);
-		ret = -EINTR;
-	}
-
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-	if (ret == -EINTR)
-		put_task_struct(current);
-	goto out;
-}
-
-EXPORT_SYMBOL(__down_interruptible);
-
-/*
- * release a single token back to a semaphore
- * - entered with lock held and interrupts disabled
- */
-void __up(struct semaphore *sem)
-{
-	struct task_struct *tsk;
-	struct sem_waiter *waiter;
-
-	semtrace(sem,"Entering __up");
-
-	/* grant the token to the process at the front of the queue */
-	waiter = list_entry(sem->wait_list.next, struct sem_waiter, list);
-
-	/* We must be careful not to touch 'waiter' after we set ->task = NULL.
-	 * It is allocated on the waiter's stack and may become invalid at
-	 * any time after that point (due to a wakeup from another source).
-	 */
-	list_del_init(&waiter->list);
-	tsk = waiter->task;
-	mb();
-	waiter->task = NULL;
-	wake_up_process(tsk);
-	put_task_struct(tsk);
-
-	semtrace(sem,"Leaving __up");
-}
-
-EXPORT_SYMBOL(__up);

+ 1 - 1
arch/frv/kernel/traps.c

@@ -73,7 +73,7 @@ asmlinkage void illegal_instruction(unsigned long esfr1, unsigned long epcr0, un
 		      epcr0, esr0, esfr1);
 
 	info.si_errno	= 0;
-	info.si_addr	= (void *) ((epcr0 & EPCR0_PC) ? (epcr0 & EPCR0_PC) : __frame->pc);
+	info.si_addr	= (void *) ((epcr0 & EPCR0_V) ? (epcr0 & EPCR0_PC) : __frame->pc);
 
 	switch (__frame->tbr & TBR_TT) {
 	case TBR_TT_ILLEGAL_INSTR:

+ 1 - 1
arch/h8300/kernel/Makefile

@@ -5,7 +5,7 @@
 extra-y := vmlinux.lds
 
 obj-y := process.o traps.o ptrace.o irq.o \
-	 sys_h8300.o time.o semaphore.o signal.o \
+	 sys_h8300.o time.o signal.o \
          setup.o gpio.o init_task.o syscalls.o \
 	 entry.o
 

+ 0 - 1
arch/h8300/kernel/h8300_ksyms.c

@@ -12,7 +12,6 @@
 #include <asm/pgalloc.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/current.h>
 #include <asm/gpio.h>

+ 0 - 132
arch/h8300/kernel/semaphore.c

@@ -1,132 +0,0 @@
-/*
- *  Generic semaphore code. Buyer beware. Do your own
- * specific changes in <asm/semaphore-helper.h>
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/semaphore-helper.h>
-
-#ifndef CONFIG_RMW_INSNS
-spinlock_t semaphore_wake_lock;
-#endif
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	current->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		current->state = (task_state);	\
-	}					\
-	current->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int ret = 0;
-
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, current);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}

+ 14 - 0
arch/ia64/Kconfig

@@ -283,6 +283,17 @@ config FORCE_MAX_ZONEORDER
 	default "17" if HUGETLB_PAGE
 	default "11"
 
+config VIRT_CPU_ACCOUNTING
+	bool "Deterministic task and CPU time accounting"
+	default n
+	help
+	  Select this option to enable more accurate task and CPU time
+	  accounting.  This is done by reading a CPU counter on each
+	  kernel entry and exit and on transitions within the kernel
+	  between system, softirq and hardirq state, so there is a
+	  small performance impact.
+	  If in doubt, say N here.
+
 config SMP
 	bool "Symmetric multi-processing support"
 	help
@@ -611,6 +622,9 @@ config IRQ_PER_CPU
 	bool
 	default y
 
+config IOMMU_HELPER
+	def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC)
+
 source "arch/ia64/hp/sim/Kconfig"
 
 source "arch/ia64/Kconfig.debug"

+ 39 - 17
arch/ia64/hp/common/sba_iommu.c

@@ -35,6 +35,7 @@
 #include <linux/nodemask.h>
 #include <linux/bitops.h>         /* hweight64() */
 #include <linux/crash_dump.h>
+#include <linux/iommu-helper.h>
 
 #include <asm/delay.h>		/* ia64_get_itc() */
 #include <asm/io.h>
@@ -460,6 +461,13 @@ get_iovp_order (unsigned long size)
 	return order;
 }
 
+static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr,
+				 unsigned int bitshiftcnt)
+{
+	return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3)
+		+ bitshiftcnt;
+}
+
 /**
  * sba_search_bitmap - find free space in IO PDIR resource bitmap
  * @ioc: IO MMU structure which owns the pdir we are interested in.
@@ -471,15 +479,25 @@ get_iovp_order (unsigned long size)
  * Cool perf optimization: search for log2(size) bits at a time.
  */
 static SBA_INLINE unsigned long
-sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
+sba_search_bitmap(struct ioc *ioc, struct device *dev,
+		  unsigned long bits_wanted, int use_hint)
 {
 	unsigned long *res_ptr;
 	unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
-	unsigned long flags, pide = ~0UL;
+	unsigned long flags, pide = ~0UL, tpide;
+	unsigned long boundary_size;
+	unsigned long shift;
+	int ret;
 
 	ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
 	ASSERT(res_ptr < res_end);
 
+	boundary_size = (unsigned long long)dma_get_seg_boundary(dev) + 1;
+	boundary_size = ALIGN(boundary_size, 1ULL << iovp_shift) >> iovp_shift;
+
+	BUG_ON(ioc->ibase & ~iovp_mask);
+	shift = ioc->ibase >> iovp_shift;
+
 	spin_lock_irqsave(&ioc->res_lock, flags);
 
 	/* Allow caller to force a search through the entire resource space */
@@ -504,9 +522,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 			if (likely(*res_ptr != ~0UL)) {
 				bitshiftcnt = ffz(*res_ptr);
 				*res_ptr |= (1UL << bitshiftcnt);
-				pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-				pide <<= 3;	/* convert to bit address */
-				pide += bitshiftcnt;
+				pide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
 				ioc->res_bitshift = bitshiftcnt + bits_wanted;
 				goto found_it;
 			}
@@ -535,11 +551,13 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 			DBG_RES("    %p %lx %lx\n", res_ptr, mask, *res_ptr);
 			ASSERT(0 != mask);
 			for (; mask ; mask <<= o, bitshiftcnt += o) {
-				if(0 == ((*res_ptr) & mask)) {
+				tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
+				ret = iommu_is_span_boundary(tpide, bits_wanted,
+							     shift,
+							     boundary_size);
+				if ((0 == ((*res_ptr) & mask)) && !ret) {
 					*res_ptr |= mask;     /* mark resources busy! */
-					pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-					pide <<= 3;	/* convert to bit address */
-					pide += bitshiftcnt;
+					pide = tpide;
 					ioc->res_bitshift = bitshiftcnt + bits_wanted;
 					goto found_it;
 				}
@@ -560,6 +578,11 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 		end = res_end - qwords;
 
 		for (; res_ptr < end; res_ptr++) {
+			tpide = ptr_to_pide(ioc, res_ptr, 0);
+			ret = iommu_is_span_boundary(tpide, bits_wanted,
+						     shift, boundary_size);
+			if (ret)
+				goto next_ptr;
 			for (i = 0 ; i < qwords ; i++) {
 				if (res_ptr[i] != 0)
 					goto next_ptr;
@@ -572,8 +595,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 				res_ptr[i] = ~0UL;
 			res_ptr[i] |= RESMAP_MASK(bits);
 
-			pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-			pide <<= 3;	/* convert to bit address */
+			pide = tpide;
 			res_ptr += qwords;
 			ioc->res_bitshift = bits;
 			goto found_it;
@@ -605,7 +627,7 @@ found_it:
  * resource bit map.
  */
 static int
-sba_alloc_range(struct ioc *ioc, size_t size)
+sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
 {
 	unsigned int pages_needed = size >> iovp_shift;
 #ifdef PDIR_SEARCH_TIMING
@@ -622,9 +644,9 @@ sba_alloc_range(struct ioc *ioc, size_t size)
 	/*
 	** "seek and ye shall find"...praying never hurts either...
 	*/
-	pide = sba_search_bitmap(ioc, pages_needed, 1);
+	pide = sba_search_bitmap(ioc, dev, pages_needed, 1);
 	if (unlikely(pide >= (ioc->res_size << 3))) {
-		pide = sba_search_bitmap(ioc, pages_needed, 0);
+		pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
 		if (unlikely(pide >= (ioc->res_size << 3))) {
 #if DELAYED_RESOURCE_CNT > 0
 			unsigned long flags;
@@ -653,7 +675,7 @@ sba_alloc_range(struct ioc *ioc, size_t size)
 			}
 			spin_unlock_irqrestore(&ioc->saved_lock, flags);
 
-			pide = sba_search_bitmap(ioc, pages_needed, 0);
+			pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
 			if (unlikely(pide >= (ioc->res_size << 3)))
 				panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
 				      ioc->ioc_hpa);
@@ -936,7 +958,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
 
-	pide = sba_alloc_range(ioc, size);
+	pide = sba_alloc_range(ioc, dev, size);
 
 	iovp = (dma_addr_t) pide << iovp_shift;
 
@@ -1373,7 +1395,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
 		dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask;
 		ASSERT(dma_len <= DMA_CHUNK_SIZE);
 		dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG
-			| (sba_alloc_range(ioc, dma_len) << iovp_shift)
+			| (sba_alloc_range(ioc, dev, dma_len) << iovp_shift)
 			| dma_offset);
 		n_mappings++;
 	}

+ 1 - 1
arch/ia64/hp/sim/simeth.c

@@ -294,7 +294,7 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
 		return NOTIFY_DONE;
 	}
 
-	if (dev->nd_net != &init_net)
+	if (dev_net(dev) != &init_net)
 		return NOTIFY_DONE;
 
 	if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE;

+ 4 - 19
arch/ia64/hp/sim/simscsi.c

@@ -201,22 +201,6 @@ simscsi_readwrite10 (struct scsi_cmnd *sc, int mode)
 	simscsi_sg_readwrite(sc, mode, offset);
 }
 
-static void simscsi_fillresult(struct scsi_cmnd *sc, char *buf, unsigned len)
-{
-
-	int i;
-	unsigned thislen;
-	struct scatterlist *slp;
-
-	scsi_for_each_sg(sc, slp, scsi_sg_count(sc), i) {
-		if (!len)
-			break;
-		thislen = min(len, slp->length);
-		memcpy(sg_virt(slp), buf, thislen);
-		len -= thislen;
-	}
-}
-
 static int
 simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 {
@@ -258,7 +242,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			buf[6] = 0;	/* reserved */
 			buf[7] = 0;	/* various flags */
 			memcpy(buf + 8, "HP      SIMULATED DISK  0.00",  28);
-			simscsi_fillresult(sc, buf, 36);
+			scsi_sg_copy_from_buffer(sc, buf, 36);
 			sc->result = GOOD;
 			break;
 
@@ -306,14 +290,15 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			buf[5] = 0;
 			buf[6] = 2;
 			buf[7] = 0;
-			simscsi_fillresult(sc, buf, 8);
+			scsi_sg_copy_from_buffer(sc, buf, 8);
 			sc->result = GOOD;
 			break;
 
 		      case MODE_SENSE:
 		      case MODE_SENSE_10:
 			/* sd.c uses this to determine whether disk does write-caching. */
-			simscsi_fillresult(sc, (char *)empty_zero_page, scsi_bufflen(sc));
+			scsi_sg_copy_from_buffer(sc, (char *)empty_zero_page,
+						 PAGE_SIZE);
 			sc->result = GOOD;
 			break;
 

+ 13 - 1
arch/ia64/ia32/elfcore32.h

@@ -30,7 +30,19 @@ struct elf_siginfo
 	int	si_errno;			/* errno */
 };
 
-#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/*
+ * Hacks are here since types between compat_timeval (= pair of s32) and
+ * ia64-native timeval (= pair of s64) are not compatible, at least a file
+ * arch/ia64/ia32/../../../fs/binfmt_elf.c will get warnings from compiler on
+ * use of cputime_to_timeval(), which usually an alias of jiffies_to_timeval().
+ */
+#define cputime_to_timeval(a,b) \
+	do { (b)->tv_usec = 0; (b)->tv_sec = (a)/NSEC_PER_SEC; } while(0)
+#else
+#define jiffies_to_timeval(a,b) \
+	do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; } while(0)
+#endif
 
 struct elf_prstatus
 {

+ 624 - 25
arch/ia64/ia32/sys_ia32.c

@@ -38,6 +38,7 @@
 #include <linux/eventpoll.h>
 #include <linux/personality.h>
 #include <linux/ptrace.h>
+#include <linux/regset.h>
 #include <linux/stat.h>
 #include <linux/ipc.h>
 #include <linux/capability.h>
@@ -2387,16 +2388,45 @@ get_free_idx (void)
 	return -ESRCH;
 }
 
+static void set_tls_desc(struct task_struct *p, int idx,
+		const struct ia32_user_desc *info, int n)
+{
+	struct thread_struct *t = &p->thread;
+	struct desc_struct *desc = &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
+	int cpu;
+
+	/*
+	 * We must not get preempted while modifying the TLS.
+	 */
+	cpu = get_cpu();
+
+	while (n-- > 0) {
+		if (LDT_empty(info)) {
+			desc->a = 0;
+			desc->b = 0;
+		} else {
+			desc->a = LDT_entry_a(info);
+			desc->b = LDT_entry_b(info);
+		}
+
+		++info;
+		++desc;
+	}
+
+	if (t == &current->thread)
+		load_TLS(t, cpu);
+
+	put_cpu();
+}
+
 /*
  * Set a given TLS descriptor:
  */
 asmlinkage int
 sys32_set_thread_area (struct ia32_user_desc __user *u_info)
 {
-	struct thread_struct *t = &current->thread;
 	struct ia32_user_desc info;
-	struct desc_struct *desc;
-	int cpu, idx;
+	int idx;
 
 	if (copy_from_user(&info, u_info, sizeof(info)))
 		return -EFAULT;
@@ -2416,18 +2446,7 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info)
 	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	cpu = smp_processor_id();
-
-	if (LDT_empty(&info)) {
-		desc->a = 0;
-		desc->b = 0;
-	} else {
-		desc->a = LDT_entry_a(&info);
-		desc->b = LDT_entry_b(&info);
-	}
-	load_TLS(t, cpu);
+	set_tls_desc(current, idx, &info, 1);
 	return 0;
 }
 
@@ -2451,6 +2470,20 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info)
 #define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
 #define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
 
+static void fill_user_desc(struct ia32_user_desc *info, int idx,
+		const struct desc_struct *desc)
+{
+	info->entry_number = idx;
+	info->base_addr = GET_BASE(desc);
+	info->limit = GET_LIMIT(desc);
+	info->seg_32bit = GET_32BIT(desc);
+	info->contents = GET_CONTENTS(desc);
+	info->read_exec_only = !GET_WRITABLE(desc);
+	info->limit_in_pages = GET_LIMIT_PAGES(desc);
+	info->seg_not_present = !GET_PRESENT(desc);
+	info->useable = GET_USEABLE(desc);
+}
+
 asmlinkage int
 sys32_get_thread_area (struct ia32_user_desc __user *u_info)
 {
@@ -2464,22 +2497,588 @@ sys32_get_thread_area (struct ia32_user_desc __user *u_info)
 		return -EINVAL;
 
 	desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	info.entry_number = idx;
-	info.base_addr = GET_BASE(desc);
-	info.limit = GET_LIMIT(desc);
-	info.seg_32bit = GET_32BIT(desc);
-	info.contents = GET_CONTENTS(desc);
-	info.read_exec_only = !GET_WRITABLE(desc);
-	info.limit_in_pages = GET_LIMIT_PAGES(desc);
-	info.seg_not_present = !GET_PRESENT(desc);
-	info.useable = GET_USEABLE(desc);
+	fill_user_desc(&info, idx, desc);
 
 	if (copy_to_user(u_info, &info, sizeof(info)))
 		return -EFAULT;
 	return 0;
 }
 
+struct regset_get {
+	void *kbuf;
+	void __user *ubuf;
+};
+
+struct regset_set {
+	const void *kbuf;
+	const void __user *ubuf;
+};
+
+struct regset_getset {
+	struct task_struct *target;
+	const struct user_regset *regset;
+	union {
+		struct regset_get get;
+		struct regset_set set;
+	} u;
+	unsigned int pos;
+	unsigned int count;
+	int ret;
+};
+
+static void getfpreg(struct task_struct *task, int regno, int *val)
+{
+	switch (regno / sizeof(int)) {
+	case 0:
+		*val = task->thread.fcr & 0xffff;
+		break;
+	case 1:
+		*val = task->thread.fsr & 0xffff;
+		break;
+	case 2:
+		*val = (task->thread.fsr>>16) & 0xffff;
+		break;
+	case 3:
+		*val = task->thread.fir;
+		break;
+	case 4:
+		*val = (task->thread.fir>>32) & 0xffff;
+		break;
+	case 5:
+		*val = task->thread.fdr;
+		break;
+	case 6:
+		*val = (task->thread.fdr >> 32) & 0xffff;
+		break;
+	}
+}
+
+static void setfpreg(struct task_struct *task, int regno, int val)
+{
+	switch (regno / sizeof(int)) {
+	case 0:
+		task->thread.fcr = (task->thread.fcr & (~0x1f3f))
+			| (val & 0x1f3f);
+		break;
+	case 1:
+		task->thread.fsr = (task->thread.fsr & (~0xffff)) | val;
+		break;
+	case 2:
+		task->thread.fsr = (task->thread.fsr & (~0xffff0000))
+			| (val << 16);
+		break;
+	case 3:
+		task->thread.fir = (task->thread.fir & (~0xffffffff)) | val;
+		break;
+	case 5:
+		task->thread.fdr = (task->thread.fdr & (~0xffffffff)) | val;
+		break;
+	}
+}
+
+static void access_fpreg_ia32(int regno, void *reg,
+		struct pt_regs *pt, struct switch_stack *sw,
+		int tos, int write)
+{
+	void *f;
+
+	if ((regno += tos) >= 8)
+		regno -= 8;
+	if (regno < 4)
+		f = &pt->f8 + regno;
+	else if (regno <= 7)
+		f = &sw->f12 + (regno - 4);
+	else {
+		printk(KERN_ERR "regno must be less than 7 \n");
+		 return;
+	}
+
+	if (write)
+		memcpy(f, reg, sizeof(struct _fpreg_ia32));
+	else
+		memcpy(reg, f, sizeof(struct _fpreg_ia32));
+}
+
+static void do_fpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	struct pt_regs *pt;
+	int start, end, tos;
+	char buf[80];
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+	if (dst->pos < 7 * sizeof(int)) {
+		end = min((dst->pos + dst->count),
+			(unsigned int)(7 * sizeof(int)));
+		for (start = dst->pos; start < end; start += sizeof(int))
+			getfpreg(task, start, (int *)(buf + start));
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, buf,
+				0, 7 * sizeof(int));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+	if (dst->pos < sizeof(struct ia32_user_i387_struct)) {
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		end = min(dst->pos + dst->count,
+			(unsigned int)(sizeof(struct ia32_user_i387_struct)));
+		start = (dst->pos - 7 * sizeof(int)) /
+			sizeof(struct _fpreg_ia32);
+		end = (end - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32);
+		for (; start < end; start++)
+			access_fpreg_ia32(start,
+				(struct _fpreg_ia32 *)buf + start,
+				pt, info->sw, tos, 0);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				buf, 7 * sizeof(int),
+				sizeof(struct ia32_user_i387_struct));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+}
+
+static void do_fpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	struct pt_regs *pt;
+	char buf[80];
+	int end, start, tos;
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+
+	if (dst->pos < 7 * sizeof(int)) {
+		start = dst->pos;
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, buf,
+				0, 7 * sizeof(int));
+		if (dst->ret)
+			return;
+		for (; start < dst->pos; start += sizeof(int))
+			setfpreg(task, start, *((int *)(buf + start)));
+		if (dst->count == 0)
+			return;
+	}
+	if (dst->pos < sizeof(struct ia32_user_i387_struct)) {
+		start = (dst->pos - 7 * sizeof(int)) /
+			sizeof(struct _fpreg_ia32);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				buf, 7 * sizeof(int),
+				sizeof(struct ia32_user_i387_struct));
+		if (dst->ret)
+			return;
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		end = (dst->pos - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32);
+		for (; start < end; start++)
+			access_fpreg_ia32(start,
+				(struct _fpreg_ia32 *)buf + start,
+				pt, info->sw, tos, 1);
+		if (dst->count == 0)
+			return;
+	}
+}
+
+#define OFFSET(member) ((int)(offsetof(struct ia32_user_fxsr_struct, member)))
+static void getfpxreg(struct task_struct *task, int start, int end, char *buf)
+{
+	int min_val;
+
+	min_val = min(end, OFFSET(fop));
+	while (start < min_val) {
+		if (start == OFFSET(cwd))
+			*((short *)buf) = task->thread.fcr & 0xffff;
+		else if (start == OFFSET(swd))
+			*((short *)buf) = task->thread.fsr & 0xffff;
+		else if (start == OFFSET(twd))
+			*((short *)buf) = (task->thread.fsr>>16) & 0xffff;
+		buf += 2;
+		start += 2;
+	}
+	/* skip fop element */
+	if (start == OFFSET(fop)) {
+		start += 2;
+		buf += 2;
+	}
+	while (start < end) {
+		if (start == OFFSET(fip))
+			*((int *)buf) = task->thread.fir;
+		else if (start == OFFSET(fcs))
+			*((int *)buf) = (task->thread.fir>>32) & 0xffff;
+		else if (start == OFFSET(foo))
+			*((int *)buf) = task->thread.fdr;
+		else if (start == OFFSET(fos))
+			*((int *)buf) = (task->thread.fdr>>32) & 0xffff;
+		else if (start == OFFSET(mxcsr))
+			*((int *)buf) = ((task->thread.fcr>>32) & 0xff80)
+					 | ((task->thread.fsr>>32) & 0x3f);
+		buf += 4;
+		start += 4;
+	}
+}
+
+static void setfpxreg(struct task_struct *task, int start, int end, char *buf)
+{
+	int min_val, num32;
+	short num;
+	unsigned long num64;
+
+	min_val = min(end, OFFSET(fop));
+	while (start < min_val) {
+		num = *((short *)buf);
+		if (start == OFFSET(cwd)) {
+			task->thread.fcr = (task->thread.fcr & (~0x1f3f))
+						| (num & 0x1f3f);
+		} else if (start == OFFSET(swd)) {
+			task->thread.fsr = (task->thread.fsr & (~0xffff)) | num;
+		} else if (start == OFFSET(twd)) {
+			task->thread.fsr = (task->thread.fsr & (~0xffff0000))
+				| (((int)num) << 16);
+		}
+		buf += 2;
+		start += 2;
+	}
+	/* skip fop element */
+	if (start == OFFSET(fop)) {
+		start += 2;
+		buf += 2;
+	}
+	while (start < end) {
+		num32 = *((int *)buf);
+		if (start == OFFSET(fip))
+			task->thread.fir = (task->thread.fir & (~0xffffffff))
+						 | num32;
+		else if (start == OFFSET(foo))
+			task->thread.fdr = (task->thread.fdr & (~0xffffffff))
+						 | num32;
+		else if (start == OFFSET(mxcsr)) {
+			num64 = num32 & 0xff10;
+			task->thread.fcr = (task->thread.fcr &
+				(~0xff1000000000UL)) | (num64<<32);
+			num64 = num32 & 0x3f;
+			task->thread.fsr = (task->thread.fsr &
+				(~0x3f00000000UL)) | (num64<<32);
+		}
+		buf += 4;
+		start += 4;
+	}
+}
+
+static void do_fpxregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	struct pt_regs *pt;
+	char buf[128];
+	int start, end, tos;
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+	if (dst->pos < OFFSET(st_space[0])) {
+		end = min(dst->pos + dst->count, (unsigned int)32);
+		getfpxreg(task, dst->pos, end, buf);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, buf,
+				0, OFFSET(st_space[0]));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(xmm_space[0])) {
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		end = min(dst->pos + dst->count,
+				(unsigned int)OFFSET(xmm_space[0]));
+		start = (dst->pos - OFFSET(st_space[0])) / 16;
+		end = (end - OFFSET(st_space[0])) / 16;
+		for (; start < end; start++)
+			access_fpreg_ia32(start, buf + 16 * start, pt,
+						info->sw, tos, 0);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				buf, OFFSET(st_space[0]), OFFSET(xmm_space[0]));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(padding[0]))
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				&info->sw->f16, OFFSET(xmm_space[0]),
+				OFFSET(padding[0]));
+}
+
+static void do_fpxregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	char buf[128];
+	int start, end;
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+
+	if (dst->pos < OFFSET(st_space[0])) {
+		start = dst->pos;
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				buf, 0, OFFSET(st_space[0]));
+		if (dst->ret)
+			return;
+		setfpxreg(task, start, dst->pos, buf);
+		if (dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(xmm_space[0])) {
+		struct pt_regs *pt;
+		int tos;
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		start = (dst->pos - OFFSET(st_space[0])) / 16;
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				buf, OFFSET(st_space[0]), OFFSET(xmm_space[0]));
+		if (dst->ret)
+			return;
+		end = (dst->pos - OFFSET(st_space[0])) / 16;
+		for (; start < end; start++)
+			access_fpreg_ia32(start, buf + 16 * start, pt, info->sw,
+						 tos, 1);
+		if (dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(padding[0]))
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				&info->sw->f16, OFFSET(xmm_space[0]),
+				 OFFSET(padding[0]));
+}
+#undef OFFSET
+
+static int do_regset_call(void (*call)(struct unw_frame_info *, void *),
+		struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	struct regset_getset info = { .target = target, .regset = regset,
+		.pos = pos, .count = count,
+		.u.set = { .kbuf = kbuf, .ubuf = ubuf },
+		.ret = 0 };
+
+	if (target == current)
+		unw_init_running(call, &info);
+	else {
+		struct unw_frame_info ufi;
+		memset(&ufi, 0, sizeof(ufi));
+		unw_init_from_blocked_task(&ufi, target);
+		(*call)(&ufi, &info);
+	}
+
+	return info.ret;
+}
+
+static int ia32_fpregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_fpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_fpxregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpxregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_fpxregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpxregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_genregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	if (kbuf) {
+		u32 *kp = kbuf;
+		while (count > 0) {
+			*kp++ = getreg(target, pos);
+			pos += 4;
+			count -= 4;
+		}
+	} else {
+		u32 __user *up = ubuf;
+		while (count > 0) {
+			if (__put_user(getreg(target, pos), up++))
+				return -EFAULT;
+			pos += 4;
+			count -= 4;
+		}
+	}
+	return 0;
+}
+
+static int ia32_genregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	int ret = 0;
+
+	if (kbuf) {
+		const u32 *kp = kbuf;
+		while (!ret && count > 0) {
+			putreg(target, pos, *kp++);
+			pos += 4;
+			count -= 4;
+		}
+	} else {
+		const u32 __user *up = ubuf;
+		u32 val;
+		while (!ret && count > 0) {
+			ret = __get_user(val, up++);
+			if (!ret)
+				putreg(target, pos, val);
+			pos += 4;
+			count -= 4;
+		}
+	}
+	return ret;
+}
+
+static int ia32_tls_active(struct task_struct *target,
+		const struct user_regset *regset)
+{
+	struct thread_struct *t = &target->thread;
+	int n = GDT_ENTRY_TLS_ENTRIES;
+	while (n > 0 && desc_empty(&t->tls_array[n -1]))
+		--n;
+	return n;
+}
+
+static int ia32_tls_get(struct task_struct *target,
+		const struct user_regset *regset, unsigned int pos,
+		unsigned int count, void *kbuf, void __user *ubuf)
+{
+	const struct desc_struct *tls;
+
+	if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) ||
+			(pos % sizeof(struct ia32_user_desc)) != 0 ||
+			(count % sizeof(struct ia32_user_desc)) != 0)
+		return -EINVAL;
+
+	pos /= sizeof(struct ia32_user_desc);
+	count /= sizeof(struct ia32_user_desc);
+
+	tls = &target->thread.tls_array[pos];
+
+	if (kbuf) {
+		struct ia32_user_desc *info = kbuf;
+		while (count-- > 0)
+			fill_user_desc(info++, GDT_ENTRY_TLS_MIN + pos++,
+					tls++);
+	} else {
+		struct ia32_user_desc __user *u_info = ubuf;
+		while (count-- > 0) {
+			struct ia32_user_desc info;
+			fill_user_desc(&info, GDT_ENTRY_TLS_MIN + pos++, tls++);
+			if (__copy_to_user(u_info++, &info, sizeof(info)))
+				return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+static int ia32_tls_set(struct task_struct *target,
+		const struct user_regset *regset, unsigned int pos,
+		unsigned int count, const void *kbuf, const void __user *ubuf)
+{
+	struct ia32_user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
+	const struct ia32_user_desc *info;
+
+	if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) ||
+			(pos % sizeof(struct ia32_user_desc)) != 0 ||
+			(count % sizeof(struct ia32_user_desc)) != 0)
+		return -EINVAL;
+
+	if (kbuf)
+		info = kbuf;
+	else if (__copy_from_user(infobuf, ubuf, count))
+		return -EFAULT;
+	else
+		info = infobuf;
+
+	set_tls_desc(target,
+		GDT_ENTRY_TLS_MIN + (pos / sizeof(struct ia32_user_desc)),
+		info, count / sizeof(struct ia32_user_desc));
+
+	return 0;
+}
+
+/*
+ * This should match arch/i386/kernel/ptrace.c:native_regsets.
+ * XXX ioperm? vm86?
+ */
+static const struct user_regset ia32_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(struct user_regs_struct32)/4,
+		.size = 4, .align = 4,
+		.get = ia32_genregs_get, .set = ia32_genregs_set
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(struct ia32_user_i387_struct) / 4,
+		.size = 4, .align = 4,
+		.get = ia32_fpregs_get, .set = ia32_fpregs_set
+	},
+	{
+		.core_note_type = NT_PRXFPREG,
+		.n = sizeof(struct ia32_user_fxsr_struct) / 4,
+		.size = 4, .align = 4,
+		.get = ia32_fpxregs_get, .set = ia32_fpxregs_set
+	},
+	{
+		.core_note_type = NT_386_TLS,
+		.n = GDT_ENTRY_TLS_ENTRIES,
+		.bias = GDT_ENTRY_TLS_MIN,
+		.size = sizeof(struct ia32_user_desc),
+		.align = sizeof(struct ia32_user_desc),
+		.active = ia32_tls_active,
+		.get = ia32_tls_get, .set = ia32_tls_set,
+	},
+};
+
+const struct user_regset_view user_ia32_view = {
+	.name = "i386", .e_machine = EM_386,
+	.regsets = ia32_regsets, .n = ARRAY_SIZE(ia32_regsets)
+};
+
 long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, 
 			__u32 len_low, __u32 len_high, int advice)
 { 

+ 1 - 1
arch/ia64/kernel/Makefile

@@ -6,7 +6,7 @@ extra-y	:= head.o init_task.o vmlinux.lds
 
 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
 	 irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o		\
-	 salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+	 salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
 	 unwind.o mca.o mca_asm.o topology.o
 
 obj-$(CONFIG_IA64_BRL_EMU)	+= brl_emu.o

+ 3 - 1
arch/ia64/kernel/acpi.c

@@ -423,6 +423,7 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
 #define pxm_bit_set(bit)	(set_bit(bit,(void *)pxm_flag))
 #define pxm_bit_test(bit)	(test_bit(bit,(void *)pxm_flag))
 static struct acpi_table_slit __initdata *slit_table;
+cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
 
 static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
 {
@@ -482,6 +483,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	    (pa->apic_id << 8) | (pa->local_sapic_eid);
 	/* nid should be overridden as logical node id later */
 	node_cpuid[srat_num_cpus].nid = pxm;
+	cpu_set(srat_num_cpus, early_cpu_possible_map);
 	srat_num_cpus++;
 }
 
@@ -559,7 +561,7 @@ void __init acpi_numa_arch_fixup(void)
 	}
 
 	/* set logical node id in cpu structure */
-	for (i = 0; i < srat_num_cpus; i++)
+	for_each_possible_early_cpu(i)
 		node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
 
 	printk(KERN_INFO "Number of logical nodes in system = %d\n",

+ 13 - 0
arch/ia64/kernel/asm-offsets.c

@@ -7,6 +7,7 @@
 #define ASM_OFFSETS_C 1
 
 #include <linux/sched.h>
+#include <linux/pid.h>
 #include <linux/clocksource.h>
 
 #include <asm-ia64/processor.h>
@@ -34,17 +35,29 @@ void foo(void)
 	DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
 	DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
 
+	BUILD_BUG_ON(sizeof(struct upid) != 32);
+	DEFINE(IA64_UPID_SHIFT, 5);
+
 	BLANK();
 
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
+	DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
+	DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
+	DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
+#endif
 
 	BLANK();
 
 	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
 	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+	DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
+	DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
+	DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
 	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
 	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
 	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));

+ 36 - 20
arch/ia64/kernel/crash.c

@@ -24,6 +24,7 @@ int kdump_status[NR_CPUS];
 static atomic_t kdump_cpu_frozen;
 atomic_t kdump_in_progress;
 static int kdump_on_init = 1;
+static int kdump_on_fatal_mca = 1;
 
 static inline Elf64_Word
 *append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
@@ -118,6 +119,7 @@ machine_crash_shutdown(struct pt_regs *pt)
 static void
 machine_kdump_on_init(void)
 {
+	crash_save_vmcoreinfo();
 	local_irq_disable();
 	kexec_disable_iosapic();
 	machine_kexec(ia64_kimage);
@@ -148,7 +150,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
 	struct ia64_mca_notify_die *nd;
 	struct die_args *args = data;
 
-	if (!kdump_on_init)
+	if (!kdump_on_init && !kdump_on_fatal_mca)
 		return NOTIFY_DONE;
 
 	if (!ia64_kimage) {
@@ -173,32 +175,38 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
 		return NOTIFY_DONE;
 
 	switch (val) {
-		case DIE_INIT_MONARCH_PROCESS:
+	case DIE_INIT_MONARCH_PROCESS:
+		if (kdump_on_init) {
 			atomic_set(&kdump_in_progress, 1);
 			*(nd->monarch_cpu) = -1;
-			break;
-		case DIE_INIT_MONARCH_LEAVE:
+		}
+		break;
+	case DIE_INIT_MONARCH_LEAVE:
+		if (kdump_on_init)
 			machine_kdump_on_init();
-			break;
-		case DIE_INIT_SLAVE_LEAVE:
-			if (atomic_read(&kdump_in_progress))
-				unw_init_running(kdump_cpu_freeze, NULL);
-			break;
-		case DIE_MCA_RENDZVOUS_LEAVE:
-			if (atomic_read(&kdump_in_progress))
-				unw_init_running(kdump_cpu_freeze, NULL);
-			break;
-		case DIE_MCA_MONARCH_LEAVE:
-		     /* die_register->signr indicate if MCA is recoverable */
-			if (!args->signr)
-				machine_kdump_on_init();
-			break;
+		break;
+	case DIE_INIT_SLAVE_LEAVE:
+		if (atomic_read(&kdump_in_progress))
+			unw_init_running(kdump_cpu_freeze, NULL);
+		break;
+	case DIE_MCA_RENDZVOUS_LEAVE:
+		if (atomic_read(&kdump_in_progress))
+			unw_init_running(kdump_cpu_freeze, NULL);
+		break;
+	case DIE_MCA_MONARCH_LEAVE:
+		/* die_register->signr indicate if MCA is recoverable */
+		if (kdump_on_fatal_mca && !args->signr) {
+			atomic_set(&kdump_in_progress, 1);
+			*(nd->monarch_cpu) = -1;
+			machine_kdump_on_init();
+		}
+		break;
 	}
 	return NOTIFY_DONE;
 }
 
 #ifdef CONFIG_SYSCTL
-static ctl_table kdump_on_init_table[] = {
+static ctl_table kdump_ctl_table[] = {
 	{
 		.ctl_name = CTL_UNNUMBERED,
 		.procname = "kdump_on_init",
@@ -207,6 +215,14 @@ static ctl_table kdump_on_init_table[] = {
 		.mode = 0644,
 		.proc_handler = &proc_dointvec,
 	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "kdump_on_fatal_mca",
+		.data = &kdump_on_fatal_mca,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+	},
 	{ .ctl_name = 0 }
 };
 
@@ -215,7 +231,7 @@ static ctl_table sys_table[] = {
 	  .ctl_name = CTL_KERN,
 	  .procname = "kernel",
 	  .mode = 0555,
-	  .child = kdump_on_init_table,
+	  .child = kdump_ctl_table,
 	},
 	{ .ctl_name = 0 }
 };

+ 46 - 0
arch/ia64/kernel/efi.c

@@ -37,6 +37,7 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mca.h>
+#include <asm/tlbflush.h>
 
 #define EFI_DEBUG	0
 
@@ -403,6 +404,41 @@ efi_get_pal_addr (void)
 	return NULL;
 }
 
+
+static u8 __init palo_checksum(u8 *buffer, u32 length)
+{
+	u8 sum = 0;
+	u8 *end = buffer + length;
+
+	while (buffer < end)
+		sum = (u8) (sum + *(buffer++));
+
+	return sum;
+}
+
+/*
+ * Parse and handle PALO table which is published at:
+ * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
+ */
+static void __init handle_palo(unsigned long palo_phys)
+{
+	struct palo_table *palo = __va(palo_phys);
+	u8  checksum;
+
+	if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
+		printk(KERN_INFO "PALO signature incorrect.\n");
+		return;
+	}
+
+	checksum = palo_checksum((u8 *)palo, palo->length);
+	if (checksum) {
+		printk(KERN_INFO "PALO checksum incorrect.\n");
+		return;
+	}
+
+	setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO);
+}
+
 void
 efi_map_pal_code (void)
 {
@@ -432,6 +468,7 @@ efi_init (void)
 	u64 efi_desc_size;
 	char *cp, vendor[100] = "unknown";
 	int i;
+	unsigned long palo_phys;
 
 	/*
 	 * It's too early to be able to use the standard kernel command line
@@ -496,6 +533,8 @@ efi_init (void)
 	efi.hcdp       = EFI_INVALID_TABLE_ADDR;
 	efi.uga        = EFI_INVALID_TABLE_ADDR;
 
+	palo_phys      = EFI_INVALID_TABLE_ADDR;
+
 	for (i = 0; i < (int) efi.systab->nr_tables; i++) {
 		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
 			efi.mps = config_tables[i].table;
@@ -515,10 +554,17 @@ efi_init (void)
 		} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
 			efi.hcdp = config_tables[i].table;
 			printk(" HCDP=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid,
+			 PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) {
+			palo_phys = config_tables[i].table;
+			printk(" PALO=0x%lx", config_tables[i].table);
 		}
 	}
 	printk("\n");
 
+	if (palo_phys != EFI_INVALID_TABLE_ADDR)
+		handle_palo(palo_phys);
+
 	runtime = __va(efi.systab->runtime);
 	efi.get_time = phys_get_time;
 	efi.set_time = phys_set_time;

+ 65 - 0
arch/ia64/kernel/entry.S

@@ -710,6 +710,16 @@ ENTRY(ia64_leave_syscall)
 (pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
 #endif
 .work_processed_syscall:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	adds r2=PT(LOADRS)+16,r12
+(pUStk)	mov.m r22=ar.itc			// fetch time at leave
+	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
+	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
+	adds r3=PT(AR_BSPSTORE)+16,r12		// deferred
+	;;
+#else
 	adds r2=PT(LOADRS)+16,r12
 	adds r3=PT(AR_BSPSTORE)+16,r12
 	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -718,6 +728,7 @@ ENTRY(ia64_leave_syscall)
 	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
 	nop.i 0
 	;;
+#endif
 	mov r16=ar.bsp				// M2  get existing backing store pointer
 	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
 (p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
@@ -737,12 +748,21 @@ ENTRY(ia64_leave_syscall)
 
 	ld8 r29=[r2],16		// M0|1 load cr.ipsr
 	ld8 r28=[r3],16		// M0|1 load cr.iip
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+	;;
+	ld8 r30=[r2],16		// M0|1 load cr.ifs
+	ld8 r25=[r3],16		// M0|1 load ar.unat
+(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	;;
+#else
 	mov r22=r0		// A    clear r22
 	;;
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
 	ld8 r25=[r3],16		// M0|1 load ar.unat
 (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
 	;;
+#endif
 	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
 (pKStk)	mov r22=psr			// M2   read PSR now that interrupts are disabled
 	nop 0
@@ -759,7 +779,11 @@ ENTRY(ia64_leave_syscall)
 	ld8.fill r1=[r3],16			// M0|1 load r1
 (pUStk) mov r17=1				// A
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) st1 [r15]=r17				// M2|3
+#else
 (pUStk) st1 [r14]=r17				// M2|3
+#endif
 	ld8.fill r13=[r3],16			// M0|1
 	mov f8=f0				// F    clear f8
 	;;
@@ -775,12 +799,22 @@ ENTRY(ia64_leave_syscall)
 	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
 	cover				// B    add current frame into dirty partition & set cr.ifs
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov r19=ar.bsp			// M2   get new backing store pointer
+	st8 [r14]=r22			// M	save time at leave
+	mov f10=f0			// F    clear f10
+
+	mov r22=r0			// A	clear r22
+	movl r14=__kernel_syscall_via_epc // X
+	;;
+#else
 	mov r19=ar.bsp			// M2   get new backing store pointer
 	mov f10=f0			// F    clear f10
 
 	nop.m 0
 	movl r14=__kernel_syscall_via_epc // X
 	;;
+#endif
 	mov.m ar.csd=r0			// M2   clear ar.csd
 	mov.m ar.ccv=r0			// M2   clear ar.ccv
 	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
@@ -913,10 +947,18 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	.pred.rel.mutex pUStk,pKStk
+(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
+(pUStk)	mov.m r22=ar.itc	// M  fetch time at leave
+	nop.i 0
+	;;
+#else
 (pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
 	nop.i 0
 	nop.i 0
 	;;
+#endif
 	ld8 r29=[r16],16	// load cr.ipsr
 	ld8 r28=[r17],16	// load cr.iip
 	;;
@@ -938,15 +980,37 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	;;
 	ld8.fill r12=[r16],16
 	ld8.fill r13=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
+#else
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
+#endif
 	;;
 	ld8 r20=[r16],16	// ar.fpsr
 	ld8.fill r15=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
+#endif
 	;;
 	ld8.fill r14=[r16],16
 	ld8.fill r2=[r17]
 (pUStk)	mov r17=1
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
+	//  mib  :  mov add br        ->  mib  : ld8 add br
+	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
+	//
+	//  no one require bsp in r16 if (pKStk) branch is selected.
+(pUStk)	st8 [r3]=r22		// save time at leave
+(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
+	shr.u r18=r19,16	// get byte size of existing "dirty" partition
+	;;
+	ld8.fill r3=[r16]	// deferred
+	LOAD_PHYS_STACK_REG_SIZE(r17)
+(pKStk)	br.cond.dpnt skip_rbs_switch
+	mov r16=ar.bsp		// get existing backing store pointer
+#else
 	ld8.fill r3=[r16]
 (pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
 	shr.u r18=r19,16	// get byte size of existing "dirty" partition
@@ -954,6 +1018,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	mov r16=ar.bsp		// get existing backing store pointer
 	LOAD_PHYS_STACK_REG_SIZE(r17)
 (pKStk)	br.cond.dpnt skip_rbs_switch
+#endif
 
 	/*
 	 * Restore user backing store.

+ 68 - 20
arch/ia64/kernel/fsys.S

@@ -61,13 +61,29 @@ ENTRY(fsys_getpid)
 	.prologue
 	.altrp b6
 	.body
+	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+	;;
+	ld8 r17=[r17]				// r17 = current->group_leader
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	;;
 	ld4 r9=[r9]
-	add r8=IA64_TASK_TGID_OFFSET,r16
+	add r17=IA64_TASK_TGIDLINK_OFFSET,r17
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-	ld4 r8=[r8]				// r8 = current->tgid
+	ld8 r17=[r17]				// r17 = current->group_leader->pids[PIDTYPE_PID].pid
+	;;
+	add r8=IA64_PID_LEVEL_OFFSET,r17
+	;;
+	ld4 r8=[r8]				// r8 = pid->level
+	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
+	;;
+	shl r8=r8,IA64_UPID_SHIFT
+	;;
+	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
+	;;
+	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
+	;;
+	mov r17=0
 	;;
 	cmp.ne p8,p0=0,r9
 (p8)	br.spnt.many fsys_fallback_syscall
@@ -126,15 +142,25 @@ ENTRY(fsys_set_tid_address)
 	.altrp b6
 	.body
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	add r17=IA64_TASK_TGIDLINK_OFFSET,r16
 	;;
 	ld4 r9=[r9]
 	tnat.z p6,p7=r32		// check argument register for being NaT
+	ld8 r17=[r17]				// r17 = current->pids[PIDTYPE_PID].pid
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-	add r8=IA64_TASK_PID_OFFSET,r16
+	add r8=IA64_PID_LEVEL_OFFSET,r17
 	add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
 	;;
-	ld4 r8=[r8]
+	ld4 r8=[r8]				// r8 = pid->level
+	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
+	;;
+	shl r8=r8,IA64_UPID_SHIFT
+	;;
+	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
+	;;
+	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
+	;;
 	cmp.ne p8,p0=0,r9
 	mov r17=-1
 	;;
@@ -210,27 +236,25 @@ ENTRY(fsys_gettimeofday)
 	// Note that instructions are optimized for McKinley. McKinley can
 	// process two bundles simultaneously and therefore we continuously
 	// try to feed the CPU two bundles and then a stop.
-	//
-	// Additional note that code has changed a lot. Optimization is TBD.
-	// Comments begin with "?" are maybe outdated.
-	tnat.nz p6,p0 = r31	// ? branch deferred to fit later bundle
-	mov pr = r30,0xc000	// Set predicates according to function
+
 	add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+	tnat.nz p6,p0 = r31		// guard against Nat argument
+(p6)	br.cond.spnt.few .fail_einval
 	movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
 	;;
+	ld4 r2 = [r2]			// process work pending flags
 	movl r29 = itc_jitter_data	// itc_jitter
 	add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20	// wall_time
-	ld4 r2 = [r2]		// process work pending flags
-	;;
-(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
 	add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
-	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+	mov pr = r30,0xc000	// Set predicates according to function
+	;;
 	and r2 = TIF_ALLWORK_MASK,r2
-(p6)    br.cond.spnt.few .fail_einval	// ? deferred branch
+	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
 	;;
-	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
+	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20	// clksrc_cycle_last
 	cmp.ne p6, p0 = 0, r2	// Fallback if work is scheduled
-(p6)    br.cond.spnt.many fsys_fallback_syscall
+(p6)	br.cond.spnt.many fsys_fallback_syscall
 	;;
 	// Begin critical section
 .time_redo:
@@ -258,7 +282,6 @@ ENTRY(fsys_gettimeofday)
 (p8)	mov r2 = ar.itc		// CPU_TIMER. 36 clocks latency!!!
 (p9)	ld8 r2 = [r30]		// MMIO_TIMER. Could also have latency issues..
 (p13)	ld8 r25 = [r19]		// get itc_lastcycle value
-	;;		// ? could be removed by moving the last add upward
 	ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_sec
 	;;
 	ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_nsec
@@ -285,13 +308,12 @@ ENTRY(fsys_gettimeofday)
 EX(.fail_efault, probe.w.fault r31, 3)
 	xmpy.l f8 = f8,f7	// nsec_per_cyc*(counter-last_counter)
 	;;
-	// ? simulate tbit.nz.or p7,p0 = r28,0
 	getf.sig r2 = f8
 	mf
 	;;
 	ld4 r10 = [r20]		// gtod_lock.sequence
 	shr.u r2 = r2,r23	// shift by factor
-	;;		// ? overloaded 3 bundles!
+	;;
 	add r8 = r8,r2		// Add xtime.nsecs
 	cmp4.ne p7,p0 = r28,r10
 (p7)	br.cond.dpnt.few .time_redo	// sequence number changed, redo
@@ -319,9 +341,9 @@ EX(.fail_efault, probe.w.fault r31, 3)
 EX(.fail_efault, probe.w.fault r23, 3)	// This also costs 5 cycles
 (p14)	xmpy.hu f8 = f8, f7		// xmpy has 5 cycles latency so use it
 	;;
-	mov r8 = r0
 (p14)	getf.sig r2 = f8
 	;;
+	mov r8 = r0
 (p14)	shr.u r21 = r2, 4
 	;;
 EX(.fail_efault, st8 [r31] = r9)
@@ -660,7 +682,11 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	nop.i 0
 	;;
 	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov.m r30=ar.itc			// M    get cycle for accounting
+#else
 	nop.m 0
+#endif
 	nop.i 0
 	;;
 	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
@@ -682,6 +708,28 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	// mov.m r30=ar.itc is called in advance
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at leave kernel
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r30,r19				// elapsed time in user mode
+	;;
+	add r20=r20,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r20				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+#endif
 	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
 	mov rp=r14				// I0   set the real return addr
 	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A

+ 20 - 0
arch/ia64/kernel/head.S

@@ -1002,6 +1002,26 @@ GLOBAL_ENTRY(sched_clock)
 	br.ret.sptk.many rp
 END(sched_clock)
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+GLOBAL_ENTRY(cycle_to_cputime)
+	alloc r16=ar.pfs,1,0,0,0
+	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+	;;
+	ldf8 f8=[r8]
+	;;
+	setf.sig f9=r32
+	;;
+	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
+	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
+	;;
+	getf.sig r8=f10		//						(5 cyc)
+	getf.sig r9=f11
+	;;
+	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
+	br.ret.sptk.many rp
+END(cycle_to_cputime)
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
 GLOBAL_ENTRY(start_kernel_thread)
 	.prologue
 	.save rp, r0				// this is the end of the call-chain

+ 0 - 6
arch/ia64/kernel/ia64_ksyms.c

@@ -19,12 +19,6 @@ EXPORT_SYMBOL_GPL(empty_zero_page);
 EXPORT_SYMBOL(ip_fast_csum);		/* hand-coded assembly */
 EXPORT_SYMBOL(csum_ipv6_magic);
 
-#include <asm/semaphore.h>
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__up);
-
 #include <asm/page.h>
 EXPORT_SYMBOL(clear_page);
 

+ 1 - 1
arch/ia64/kernel/irq_ia64.c

@@ -472,7 +472,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 			static unsigned char count;
 			static long last_time;
 
-			if (jiffies - last_time > 5*HZ)
+			if (time_after(jiffies, last_time + 5 * HZ))
 				count = 0;
 			if (++count < 5) {
 				last_time = jiffies;

+ 69 - 0
arch/ia64/kernel/ivt.S

@@ -805,8 +805,13 @@ ENTRY(break_fault)
 
 (p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
 (p9)	adds r8=1,r8				// A    increment ei to next slot
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	;;
+	mov b6=r30				// I0   setup syscall handler branch reg early
+#else
 	nop.i 0
 	;;
+#endif
 
 	mov.m r25=ar.unat			// M2 (5 cyc)
 	dep r29=r8,r29,41,2			// I0   insert new ei into cr.ipsr
@@ -817,7 +822,11 @@ ENTRY(break_fault)
 	//
 ///////////////////////////////////////////////////////////////////////
 	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov.m r30=ar.itc			// M    get cycle for accounting
+#else
 	mov b6=r30				// I0   setup syscall handler branch reg early
+#endif
 	cmp.eq pKStk,pUStk=r0,r17		// A    were we on kernel stacks already?
 
 	and r9=_TIF_SYSCALL_TRACEAUDIT,r9	// A    mask trace or audit
@@ -829,6 +838,30 @@ ENTRY(break_fault)
 	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 1:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	// mov.m r30=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13	// A
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13	// A
+(pKStk)	br.cond.spnt .skip_accounting		// B	unlikely skip
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// M  get last stamp
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// M  time at leave
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// M  cumulated stime
+	ld8 r21=[r17]				// M  cumulated utime
+	sub r22=r19,r18				// A  stime before leave
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// M  update stamp
+	sub r18=r30,r19				// A  elapsed time in user
+	;;
+	add r20=r20,r22				// A  sum stime
+	add r21=r21,r18				// A  sum utime
+	;;
+	st8 [r16]=r20				// M  update stime
+	st8 [r17]=r21				// M  update utime
+	;;
+.skip_accounting:
+#endif
 	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
 	nop 0
 	bsw.1					// B (6 cyc) regs are saved, switch to bank 1
@@ -928,6 +961,7 @@ END(interrupt)
 	 *	- r27: saved ar.rsc
 	 *	- r28: saved cr.iip
 	 *	- r29: saved cr.ipsr
+	 *	- r30: ar.itc for accounting (don't touch)
 	 *	- r31: saved pr
 	 *	-  b0: original contents (to be saved)
 	 * On exit:
@@ -1090,6 +1124,41 @@ END(dispatch_illegal_op_fault)
 	DBG_FAULT(16)
 	FAULT(16)
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	/*
+	 * There is no particular reason for this code to be here, other than
+	 * that there happens to be space here that would go unused otherwise.
+	 * If this fault ever gets "unreserved", simply moved the following
+	 * code to a more suitable spot...
+	 *
+	 * account_sys_enter is called from SAVE_MIN* macros if accounting is
+	 * enabled and if the macro is entered from user mode.
+	 */
+ENTRY(account_sys_enter)
+	// mov.m r20=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at left from kernel
+        ;;
+	ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r20,r19				// elapsed time in user mode
+	;;
+	add r23=r23,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r23				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+	br.ret.sptk.many rp
+END(account_sys_enter)
+#endif
+
 	.org ia64_ivt+0x4400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4400 Entry 17 (size 64 bundles) Reserved

+ 107 - 26
arch/ia64/kernel/kprobes.c

@@ -78,6 +78,20 @@ static enum instruction_type bundle_encoding[32][3] = {
   { u, u, u },  			/* 1F */
 };
 
+/* Insert a long branch code */
+static void __kprobes set_brl_inst(void *from, void *to)
+{
+	s64 rel = ((s64) to - (s64) from) >> 4;
+	bundle_t *brl;
+	brl = (bundle_t *) ((u64) from & ~0xf);
+	brl->quad0.template = 0x05;	/* [MLX](stop) */
+	brl->quad0.slot0 = NOP_M_INST;	/* nop.m 0x0 */
+	brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2;
+	brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46);
+	/* brl.cond.sptk.many.clr rel<<4 (qp=0) */
+	brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff);
+}
+
 /*
  * In this function we check to see if the instruction
  * is IP relative instruction and update the kprobe
@@ -496,6 +510,77 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 	regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
 }
 
+/* Check the instruction in the slot is break */
+static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot)
+{
+	unsigned int major_opcode;
+	unsigned int template = bundle->quad0.template;
+	unsigned long kprobe_inst;
+
+	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
+	if (slot == 1 && bundle_encoding[template][1] == L)
+		slot++;
+
+	/* Get Kprobe probe instruction at given slot*/
+	get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
+
+	/* For break instruction,
+	 * Bits 37:40 Major opcode to be zero
+	 * Bits 27:32 X6 to be zero
+	 * Bits 32:35 X3 to be zero
+	 */
+	if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) {
+		/* Not a break instruction */
+		return 0;
+	}
+
+	/* Is a break instruction */
+	return 1;
+}
+
+/*
+ * In this function, we check whether the target bundle modifies IP or
+ * it triggers an exception. If so, it cannot be boostable.
+ */
+static int __kprobes can_boost(bundle_t *bundle, uint slot,
+			       unsigned long bundle_addr)
+{
+	unsigned int template = bundle->quad0.template;
+
+	do {
+		if (search_exception_tables(bundle_addr + slot) ||
+		    __is_ia64_break_inst(bundle, slot))
+			return 0;	/* exception may occur in this bundle*/
+	} while ((++slot) < 3);
+	template &= 0x1e;
+	if (template >= 0x10 /* including B unit */ ||
+	    template == 0x04 /* including X unit */ ||
+	    template == 0x06) /* undefined */
+		return 0;
+
+	return 1;
+}
+
+/* Prepare long jump bundle and disables other boosters if need */
+static void __kprobes prepare_booster(struct kprobe *p)
+{
+	unsigned long addr = (unsigned long)p->addr & ~0xFULL;
+	unsigned int slot = (unsigned long)p->addr & 0xf;
+	struct kprobe *other_kp;
+
+	if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) {
+		set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1);
+		p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE;
+	}
+
+	/* disables boosters in previous slots */
+	for (; addr < (unsigned long)p->addr; addr++) {
+		other_kp = get_kprobe((void *)addr);
+		if (other_kp)
+			other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE;
+	}
+}
+
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
 	unsigned long addr = (unsigned long) p->addr;
@@ -530,6 +615,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 
 	prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp);
 
+	prepare_booster(p);
+
 	return 0;
 }
 
@@ -543,7 +630,9 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
 	src = &p->opcode.bundle;
 
 	flush_icache_range((unsigned long)p->ainsn.insn,
-			(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
+			   (unsigned long)p->ainsn.insn +
+			   sizeof(kprobe_opcode_t) * MAX_INSN_SIZE);
+
 	switch (p->ainsn.slot) {
 		case 0:
 			dest->quad0.slot0 = src->quad0.slot0;
@@ -584,13 +673,13 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, 0);
+	free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
 	mutex_unlock(&kprobe_mutex);
 }
 /*
  * We are resuming execution after a single step fault, so the pt_regs
  * structure reflects the register state after we executed the instruction
- * located in the kprobe (p->ainsn.insn.bundle).  We still need to adjust
+ * located in the kprobe (p->ainsn.insn->bundle).  We still need to adjust
  * the ip to point back to the original stack address. To set the IP address
  * to original stack address, handle the case where we need to fixup the
  * relative IP address and/or fixup branch register.
@@ -607,7 +696,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 	if (slot == 1 && bundle_encoding[template][1] == L)
 		slot = 2;
 
-	if (p->ainsn.inst_flag) {
+	if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) {
 
 		if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
 			/* Fix relative IP address */
@@ -686,33 +775,12 @@ static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
 static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
 {
 	unsigned int slot = ia64_psr(regs)->ri;
-	unsigned int template, major_opcode;
-	unsigned long kprobe_inst;
 	unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip;
 	bundle_t bundle;
 
 	memcpy(&bundle, kprobe_addr, sizeof(bundle_t));
-	template = bundle.quad0.template;
-
-	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
-	if (slot == 1 && bundle_encoding[template][1] == L)
-		slot++;
 
-	/* Get Kprobe probe instruction at given slot*/
-	get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode);
-
-	/* For break instruction,
-	 * Bits 37:40 Major opcode to be zero
-	 * Bits 27:32 X6 to be zero
-	 * Bits 32:35 X3 to be zero
-	 */
-	if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) {
-		/* Not a break instruction */
-		return 0;
-	}
-
-	/* Is a break instruction */
-	return 1;
+	return __is_ia64_break_inst(&bundle, slot);
 }
 
 static int __kprobes pre_kprobes_handler(struct die_args *args)
@@ -802,6 +870,19 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
 		return 1;
 
 ss_probe:
+#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
+	if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
+		/* Boost up -- we can execute copied instructions directly */
+		ia64_psr(regs)->ri = p->ainsn.slot;
+		regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL;
+		/* turn single stepping off */
+		ia64_psr(regs)->ss = 0;
+
+		reset_current_kprobe();
+		preempt_enable_no_resched();
+		return 1;
+	}
+#endif
 	prepare_ss(p, regs);
 	kcb->kprobe_status = KPROBE_HIT_SS;
 	return 1;

+ 54 - 6
arch/ia64/kernel/mca.c

@@ -69,6 +69,7 @@
  * 2007-04-27 Russ Anderson <rja@sgi.com>
  *	      Support multiple cpus going through OS_MCA in the same event.
  */
+#include <linux/jiffies.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/sched.h>
@@ -97,6 +98,7 @@
 
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
+#include <asm/tlb.h>
 
 #include "mca_drv.h"
 #include "entry.h"
@@ -112,6 +114,7 @@ DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
 DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
 DEFINE_PER_CPU(u64, ia64_mca_pal_pte);	    /* PTE to map PAL code */
 DEFINE_PER_CPU(u64, ia64_mca_pal_base);    /* vaddr PAL code granule */
+DEFINE_PER_CPU(u64, ia64_mca_tr_reload);   /* Flag for TR reload */
 
 unsigned long __per_cpu_mca[NR_CPUS];
 
@@ -293,7 +296,8 @@ static void ia64_mlogbuf_dump_from_init(void)
 	if (mlogbuf_finished)
 		return;
 
-	if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) {
+	if (mlogbuf_timestamp &&
+			time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) {
 		printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
 			" and the system seems to be messed up.\n");
 		ia64_mlogbuf_finish(0);
@@ -1182,6 +1186,49 @@ all_in:
 	return;
 }
 
+/*  mca_insert_tr
+ *
+ *  Switch rid when TR reload and needed!
+ *  iord: 1: itr, 2: itr;
+ *
+*/
+static void mca_insert_tr(u64 iord)
+{
+
+	int i;
+	u64 old_rr;
+	struct ia64_tr_entry *p;
+	unsigned long psr;
+	int cpu = smp_processor_id();
+
+	psr = ia64_clear_ic();
+	for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) {
+		p = &__per_cpu_idtrs[cpu][iord-1][i];
+		if (p->pte & 0x1) {
+			old_rr = ia64_get_rr(p->ifa);
+			if (old_rr != p->rr) {
+				ia64_set_rr(p->ifa, p->rr);
+				ia64_srlz_d();
+			}
+			ia64_ptr(iord, p->ifa, p->itir >> 2);
+			ia64_srlz_i();
+			if (iord & 0x1) {
+				ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2);
+				ia64_srlz_i();
+			}
+			if (iord & 0x2) {
+				ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2);
+				ia64_srlz_i();
+			}
+			if (old_rr != p->rr) {
+				ia64_set_rr(p->ifa, old_rr);
+				ia64_srlz_d();
+			}
+		}
+	}
+	ia64_set_psr(psr);
+}
+
 /*
  * ia64_mca_handler
  *
@@ -1266,16 +1313,17 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 	} else {
 		/* Dump buffered message to console */
 		ia64_mlogbuf_finish(1);
-#ifdef CONFIG_KEXEC
-		atomic_set(&kdump_in_progress, 1);
-		monarch_cpu = -1;
-#endif
 	}
+
+	if (__get_cpu_var(ia64_mca_tr_reload)) {
+		mca_insert_tr(0x1); /*Reload dynamic itrs*/
+		mca_insert_tr(0x2); /*Reload dynamic itrs*/
+	}
+
 	if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
 			== NOTIFY_STOP)
 		ia64_mca_spin(__func__);
 
-
 	if (atomic_dec_return(&mca_count) > 0) {
 		int i;
 

+ 5 - 0
arch/ia64/kernel/mca_asm.S

@@ -219,8 +219,13 @@ ia64_reload_tr:
 	mov r20=IA64_TR_CURRENT_STACK
 	;;
 	itr.d dtr[r20]=r16
+	GET_THIS_PADDR(r2, ia64_mca_tr_reload)
+	mov r18 = 1
 	;;
 	srlz.d
+	;;
+	st8 [r2] =r18
+	;;
 
 done_tlb_purge_and_reload:
 

+ 14 - 0
arch/ia64/kernel/minstate.h

@@ -3,6 +3,18 @@
 
 #include "entry.h"
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define ACCOUNT_GET_STAMP				\
+(pUStk) mov.m r20=ar.itc;
+#define ACCOUNT_SYS_ENTER				\
+(pUStk) br.call.spnt rp=account_sys_enter		\
+	;;
+#else
+#define ACCOUNT_GET_STAMP
+#define ACCOUNT_SYS_ENTER
+#endif
+
 /*
  * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
  * the minimum state necessary that allows us to turn psr.ic back
@@ -122,11 +134,13 @@
 	;;											\
 .mem.offset 0,0; st8.spill [r16]=r2,16;								\
 .mem.offset 8,0; st8.spill [r17]=r3,16;								\
+	ACCOUNT_GET_STAMP									\
 	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
 	;;											\
 	EXTRA;											\
 	movl r1=__gp;		/* establish kernel global pointer */				\
 	;;											\
+	ACCOUNT_SYS_ENTER									\
 	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
 	;;
 

+ 1 - 1
arch/ia64/kernel/numa.c

@@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void)
 	for(node=0; node < MAX_NUMNODES; node++)
 		cpus_clear(node_to_cpu_mask[node]);
 
-	for(cpu = 0; cpu < NR_CPUS; ++cpu) {
+	for_each_possible_early_cpu(cpu) {
 		node = -1;
 		for (i = 0; i < NR_CPUS; ++i)
 			if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {

+ 4 - 4
arch/ia64/kernel/patch.c

@@ -135,10 +135,10 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 
 	while (offp < (s32 *) end) {
 		wp = (u64 *) ia64_imva((char *) offp + *offp);
-		wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
-		wp[1] = 0x0004000000000200UL;
-		wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
-		wp[3] = 0x0084006880000200UL;
+		wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+		wp[1] = 0x0084006880000200UL;
+		wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
+		wp[3] = 0x0004000000000200UL;
 		ia64_fc(wp); ia64_fc(wp + 2);
 		++offp;
 	}

+ 2 - 2
arch/ia64/kernel/perfmon.c

@@ -4204,10 +4204,10 @@ pfm_check_task_exist(pfm_context_t *ctx)
 	do_each_thread (g, t) {
 		if (t->thread.pfm_context == ctx) {
 			ret = 0;
-			break;
+			goto out;
 		}
 	} while_each_thread (g, t);
-
+out:
 	read_unlock(&tasklist_lock);
 
 	DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));

+ 0 - 30
arch/ia64/kernel/process.c

@@ -625,42 +625,12 @@ do_dump_fpu (struct unw_frame_info *info, void *arg)
 	do_dump_task_fpu(current, info, arg);
 }
 
-int
-dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
-{
-	struct unw_frame_info tcore_info;
-
-	if (current == task) {
-		unw_init_running(do_copy_regs, regs);
-	} else {
-		memset(&tcore_info, 0, sizeof(tcore_info));
-		unw_init_from_blocked_task(&tcore_info, task);
-		do_copy_task_regs(task, &tcore_info, regs);
-	}
-	return 1;
-}
-
 void
 ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
 {
 	unw_init_running(do_copy_regs, dst);
 }
 
-int
-dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
-{
-	struct unw_frame_info tcore_info;
-
-	if (current == task) {
-		unw_init_running(do_dump_fpu, dst);
-	} else {
-		memset(&tcore_info, 0, sizeof(tcore_info));
-		unw_init_from_blocked_task(&tcore_info, task);
-		do_dump_task_fpu(task, &tcore_info, dst);
-	}
-	return 1;
-}
-
 int
 dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
 {

+ 895 - 322
arch/ia64/kernel/ptrace.c

@@ -3,6 +3,9 @@
  *
  * Copyright (C) 1999-2005 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2006 Intel Co
+ *  2006-08-12	- IA64 Native Utrace implementation support added by
+ *	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  *
  * Derived from the x86 and Alpha versions.
  */
@@ -17,6 +20,8 @@
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -740,25 +745,6 @@ ia64_sync_fph (struct task_struct *task)
 	psr->dfh = 1;
 }
 
-static int
-access_fr (struct unw_frame_info *info, int regnum, int hi,
-	   unsigned long *data, int write_access)
-{
-	struct ia64_fpreg fpval;
-	int ret;
-
-	ret = unw_get_fr(info, regnum, &fpval);
-	if (ret < 0)
-		return ret;
-
-	if (write_access) {
-		fpval.u.bits[hi] = *data;
-		ret = unw_set_fr(info, regnum, fpval);
-	} else
-		*data = fpval.u.bits[hi];
-	return ret;
-}
-
 /*
  * Change the machine-state of CHILD such that it will return via the normal
  * kernel exit-path, rather than the syscall-exit path.
@@ -860,309 +846,7 @@ access_nat_bits (struct task_struct *child, struct pt_regs *pt,
 
 static int
 access_uarea (struct task_struct *child, unsigned long addr,
-	      unsigned long *data, int write_access)
-{
-	unsigned long *ptr, regnum, urbs_end, cfm;
-	struct switch_stack *sw;
-	struct pt_regs *pt;
-#	define pt_reg_addr(pt, reg)	((void *)			    \
-					 ((unsigned long) (pt)		    \
-					  + offsetof(struct pt_regs, reg)))
-
-
-	pt = task_pt_regs(child);
-	sw = (struct switch_stack *) (child->thread.ksp + 16);
-
-	if ((addr & 0x7) != 0) {
-		dprintk("ptrace: unaligned register address 0x%lx\n", addr);
-		return -1;
-	}
-
-	if (addr < PT_F127 + 16) {
-		/* accessing fph */
-		if (write_access)
-			ia64_sync_fph(child);
-		else
-			ia64_flush_fph(child);
-		ptr = (unsigned long *)
-			((unsigned long) &child->thread.fph + addr);
-	} else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) {
-		/* scratch registers untouched by kernel (saved in pt_regs) */
-		ptr = pt_reg_addr(pt, f10) + (addr - PT_F10);
-	} else if (addr >= PT_F12 && addr < PT_F15 + 16) {
-		/*
-		 * Scratch registers untouched by kernel (saved in
-		 * switch_stack).
-		 */
-		ptr = (unsigned long *) ((long) sw
-					 + (addr - PT_NAT_BITS - 32));
-	} else if (addr < PT_AR_LC + 8) {
-		/* preserved state: */
-		struct unw_frame_info info;
-		char nat = 0;
-		int ret;
-
-		unw_init_from_blocked_task(&info, child);
-		if (unw_unwind_to_user(&info) < 0)
-			return -1;
-
-		switch (addr) {
-		      case PT_NAT_BITS:
-			return access_nat_bits(child, pt, &info,
-					       data, write_access);
-
-		      case PT_R4: case PT_R5: case PT_R6: case PT_R7:
-			if (write_access) {
-				/* read NaT bit first: */
-				unsigned long dummy;
-
-				ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4,
-						 &dummy, &nat);
-				if (ret < 0)
-					return ret;
-			}
-			return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data,
-					     &nat, write_access);
-
-		      case PT_B1: case PT_B2: case PT_B3:
-		      case PT_B4: case PT_B5:
-			return unw_access_br(&info, (addr - PT_B1)/8 + 1, data,
-					     write_access);
-
-		      case PT_AR_EC:
-			return unw_access_ar(&info, UNW_AR_EC, data,
-					     write_access);
-
-		      case PT_AR_LC:
-			return unw_access_ar(&info, UNW_AR_LC, data,
-					     write_access);
-
-		      default:
-			if (addr >= PT_F2 && addr < PT_F5 + 16)
-				return access_fr(&info, (addr - PT_F2)/16 + 2,
-						 (addr & 8) != 0, data,
-						 write_access);
-			else if (addr >= PT_F16 && addr < PT_F31 + 16)
-				return access_fr(&info,
-						 (addr - PT_F16)/16 + 16,
-						 (addr & 8) != 0,
-						 data, write_access);
-			else {
-				dprintk("ptrace: rejecting access to register "
-					"address 0x%lx\n", addr);
-				return -1;
-			}
-		}
-	} else if (addr < PT_F9+16) {
-		/* scratch state */
-		switch (addr) {
-		      case PT_AR_BSP:
-			/*
-			 * By convention, we use PT_AR_BSP to refer to
-			 * the end of the user-level backing store.
-			 * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
-			 * to get the real value of ar.bsp at the time
-			 * the kernel was entered.
-			 *
-			 * Furthermore, when changing the contents of
-			 * PT_AR_BSP (or PT_CFM) while the task is
-			 * blocked in a system call, convert the state
-			 * so that the non-system-call exit
-			 * path is used.  This ensures that the proper
-			 * state will be picked up when resuming
-			 * execution.  However, it *also* means that
-			 * once we write PT_AR_BSP/PT_CFM, it won't be
-			 * possible to modify the syscall arguments of
-			 * the pending system call any longer.  This
-			 * shouldn't be an issue because modifying
-			 * PT_AR_BSP/PT_CFM generally implies that
-			 * we're either abandoning the pending system
-			 * call or that we defer it's re-execution
-			 * (e.g., due to GDB doing an inferior
-			 * function call).
-			 */
-			urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
-			if (write_access) {
-				if (*data != urbs_end) {
-					if (in_syscall(pt))
-						convert_to_non_syscall(child,
-								       pt,
-								       cfm);
-					/*
-					 * Simulate user-level write
-					 * of ar.bsp:
-					 */
-					pt->loadrs = 0;
-					pt->ar_bspstore = *data;
-				}
-			} else
-				*data = urbs_end;
-			return 0;
-
-		      case PT_CFM:
-			urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
-			if (write_access) {
-				if (((cfm ^ *data) & PFM_MASK) != 0) {
-					if (in_syscall(pt))
-						convert_to_non_syscall(child,
-								       pt,
-								       cfm);
-					pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
-						      | (*data & PFM_MASK));
-				}
-			} else
-				*data = cfm;
-			return 0;
-
-		      case PT_CR_IPSR:
-			if (write_access) {
-				unsigned long tmp = *data;
-				/* psr.ri==3 is a reserved value: SDM 2:25 */
-				if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
-					tmp &= ~IA64_PSR_RI;
-				pt->cr_ipsr = ((tmp & IPSR_MASK)
-					       | (pt->cr_ipsr & ~IPSR_MASK));
-			} else
-				*data = (pt->cr_ipsr & IPSR_MASK);
-			return 0;
-
-		      case PT_AR_RSC:
-			if (write_access)
-				pt->ar_rsc = *data | (3 << 2); /* force PL3 */
-			else
-				*data = pt->ar_rsc;
-			return 0;
-
-		      case PT_AR_RNAT:
-			ptr = pt_reg_addr(pt, ar_rnat);
-			break;
-		      case PT_R1:
-			ptr = pt_reg_addr(pt, r1);
-			break;
-		      case PT_R2:  case PT_R3:
-			ptr = pt_reg_addr(pt, r2) + (addr - PT_R2);
-			break;
-		      case PT_R8:  case PT_R9:  case PT_R10: case PT_R11:
-			ptr = pt_reg_addr(pt, r8) + (addr - PT_R8);
-			break;
-		      case PT_R12: case PT_R13:
-			ptr = pt_reg_addr(pt, r12) + (addr - PT_R12);
-			break;
-		      case PT_R14:
-			ptr = pt_reg_addr(pt, r14);
-			break;
-		      case PT_R15:
-			ptr = pt_reg_addr(pt, r15);
-			break;
-		      case PT_R16: case PT_R17: case PT_R18: case PT_R19:
-		      case PT_R20: case PT_R21: case PT_R22: case PT_R23:
-		      case PT_R24: case PT_R25: case PT_R26: case PT_R27:
-		      case PT_R28: case PT_R29: case PT_R30: case PT_R31:
-			ptr = pt_reg_addr(pt, r16) + (addr - PT_R16);
-			break;
-		      case PT_B0:
-			ptr = pt_reg_addr(pt, b0);
-			break;
-		      case PT_B6:
-			ptr = pt_reg_addr(pt, b6);
-			break;
-		      case PT_B7:
-			ptr = pt_reg_addr(pt, b7);
-			break;
-		      case PT_F6:  case PT_F6+8: case PT_F7: case PT_F7+8:
-		      case PT_F8:  case PT_F8+8: case PT_F9: case PT_F9+8:
-			ptr = pt_reg_addr(pt, f6) + (addr - PT_F6);
-			break;
-		      case PT_AR_BSPSTORE:
-			ptr = pt_reg_addr(pt, ar_bspstore);
-			break;
-		      case PT_AR_UNAT:
-			ptr = pt_reg_addr(pt, ar_unat);
-			break;
-		      case PT_AR_PFS:
-			ptr = pt_reg_addr(pt, ar_pfs);
-			break;
-		      case PT_AR_CCV:
-			ptr = pt_reg_addr(pt, ar_ccv);
-			break;
-		      case PT_AR_FPSR:
-			ptr = pt_reg_addr(pt, ar_fpsr);
-			break;
-		      case PT_CR_IIP:
-			ptr = pt_reg_addr(pt, cr_iip);
-			break;
-		      case PT_PR:
-			ptr = pt_reg_addr(pt, pr);
-			break;
-			/* scratch register */
-
-		      default:
-			/* disallow accessing anything else... */
-			dprintk("ptrace: rejecting access to register "
-				"address 0x%lx\n", addr);
-			return -1;
-		}
-	} else if (addr <= PT_AR_SSD) {
-		ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD);
-	} else {
-		/* access debug registers */
-
-		if (addr >= PT_IBR) {
-			regnum = (addr - PT_IBR) >> 3;
-			ptr = &child->thread.ibr[0];
-		} else {
-			regnum = (addr - PT_DBR) >> 3;
-			ptr = &child->thread.dbr[0];
-		}
-
-		if (regnum >= 8) {
-			dprintk("ptrace: rejecting access to register "
-				"address 0x%lx\n", addr);
-			return -1;
-		}
-#ifdef CONFIG_PERFMON
-		/*
-		 * Check if debug registers are used by perfmon. This
-		 * test must be done once we know that we can do the
-		 * operation, i.e. the arguments are all valid, but
-		 * before we start modifying the state.
-		 *
-		 * Perfmon needs to keep a count of how many processes
-		 * are trying to modify the debug registers for system
-		 * wide monitoring sessions.
-		 *
-		 * We also include read access here, because they may
-		 * cause the PMU-installed debug register state
-		 * (dbr[], ibr[]) to be reset. The two arrays are also
-		 * used by perfmon, but we do not use
-		 * IA64_THREAD_DBG_VALID. The registers are restored
-		 * by the PMU context switch code.
-		 */
-		if (pfm_use_debug_registers(child)) return -1;
-#endif
-
-		if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
-			child->thread.flags |= IA64_THREAD_DBG_VALID;
-			memset(child->thread.dbr, 0,
-			       sizeof(child->thread.dbr));
-			memset(child->thread.ibr, 0,
-			       sizeof(child->thread.ibr));
-		}
-
-		ptr += regnum;
-
-		if ((regnum & 1) && write_access) {
-			/* don't let the user set kernel-level breakpoints: */
-			*ptr = *data & ~(7UL << 56);
-			return 0;
-		}
-	}
-	if (write_access)
-		*ptr = *data;
-	else
-		*data = *ptr;
-	return 0;
-}
+	      unsigned long *data, int write_access);
 
 static long
 ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
@@ -1626,3 +1310,892 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
 	if (test_thread_flag(TIF_RESTORE_RSE))
 		ia64_sync_krbs();
 }
+
+/* Utrace implementation starts here */
+struct regset_get {
+	void *kbuf;
+	void __user *ubuf;
+};
+
+struct regset_set {
+	const void *kbuf;
+	const void __user *ubuf;
+};
+
+struct regset_getset {
+	struct task_struct *target;
+	const struct user_regset *regset;
+	union {
+		struct regset_get get;
+		struct regset_set set;
+	} u;
+	unsigned int pos;
+	unsigned int count;
+	int ret;
+};
+
+static int
+access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long *ptr = NULL;
+	int ret;
+	char nat = 0;
+
+	pt = task_pt_regs(target);
+	switch (addr) {
+	case ELF_GR_OFFSET(1):
+		ptr = &pt->r1;
+		break;
+	case ELF_GR_OFFSET(2):
+	case ELF_GR_OFFSET(3):
+		ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2));
+		break;
+	case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7):
+		if (write_access) {
+			/* read NaT bit first: */
+			unsigned long dummy;
+
+			ret = unw_get_gr(info, addr/8, &dummy, &nat);
+			if (ret < 0)
+				return ret;
+		}
+		return unw_access_gr(info, addr/8, data, &nat, write_access);
+	case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11):
+		ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8);
+		break;
+	case ELF_GR_OFFSET(12):
+	case ELF_GR_OFFSET(13):
+		ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12);
+		break;
+	case ELF_GR_OFFSET(14):
+		ptr = &pt->r14;
+		break;
+	case ELF_GR_OFFSET(15):
+		ptr = &pt->r15;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static int
+access_elf_breg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long *ptr = NULL;
+
+	pt = task_pt_regs(target);
+	switch (addr) {
+	case ELF_BR_OFFSET(0):
+		ptr = &pt->b0;
+		break;
+	case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5):
+		return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8,
+				     data, write_access);
+	case ELF_BR_OFFSET(6):
+		ptr = &pt->b6;
+		break;
+	case ELF_BR_OFFSET(7):
+		ptr = &pt->b7;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static int
+access_elf_areg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long cfm, urbs_end;
+	unsigned long *ptr = NULL;
+
+	pt = task_pt_regs(target);
+	if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) {
+		switch (addr) {
+		case ELF_AR_RSC_OFFSET:
+			/* force PL3 */
+			if (write_access)
+				pt->ar_rsc = *data | (3 << 2);
+			else
+				*data = pt->ar_rsc;
+			return 0;
+		case ELF_AR_BSP_OFFSET:
+			/*
+			 * By convention, we use PT_AR_BSP to refer to
+			 * the end of the user-level backing store.
+			 * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
+			 * to get the real value of ar.bsp at the time
+			 * the kernel was entered.
+			 *
+			 * Furthermore, when changing the contents of
+			 * PT_AR_BSP (or PT_CFM) while the task is
+			 * blocked in a system call, convert the state
+			 * so that the non-system-call exit
+			 * path is used.  This ensures that the proper
+			 * state will be picked up when resuming
+			 * execution.  However, it *also* means that
+			 * once we write PT_AR_BSP/PT_CFM, it won't be
+			 * possible to modify the syscall arguments of
+			 * the pending system call any longer.  This
+			 * shouldn't be an issue because modifying
+			 * PT_AR_BSP/PT_CFM generally implies that
+			 * we're either abandoning the pending system
+			 * call or that we defer it's re-execution
+			 * (e.g., due to GDB doing an inferior
+			 * function call).
+			 */
+			urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+			if (write_access) {
+				if (*data != urbs_end) {
+					if (in_syscall(pt))
+						convert_to_non_syscall(target,
+								       pt,
+								       cfm);
+					/*
+					 * Simulate user-level write
+					 * of ar.bsp:
+					 */
+					pt->loadrs = 0;
+					pt->ar_bspstore = *data;
+				}
+			} else
+				*data = urbs_end;
+			return 0;
+		case ELF_AR_BSPSTORE_OFFSET:
+			ptr = &pt->ar_bspstore;
+			break;
+		case ELF_AR_RNAT_OFFSET:
+			ptr = &pt->ar_rnat;
+			break;
+		case ELF_AR_CCV_OFFSET:
+			ptr = &pt->ar_ccv;
+			break;
+		case ELF_AR_UNAT_OFFSET:
+			ptr = &pt->ar_unat;
+			break;
+		case ELF_AR_FPSR_OFFSET:
+			ptr = &pt->ar_fpsr;
+			break;
+		case ELF_AR_PFS_OFFSET:
+			ptr = &pt->ar_pfs;
+			break;
+		case ELF_AR_LC_OFFSET:
+			return unw_access_ar(info, UNW_AR_LC, data,
+					     write_access);
+		case ELF_AR_EC_OFFSET:
+			return unw_access_ar(info, UNW_AR_EC, data,
+					     write_access);
+		case ELF_AR_CSD_OFFSET:
+			ptr = &pt->ar_csd;
+			break;
+		case ELF_AR_SSD_OFFSET:
+			ptr = &pt->ar_ssd;
+		}
+	} else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) {
+		switch (addr) {
+		case ELF_CR_IIP_OFFSET:
+			ptr = &pt->cr_iip;
+			break;
+		case ELF_CFM_OFFSET:
+			urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+			if (write_access) {
+				if (((cfm ^ *data) & PFM_MASK) != 0) {
+					if (in_syscall(pt))
+						convert_to_non_syscall(target,
+								       pt,
+								       cfm);
+					pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
+						      | (*data & PFM_MASK));
+				}
+			} else
+				*data = cfm;
+			return 0;
+		case ELF_CR_IPSR_OFFSET:
+			if (write_access) {
+				unsigned long tmp = *data;
+				/* psr.ri==3 is a reserved value: SDM 2:25 */
+				if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
+					tmp &= ~IA64_PSR_RI;
+				pt->cr_ipsr = ((tmp & IPSR_MASK)
+					       | (pt->cr_ipsr & ~IPSR_MASK));
+			} else
+				*data = (pt->cr_ipsr & IPSR_MASK);
+			return 0;
+		}
+	} else if (addr == ELF_NAT_OFFSET)
+		return access_nat_bits(target, pt, info,
+				       data, write_access);
+	else if (addr == ELF_PR_OFFSET)
+		ptr = &pt->pr;
+	else
+		return -1;
+
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+
+	return 0;
+}
+
+static int
+access_elf_reg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15))
+		return access_elf_gpreg(target, info, addr, data, write_access);
+	else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7))
+		return access_elf_breg(target, info, addr, data, write_access);
+	else
+		return access_elf_areg(target, info, addr, data, write_access);
+}
+
+void do_gpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	struct regset_getset *dst = arg;
+	elf_greg_t tmp[16];
+	unsigned int i, index, min_copy;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/*
+	 * coredump format:
+	 *      r0-r31
+	 *      NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+	 *      predicate registers (p0-p63)
+	 *      b0-b7
+	 *      ip cfm user-mask
+	 *      ar.rsc ar.bsp ar.bspstore ar.rnat
+	 *      ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+	 */
+
+
+	/* Skip r0 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+		dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+						      &dst->u.get.kbuf,
+						      &dst->u.get.ubuf,
+						      0, ELF_GR_OFFSET(1));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* gr1 - gr15 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+		index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+		min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_GR_OFFSET(16);
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* r16-r31 */
+	if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+		pt = task_pt_regs(dst->target);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16,
+				ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* nat, pr, b0 - b7 */
+	if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+		index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+		min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_CR_IIP_OFFSET;
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+	 * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+	 */
+	if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+		index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+		min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_AR_END_OFFSET;
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+	}
+}
+
+void do_gpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	struct regset_getset *dst = arg;
+	elf_greg_t tmp[16];
+	unsigned int i, index;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip r0 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+		dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+						       &dst->u.set.kbuf,
+						       &dst->u.set.ubuf,
+						       0, ELF_GR_OFFSET(1));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* gr1-gr15 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+		i = dst->pos;
+		index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+		if (dst->ret)
+			return;
+		for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		if (dst->count == 0)
+			return;
+	}
+
+	/* gr16-gr31 */
+	if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+		pt = task_pt_regs(dst->target);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16,
+				ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* nat, pr, b0 - b7 */
+	if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+		i = dst->pos;
+		index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+		if (dst->ret)
+			return;
+		for (; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		if (dst->count == 0)
+			return;
+	}
+
+	/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+	 * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+	 */
+	if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+		i = dst->pos;
+		index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+		if (dst->ret)
+			return;
+		for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+	}
+}
+
+#define ELF_FP_OFFSET(i)	(i * sizeof(elf_fpreg_t))
+
+void do_fpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	elf_fpreg_t tmp[30];
+	int index, min_copy, i;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip pos 0 and 1 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+		dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+						      &dst->u.get.kbuf,
+						      &dst->u.get.ubuf,
+						      0, ELF_FP_OFFSET(2));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fr2-fr31 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+		index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t);
+
+		min_copy = min(((unsigned int)ELF_FP_OFFSET(32)),
+				dst->pos + dst->count);
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t),
+				index++)
+			if (unw_get_fr(info, i / sizeof(elf_fpreg_t),
+					 &tmp[index])) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fph */
+	if (dst->count > 0) {
+		ia64_flush_fph(dst->target);
+		if (task->thread.flags & IA64_THREAD_FPH_VALID)
+			dst->ret = user_regset_copyout(
+				&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				&dst->target->thread.fph,
+				ELF_FP_OFFSET(32), -1);
+		else
+			/* Zero fill instead.  */
+			dst->ret = user_regset_copyout_zero(
+				&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				ELF_FP_OFFSET(32), -1);
+	}
+}
+
+void do_fpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	elf_fpreg_t fpreg, tmp[30];
+	int index, start, end;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip pos 0 and 1 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+		dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+						       &dst->u.set.kbuf,
+						       &dst->u.set.ubuf,
+						       0, ELF_FP_OFFSET(2));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fr2-fr31 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+		start = dst->pos;
+		end = min(((unsigned int)ELF_FP_OFFSET(32)),
+			 dst->pos + dst->count);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+		if (dst->ret)
+			return;
+
+		if (start & 0xF) { /* only write high part */
+			if (unw_get_fr(info, start / sizeof(elf_fpreg_t),
+					 &fpreg)) {
+				dst->ret = -EIO;
+				return;
+			}
+			tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0]
+				= fpreg.u.bits[0];
+			start &= ~0xFUL;
+		}
+		if (end & 0xF) { /* only write low part */
+			if (unw_get_fr(info, end / sizeof(elf_fpreg_t),
+					&fpreg)) {
+				dst->ret = -EIO;
+				return;
+			}
+			tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1]
+				= fpreg.u.bits[1];
+			end = (end + 0xF) & ~0xFUL;
+		}
+
+		for ( ;	start < end ; start += sizeof(elf_fpreg_t)) {
+			index = start / sizeof(elf_fpreg_t);
+			if (unw_set_fr(info, index, tmp[index - 2])) {
+				dst->ret = -EIO;
+				return;
+			}
+		}
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* fph */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) {
+		ia64_sync_fph(dst->target);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+						&dst->u.set.kbuf,
+						&dst->u.set.ubuf,
+						&dst->target->thread.fph,
+						ELF_FP_OFFSET(32), -1);
+	}
+}
+
+static int
+do_regset_call(void (*call)(struct unw_frame_info *, void *),
+	       struct task_struct *target,
+	       const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf)
+{
+	struct regset_getset info = { .target = target, .regset = regset,
+				 .pos = pos, .count = count,
+				 .u.set = { .kbuf = kbuf, .ubuf = ubuf },
+				 .ret = 0 };
+
+	if (target == current)
+		unw_init_running(call, &info);
+	else {
+		struct unw_frame_info ufi;
+		memset(&ufi, 0, sizeof(ufi));
+		unw_init_from_blocked_task(&ufi, target);
+		(*call)(&ufi, &info);
+	}
+
+	return info.ret;
+}
+
+static int
+gpregs_get(struct task_struct *target,
+	   const struct user_regset *regset,
+	   unsigned int pos, unsigned int count,
+	   void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_gpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int gpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_gpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static void do_gpregs_writeback(struct unw_frame_info *info, void *arg)
+{
+	do_sync_rbs(info, ia64_sync_user_rbs);
+}
+
+/*
+ * This is called to write back the register backing store.
+ * ptrace does this before it stops, so that a tracer reading the user
+ * memory after the thread stops will get the current register data.
+ */
+static int
+gpregs_writeback(struct task_struct *target,
+		 const struct user_regset *regset,
+		 int now)
+{
+	if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE))
+		return 0;
+	tsk_set_notify_resume(target);
+	return do_regset_call(do_gpregs_writeback, target, regset, 0, 0,
+		NULL, NULL);
+}
+
+static int
+fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+	return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32;
+}
+
+static int fpregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int fpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int
+access_uarea(struct task_struct *child, unsigned long addr,
+	      unsigned long *data, int write_access)
+{
+	unsigned int pos = -1; /* an invalid value */
+	int ret;
+	unsigned long *ptr, regnum;
+
+	if ((addr & 0x7) != 0) {
+		dprintk("ptrace: unaligned register address 0x%lx\n", addr);
+		return -1;
+	}
+	if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) ||
+		(addr >= PT_R7 + 8 && addr < PT_B1) ||
+		(addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) ||
+		(addr >= PT_AR_SSD + 8 && addr < PT_DBR)) {
+		dprintk("ptrace: rejecting access to register "
+					"address 0x%lx\n", addr);
+		return -1;
+	}
+
+	switch (addr) {
+	case PT_F32 ... (PT_F127 + 15):
+		pos = addr - PT_F32 + ELF_FP_OFFSET(32);
+		break;
+	case PT_F2 ... (PT_F5 + 15):
+		pos = addr - PT_F2 + ELF_FP_OFFSET(2);
+		break;
+	case PT_F10 ... (PT_F31 + 15):
+		pos = addr - PT_F10 + ELF_FP_OFFSET(10);
+		break;
+	case PT_F6 ... (PT_F9 + 15):
+		pos = addr - PT_F6 + ELF_FP_OFFSET(6);
+		break;
+	}
+
+	if (pos != -1) {
+		if (write_access)
+			ret = fpregs_set(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		else
+			ret = fpregs_get(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		if (ret != 0)
+			return -1;
+		return 0;
+	}
+
+	switch (addr) {
+	case PT_NAT_BITS:
+		pos = ELF_NAT_OFFSET;
+		break;
+	case PT_R4 ... PT_R7:
+		pos = addr - PT_R4 + ELF_GR_OFFSET(4);
+		break;
+	case PT_B1 ... PT_B5:
+		pos = addr - PT_B1 + ELF_BR_OFFSET(1);
+		break;
+	case PT_AR_EC:
+		pos = ELF_AR_EC_OFFSET;
+		break;
+	case PT_AR_LC:
+		pos = ELF_AR_LC_OFFSET;
+		break;
+	case PT_CR_IPSR:
+		pos = ELF_CR_IPSR_OFFSET;
+		break;
+	case PT_CR_IIP:
+		pos = ELF_CR_IIP_OFFSET;
+		break;
+	case PT_CFM:
+		pos = ELF_CFM_OFFSET;
+		break;
+	case PT_AR_UNAT:
+		pos = ELF_AR_UNAT_OFFSET;
+		break;
+	case PT_AR_PFS:
+		pos = ELF_AR_PFS_OFFSET;
+		break;
+	case PT_AR_RSC:
+		pos = ELF_AR_RSC_OFFSET;
+		break;
+	case PT_AR_RNAT:
+		pos = ELF_AR_RNAT_OFFSET;
+		break;
+	case PT_AR_BSPSTORE:
+		pos = ELF_AR_BSPSTORE_OFFSET;
+		break;
+	case PT_PR:
+		pos = ELF_PR_OFFSET;
+		break;
+	case PT_B6:
+		pos = ELF_BR_OFFSET(6);
+		break;
+	case PT_AR_BSP:
+		pos = ELF_AR_BSP_OFFSET;
+		break;
+	case PT_R1 ... PT_R3:
+		pos = addr - PT_R1 + ELF_GR_OFFSET(1);
+		break;
+	case PT_R12 ... PT_R15:
+		pos = addr - PT_R12 + ELF_GR_OFFSET(12);
+		break;
+	case PT_R8 ... PT_R11:
+		pos = addr - PT_R8 + ELF_GR_OFFSET(8);
+		break;
+	case PT_R16 ... PT_R31:
+		pos = addr - PT_R16 + ELF_GR_OFFSET(16);
+		break;
+	case PT_AR_CCV:
+		pos = ELF_AR_CCV_OFFSET;
+		break;
+	case PT_AR_FPSR:
+		pos = ELF_AR_FPSR_OFFSET;
+		break;
+	case PT_B0:
+		pos = ELF_BR_OFFSET(0);
+		break;
+	case PT_B7:
+		pos = ELF_BR_OFFSET(7);
+		break;
+	case PT_AR_CSD:
+		pos = ELF_AR_CSD_OFFSET;
+		break;
+	case PT_AR_SSD:
+		pos = ELF_AR_SSD_OFFSET;
+		break;
+	}
+
+	if (pos != -1) {
+		if (write_access)
+			ret = gpregs_set(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		else
+			ret = gpregs_get(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		if (ret != 0)
+			return -1;
+		return 0;
+	}
+
+	/* access debug registers */
+	if (addr >= PT_IBR) {
+		regnum = (addr - PT_IBR) >> 3;
+		ptr = &child->thread.ibr[0];
+	} else {
+		regnum = (addr - PT_DBR) >> 3;
+		ptr = &child->thread.dbr[0];
+	}
+
+	if (regnum >= 8) {
+		dprintk("ptrace: rejecting access to register "
+				"address 0x%lx\n", addr);
+		return -1;
+	}
+#ifdef CONFIG_PERFMON
+	/*
+	 * Check if debug registers are used by perfmon. This
+	 * test must be done once we know that we can do the
+	 * operation, i.e. the arguments are all valid, but
+	 * before we start modifying the state.
+	 *
+	 * Perfmon needs to keep a count of how many processes
+	 * are trying to modify the debug registers for system
+	 * wide monitoring sessions.
+	 *
+	 * We also include read access here, because they may
+	 * cause the PMU-installed debug register state
+	 * (dbr[], ibr[]) to be reset. The two arrays are also
+	 * used by perfmon, but we do not use
+	 * IA64_THREAD_DBG_VALID. The registers are restored
+	 * by the PMU context switch code.
+	 */
+	if (pfm_use_debug_registers(child))
+		return -1;
+#endif
+
+	if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
+		child->thread.flags |= IA64_THREAD_DBG_VALID;
+		memset(child->thread.dbr, 0,
+				sizeof(child->thread.dbr));
+		memset(child->thread.ibr, 0,
+				sizeof(child->thread.ibr));
+	}
+
+	ptr += regnum;
+
+	if ((regnum & 1) && write_access) {
+		/* don't let the user set kernel-level breakpoints: */
+		*ptr = *data & ~(7UL << 56);
+		return 0;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static const struct user_regset native_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = ELF_NGREG,
+		.size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t),
+		.get = gpregs_get, .set = gpregs_set,
+		.writeback = gpregs_writeback
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = ELF_NFPREG,
+		.size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t),
+		.get = fpregs_get, .set = fpregs_set, .active = fpregs_active
+	},
+};
+
+static const struct user_regset_view user_ia64_view = {
+	.name = "ia64",
+	.e_machine = EM_IA_64,
+	.regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *tsk)
+{
+#ifdef CONFIG_IA32_SUPPORT
+	extern const struct user_regset_view user_ia32_view;
+	if (IS_IA32_PROCESS(task_pt_regs(tsk)))
+		return &user_ia32_view;
+#endif
+	return &user_ia64_view;
+}

+ 0 - 165
arch/ia64/kernel/semaphore.c

@@ -1,165 +0,0 @@
-/*
- * IA-64 semaphore implementation (derived from x86 version).
- *
- * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-/*
- * Semaphores are implemented using a two-way counter: The "count"
- * variable is decremented for each process that tries to acquire the
- * semaphore, while the "sleepers" variable is a count of such
- * acquires.
- *
- * Notably, the inline "up()" and "down()" functions can efficiently
- * test if they need to do any extra work (up needs to do something
- * only if count was negative before the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is contention
- * on the lock, and as such all this is the "non-critical" part of the
- * whole semaphore business. The critical part is the inline stuff in
- * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
- */
-#include <linux/sched.h>
-#include <linux/init.h>
-
-#include <asm/errno.h>
-#include <asm/semaphore.h>
-
-/*
- * Logic:
- *  - Only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - When we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleepers" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void
-__up (struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-void __sched __down (struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * the wait_queue_head.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_UNINTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	tsk->state = TASK_RUNNING;
-}
-
-int __sched __down_interruptible (struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers ++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * wait_queue_head. The "-1" is because we're
-		 * still hoping to get the semaphore.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_INTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-	tsk->state = TASK_RUNNING;
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for having decremented the
- * count.
- */
-int
-__down_trylock (struct semaphore *sem)
-{
-	unsigned long flags;
-	int sleepers;
-
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock in the
-	 * wait_queue_head.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count)) {
-		wake_up_locked(&sem->wait);
-	}
-
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	return 1;
-}

+ 29 - 2
arch/ia64/kernel/setup.c

@@ -59,6 +59,7 @@
 #include <asm/setup.h>
 #include <asm/smp.h>
 #include <asm/system.h>
+#include <asm/tlbflush.h>
 #include <asm/unistd.h>
 #include <asm/hpsim.h>
 
@@ -176,6 +177,29 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
 	return 0;
 }
 
+/*
+ * Similar to "filter_rsvd_memory()", but the reserved memory ranges
+ * are not filtered out.
+ */
+int __init
+filter_memory(unsigned long start, unsigned long end, void *arg)
+{
+	void (*func)(unsigned long, unsigned long, int);
+
+#if IGNORE_PFN0
+	if (start == PAGE_OFFSET) {
+		printk(KERN_WARNING "warning: skipping physical page 0\n");
+		start += PAGE_SIZE;
+		if (start >= end)
+			return 0;
+	}
+#endif
+	func = arg;
+	if (start < end)
+		call_pernode_memory(__pa(start), end - start, func);
+	return 0;
+}
+
 static void __init
 sort_regions (struct rsvd_region *rsvd_region, int max)
 {
@@ -493,6 +517,8 @@ setup_arch (char **cmdline_p)
 	acpi_table_init();
 # ifdef CONFIG_ACPI_NUMA
 	acpi_numa_init();
+	per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
+		32 : cpus_weight(early_cpu_possible_map)), additional_cpus);
 # endif
 #else
 # ifdef CONFIG_SMP
@@ -946,9 +972,10 @@ cpu_init (void)
 #endif
 
 	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
-	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
+	if (ia64_pal_vm_summary(NULL, &vmi) == 0) {
 		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
-	else {
+		setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL);
+	} else {
 		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
 		max_ctx = (1U << 15) - 1;	/* use architected minimum */
 	}

+ 82 - 0
arch/ia64/kernel/smp.c

@@ -209,6 +209,19 @@ send_IPI_allbutself (int op)
 	}
 }
 
+/*
+ * Called with preemption disabled.
+ */
+static inline void
+send_IPI_mask(cpumask_t mask, int op)
+{
+	unsigned int cpu;
+
+	for_each_cpu_mask(cpu, mask) {
+			send_IPI_single(cpu, op);
+	}
+}
+
 /*
  * Called with preemption disabled.
  */
@@ -401,6 +414,75 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
+/**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * <mask>	The set of cpus to run on.  Must not include the current cpu.
+ * <func> 	The function to run. This must be fast and non-blocking.
+ * <info>	An arbitrary pointer to pass to the function.
+ * <wait>	If true, wait (atomically) until function
+ *		has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function_mask(cpumask_t mask,
+			   void (*func)(void *), void *info,
+			   int wait)
+{
+	struct call_data_struct data;
+	cpumask_t allbutself;
+	int cpus;
+
+	spin_lock(&call_lock);
+	allbutself = cpu_online_map;
+	cpu_clear(smp_processor_id(), allbutself);
+
+	cpus_and(mask, mask, allbutself);
+	cpus = cpus_weight(mask);
+	if (!cpus) {
+		spin_unlock(&call_lock);
+		return 0;
+	}
+
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
+	data.func = func;
+	data.info = info;
+	atomic_set(&data.started, 0);
+	data.wait = wait;
+	if (wait)
+		atomic_set(&data.finished, 0);
+
+	call_data = &data;
+	mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC*/
+
+	/* Send a message to other CPUs */
+	if (cpus_equal(mask, allbutself))
+		send_IPI_allbutself(IPI_CALL_FUNC);
+	else
+		send_IPI_mask(mask, IPI_CALL_FUNC);
+
+	/* Wait for response */
+	while (atomic_read(&data.started) != cpus)
+		cpu_relax();
+
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			cpu_relax();
+	call_data = NULL;
+
+	spin_unlock(&call_lock);
+	return 0;
+
+}
+EXPORT_SYMBOL(smp_call_function_mask);
+
 /*
  * this function sends a 'generic call function' IPI to all other CPUs
  * in the system.

+ 1 - 1
arch/ia64/kernel/smpboot.c

@@ -400,9 +400,9 @@ smp_callin (void)
 	/* Setup the per cpu irq handling data structures */
 	__setup_vector_irq(cpuid);
 	cpu_set(cpuid, cpu_online_map);
-	unlock_ipi_calllock();
 	per_cpu(cpu_state, cpuid) = CPU_ONLINE;
 	spin_unlock(&vector_lock);
+	unlock_ipi_calllock();
 
 	smp_setup_percpu_timer();
 

+ 78 - 0
arch/ia64/kernel/time.c

@@ -59,6 +59,84 @@ static struct clocksource clocksource_itc = {
 };
 static struct clocksource *itc_clocksource;
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+
+#include <linux/kernel_stat.h>
+
+extern cputime_t cycle_to_cputime(u64 cyc);
+
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
+{
+	struct thread_info *pi = task_thread_info(prev);
+	struct thread_info *ni = task_thread_info(next);
+	cputime_t delta_stime, delta_utime;
+	__u64 now;
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
+	account_system_time(prev, 0, delta_stime);
+	account_system_time_scaled(prev, delta_stime);
+
+	if (pi->ac_utime) {
+		delta_utime = cycle_to_cputime(pi->ac_utime);
+		account_user_time(prev, delta_utime);
+		account_user_time_scaled(prev, delta_utime);
+	}
+
+	pi->ac_stamp = ni->ac_stamp = now;
+	ni->ac_stime = ni->ac_utime = 0;
+}
+
+/*
+ * Account time for a transition between system, hard irq or soft irq state.
+ * Note that this function is called with interrupts enabled.
+ */
+void account_system_vtime(struct task_struct *tsk)
+{
+	struct thread_info *ti = task_thread_info(tsk);
+	unsigned long flags;
+	cputime_t delta_stime;
+	__u64 now;
+
+	local_irq_save(flags);
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
+	account_system_time(tsk, 0, delta_stime);
+	account_system_time_scaled(tsk, delta_stime);
+	ti->ac_stime = 0;
+
+	ti->ac_stamp = now;
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Called from the timer interrupt handler to charge accumulated user time
+ * to the current process.  Must be called with interrupts disabled.
+ */
+void account_process_tick(struct task_struct *p, int user_tick)
+{
+	struct thread_info *ti = task_thread_info(p);
+	cputime_t delta_utime;
+
+	if (ti->ac_utime) {
+		delta_utime = cycle_to_cputime(ti->ac_utime);
+		account_user_time(p, delta_utime);
+		account_user_time_scaled(p, delta_utime);
+		ti->ac_utime = 0;
+	}
+}
+
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
 static irqreturn_t
 timer_interrupt (int irq, void *dev_id)
 {

+ 2 - 1
arch/ia64/kernel/unaligned.c

@@ -13,6 +13,7 @@
  * 2001/08/13	Correct size of extended floats (float_fsz) from 16 to 10 bytes.
  * 2001/01/17	Add support emulation of unaligned kernel accesses.
  */
+#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/tty.h>
@@ -1290,7 +1291,7 @@ within_logging_rate_limit (void)
 {
 	static unsigned long count, last_time;
 
-	if (jiffies - last_time > 5*HZ)
+	if (time_after(jiffies, last_time + 5 * HZ))
 		count = 0;
 	if (count < 5) {
 		last_time = jiffies;

+ 1 - 3
arch/ia64/mm/contig.c

@@ -45,8 +45,6 @@ void show_mem(void)
 
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
-	printk(KERN_INFO "Free swap:       %6ldkB\n",
-	       nr_swap_pages<<(PAGE_SHIFT-10));
 	printk(KERN_INFO "Node memory in pages:\n");
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;
@@ -255,7 +253,7 @@ paging_init (void)
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
-	efi_memmap_walk(register_active_ranges, NULL);
+	efi_memmap_walk(filter_memory, register_active_ranges);
 	efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
 	if (max_gap < LARGE_GAP) {
 		vmem_map = (struct page *) 0;

+ 6 - 11
arch/ia64/mm/discontig.c

@@ -104,7 +104,7 @@ static int __meminit early_nr_cpus_node(int node)
 {
 	int cpu, n = 0;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
+	for_each_possible_early_cpu(cpu)
 		if (node == node_cpuid[cpu].nid)
 			n++;
 
@@ -124,6 +124,7 @@ static unsigned long __meminit compute_pernodesize(int node)
 	pernodesize += node * L1_CACHE_BYTES;
 	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
 	pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
+	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
 	pernodesize = PAGE_ALIGN(pernodesize);
 	return pernodesize;
 }
@@ -142,7 +143,7 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
 #ifdef CONFIG_SMP
 	int cpu;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_early_cpu(cpu) {
 		if (node == node_cpuid[cpu].nid) {
 			memcpy(__va(cpu_data), __phys_per_cpu_start,
 			       __per_cpu_end - __per_cpu_start);
@@ -345,7 +346,7 @@ static void __init initialize_pernode_data(void)
 
 #ifdef CONFIG_SMP
 	/* Set the node_data pointer for each per-cpu struct */
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_early_cpu(cpu) {
 		node = node_cpuid[cpu].nid;
 		per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
 	}
@@ -444,7 +445,7 @@ void __init find_memory(void)
 			mem_data[node].min_pfn = ~0UL;
 		}
 
-	efi_memmap_walk(register_active_ranges, NULL);
+	efi_memmap_walk(filter_memory, register_active_ranges);
 
 	/*
 	 * Initialize the boot memory maps in reverse order since that's
@@ -493,13 +494,9 @@ void __cpuinit *per_cpu_init(void)
 	int cpu;
 	static int first_time = 1;
 
-
-	if (smp_processor_id() != 0)
-		return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
-
 	if (first_time) {
 		first_time = 0;
-		for (cpu = 0; cpu < NR_CPUS; cpu++)
+		for_each_possible_early_cpu(cpu)
 			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
 	}
 
@@ -522,8 +519,6 @@ void show_mem(void)
 
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
-	printk(KERN_INFO "Free swap:       %6ldkB\n",
-	       nr_swap_pages<<(PAGE_SHIFT-10));
 	printk(KERN_INFO "Node memory in pages:\n");
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;

+ 3 - 11
arch/ia64/mm/init.c

@@ -58,7 +58,6 @@ __ia64_sync_icache_dcache (pte_t pte)
 {
 	unsigned long addr;
 	struct page *page;
-	unsigned long order;
 
 	page = pte_page(pte);
 	addr = (unsigned long) page_address(page);
@@ -66,12 +65,7 @@ __ia64_sync_icache_dcache (pte_t pte)
 	if (test_bit(PG_arch_1, &page->flags))
 		return;				/* i-cache is already coherent with d-cache */
 
-	if (PageCompound(page)) {
-		order = compound_order(page);
-		flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT));
-	}
-	else
-		flush_icache_range(addr, addr + PAGE_SIZE);
+	flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page)));
 	set_bit(PG_arch_1, &page->flags);	/* mark page as clean */
 }
 
@@ -553,12 +547,10 @@ find_largest_hole (u64 start, u64 end, void *arg)
 #endif /* CONFIG_VIRTUAL_MEM_MAP */
 
 int __init
-register_active_ranges(u64 start, u64 end, void *arg)
+register_active_ranges(u64 start, u64 len, int nid)
 {
-	int nid = paddr_to_nid(__pa(start));
+	u64 end = start + len;
 
-	if (nid < 0)
-		nid = 0;
 #ifdef CONFIG_KEXEC
 	if (start > crashk_res.start && start < crashk_res.end)
 		start = crashk_res.end;

+ 3 - 1
arch/ia64/mm/numa.c

@@ -27,7 +27,9 @@
  */
 int num_node_memblks;
 struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
-struct node_cpuid_s node_cpuid[NR_CPUS];
+struct node_cpuid_s node_cpuid[NR_CPUS] =
+	{ [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } };
+
 /*
  * This is a matrix with "distances" between nodes, they should be
  * proportional to the memory access latency ratios.

+ 342 - 15
arch/ia64/mm/tlb.c

@@ -11,6 +11,9 @@
  * Rohit Seth <rohit.seth@intel.com>
  * Ken Chen <kenneth.w.chen@intel.com>
  * Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation
+ * Copyright (C) 2007 Intel Corp
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ *	Add multiple ptc.g/ptc.ga instruction support in global tlb purge.
  */
 #include <linux/module.h>
 #include <linux/init.h>
@@ -26,6 +29,9 @@
 #include <asm/pal.h>
 #include <asm/tlbflush.h>
 #include <asm/dma.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/tlb.h>
 
 static struct {
 	unsigned long mask;	/* mask of supported purge page-sizes */
@@ -39,6 +45,10 @@ struct ia64_ctx ia64_ctx = {
 };
 
 DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
+DEFINE_PER_CPU(u8, ia64_tr_num);  /*Number of TR slots in current processor*/
+DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/
+
+struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX];
 
 /*
  * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
@@ -84,14 +94,140 @@ wrap_mmu_context (struct mm_struct *mm)
 	local_flush_tlb_all();
 }
 
+/*
+ * Implement "spinaphores" ... like counting semaphores, but they
+ * spin instead of sleeping.  If there are ever any other users for
+ * this primitive it can be moved up to a spinaphore.h header.
+ */
+struct spinaphore {
+	atomic_t	cur;
+};
+
+static inline void spinaphore_init(struct spinaphore *ss, int val)
+{
+	atomic_set(&ss->cur, val);
+}
+
+static inline void down_spin(struct spinaphore *ss)
+{
+	while (unlikely(!atomic_add_unless(&ss->cur, -1, 0)))
+		while (atomic_read(&ss->cur) == 0)
+			cpu_relax();
+}
+
+static inline void up_spin(struct spinaphore *ss)
+{
+	atomic_add(1, &ss->cur);
+}
+
+static struct spinaphore ptcg_sem;
+static u16 nptcg = 1;
+static int need_ptcg_sem = 1;
+static int toolatetochangeptcgsem = 0;
+
+/*
+ * Kernel parameter "nptcg=" overrides max number of concurrent global TLB
+ * purges which is reported from either PAL or SAL PALO.
+ *
+ * We don't have sanity checking for nptcg value. It's the user's responsibility
+ * for valid nptcg value on the platform. Otherwise, kernel may hang in some
+ * cases.
+ */
+static int __init
+set_nptcg(char *str)
+{
+	int value = 0;
+
+	get_option(&str, &value);
+	setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER);
+
+	return 1;
+}
+
+__setup("nptcg=", set_nptcg);
+
+/*
+ * Maximum number of simultaneous ptc.g purges in the system can
+ * be defined by PAL_VM_SUMMARY (in which case we should take
+ * the smallest value for any cpu in the system) or by the PAL
+ * override table (in which case we should ignore the value from
+ * PAL_VM_SUMMARY).
+ *
+ * Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g
+ * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case,
+ * we should ignore the value from either PAL_VM_SUMMARY or PAL override table.
+ *
+ * Complicating the logic here is the fact that num_possible_cpus()
+ * isn't fully setup until we start bringing cpus online.
+ */
+void
+setup_ptcg_sem(int max_purges, int nptcg_from)
+{
+	static int kp_override;
+	static int palo_override;
+	static int firstcpu = 1;
+
+	if (toolatetochangeptcgsem) {
+		BUG_ON(max_purges < nptcg);
+		return;
+	}
+
+	if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) {
+		kp_override = 1;
+		nptcg = max_purges;
+		goto resetsema;
+	}
+	if (kp_override) {
+		need_ptcg_sem = num_possible_cpus() > nptcg;
+		return;
+	}
+
+	if (nptcg_from == NPTCG_FROM_PALO) {
+		palo_override = 1;
+
+		/* In PALO max_purges == 0 really means it! */
+		if (max_purges == 0)
+			panic("Whoa! Platform does not support global TLB purges.\n");
+		nptcg = max_purges;
+		if (nptcg == PALO_MAX_TLB_PURGES) {
+			need_ptcg_sem = 0;
+			return;
+		}
+		goto resetsema;
+	}
+	if (palo_override) {
+		if (nptcg != PALO_MAX_TLB_PURGES)
+			need_ptcg_sem = (num_possible_cpus() > nptcg);
+		return;
+	}
+
+	/* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */
+	if (max_purges == 0) max_purges = 1;
+
+	if (firstcpu) {
+		nptcg = max_purges;
+		firstcpu = 0;
+	}
+	if (max_purges < nptcg)
+		nptcg = max_purges;
+	if (nptcg == PAL_MAX_PURGES) {
+		need_ptcg_sem = 0;
+		return;
+	} else
+		need_ptcg_sem = (num_possible_cpus() > nptcg);
+
+resetsema:
+	spinaphore_init(&ptcg_sem, max_purges);
+}
+
 void
 ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
 		       unsigned long end, unsigned long nbits)
 {
-	static DEFINE_SPINLOCK(ptcg_lock);
-
 	struct mm_struct *active_mm = current->active_mm;
 
+	toolatetochangeptcgsem = 1;
+
 	if (mm != active_mm) {
 		/* Restore region IDs for mm */
 		if (mm && active_mm) {
@@ -102,19 +238,20 @@ ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
 		}
 	}
 
-	/* HW requires global serialization of ptc.ga.  */
-	spin_lock(&ptcg_lock);
-	{
-		do {
-			/*
-			 * Flush ALAT entries also.
-			 */
-			ia64_ptcga(start, (nbits<<2));
-			ia64_srlz_i();
-			start += (1UL << nbits);
-		} while (start < end);
-	}
-	spin_unlock(&ptcg_lock);
+	if (need_ptcg_sem)
+		down_spin(&ptcg_sem);
+
+	do {
+		/*
+		 * Flush ALAT entries also.
+		 */
+		ia64_ptcga(start, (nbits << 2));
+		ia64_srlz_i();
+		start += (1UL << nbits);
+	} while (start < end);
+
+	if (need_ptcg_sem)
+		up_spin(&ptcg_sem);
 
         if (mm != active_mm) {
                 activate_context(active_mm);
@@ -190,6 +327,9 @@ ia64_tlb_init (void)
 	ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */
 	unsigned long tr_pgbits;
 	long status;
+	pal_vm_info_1_u_t vm_info_1;
+	pal_vm_info_2_u_t vm_info_2;
+	int cpu = smp_processor_id();
 
 	if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
 		printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; "
@@ -206,4 +346,191 @@ ia64_tlb_init (void)
 	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
 
 	local_flush_tlb_all();	/* nuke left overs from bootstrapping... */
+	status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2);
+
+	if (status) {
+		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
+		per_cpu(ia64_tr_num, cpu) = 8;
+		return;
+	}
+	per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
+	if (per_cpu(ia64_tr_num, cpu) >
+				(vm_info_1.pal_vm_info_1_s.max_dtr_entry+1))
+		per_cpu(ia64_tr_num, cpu) =
+				vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
+	if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) {
+		per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX;
+		printk(KERN_DEBUG "TR register number exceeds IA64_TR_ALLOC_MAX!"
+			"IA64_TR_ALLOC_MAX should be extended\n");
+	}
+}
+
+/*
+ * is_tr_overlap
+ *
+ * Check overlap with inserted TRs.
+ */
+static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size)
+{
+	u64 tr_log_size;
+	u64 tr_end;
+	u64 va_rr = ia64_get_rr(va);
+	u64 va_rid = RR_TO_RID(va_rr);
+	u64 va_end = va + (1<<log_size) - 1;
+
+	if (va_rid != RR_TO_RID(p->rr))
+		return 0;
+	tr_log_size = (p->itir & 0xff) >> 2;
+	tr_end = p->ifa + (1<<tr_log_size) - 1;
+
+	if (va > tr_end || p->ifa > va_end)
+		return 0;
+	return 1;
+
+}
+
+/*
+ * ia64_insert_tr in virtual mode. Allocate a TR slot
+ *
+ * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr
+ *
+ * va 	: virtual address.
+ * pte 	: pte entries inserted.
+ * log_size: range to be covered.
+ *
+ * Return value:  <0 :  error No.
+ *
+ *		  >=0 : slot number allocated for TR.
+ * Must be called with preemption disabled.
+ */
+int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
+{
+	int i, r;
+	unsigned long psr;
+	struct ia64_tr_entry *p;
+	int cpu = smp_processor_id();
+
+	r = -EINVAL;
+	/*Check overlap with existing TR entries*/
+	if (target_mask & 0x1) {
+		p = &__per_cpu_idtrs[cpu][0][0];
+		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
+								i++, p++) {
+			if (p->pte & 0x1)
+				if (is_tr_overlap(p, va, log_size)) {
+					printk(KERN_DEBUG "Overlapped Entry"
+						"Inserted for TR Reigster!!\n");
+					goto out;
+			}
+		}
+	}
+	if (target_mask & 0x2) {
+		p = &__per_cpu_idtrs[cpu][1][0];
+		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
+								i++, p++) {
+			if (p->pte & 0x1)
+				if (is_tr_overlap(p, va, log_size)) {
+					printk(KERN_DEBUG "Overlapped Entry"
+						"Inserted for TR Reigster!!\n");
+					goto out;
+				}
+		}
+	}
+
+	for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) {
+		switch (target_mask & 0x3) {
+		case 1:
+			if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1))
+				goto found;
+			continue;
+		case 2:
+			if (!(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+				goto found;
+			continue;
+		case 3:
+			if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1) &&
+				!(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+				goto found;
+			continue;
+		default:
+			r = -EINVAL;
+			goto out;
+		}
+	}
+found:
+	if (i >= per_cpu(ia64_tr_num, cpu))
+		return -EBUSY;
+
+	/*Record tr info for mca hander use!*/
+	if (i > per_cpu(ia64_tr_used, cpu))
+		per_cpu(ia64_tr_used, cpu) = i;
+
+	psr = ia64_clear_ic();
+	if (target_mask & 0x1) {
+		ia64_itr(0x1, i, va, pte, log_size);
+		ia64_srlz_i();
+		p = &__per_cpu_idtrs[cpu][0][i];
+		p->ifa = va;
+		p->pte = pte;
+		p->itir = log_size << 2;
+		p->rr = ia64_get_rr(va);
+	}
+	if (target_mask & 0x2) {
+		ia64_itr(0x2, i, va, pte, log_size);
+		ia64_srlz_i();
+		p = &__per_cpu_idtrs[cpu][1][i];
+		p->ifa = va;
+		p->pte = pte;
+		p->itir = log_size << 2;
+		p->rr = ia64_get_rr(va);
+	}
+	ia64_set_psr(psr);
+	r = i;
+out:
+	return r;
+}
+EXPORT_SYMBOL_GPL(ia64_itr_entry);
+
+/*
+ * ia64_purge_tr
+ *
+ * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr.
+ * slot: slot number to be freed.
+ *
+ * Must be called with preemption disabled.
+ */
+void ia64_ptr_entry(u64 target_mask, int slot)
+{
+	int cpu = smp_processor_id();
+	int i;
+	struct ia64_tr_entry *p;
+
+	if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu))
+		return;
+
+	if (target_mask & 0x1) {
+		p = &__per_cpu_idtrs[cpu][0][slot];
+		if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
+			p->pte = 0;
+			ia64_ptr(0x1, p->ifa, p->itir>>2);
+			ia64_srlz_i();
+		}
+	}
+
+	if (target_mask & 0x2) {
+		p = &__per_cpu_idtrs[cpu][1][slot];
+		if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
+			p->pte = 0;
+			ia64_ptr(0x2, p->ifa, p->itir>>2);
+			ia64_srlz_i();
+		}
+	}
+
+	for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) {
+		if ((__per_cpu_idtrs[cpu][0][i].pte & 0x1) ||
+				(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+			break;
+	}
+	per_cpu(ia64_tr_used, cpu) = i;
 }
+EXPORT_SYMBOL_GPL(ia64_ptr_entry);

+ 6 - 1
arch/ia64/pci/pci.c

@@ -362,7 +362,12 @@ pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
 	info.name = name;
 	acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window,
 			&info);
-
+	/*
+	 * See arch/x86/pci/acpi.c.
+	 * The desired pci bus might already be scanned in a quirk. We
+	 * should handle the case here, but it appears that IA64 hasn't
+	 * such quirk. So we just ignore the case now.
+	 */
 	pbus = pci_scan_bus_parented(NULL, bus, &pci_root_ops, controller);
 	if (pbus)
 		pcibios_setup_root_windows(pbus, controller);

+ 4 - 4
arch/ia64/sn/kernel/xpc_main.c

@@ -199,7 +199,7 @@ xpc_timeout_partition_disengage_request(unsigned long data)
 	struct xpc_partition *part = (struct xpc_partition *) data;
 
 
-	DBUG_ON(jiffies < part->disengage_request_timeout);
+	DBUG_ON(time_before(jiffies, part->disengage_request_timeout));
 
 	(void) xpc_partition_disengaged(part);
 
@@ -230,7 +230,7 @@ xpc_hb_beater(unsigned long dummy)
 {
 	xpc_vars->heartbeat++;
 
-	if (jiffies >= xpc_hb_check_timeout) {
+	if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
 		wake_up_interruptible(&xpc_act_IRQ_wq);
 	}
 
@@ -270,7 +270,7 @@ xpc_hb_checker(void *ignore)
 
 
 		/* checking of remote heartbeats is skewed by IRQ handling */
-		if (jiffies >= xpc_hb_check_timeout) {
+		if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
 			dev_dbg(xpc_part, "checking remote heartbeats\n");
 			xpc_check_remote_hb();
 
@@ -305,7 +305,7 @@ xpc_hb_checker(void *ignore)
 		/* wait for IRQ or timeout */
 		(void) wait_event_interruptible(xpc_act_IRQ_wq,
 			    (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
-					jiffies >= xpc_hb_check_timeout ||
+					time_after_eq(jiffies, xpc_hb_check_timeout) ||
 						(volatile int) xpc_exiting));
 	}
 

+ 1 - 1
arch/ia64/sn/kernel/xpc_partition.c

@@ -877,7 +877,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
 	disengaged = (xpc_partition_engaged(1UL << partid) == 0);
 	if (part->disengage_request_timeout) {
 		if (!disengaged) {
-			if (jiffies < part->disengage_request_timeout) {
+			if (time_before(jiffies, part->disengage_request_timeout)) {
 				/* timelimit hasn't been reached yet */
 				return 0;
 			}

+ 1 - 1
arch/m32r/kernel/Makefile

@@ -5,7 +5,7 @@
 extra-y	:= head.o init_task.o vmlinux.lds
 
 obj-y	:= process.o entry.o traps.o align.o irq.o setup.o time.o \
-	m32r_ksyms.o sys_m32r.o semaphore.o signal.o ptrace.o
+	m32r_ksyms.o sys_m32r.o signal.o ptrace.o
 
 obj-$(CONFIG_SMP)		+= smp.o smpboot.o
 obj-$(CONFIG_MODULES)		+= module.o

+ 0 - 5
arch/m32r/kernel/m32r_ksyms.c

@@ -7,7 +7,6 @@
 #include <linux/interrupt.h>
 #include <linux/string.h>
 
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
@@ -22,10 +21,6 @@ EXPORT_SYMBOL(dump_fpu);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
 EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down_trylock);
 
 /* Networking helper routines. */
 /* Delay loops */

+ 0 - 185
arch/m32r/kernel/semaphore.c

@@ -1,185 +0,0 @@
-/*
- *  linux/arch/m32r/semaphore.c
- *    orig : i386 2.6.4
- *
- *  M32R semaphore implementation.
- *
- *	Copyright (c) 2002 - 2004 Hitoshi Yamamoto
- */
-
-/*
- * i386 semaphore implementation.
- *
- * (C) Copyright 1999 Linus Torvalds
- *
- * Portions Copyright 1999 Red Hat, Inc.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
- */
-#include <linux/sched.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <asm/semaphore.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-asmlinkage void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-asmlinkage void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * the wait_queue_head.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_UNINTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	tsk->state = TASK_RUNNING;
-}
-
-asmlinkage int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * wait_queue_head. The "-1" is because we're
-		 * still hoping to get the semaphore.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_INTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-	tsk->state = TASK_RUNNING;
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- *
- * We could have done the trylock with a
- * single "cmpxchg" without failure cases,
- * but then it wouldn't work on a 386.
- */
-asmlinkage int __down_trylock(struct semaphore * sem)
-{
-	int sleepers;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock in the
-	 * wait_queue_head.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count)) {
-		wake_up_locked(&sem->wait);
-	}
-
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	return 1;
-}

+ 1 - 1
arch/m68k/kernel/Makefile

@@ -10,7 +10,7 @@ endif
 extra-y	+= vmlinux.lds
 
 obj-y	:= entry.o process.o traps.o ints.o signal.o ptrace.o module.o \
-	   sys_m68k.o time.o semaphore.o setup.o m68k_ksyms.o devres.o
+	   sys_m68k.o time.o setup.o m68k_ksyms.o devres.o
 
 devres-y = ../../../kernel/irq/devres.o
 

+ 0 - 6
arch/m68k/kernel/m68k_ksyms.c

@@ -1,5 +1,4 @@
 #include <linux/module.h>
-#include <asm/semaphore.h>
 
 asmlinkage long long __ashldi3 (long long, int);
 asmlinkage long long __ashrdi3 (long long, int);
@@ -15,8 +14,3 @@ EXPORT_SYMBOL(__ashrdi3);
 EXPORT_SYMBOL(__lshrdi3);
 EXPORT_SYMBOL(__muldi3);
 
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
-

+ 0 - 132
arch/m68k/kernel/semaphore.c

@@ -1,132 +0,0 @@
-/*
- *  Generic semaphore code. Buyer beware. Do your own
- * specific changes in <asm/semaphore-helper.h>
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/semaphore-helper.h>
-
-#ifndef CONFIG_RMW_INSNS
-spinlock_t semaphore_wake_lock;
-#endif
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	current->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		current->state = (task_state);	\
-	}					\
-	current->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int ret = 0;
-
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, current);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}

+ 1 - 1
arch/m68k/lib/Makefile

@@ -5,4 +5,4 @@
 EXTRA_AFLAGS := -traditional
 
 lib-y	:= ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
-	   checksum.o string.o semaphore.o uaccess.o
+	   checksum.o string.o uaccess.o

+ 0 - 53
arch/m68k/lib/semaphore.S

@@ -1,53 +0,0 @@
-/*
- *  linux/arch/m68k/lib/semaphore.S
- *
- *  Copyright (C) 1996  Linus Torvalds
- *
- *  m68k version by Andreas Schwab
- */
-
-#include <linux/linkage.h>
-#include <asm/semaphore.h>
-
-/*
- * The semaphore operations have a special calling sequence that
- * allow us to do a simpler in-line version of them. These routines
- * need to convert that sequence back into the C sequence when
- * there is contention on the semaphore.
- */
-ENTRY(__down_failed)
-	moveml %a0/%d0/%d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __down
-	movel (%sp)+,%a1
-	moveml (%sp)+,%a0/%d0/%d1
-	rts
-
-ENTRY(__down_failed_interruptible)
-	movel %a0,-(%sp)
-	movel %d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __down_interruptible
-	movel (%sp)+,%a1
-	movel (%sp)+,%d1
-	movel (%sp)+,%a0
-	rts
-
-ENTRY(__down_failed_trylock)
-	movel %a0,-(%sp)
-	movel %d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __down_trylock
-	movel (%sp)+,%a1
-	movel (%sp)+,%d1
-	movel (%sp)+,%a0
-	rts
-
-ENTRY(__up_wakeup)
-	moveml %a0/%d0/%d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __up
-	movel (%sp)+,%a1
-	moveml (%sp)+,%a0/%d0/%d1
-	rts
-

+ 1 - 1
arch/m68knommu/kernel/Makefile

@@ -5,7 +5,7 @@
 extra-y := vmlinux.lds
 
 obj-y += dma.o entry.o init_task.o irq.o m68k_ksyms.o process.o ptrace.o \
-	 semaphore.o setup.o signal.o syscalltable.o sys_m68k.o time.o traps.o
+	 setup.o signal.o syscalltable.o sys_m68k.o time.o traps.o
 
 obj-$(CONFIG_MODULES)	+= module.o
 obj-$(CONFIG_COMEMPCI)	+= comempci.o

+ 0 - 6
arch/m68knommu/kernel/m68k_ksyms.c

@@ -13,7 +13,6 @@
 #include <asm/pgalloc.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/current.h>
 
@@ -39,11 +38,6 @@ EXPORT_SYMBOL(csum_partial_copy_nocheck);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
 
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
-
 /*
  * libgcc functions - functions that are used internally by the
  * compiler...  (prototypes are not correct though, but that

部分文件因为文件数量过多而无法显示