Forráskód Böngészése

Merge ../linux-2.6

Paul Mackerras 19 éve
szülő
commit
24bfb00123
100 módosított fájl, 1535 hozzáadás és 1362 törlés
  1. 2 4
      CREDITS
  2. 8 3
      Documentation/Changes
  3. 1 1
      Documentation/DocBook/Makefile
  4. 1 1
      Documentation/DocBook/journal-api.tmpl
  5. 3 4
      Documentation/DocBook/kernel-api.tmpl
  6. 160 0
      Documentation/DocBook/rapidio.tmpl
  7. 91 83
      Documentation/MSI-HOWTO.txt
  8. 0 2
      Documentation/RCU/whatisRCU.txt
  9. 3 2
      Documentation/device-mapper/snapshot.txt
  10. 2 2
      Documentation/fb/vesafb.txt
  11. 16 0
      Documentation/feature-removal-schedule.txt
  12. 173 0
      Documentation/filesystems/dentry-locking.txt
  13. 0 5
      Documentation/filesystems/devfs/README
  14. 195 0
      Documentation/filesystems/ramfs-rootfs-initramfs.txt
  15. 148 286
      Documentation/filesystems/vfs.txt
  16. 18 16
      Documentation/hpet.txt
  17. 1 1
      Documentation/magic-number.txt
  18. 0 2
      Documentation/networking/decnet.txt
  19. 1 1
      Documentation/oops-tracing.txt
  20. 12 5
      Documentation/power/video.txt
  21. 12 9
      Documentation/s390/driver-model.txt
  22. 20 9
      Documentation/sound/alsa/ALSA-Configuration.txt
  23. 15 41
      Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
  24. 2 2
      Documentation/sparse.txt
  25. 3 3
      Documentation/video4linux/bttv/README.freeze
  26. 13 12
      Documentation/vm/hugetlbpage.txt
  27. 29 3
      MAINTAINERS
  28. 1 1
      Makefile
  29. 1 48
      arch/arm/kernel/ptrace.c
  30. 1 0
      arch/arm/mach-aaec2000/clock.c
  31. 1 0
      arch/arm/mach-epxa10db/mm.c
  32. 1 2
      arch/arm/mach-integrator/impd1.c
  33. 1 0
      arch/arm/mach-pxa/corgi_lcd.c
  34. 1 48
      arch/arm26/kernel/ptrace.c
  35. 3 3
      arch/cris/arch-v10/README.mm
  36. 2 49
      arch/cris/arch-v10/kernel/ptrace.c
  37. 1 1
      arch/cris/arch-v10/kernel/signal.c
  38. 7 7
      arch/cris/arch-v32/drivers/cryptocop.c
  39. 2 49
      arch/cris/arch-v32/kernel/ptrace.c
  40. 1 1
      arch/cris/arch-v32/kernel/signal.c
  41. 1 1
      arch/cris/mm/ioremap.c
  42. 1 42
      arch/frv/kernel/ptrace.c
  43. 1 38
      arch/h8300/kernel/ptrace.c
  44. 13 0
      arch/i386/Kconfig
  45. 0 10
      arch/i386/Kconfig.debug
  46. 8 2
      arch/i386/kernel/apic.c
  47. 2 3
      arch/i386/kernel/apm.c
  48. 1 4
      arch/i386/kernel/cpu/common.c
  49. 1 1
      arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
  50. 1 1
      arch/i386/kernel/cpu/mcheck/k7.c
  51. 2 2
      arch/i386/kernel/cpu/mcheck/mce.c
  52. 2 2
      arch/i386/kernel/cpu/mcheck/p4.c
  53. 1 1
      arch/i386/kernel/cpu/mcheck/p5.c
  54. 1 1
      arch/i386/kernel/cpu/mcheck/p6.c
  55. 1 1
      arch/i386/kernel/cpu/mcheck/winchip.c
  56. 95 85
      arch/i386/kernel/kprobes.c
  57. 1 0
      arch/i386/kernel/ldt.c
  58. 1 1
      arch/i386/kernel/mca.c
  59. 2 42
      arch/i386/kernel/ptrace.c
  60. 2 1
      arch/i386/kernel/reboot_fixups.c
  61. 1 0
      arch/i386/kernel/scx200.c
  62. 1 3
      arch/i386/kernel/smpboot.c
  63. 0 6
      arch/i386/oprofile/Kconfig
  64. 1 0
      arch/i386/power/cpu.c
  65. 13 0
      arch/ia64/Kconfig
  66. 0 11
      arch/ia64/Kconfig.debug
  67. 2 4
      arch/ia64/hp/sim/simserial.c
  68. 69 57
      arch/ia64/kernel/kprobes.c
  69. 1 1
      arch/ia64/kernel/perfmon.c
  70. 7 0
      arch/ia64/kernel/setup.c
  71. 0 6
      arch/ia64/oprofile/Kconfig
  72. 2 4
      arch/m68k/atari/time.c
  73. 3 44
      arch/m68k/kernel/ptrace.c
  74. 14 3
      arch/m68knommu/Kconfig
  75. 5 2
      arch/m68knommu/Makefile
  76. 1 0
      arch/m68knommu/kernel/asm-offsets.c
  77. 1 38
      arch/m68knommu/kernel/ptrace.c
  78. 4 1
      arch/m68knommu/kernel/setup.c
  79. 9 0
      arch/m68knommu/kernel/vmlinux.lds.S
  80. 19 0
      arch/m68knommu/platform/520x/Makefile
  81. 65 0
      arch/m68knommu/platform/520x/config.c
  82. 1 0
      arch/m68knommu/platform/5307/Makefile
  83. 3 0
      arch/m68knommu/platform/5307/head.S
  84. 1 0
      arch/m68knommu/platform/5307/ints.c
  85. 6 6
      arch/m68knommu/platform/5307/pit.c
  86. 1 1
      arch/mips/Kconfig
  87. 4 0
      arch/mips/boot/.gitignore
  88. 6 5
      arch/mips/configs/pnx8550-jbs_defconfig
  89. 6 4
      arch/mips/configs/pnx8550-v2pci_defconfig
  90. 6 0
      arch/mips/ddb5xxx/common/rtc_ds1386.c
  91. 22 2
      arch/mips/dec/time.c
  92. 6 0
      arch/mips/jmr3927/common/rtc_ds1742.c
  93. 1 2
      arch/mips/kernel/irixsig.c
  94. 5 50
      arch/mips/kernel/ptrace.c
  95. 93 104
      arch/mips/kernel/rtlx.c
  96. 0 3
      arch/mips/kernel/signal.c
  97. 8 5
      arch/mips/kernel/signal32.c
  98. 44 56
      arch/mips/kernel/vpe.c
  99. 9 0
      arch/mips/lasat/ds1603.c
  100. 6 0
      arch/mips/momentum/jaguar_atx/setup.c

+ 2 - 4
CREDITS

@@ -3642,11 +3642,9 @@ S: Beaverton, OR 97005
 S: USA
 
 N: Michal Wronski
-E: wrona@mat.uni.torun.pl
-W: http://www.mat.uni.torun.pl/~wrona
+E: Michal.Wronski@motorola.com
 D: POSIX message queues fs (with K. Benedyczak)
-S: ul. Teczowa 23/12
-S: 80-680 Gdansk-Sobieszewo
+S: Krakow
 S: Poland
 
 N: Frank Xia

+ 8 - 3
Documentation/Changes

@@ -139,9 +139,14 @@ You'll probably want to upgrade.
 Ksymoops
 --------
 
-If the unthinkable happens and your kernel oopses, you'll need a 2.4
-version of ksymoops to decode the report; see REPORTING-BUGS in the
-root of the Linux source for more information.
+If the unthinkable happens and your kernel oopses, you may need the
+ksymoops tool to decode it, but in most cases you don't.
+In the 2.6 kernel it is generally preferred to build the kernel with
+CONFIG_KALLSYMS so that it produces readable dumps that can be used as-is
+(this also produces better output than ksymoops).
+If for some reason your kernel is not build with CONFIG_KALLSYMS and
+you have no way to rebuild and reproduce the Oops with that option, then
+you can still decode that Oops with ksymoops.
 
 Module-Init-Tools
 -----------------

+ 1 - 1
Documentation/DocBook/Makefile

@@ -10,7 +10,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
 	    kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
 	    procfs-guide.xml writing_usb_driver.xml \
 	    sis900.xml kernel-api.xml journal-api.xml lsm.xml usb.xml \
-	    gadget.xml libata.xml mtdnand.xml librs.xml
+	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml
 
 ###
 # The build process is as follows (targets):

+ 1 - 1
Documentation/DocBook/journal-api.tmpl

@@ -306,7 +306,7 @@ an example.
 </para>
 	<sect1><title>Journal Level</title>
 !Efs/jbd/journal.c
-!Efs/jbd/recovery.c
+!Ifs/jbd/recovery.c
 	</sect1>
 	<sect1><title>Transasction Level</title>
 !Efs/jbd/transaction.c	

+ 3 - 4
Documentation/DocBook/kernel-api.tmpl

@@ -118,7 +118,7 @@ X!Ilib/string.c
      </sect1>
      <sect1><title>User Space Memory Access</title>
 !Iinclude/asm-i386/uaccess.h
-!Iarch/i386/lib/usercopy.c
+!Earch/i386/lib/usercopy.c
      </sect1>
      <sect1><title>More Memory Management Functions</title>
 !Iinclude/linux/rmap.h
@@ -174,7 +174,6 @@ X!Ilib/string.c
      <title>The Linux VFS</title>
      <sect1><title>The Filesystem types</title>
 !Iinclude/linux/fs.h
-!Einclude/linux/fs.h
      </sect1>
      <sect1><title>The Directory Cache</title>
 !Efs/dcache.c
@@ -266,7 +265,7 @@ X!Ekernel/module.c
   <chapter id="hardware">
      <title>Hardware Interfaces</title>
      <sect1><title>Interrupt Handling</title>
-!Ikernel/irq/manage.c
+!Ekernel/irq/manage.c
      </sect1>
 
      <sect1><title>Resources Management</title>
@@ -501,7 +500,7 @@ KAO -->
 !Edrivers/video/modedb.c
      </sect1>
      <sect1><title>Frame Buffer Macintosh Video Mode Database</title>
-!Idrivers/video/macmodes.c
+!Edrivers/video/macmodes.c
      </sect1>
      <sect1><title>Frame Buffer Fonts</title>
         <para>

+ 160 - 0
Documentation/DocBook/rapidio.tmpl

@@ -0,0 +1,160 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+        "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [
+	<!ENTITY rapidio SYSTEM "rapidio.xml">
+	]>
+
+<book id="RapidIO-Guide">
+ <bookinfo>
+  <title>RapidIO Subsystem Guide</title>
+
+  <authorgroup>
+   <author>
+    <firstname>Matt</firstname>
+    <surname>Porter</surname>
+    <affiliation>
+     <address>
+      <email>mporter@kernel.crashing.org</email>
+      <email>mporter@mvista.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <copyright>
+   <year>2005</year>
+   <holder>MontaVista Software, Inc.</holder>
+  </copyright>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License version 2 as published by the Free Software Foundation.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="intro">
+      <title>Introduction</title>
+  <para>
+	RapidIO is a high speed switched fabric interconnect with
+	features aimed at the embedded market.  RapidIO provides
+	support for memory-mapped I/O as well as message-based
+	transactions over the switched fabric network. RapidIO has
+	a standardized discovery mechanism not unlike the PCI bus
+	standard that allows simple detection of devices in a
+	network.
+  </para>
+  <para>
+  	This documentation is provided for developers intending
+	to support RapidIO on new architectures, write new drivers,
+	or to understand the subsystem internals.
+  </para>
+  </chapter>
+
+  <chapter id="bugs">
+     <title>Known Bugs and Limitations</title>
+
+     <sect1>
+     	<title>Bugs</title>
+	  <para>None. ;)</para>
+     </sect1>
+     <sect1>
+     	<title>Limitations</title>
+	  <para>
+	    <orderedlist>
+	      <listitem><para>Access/management of RapidIO memory regions is not supported</para></listitem>
+	      <listitem><para>Multiple host enumeration is not supported</para></listitem>
+	    </orderedlist>
+	 </para>
+     </sect1>
+  </chapter>
+
+  <chapter id="drivers">
+     	<title>RapidIO driver interface</title>
+	<para>
+		Drivers are provided a set of calls in order
+		to interface with the subsystem to gather info
+		on devices, request/map memory region resources,
+		and manage mailboxes/doorbells.
+	</para>
+	<sect1>
+		<title>Functions</title>
+!Iinclude/linux/rio_drv.h
+!Edrivers/rapidio/rio-driver.c
+!Edrivers/rapidio/rio.c
+	</sect1>
+  </chapter>
+
+  <chapter id="internals">
+     <title>Internals</title>
+
+     <para>
+     This chapter contains the autogenerated documentation of the RapidIO
+     subsystem.
+     </para>
+
+     <sect1><title>Structures</title>
+!Iinclude/linux/rio.h
+     </sect1>
+     <sect1><title>Enumeration and Discovery</title>
+!Idrivers/rapidio/rio-scan.c
+     </sect1>
+     <sect1><title>Driver functionality</title>
+!Idrivers/rapidio/rio.c
+!Idrivers/rapidio/rio-access.c
+     </sect1>
+     <sect1><title>Device model support</title>
+!Idrivers/rapidio/rio-driver.c
+     </sect1>
+     <sect1><title>Sysfs support</title>
+!Idrivers/rapidio/rio-sysfs.c
+     </sect1>
+     <sect1><title>PPC32 support</title>
+!Iarch/ppc/kernel/rio.c
+!Earch/ppc/syslib/ppc85xx_rio.c
+!Iarch/ppc/syslib/ppc85xx_rio.c
+     </sect1>
+  </chapter>
+
+  <chapter id="credits">
+     <title>Credits</title>
+	<para>
+		The following people have contributed to the RapidIO
+		subsystem directly or indirectly:
+		<orderedlist>
+			<listitem><para>Matt Porter<email>mporter@kernel.crashing.org</email></para></listitem>
+			<listitem><para>Randy Vinson<email>rvinson@mvista.com</email></para></listitem>
+			<listitem><para>Dan Malek<email>dan@embeddedalley.com</email></para></listitem>
+		</orderedlist>
+	</para>
+	<para>
+		The following people have contributed to this document:
+		<orderedlist>
+			<listitem><para>Matt Porter<email>mporter@kernel.crashing.org</email></para></listitem>
+		</orderedlist>
+	</para>
+  </chapter>
+</book>

+ 91 - 83
Documentation/MSI-HOWTO.txt

@@ -10,14 +10,22 @@
 This guide describes the basics of Message Signaled Interrupts (MSI),
 the advantages of using MSI over traditional interrupt mechanisms,
 and how to enable your driver to use MSI or MSI-X. Also included is
-a Frequently Asked Questions.
+a Frequently Asked Questions (FAQ) section.
+
+1.1 Terminology
+
+PCI devices can be single-function or multi-function.  In either case,
+when this text talks about enabling or disabling MSI on a "device
+function," it is referring to one specific PCI device and function and
+not to all functions on a PCI device (unless the PCI device has only
+one function).
 
 2. Copyright 2003 Intel Corporation
 
 3. What is MSI/MSI-X?
 
 Message Signaled Interrupt (MSI), as described in the PCI Local Bus
-Specification Revision 2.3 or latest, is an optional feature, and a
+Specification Revision 2.3 or later, is an optional feature, and a
 required feature for PCI Express devices. MSI enables a device function
 to request service by sending an Inbound Memory Write on its PCI bus to
 the FSB as a Message Signal Interrupt transaction. Because MSI is
@@ -27,7 +35,7 @@ supported.
 
 A PCI device that supports MSI must also support pin IRQ assertion
 interrupt mechanism to provide backward compatibility for systems that
-do not support MSI. In Systems, which support MSI, the bus driver is
+do not support MSI. In systems which support MSI, the bus driver is
 responsible for initializing the message address and message data of
 the device function's MSI/MSI-X capability structure during device
 initial configuration.
@@ -61,17 +69,17 @@ over the MSI capability structure as described below.
 
         - MSI and MSI-X both support per-vector masking. Per-vector
 	masking is an optional extension of MSI but a required
-	feature for MSI-X. Per-vector masking provides the kernel
-	the ability to mask/unmask MSI when servicing its software
-	interrupt service routing handler. If per-vector masking is
+	feature for MSI-X. Per-vector masking provides the kernel the
+	ability to mask/unmask a single MSI while running its
+	interrupt service routine. If per-vector masking is
 	not supported, then the device driver should provide the
 	hardware/software synchronization to ensure that the device
 	generates MSI when the driver wants it to do so.
 
 4. Why use MSI?
 
-As a benefit the simplification of board design, MSI allows board
-designers to remove out of band interrupt routing. MSI is another
+As a benefit to the simplification of board design, MSI allows board
+designers to remove out-of-band interrupt routing. MSI is another
 step towards a legacy-free environment.
 
 Due to increasing pressure on chipset and processor packages to
@@ -87,7 +95,7 @@ support. As a result, the PCI Express technology requires MSI
 support for better interrupt performance.
 
 Using MSI enables the device functions to support two or more
-vectors, which can be configured to target different CPU's to
+vectors, which can be configured to target different CPUs to
 increase scalability.
 
 5. Configuring a driver to use MSI/MSI-X
@@ -119,13 +127,13 @@ pci_enable_msi() explicitly.
 
 int pci_enable_msi(struct pci_dev *dev)
 
-With this new API, any existing device driver, which like to have
-MSI enabled on its device function, must call this API to enable MSI
+With this new API, a device driver that wants to have MSI
+enabled on its device function must call this API to enable MSI.
 A successful call will initialize the MSI capability structure
 with ONE vector, regardless of whether a device function is
 capable of supporting multiple messages. This vector replaces the
-pre-assigned dev->irq with a new MSI vector. To avoid the conflict
-of new assigned vector with existing pre-assigned vector requires
+pre-assigned dev->irq with a new MSI vector. To avoid a conflict
+of the new assigned vector with existing pre-assigned vector requires
 a device driver to call this API before calling request_irq().
 
 5.2.2 API pci_disable_msi
@@ -137,14 +145,14 @@ when a device driver is unloading. This API restores dev->irq with
 the pre-assigned IOAPIC vector and switches a device's interrupt
 mode to PCI pin-irq assertion/INTx emulation mode.
 
-Note that a device driver should always call free_irq() on MSI vector
-it has done request_irq() on before calling this API. Failure to do
-so results a BUG_ON() and a device will be left with MSI enabled and
+Note that a device driver should always call free_irq() on the MSI vector
+that it has done request_irq() on before calling this API. Failure to do
+so results in a BUG_ON() and a device will be left with MSI enabled and
 leaks its vector.
 
 5.2.3 MSI mode vs. legacy mode diagram
 
-The below diagram shows the events, which switches the interrupt
+The below diagram shows the events which switch the interrupt
 mode on the MSI-capable device function between MSI mode and
 PIN-IRQ assertion mode.
 
@@ -155,9 +163,9 @@ PIN-IRQ assertion mode.
  	 ------------	pci_disable_msi  ------------------------
 
 
-Figure 1.0 MSI Mode vs. Legacy Mode
+Figure 1. MSI Mode vs. Legacy Mode
 
-In Figure 1.0, a device operates by default in legacy mode. Legacy
+In Figure 1, a device operates by default in legacy mode. Legacy
 in this context means PCI pin-irq assertion or PCI-Express INTx
 emulation. A successful MSI request (using pci_enable_msi()) switches
 a device's interrupt mode to MSI mode. A pre-assigned IOAPIC vector
@@ -166,11 +174,11 @@ assigned MSI vector will replace dev->irq.
 
 To return back to its default mode, a device driver should always call
 pci_disable_msi() to undo the effect of pci_enable_msi(). Note that a
-device driver should always call free_irq() on MSI vector it has done
-request_irq() on before calling pci_disable_msi(). Failure to do so
-results a BUG_ON() and a device will be left with MSI enabled and
+device driver should always call free_irq() on the MSI vector it has
+done request_irq() on before calling pci_disable_msi(). Failure to do
+so results in a BUG_ON() and a device will be left with MSI enabled and
 leaks its vector. Otherwise, the PCI subsystem restores a device's
-dev->irq with a pre-assigned IOAPIC vector and marks released
+dev->irq with a pre-assigned IOAPIC vector and marks the released
 MSI vector as unused.
 
 Once being marked as unused, there is no guarantee that the PCI
@@ -178,8 +186,8 @@ subsystem will reserve this MSI vector for a device. Depending on
 the availability of current PCI vector resources and the number of
 MSI/MSI-X requests from other drivers, this MSI may be re-assigned.
 
-For the case where the PCI subsystem re-assigned this MSI vector
-another driver, a request to switching back to MSI mode may result
+For the case where the PCI subsystem re-assigns this MSI vector to
+another driver, a request to switch back to MSI mode may result
 in being assigned a different MSI vector or a failure if no more
 vectors are available.
 
@@ -208,12 +216,12 @@ Unlike the function pci_enable_msi(), the function pci_enable_msix()
 does not replace the pre-assigned IOAPIC dev->irq with a new MSI
 vector because the PCI subsystem writes the 1:1 vector-to-entry mapping
 into the field vector of each element contained in a second argument.
-Note that the pre-assigned IO-APIC dev->irq is valid only if the device
-operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt of
+Note that the pre-assigned IOAPIC dev->irq is valid only if the device
+operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt at
 using dev->irq by the device driver to request for interrupt service
 may result unpredictabe behavior.
 
-For each MSI-X vector granted, a device driver is responsible to call
+For each MSI-X vector granted, a device driver is responsible for calling
 other functions like request_irq(), enable_irq(), etc. to enable
 this vector with its corresponding interrupt service handler. It is
 a device driver's choice to assign all vectors with the same
@@ -224,13 +232,13 @@ service handler.
 
 The PCI 3.0 specification has implementation notes that MMIO address
 space for a device's MSI-X structure should be isolated so that the
-software system can set different page for controlling accesses to
-the MSI-X structure. The implementation of MSI patch requires the PCI
+software system can set different pages for controlling accesses to the
+MSI-X structure. The implementation of MSI support requires the PCI
 subsystem, not a device driver, to maintain full control of the MSI-X
-table/MSI-X PBA and MMIO address space of the MSI-X table/MSI-X PBA.
-A device driver is prohibited from requesting the MMIO address space
-of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem will fail
-enabling MSI-X on its hardware device when it calls the function
+table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
+table/MSI-X PBA.  A device driver is prohibited from requesting the MMIO
+address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem
+will fail enabling MSI-X on its hardware device when it calls the function
 pci_enable_msix().
 
 5.3.2 Handling MSI-X allocation
@@ -274,9 +282,9 @@ For the case where fewer MSI-X vectors are allocated to a function
 than requested, the function pci_enable_msix() will return the
 maximum number of MSI-X vectors available to the caller. A device
 driver may re-send its request with fewer or equal vectors indicated
-in a return. For example, if a device driver requests 5 vectors, but
-the number of available vectors is 3 vectors, a value of 3 will be a
-return as a result of pci_enable_msix() call. A function could be
+in the return. For example, if a device driver requests 5 vectors, but
+the number of available vectors is 3 vectors, a value of 3 will be
+returned as a result of pci_enable_msix() call. A function could be
 designed for its driver to use only 3 MSI-X table entries as
 different combinations as ABC--, A-B-C, A--CB, etc. Note that this
 patch does not support multiple entries with the same vector. Such
@@ -285,49 +293,46 @@ as ABBCC, AABCC, BCCBA, etc will result as a failure by the function
 pci_enable_msix(). Below are the reasons why supporting multiple
 entries with the same vector is an undesirable solution.
 
-	- The PCI subsystem can not determine which entry, which
-	  generated the message, to mask/unmask MSI while handling
+	- The PCI subsystem cannot determine the entry that
+	  generated the message to mask/unmask MSI while handling
 	  software driver ISR. Attempting to walk through all MSI-X
 	  table entries (2048 max) to mask/unmask any match vector
 	  is an undesirable solution.
 
-	- Walk through all MSI-X table entries (2048 max) to handle
+	- Walking through all MSI-X table entries (2048 max) to handle
 	  SMP affinity of any match vector is an undesirable solution.
 
 5.3.4 API pci_enable_msix
 
-int pci_enable_msix(struct pci_dev *dev, u32 *entries, int nvec)
+int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
 
 This API enables a device driver to request the PCI subsystem
-for enabling MSI-X messages on its hardware device. Depending on
+to enable MSI-X messages on its hardware device. Depending on
 the availability of PCI vectors resources, the PCI subsystem enables
-either all or nothing.
+either all or none of the requested vectors.
 
-Argument dev points to the device (pci_dev) structure.
+Argument 'dev' points to the device (pci_dev) structure.
 
-Argument entries is a pointer of unsigned integer type. The number of
-elements is indicated in argument nvec. The content of each element
-will be mapped to the following struct defined in /driver/pci/msi.h.
+Argument 'entries' is a pointer to an array of msix_entry structs.
+The number of entries is indicated in argument 'nvec'.
+struct msix_entry is defined in /driver/pci/msi.h:
 
 struct msix_entry {
 	u16 	vector; /* kernel uses to write alloc vector */
 	u16	entry; /* driver uses to specify entry */
 };
 
-A device driver is responsible for initializing the field entry of
-each element with unique entry supported by MSI-X table. Otherwise,
+A device driver is responsible for initializing the field 'entry' of
+each element with a unique entry supported by MSI-X table. Otherwise,
 -EINVAL will be returned as a result. A successful return of zero
-indicates the PCI subsystem completes initializing each of requested
+indicates the PCI subsystem completed initializing each of the requested
 entries of the MSI-X table with message address and message data.
 Last but not least, the PCI subsystem will write the 1:1
-vector-to-entry mapping into the field vector of each element. A
-device driver is responsible of keeping track of allocated MSI-X
+vector-to-entry mapping into the field 'vector' of each element. A
+device driver is responsible for keeping track of allocated MSI-X
 vectors in its internal data structure.
 
-Argument nvec is an integer indicating the number of messages
-requested.
-
-A return of zero indicates that the number of MSI-X vectors is
+A return of zero indicates that the number of MSI-X vectors was
 successfully allocated. A return of greater than zero indicates
 MSI-X vector shortage. Or a return of less than zero indicates
 a failure. This failure may be a result of duplicate entries
@@ -341,12 +346,12 @@ void pci_disable_msix(struct pci_dev *dev)
 This API should always be used to undo the effect of pci_enable_msix()
 when a device driver is unloading. Note that a device driver should
 always call free_irq() on all MSI-X vectors it has done request_irq()
-on before calling this API. Failure to do so results a BUG_ON() and
+on before calling this API. Failure to do so results in a BUG_ON() and
 a device will be left with MSI-X enabled and leaks its vectors.
 
 5.3.6 MSI-X mode vs. legacy mode diagram
 
-The below diagram shows the events, which switches the interrupt
+The below diagram shows the events which switch the interrupt
 mode on the MSI-X capable device function between MSI-X mode and
 PIN-IRQ assertion mode (legacy).
 
@@ -356,22 +361,22 @@ PIN-IRQ assertion mode (legacy).
 	| 	     | ===============>	    |			     |
  	 ------------	pci_disable_msix     ------------------------
 
-Figure 2.0 MSI-X Mode vs. Legacy Mode
+Figure 2. MSI-X Mode vs. Legacy Mode
 
-In Figure 2.0, a device operates by default in legacy mode. A
+In Figure 2, a device operates by default in legacy mode. A
 successful MSI-X request (using pci_enable_msix()) switches a
 device's interrupt mode to MSI-X mode. A pre-assigned IOAPIC vector
 stored in dev->irq will be saved by the PCI subsystem; however,
 unlike MSI mode, the PCI subsystem will not replace dev->irq with
 assigned MSI-X vector because the PCI subsystem already writes the 1:1
-vector-to-entry mapping into the field vector of each element
+vector-to-entry mapping into the field 'vector' of each element
 specified in second argument.
 
 To return back to its default mode, a device driver should always call
 pci_disable_msix() to undo the effect of pci_enable_msix(). Note that
 a device driver should always call free_irq() on all MSI-X vectors it
 has done request_irq() on before calling pci_disable_msix(). Failure
-to do so results a BUG_ON() and a device will be left with MSI-X
+to do so results in a BUG_ON() and a device will be left with MSI-X
 enabled and leaks its vectors. Otherwise, the PCI subsystem switches a
 device function's interrupt mode from MSI-X mode to legacy mode and
 marks all allocated MSI-X vectors as unused.
@@ -383,53 +388,56 @@ MSI/MSI-X requests from other drivers, these MSI-X vectors may be
 re-assigned.
 
 For the case where the PCI subsystem re-assigned these MSI-X vectors
-to other driver, a request to switching back to MSI-X mode may result
+to other drivers, a request to switch back to MSI-X mode may result
 being assigned with another set of MSI-X vectors or a failure if no
 more vectors are available.
 
-5.4 Handling function implementng both MSI and MSI-X capabilities
+5.4 Handling function implementing both MSI and MSI-X capabilities
 
 For the case where a function implements both MSI and MSI-X
 capabilities, the PCI subsystem enables a device to run either in MSI
 mode or MSI-X mode but not both. A device driver determines whether it
 wants MSI or MSI-X enabled on its hardware device. Once a device
-driver requests for MSI, for example, it is prohibited to request for
+driver requests for MSI, for example, it is prohibited from requesting
 MSI-X; in other words, a device driver is not permitted to ping-pong
 between MSI mod MSI-X mode during a run-time.
 
 5.5 Hardware requirements for MSI/MSI-X support
+
 MSI/MSI-X support requires support from both system hardware and
 individual hardware device functions.
 
 5.5.1 System hardware support
+
 Since the target of MSI address is the local APIC CPU, enabling
-MSI/MSI-X support in Linux kernel is dependent on whether existing
-system hardware supports local APIC. Users should verify their
-system whether it runs when CONFIG_X86_LOCAL_APIC=y.
+MSI/MSI-X support in the Linux kernel is dependent on whether existing
+system hardware supports local APIC. Users should verify that their
+system supports local APIC operation by testing that it runs when
+CONFIG_X86_LOCAL_APIC=y.
 
 In SMP environment, CONFIG_X86_LOCAL_APIC is automatically set;
 however, in UP environment, users must manually set
 CONFIG_X86_LOCAL_APIC. Once CONFIG_X86_LOCAL_APIC=y, setting
-CONFIG_PCI_MSI enables the VECTOR based scheme and
-the option for MSI-capable device drivers to selectively enable
-MSI/MSI-X.
+CONFIG_PCI_MSI enables the VECTOR based scheme and the option for
+MSI-capable device drivers to selectively enable MSI/MSI-X.
 
 Note that CONFIG_X86_IO_APIC setting is irrelevant because MSI/MSI-X
 vector is allocated new during runtime and MSI/MSI-X support does not
 depend on BIOS support. This key independency enables MSI/MSI-X
-support on future IOxAPIC free platform.
+support on future IOxAPIC free platforms.
 
 5.5.2 Device hardware support
+
 The hardware device function supports MSI by indicating the
 MSI/MSI-X capability structure on its PCI capability list. By
 default, this capability structure will not be initialized by
 the kernel to enable MSI during the system boot. In other words,
 the device function is running on its default pin assertion mode.
 Note that in many cases the hardware supporting MSI have bugs,
-which may result in system hang. The software driver of specific
-MSI-capable hardware is responsible for whether calling
+which may result in system hangs. The software driver of specific
+MSI-capable hardware is responsible for deciding whether to call
 pci_enable_msi or not. A return of zero indicates the kernel
-successfully initializes the MSI/MSI-X capability structure of the
+successfully initialized the MSI/MSI-X capability structure of the
 device function. The device function is now running on MSI/MSI-X mode.
 
 5.6 How to tell whether MSI/MSI-X is enabled on device function
@@ -439,10 +447,10 @@ pci_enable_msi()/pci_enable_msix() indicates to a device driver that
 its device function is initialized successfully and ready to run in
 MSI/MSI-X mode.
 
-At the user level, users can use command 'cat /proc/interrupts'
-to display the vector allocated for a device and its interrupt
-MSI/MSI-X mode ("PCI MSI"/"PCI MSIX"). Below shows below MSI mode is
-enabled on a SCSI Adaptec 39320D Ultra320.
+At the user level, users can use the command 'cat /proc/interrupts'
+to display the vectors allocated for devices and their interrupt
+MSI/MSI-X modes ("PCI-MSI"/"PCI-MSI-X"). Below shows MSI mode is
+enabled on a SCSI Adaptec 39320D Ultra320 controller.
 
            CPU0       CPU1
   0:     324639          0    IO-APIC-edge  timer
@@ -453,8 +461,8 @@ enabled on a SCSI Adaptec 39320D Ultra320.
  15:          1          0    IO-APIC-edge  ide1
 169:          0          0   IO-APIC-level  uhci-hcd
 185:          0          0   IO-APIC-level  uhci-hcd
-193:        138         10         PCI MSI  aic79xx
-201:         30          0         PCI MSI  aic79xx
+193:        138         10         PCI-MSI  aic79xx
+201:         30          0         PCI-MSI  aic79xx
 225:         30          0   IO-APIC-level  aic7xxx
 233:         30          0   IO-APIC-level  aic7xxx
 NMI:          0          0
@@ -490,8 +498,8 @@ target address set as 0xfeexxxxx, as conformed to PCI
 specification 2.3 or latest, then it should work.
 
 Q4. From the driver point of view, if the MSI is lost because
-of the errors occur during inbound memory write, then it may
-wait for ever. Is there a mechanism for it to recover?
+of errors occurring during inbound memory write, then it may
+wait forever. Is there a mechanism for it to recover?
 
 A4. Since the target of the transaction is an inbound memory
 write, all transaction termination conditions (Retry,

+ 0 - 2
Documentation/RCU/whatisRCU.txt

@@ -772,8 +772,6 @@ RCU pointer/list traversal:
 	list_for_each_entry_rcu
 	list_for_each_continue_rcu	(to be deprecated in favor of new
 					 list_for_each_entry_continue_rcu)
-	hlist_for_each_rcu		(to be deprecated in favor of
-					 hlist_for_each_entry_rcu)
 	hlist_for_each_entry_rcu
 
 RCU pointer update:

+ 3 - 2
Documentation/device-mapper/snapshot.txt

@@ -19,7 +19,6 @@ There are two dm targets available: snapshot and snapshot-origin.
 *) snapshot-origin <origin>
 
 which will normally have one or more snapshots based on it.
-You must create the snapshot-origin device before you can create snapshots.
 Reads will be mapped directly to the backing device. For each write, the
 original data will be saved in the <COW device> of each snapshot to keep
 its visible content unchanged, at least until the <COW device> fills up.
@@ -27,7 +26,7 @@ its visible content unchanged, at least until the <COW device> fills up.
 
 *) snapshot <origin> <COW device> <persistent?> <chunksize>
 
-A snapshot is created of the <origin> block device. Changed chunks of
+A snapshot of the <origin> block device is created. Changed chunks of
 <chunksize> sectors will be stored on the <COW device>.  Writes will
 only go to the <COW device>.  Reads will come from the <COW device> or
 from <origin> for unchanged data.  <COW device> will often be
@@ -37,6 +36,8 @@ the amount of free space and expand the <COW device> before it fills up.
 
 <persistent?> is P (Persistent) or N (Not persistent - will not survive
 after reboot).
+The difference is that for transient snapshots less metadata must be
+saved on disk - they can be kept in memory by the kernel.
 
 
 How this is used by LVM2

+ 2 - 2
Documentation/fb/vesafb.txt

@@ -146,10 +146,10 @@ pmipal	Use the protected mode interface for palette changes.
 
 mtrr:n	setup memory type range registers for the vesafb framebuffer
 	where n:
-	      0 - disabled (equivalent to nomtrr)
+	      0 - disabled (equivalent to nomtrr) (default)
 	      1 - uncachable
 	      2 - write-back
-	      3 - write-combining (default)
+	      3 - write-combining
 	      4 - write-through
 
 	If you see the following in dmesg, choose the type that matches the

+ 16 - 0
Documentation/feature-removal-schedule.txt

@@ -69,6 +69,22 @@ Who:	Grant Coady <gcoady@gmail.com>
 
 ---------------------------
 
+What:	remove EXPORT_SYMBOL(panic_timeout)
+When:	April 2006
+Files:	kernel/panic.c
+Why:	No modular usage in the kernel.
+Who:	Adrian Bunk <bunk@stusta.de>
+
+---------------------------
+
+What:	remove EXPORT_SYMBOL(insert_resource)
+When:	April 2006
+Files:	kernel/resource.c
+Why:	No modular usage in the kernel.
+Who:	Adrian Bunk <bunk@stusta.de>
+
+---------------------------
+
 What:	PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl])
 When:	November 2005
 Files:	drivers/pcmcia/: pcmcia_ioctl.c

+ 173 - 0
Documentation/filesystems/dentry-locking.txt

@@ -0,0 +1,173 @@
+RCU-based dcache locking model
+==============================
+
+On many workloads, the most common operation on dcache is to look up a
+dentry, given a parent dentry and the name of the child. Typically,
+for every open(), stat() etc., the dentry corresponding to the
+pathname will be looked up by walking the tree starting with the first
+component of the pathname and using that dentry along with the next
+component to look up the next level and so on. Since it is a frequent
+operation for workloads like multiuser environments and web servers,
+it is important to optimize this path.
+
+Prior to 2.5.10, dcache_lock was acquired in d_lookup and thus in
+every component during path look-up. Since 2.5.10 onwards, fast-walk
+algorithm changed this by holding the dcache_lock at the beginning and
+walking as many cached path component dentries as possible. This
+significantly decreases the number of acquisition of
+dcache_lock. However it also increases the lock hold time
+significantly and affects performance in large SMP machines. Since
+2.5.62 kernel, dcache has been using a new locking model that uses RCU
+to make dcache look-up lock-free.
+
+The current dcache locking model is not very different from the
+existing dcache locking model. Prior to 2.5.62 kernel, dcache_lock
+protected the hash chain, d_child, d_alias, d_lru lists as well as
+d_inode and several other things like mount look-up. RCU-based changes
+affect only the way the hash chain is protected. For everything else
+the dcache_lock must be taken for both traversing as well as
+updating. The hash chain updates too take the dcache_lock.  The
+significant change is the way d_lookup traverses the hash chain, it
+doesn't acquire the dcache_lock for this and rely on RCU to ensure
+that the dentry has not been *freed*.
+
+
+Dcache locking details
+======================
+
+For many multi-user workloads, open() and stat() on files are very
+frequently occurring operations. Both involve walking of path names to
+find the dentry corresponding to the concerned file. In 2.4 kernel,
+dcache_lock was held during look-up of each path component. Contention
+and cache-line bouncing of this global lock caused significant
+scalability problems. With the introduction of RCU in Linux kernel,
+this was worked around by making the look-up of path components during
+path walking lock-free.
+
+
+Safe lock-free look-up of dcache hash table
+===========================================
+
+Dcache is a complex data structure with the hash table entries also
+linked together in other lists. In 2.4 kernel, dcache_lock protected
+all the lists. We applied RCU only on hash chain walking. The rest of
+the lists are still protected by dcache_lock.  Some of the important
+changes are :
+
+1. The deletion from hash chain is done using hlist_del_rcu() macro
+   which doesn't initialize next pointer of the deleted dentry and
+   this allows us to walk safely lock-free while a deletion is
+   happening.
+
+2. Insertion of a dentry into the hash table is done using
+   hlist_add_head_rcu() which take care of ordering the writes - the
+   writes to the dentry must be visible before the dentry is
+   inserted. This works in conjunction with hlist_for_each_rcu() while
+   walking the hash chain. The only requirement is that all
+   initialization to the dentry must be done before
+   hlist_add_head_rcu() since we don't have dcache_lock protection
+   while traversing the hash chain. This isn't different from the
+   existing code.
+
+3. The dentry looked up without holding dcache_lock by cannot be
+   returned for walking if it is unhashed. It then may have a NULL
+   d_inode or other bogosity since RCU doesn't protect the other
+   fields in the dentry. We therefore use a flag DCACHE_UNHASHED to
+   indicate unhashed dentries and use this in conjunction with a
+   per-dentry lock (d_lock). Once looked up without the dcache_lock,
+   we acquire the per-dentry lock (d_lock) and check if the dentry is
+   unhashed. If so, the look-up is failed. If not, the reference count
+   of the dentry is increased and the dentry is returned.
+
+4. Once a dentry is looked up, it must be ensured during the path walk
+   for that component it doesn't go away. In pre-2.5.10 code, this was
+   done holding a reference to the dentry. dcache_rcu does the same.
+   In some sense, dcache_rcu path walking looks like the pre-2.5.10
+   version.
+
+5. All dentry hash chain updates must take the dcache_lock as well as
+   the per-dentry lock in that order. dput() does this to ensure that
+   a dentry that has just been looked up in another CPU doesn't get
+   deleted before dget() can be done on it.
+
+6. There are several ways to do reference counting of RCU protected
+   objects. One such example is in ipv4 route cache where deferred
+   freeing (using call_rcu()) is done as soon as the reference count
+   goes to zero. This cannot be done in the case of dentries because
+   tearing down of dentries require blocking (dentry_iput()) which
+   isn't supported from RCU callbacks. Instead, tearing down of
+   dentries happen synchronously in dput(), but actual freeing happens
+   later when RCU grace period is over. This allows safe lock-free
+   walking of the hash chains, but a matched dentry may have been
+   partially torn down. The checking of DCACHE_UNHASHED flag with
+   d_lock held detects such dentries and prevents them from being
+   returned from look-up.
+
+
+Maintaining POSIX rename semantics
+==================================
+
+Since look-up of dentries is lock-free, it can race against a
+concurrent rename operation. For example, during rename of file A to
+B, look-up of either A or B must succeed.  So, if look-up of B happens
+after A has been removed from the hash chain but not added to the new
+hash chain, it may fail.  Also, a comparison while the name is being
+written concurrently by a rename may result in false positive matches
+violating rename semantics.  Issues related to race with rename are
+handled as described below :
+
+1. Look-up can be done in two ways - d_lookup() which is safe from
+   simultaneous renames and __d_lookup() which is not.  If
+   __d_lookup() fails, it must be followed up by a d_lookup() to
+   correctly determine whether a dentry is in the hash table or
+   not. d_lookup() protects look-ups using a sequence lock
+   (rename_lock).
+
+2. The name associated with a dentry (d_name) may be changed if a
+   rename is allowed to happen simultaneously. To avoid memcmp() in
+   __d_lookup() go out of bounds due to a rename and false positive
+   comparison, the name comparison is done while holding the
+   per-dentry lock. This prevents concurrent renames during this
+   operation.
+
+3. Hash table walking during look-up may move to a different bucket as
+   the current dentry is moved to a different bucket due to rename.
+   But we use hlists in dcache hash table and they are
+   null-terminated.  So, even if a dentry moves to a different bucket,
+   hash chain walk will terminate. [with a list_head list, it may not
+   since termination is when the list_head in the original bucket is
+   reached].  Since we redo the d_parent check and compare name while
+   holding d_lock, lock-free look-up will not race against d_move().
+
+4. There can be a theoretical race when a dentry keeps coming back to
+   original bucket due to double moves. Due to this look-up may
+   consider that it has never moved and can end up in a infinite loop.
+   But this is not any worse that theoretical livelocks we already
+   have in the kernel.
+
+
+Important guidelines for filesystem developers related to dcache_rcu
+====================================================================
+
+1. Existing dcache interfaces (pre-2.5.62) exported to filesystem
+   don't change. Only dcache internal implementation changes. However
+   filesystems *must not* delete from the dentry hash chains directly
+   using the list macros like allowed earlier. They must use dcache
+   APIs like d_drop() or __d_drop() depending on the situation.
+
+2. d_flags is now protected by a per-dentry lock (d_lock). All access
+   to d_flags must be protected by it.
+
+3. For a hashed dentry, checking of d_count needs to be protected by
+   d_lock.
+
+
+Papers and other documentation on dcache locking
+================================================
+
+1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
+
+2. http://lse.sourceforge.net/locking/dcache/dcache.html
+
+
+

+ 0 - 5
Documentation/filesystems/devfs/README

@@ -1812,11 +1812,6 @@ it may overflow the messages buffer, but try to get as much of it as
 you can
 
 
-if you get an Oops, run ksymoops to decode it so that the
-names of the offending functions are provided. A non-decoded Oops is
-pretty useless
-
-
 send a copy of your devfsd configuration file(s)
 
 send the bug report to me first.

+ 195 - 0
Documentation/filesystems/ramfs-rootfs-initramfs.txt

@@ -0,0 +1,195 @@
+ramfs, rootfs and initramfs
+October 17, 2005
+Rob Landley <rob@landley.net>
+=============================
+
+What is ramfs?
+--------------
+
+Ramfs is a very simple filesystem that exports Linux's disk caching
+mechanisms (the page cache and dentry cache) as a dynamically resizable
+ram-based filesystem.
+
+Normally all files are cached in memory by Linux.  Pages of data read from
+backing store (usually the block device the filesystem is mounted on) are kept
+around in case it's needed again, but marked as clean (freeable) in case the
+Virtual Memory system needs the memory for something else.  Similarly, data
+written to files is marked clean as soon as it has been written to backing
+store, but kept around for caching purposes until the VM reallocates the
+memory.  A similar mechanism (the dentry cache) greatly speeds up access to
+directories.
+
+With ramfs, there is no backing store.  Files written into ramfs allocate
+dentries and page cache as usual, but there's nowhere to write them to.
+This means the pages are never marked clean, so they can't be freed by the
+VM when it's looking to recycle memory.
+
+The amount of code required to implement ramfs is tiny, because all the
+work is done by the existing Linux caching infrastructure.  Basically,
+you're mounting the disk cache as a filesystem.  Because of this, ramfs is not
+an optional component removable via menuconfig, since there would be negligible
+space savings.
+
+ramfs and ramdisk:
+------------------
+
+The older "ram disk" mechanism created a synthetic block device out of
+an area of ram and used it as backing store for a filesystem.  This block
+device was of fixed size, so the filesystem mounted on it was of fixed
+size.  Using a ram disk also required unnecessarily copying memory from the
+fake block device into the page cache (and copying changes back out), as well
+as creating and destroying dentries.  Plus it needed a filesystem driver
+(such as ext2) to format and interpret this data.
+
+Compared to ramfs, this wastes memory (and memory bus bandwidth), creates
+unnecessary work for the CPU, and pollutes the CPU caches.  (There are tricks
+to avoid this copying by playing with the page tables, but they're unpleasantly
+complicated and turn out to be about as expensive as the copying anyway.)
+More to the point, all the work ramfs is doing has to happen _anyway_,
+since all file access goes through the page and dentry caches.  The ram
+disk is simply unnecessary, ramfs is internally much simpler.
+
+Another reason ramdisks are semi-obsolete is that the introduction of
+loopback devices offered a more flexible and convenient way to create
+synthetic block devices, now from files instead of from chunks of memory.
+See losetup (8) for details.
+
+ramfs and tmpfs:
+----------------
+
+One downside of ramfs is you can keep writing data into it until you fill
+up all memory, and the VM can't free it because the VM thinks that files
+should get written to backing store (rather than swap space), but ramfs hasn't
+got any backing store.  Because of this, only root (or a trusted user) should
+be allowed write access to a ramfs mount.
+
+A ramfs derivative called tmpfs was created to add size limits, and the ability
+to write the data to swap space.  Normal users can be allowed write access to
+tmpfs mounts.  See Documentation/filesystems/tmpfs.txt for more information.
+
+What is rootfs?
+---------------
+
+Rootfs is a special instance of ramfs, which is always present in 2.6 systems.
+(It's used internally as the starting and stopping point for searches of the
+kernel's doubly-linked list of mount points.)
+
+Most systems just mount another filesystem over it and ignore it.  The
+amount of space an empty instance of ramfs takes up is tiny.
+
+What is initramfs?
+------------------
+
+All 2.6 Linux kernels contain a gzipped "cpio" format archive, which is
+extracted into rootfs when the kernel boots up.  After extracting, the kernel
+checks to see if rootfs contains a file "init", and if so it executes it as PID
+1.  If found, this init process is responsible for bringing the system the
+rest of the way up, including locating and mounting the real root device (if
+any).  If rootfs does not contain an init program after the embedded cpio
+archive is extracted into it, the kernel will fall through to the older code
+to locate and mount a root partition, then exec some variant of /sbin/init
+out of that.
+
+All this differs from the old initrd in several ways:
+
+  - The old initrd was a separate file, while the initramfs archive is linked
+    into the linux kernel image.  (The directory linux-*/usr is devoted to
+    generating this archive during the build.)
+
+  - The old initrd file was a gzipped filesystem image (in some file format,
+    such as ext2, that had to be built into the kernel), while the new
+    initramfs archive is a gzipped cpio archive (like tar only simpler,
+    see cpio(1) and Documentation/early-userspace/buffer-format.txt).
+
+  - The program run by the old initrd (which was called /initrd, not /init) did
+    some setup and then returned to the kernel, while the init program from
+    initramfs is not expected to return to the kernel.  (If /init needs to hand
+    off control it can overmount / with a new root device and exec another init
+    program.  See the switch_root utility, below.)
+
+  - When switching another root device, initrd would pivot_root and then
+    umount the ramdisk.  But initramfs is rootfs: you can neither pivot_root
+    rootfs, nor unmount it.  Instead delete everything out of rootfs to
+    free up the space (find -xdev / -exec rm '{}' ';'), overmount rootfs
+    with the new root (cd /newmount; mount --move . /; chroot .), attach
+    stdin/stdout/stderr to the new /dev/console, and exec the new init.
+
+    Since this is a remarkably persnickity process (and involves deleting
+    commands before you can run them), the klibc package introduced a helper
+    program (utils/run_init.c) to do all this for you.  Most other packages
+    (such as busybox) have named this command "switch_root".
+
+Populating initramfs:
+---------------------
+
+The 2.6 kernel build process always creates a gzipped cpio format initramfs
+archive and links it into the resulting kernel binary.  By default, this
+archive is empty (consuming 134 bytes on x86).  The config option
+CONFIG_INITRAMFS_SOURCE (for some reason buried under devices->block devices
+in menuconfig, and living in usr/Kconfig) can be used to specify a source for
+the initramfs archive, which will automatically be incorporated into the
+resulting binary.  This option can point to an existing gzipped cpio archive, a
+directory containing files to be archived, or a text file specification such
+as the following example:
+
+  dir /dev 755 0 0
+  nod /dev/console 644 0 0 c 5 1
+  nod /dev/loop0 644 0 0 b 7 0
+  dir /bin 755 1000 1000
+  slink /bin/sh busybox 777 0 0
+  file /bin/busybox initramfs/busybox 755 0 0
+  dir /proc 755 0 0
+  dir /sys 755 0 0
+  dir /mnt 755 0 0
+  file /init initramfs/init.sh 755 0 0
+
+One advantage of the text file is that root access is not required to
+set permissions or create device nodes in the new archive.  (Note that those
+two example "file" entries expect to find files named "init.sh" and "busybox" in
+a directory called "initramfs", under the linux-2.6.* directory.  See
+Documentation/early-userspace/README for more details.)
+
+If you don't already understand what shared libraries, devices, and paths
+you need to get a minimal root filesystem up and running, here are some
+references:
+http://www.tldp.org/HOWTO/Bootdisk-HOWTO/
+http://www.tldp.org/HOWTO/From-PowerUp-To-Bash-Prompt-HOWTO.html
+http://www.linuxfromscratch.org/lfs/view/stable/
+
+The "klibc" package (http://www.kernel.org/pub/linux/libs/klibc) is
+designed to be a tiny C library to statically link early userspace
+code against, along with some related utilities.  It is BSD licensed.
+
+I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net)
+myself.  These are LGPL and GPL, respectively.
+
+In theory you could use glibc, but that's not well suited for small embedded
+uses like this.  (A "hello world" program statically linked against glibc is
+over 400k.  With uClibc it's 7k.  Also note that glibc dlopens libnss to do
+name lookups, even when otherwise statically linked.)
+
+Future directions:
+------------------
+
+Today (2.6.14), initramfs is always compiled in, but not always used.  The
+kernel falls back to legacy boot code that is reached only if initramfs does
+not contain an /init program.  The fallback is legacy code, there to ensure a
+smooth transition and allowing early boot functionality to gradually move to
+"early userspace" (I.E. initramfs).
+
+The move to early userspace is necessary because finding and mounting the real
+root device is complex.  Root partitions can span multiple devices (raid or
+separate journal).  They can be out on the network (requiring dhcp, setting a
+specific mac address, logging into a server, etc).  They can live on removable
+media, with dynamically allocated major/minor numbers and persistent naming
+issues requiring a full udev implementation to sort out.  They can be
+compressed, encrypted, copy-on-write, loopback mounted, strangely partitioned,
+and so on.
+
+This kind of complexity (which inevitably includes policy) is rightly handled
+in userspace.  Both klibc and busybox/uClibc are working on simple initramfs
+packages to drop into a kernel build, and when standard solutions are ready
+and widely deployed, the kernel's legacy early boot code will become obsolete
+and a candidate for the feature removal schedule.
+
+But that's a while off yet.

+ 148 - 286
Documentation/filesystems/vfs.txt

@@ -3,7 +3,7 @@
 
 	Original author: Richard Gooch <rgooch@atnf.csiro.au>
 
-		  Last updated on August 25, 2005
+		  Last updated on October 28, 2005
 
   Copyright (C) 1999 Richard Gooch
   Copyright (C) 2005 Pekka Enberg
@@ -11,62 +11,61 @@
   This file is released under the GPLv2.
 
 
-What is it?
-===========
+Introduction
+============
 
-The Virtual File System (otherwise known as the Virtual Filesystem
-Switch) is the software layer in the kernel that provides the
-filesystem interface to userspace programs. It also provides an
-abstraction within the kernel which allows different filesystem
-implementations to coexist.
+The Virtual File System (also known as the Virtual Filesystem Switch)
+is the software layer in the kernel that provides the filesystem
+interface to userspace programs. It also provides an abstraction
+within the kernel which allows different filesystem implementations to
+coexist.
 
+VFS system calls open(2), stat(2), read(2), write(2), chmod(2) and so
+on are called from a process context. Filesystem locking is described
+in the document Documentation/filesystems/Locking.
 
-A Quick Look At How It Works
-============================
 
-In this section I'll briefly describe how things work, before
-launching into the details. I'll start with describing what happens
-when user programs open and manipulate files, and then look from the
-other view which is how a filesystem is supported and subsequently
-mounted.
-
-
-Opening a File
---------------
-
-The VFS implements the open(2), stat(2), chmod(2) and similar system
-calls. The pathname argument is used by the VFS to search through the
-directory entry cache (dentry cache or "dcache"). This provides a very
-fast look-up mechanism to translate a pathname (filename) into a
-specific dentry.
-
-An individual dentry usually has a pointer to an inode. Inodes are the
-things that live on disc drives, and can be regular files (you know:
-those things that you write data into), directories, FIFOs and other
-beasts. Dentries live in RAM and are never saved to disc: they exist
-only for performance. Inodes live on disc and are copied into memory
-when required. Later any changes are written back to disc. The inode
-that lives in RAM is a VFS inode, and it is this which the dentry
-points to. A single inode can be pointed to by multiple dentries
-(think about hardlinks).
-
-The dcache is meant to be a view into your entire filespace. Unlike
-Linus, most of us losers can't fit enough dentries into RAM to cover
-all of our filespace, so the dcache has bits missing. In order to
-resolve your pathname into a dentry, the VFS may have to resort to
-creating dentries along the way, and then loading the inode. This is
-done by looking up the inode.
-
-To look up an inode (usually read from disc) requires that the VFS
-calls the lookup() method of the parent directory inode. This method
-is installed by the specific filesystem implementation that the inode
-lives in. There will be more on this later.
+Directory Entry Cache (dcache)
+------------------------------
 
-Once the VFS has the required dentry (and hence the inode), we can do
-all those boring things like open(2) the file, or stat(2) it to peek
-at the inode data. The stat(2) operation is fairly simple: once the
-VFS has the dentry, it peeks at the inode data and passes some of it
-back to userspace.
+The VFS implements the open(2), stat(2), chmod(2), and similar system
+calls. The pathname argument that is passed to them is used by the VFS
+to search through the directory entry cache (also known as the dentry
+cache or dcache). This provides a very fast look-up mechanism to
+translate a pathname (filename) into a specific dentry. Dentries live
+in RAM and are never saved to disc: they exist only for performance.
+
+The dentry cache is meant to be a view into your entire filespace. As
+most computers cannot fit all dentries in the RAM at the same time,
+some bits of the cache are missing. In order to resolve your pathname
+into a dentry, the VFS may have to resort to creating dentries along
+the way, and then loading the inode. This is done by looking up the
+inode.
+
+
+The Inode Object
+----------------
+
+An individual dentry usually has a pointer to an inode. Inodes are
+filesystem objects such as regular files, directories, FIFOs and other
+beasts.  They live either on the disc (for block device filesystems)
+or in the memory (for pseudo filesystems). Inodes that live on the
+disc are copied into the memory when required and changes to the inode
+are written back to disc. A single inode can be pointed to by multiple
+dentries (hard links, for example, do this).
+
+To look up an inode requires that the VFS calls the lookup() method of
+the parent directory inode. This method is installed by the specific
+filesystem implementation that the inode lives in. Once the VFS has
+the required dentry (and hence the inode), we can do all those boring
+things like open(2) the file, or stat(2) it to peek at the inode
+data. The stat(2) operation is fairly simple: once the VFS has the
+dentry, it peeks at the inode data and passes some of it back to
+userspace.
+
+
+The File Object
+---------------
 
 Opening a file requires another operation: allocation of a file
 structure (this is the kernel-side implementation of file
@@ -74,51 +73,39 @@ descriptors). The freshly allocated file structure is initialized with
 a pointer to the dentry and a set of file operation member functions.
 These are taken from the inode data. The open() file method is then
 called so the specific filesystem implementation can do it's work. You
-can see that this is another switch performed by the VFS.
-
-The file structure is placed into the file descriptor table for the
-process.
+can see that this is another switch performed by the VFS. The file
+structure is placed into the file descriptor table for the process.
 
 Reading, writing and closing files (and other assorted VFS operations)
 is done by using the userspace file descriptor to grab the appropriate
-file structure, and then calling the required file structure method
-function to do whatever is required.
-
-For as long as the file is open, it keeps the dentry "open" (in use),
-which in turn means that the VFS inode is still in use.
-
-All VFS system calls (i.e. open(2), stat(2), read(2), write(2),
-chmod(2) and so on) are called from a process context. You should
-assume that these calls are made without any kernel locks being
-held. This means that the processes may be executing the same piece of
-filesystem or driver code at the same time, on different
-processors. You should ensure that access to shared resources is
-protected by appropriate locks.
+file structure, and then calling the required file structure method to
+do whatever is required. For as long as the file is open, it keeps the
+dentry in use, which in turn means that the VFS inode is still in use.
 
 
 Registering and Mounting a Filesystem
--------------------------------------
+=====================================
 
-If you want to support a new kind of filesystem in the kernel, all you
-need to do is call register_filesystem(). You pass a structure
-describing the filesystem implementation (struct file_system_type)
-which is then added to an internal table of supported filesystems. You
-can do:
+To register and unregister a filesystem, use the following API
+functions:
 
-% cat /proc/filesystems
+   #include <linux/fs.h>
 
-to see what filesystems are currently available on your system.
+   extern int register_filesystem(struct file_system_type *);
+   extern int unregister_filesystem(struct file_system_type *);
 
-When a request is made to mount a block device onto a directory in
-your filespace the VFS will call the appropriate method for the
-specific filesystem. The dentry for the mount point will then be
-updated to point to the root inode for the new filesystem.
+The passed struct file_system_type describes your filesystem. When a
+request is made to mount a device onto a directory in your filespace,
+the VFS will call the appropriate get_sb() method for the specific
+filesystem. The dentry for the mount point will then be updated to
+point to the root inode for the new filesystem.
 
-It's now time to look at things in more detail.
+You can see all filesystems that are registered to the kernel in the
+file /proc/filesystems.
 
 
 struct file_system_type
-=======================
+-----------------------
 
 This describes the filesystem. As of kernel 2.6.13, the following
 members are defined:
@@ -197,8 +184,14 @@ A fill_super() method implementation has the following arguments:
   int silent: whether or not to be silent on error
 
 
+The Superblock Object
+=====================
+
+A superblock object represents a mounted filesystem.
+
+
 struct super_operations
-=======================
+-----------------------
 
 This describes how the VFS can manipulate the superblock of your
 filesystem. As of kernel 2.6.13, the following members are defined:
@@ -286,9 +279,9 @@ or bottom half).
   	a superblock. The second parameter indicates whether the method
 	should wait until the write out has been completed. Optional.
 
-  write_super_lockfs: called when VFS is locking a filesystem and forcing
-  	it into a consistent state.  This function is currently used by the
-	Logical Volume Manager (LVM).
+  write_super_lockfs: called when VFS is locking a filesystem and
+  	forcing it into a consistent state.  This method is currently
+  	used by the Logical Volume Manager (LVM).
 
   unlockfs: called when VFS is unlocking a filesystem and making it writable
   	again.
@@ -317,8 +310,14 @@ field. This is a pointer to a "struct inode_operations" which
 describes the methods that can be performed on individual inodes.
 
 
+The Inode Object
+================
+
+An inode object represents an object within the filesystem.
+
+
 struct inode_operations
-=======================
+-----------------------
 
 This describes how the VFS can manipulate an inode in your
 filesystem. As of kernel 2.6.13, the following members are defined:
@@ -394,51 +393,62 @@ otherwise noted.
 	will probably need to call d_instantiate() just as you would
 	in the create() method
 
+  rename: called by the rename(2) system call to rename the object to
+	have the parent and name given by the second inode and dentry.
+
   readlink: called by the readlink(2) system call. Only required if
 	you want to support reading symbolic links
 
   follow_link: called by the VFS to follow a symbolic link to the
 	inode it points to.  Only required if you want to support
-	symbolic links.  This function returns a void pointer cookie
+	symbolic links.  This method returns a void pointer cookie
 	that is passed to put_link().
 
   put_link: called by the VFS to release resources allocated by
-  	follow_link().  The cookie returned by follow_link() is passed to
-	to this function as the last parameter.  It is used by filesystems
-	such as NFS where page cache is not stable (i.e. page that was
-	installed when the symbolic link walk started might not be in the
-	page cache at the end of the walk).
-
-  truncate: called by the VFS to change the size of a file.  The i_size
- 	field of the inode is set to the desired size by the VFS before
-	this function is called.  This function is called by the truncate(2)
-	system call and related functionality.
+  	follow_link().  The cookie returned by follow_link() is passed
+  	to to this method as the last parameter.  It is used by
+  	filesystems such as NFS where page cache is not stable
+  	(i.e. page that was installed when the symbolic link walk
+  	started might not be in the page cache at the end of the
+  	walk).
+
+  truncate: called by the VFS to change the size of a file.  The
+ 	i_size field of the inode is set to the desired size by the
+ 	VFS before this method is called.  This method is called by
+ 	the truncate(2) system call and related functionality.
 
   permission: called by the VFS to check for access rights on a POSIX-like
   	filesystem.
 
-  setattr: called by the VFS to set attributes for a file.  This function is
-  	called by chmod(2) and related system calls.
+  setattr: called by the VFS to set attributes for a file. This method
+  	is called by chmod(2) and related system calls.
 
-  getattr: called by the VFS to get attributes of a file.  This function is
-  	called by stat(2) and related system calls.
+  getattr: called by the VFS to get attributes of a file. This method
+  	is called by stat(2) and related system calls.
 
   setxattr: called by the VFS to set an extended attribute for a file.
-  	Extended attribute is a name:value pair associated with an inode. This
-	function is called by setxattr(2) system call.
+  	Extended attribute is a name:value pair associated with an
+  	inode. This method is called by setxattr(2) system call.
+
+  getxattr: called by the VFS to retrieve the value of an extended
+  	attribute name. This method is called by getxattr(2) function
+  	call.
 
-  getxattr: called by the VFS to retrieve the value of an extended attribute
-  	name.  This function is called by getxattr(2) function call.
+  listxattr: called by the VFS to list all extended attributes for a
+  	given file. This method is called by listxattr(2) system call.
 
-  listxattr: called by the VFS to list all extended attributes for a given
-  	file.  This function is called by listxattr(2) system call.
+  removexattr: called by the VFS to remove an extended attribute from
+  	a file. This method is called by removexattr(2) system call.
 
-  removexattr: called by the VFS to remove an extended attribute from a file.
-  	This function is called by removexattr(2) system call.
+
+The Address Space Object
+========================
+
+The address space object is used to identify pages in the page cache.
 
 
 struct address_space_operations
-===============================
+-------------------------------
 
 This describes how the VFS can manipulate mapping of a file to page cache in
 your filesystem. As of kernel 2.6.13, the following members are defined:
@@ -502,8 +512,14 @@ struct address_space_operations {
 	it.  An example implementation can be found in fs/ext2/xip.c.
 
 
+The File Object
+===============
+
+A file object represents a file opened by a process.
+
+
 struct file_operations
-======================
+----------------------
 
 This describes how the VFS can manipulate an open file. As of kernel
 2.6.13, the following members are defined:
@@ -661,7 +677,7 @@ of child dentries. Child dentries are basically like files in a
 directory.
 
 
-Directory Entry Cache APIs
+Directory Entry Cache API
 --------------------------
 
 There are a number of functions defined which permit a filesystem to
@@ -705,178 +721,24 @@ manipulate dentries:
 	and the dentry is returned. The caller must use d_put()
 	to free the dentry when it finishes using it.
 
+For further information on dentry locking, please refer to the document
+Documentation/filesystems/dentry-locking.txt.
 
-RCU-based dcache locking model
-------------------------------
 
-On many workloads, the most common operation on dcache is
-to look up a dentry, given a parent dentry and the name
-of the child. Typically, for every open(), stat() etc.,
-the dentry corresponding to the pathname will be looked
-up by walking the tree starting with the first component
-of the pathname and using that dentry along with the next
-component to look up the next level and so on. Since it
-is a frequent operation for workloads like multiuser
-environments and web servers, it is important to optimize
-this path.
-
-Prior to 2.5.10, dcache_lock was acquired in d_lookup and thus
-in every component during path look-up. Since 2.5.10 onwards,
-fast-walk algorithm changed this by holding the dcache_lock
-at the beginning and walking as many cached path component
-dentries as possible. This significantly decreases the number
-of acquisition of dcache_lock. However it also increases the
-lock hold time significantly and affects performance in large
-SMP machines. Since 2.5.62 kernel, dcache has been using
-a new locking model that uses RCU to make dcache look-up
-lock-free.
-
-The current dcache locking model is not very different from the existing
-dcache locking model. Prior to 2.5.62 kernel, dcache_lock
-protected the hash chain, d_child, d_alias, d_lru lists as well
-as d_inode and several other things like mount look-up. RCU-based
-changes affect only the way the hash chain is protected. For everything
-else the dcache_lock must be taken for both traversing as well as
-updating. The hash chain updates too take the dcache_lock.
-The significant change is the way d_lookup traverses the hash chain,
-it doesn't acquire the dcache_lock for this and rely on RCU to
-ensure that the dentry has not been *freed*.
-
-
-Dcache locking details
-----------------------
+Resources
+=========
+
+(Note some of these resources are not up-to-date with the latest kernel
+ version.)
+
+Creating Linux virtual filesystems. 2002
+    <http://lwn.net/Articles/13325/>
+
+The Linux Virtual File-system Layer by Neil Brown. 1999
+    <http://www.cse.unsw.edu.au/~neilb/oss/linux-commentary/vfs.html>
+
+A tour of the Linux VFS by Michael K. Johnson. 1996
+    <http://www.tldp.org/LDP/khg/HyperNews/get/fs/vfstour.html>
 
-For many multi-user workloads, open() and stat() on files are
-very frequently occurring operations. Both involve walking
-of path names to find the dentry corresponding to the
-concerned file. In 2.4 kernel, dcache_lock was held
-during look-up of each path component. Contention and
-cache-line bouncing of this global lock caused significant
-scalability problems. With the introduction of RCU
-in Linux kernel, this was worked around by making
-the look-up of path components during path walking lock-free.
-
-
-Safe lock-free look-up of dcache hash table
-===========================================
-
-Dcache is a complex data structure with the hash table entries
-also linked together in other lists. In 2.4 kernel, dcache_lock
-protected all the lists. We applied RCU only on hash chain
-walking. The rest of the lists are still protected by dcache_lock.
-Some of the important changes are :
-
-1. The deletion from hash chain is done using hlist_del_rcu() macro which
-   doesn't initialize next pointer of the deleted dentry and this
-   allows us to walk safely lock-free while a deletion is happening.
-
-2. Insertion of a dentry into the hash table is done using
-   hlist_add_head_rcu() which take care of ordering the writes -
-   the writes to the dentry must be visible before the dentry
-   is inserted. This works in conjunction with hlist_for_each_rcu()
-   while walking the hash chain. The only requirement is that
-   all initialization to the dentry must be done before hlist_add_head_rcu()
-   since we don't have dcache_lock protection while traversing
-   the hash chain. This isn't different from the existing code.
-
-3. The dentry looked up without holding dcache_lock by cannot be
-   returned for walking if it is unhashed. It then may have a NULL
-   d_inode or other bogosity since RCU doesn't protect the other
-   fields in the dentry. We therefore use a flag DCACHE_UNHASHED to
-   indicate unhashed  dentries and use this in conjunction with a
-   per-dentry lock (d_lock). Once looked up without the dcache_lock,
-   we acquire the per-dentry lock (d_lock) and check if the
-   dentry is unhashed. If so, the look-up is failed. If not, the
-   reference count of the dentry is increased and the dentry is returned.
-
-4. Once a dentry is looked up, it must be ensured during the path
-   walk for that component it doesn't go away. In pre-2.5.10 code,
-   this was done holding a reference to the dentry. dcache_rcu does
-   the same.  In some sense, dcache_rcu path walking looks like
-   the pre-2.5.10 version.
-
-5. All dentry hash chain updates must take the dcache_lock as well as
-   the per-dentry lock in that order. dput() does this to ensure
-   that a dentry that has just been looked up in another CPU
-   doesn't get deleted before dget() can be done on it.
-
-6. There are several ways to do reference counting of RCU protected
-   objects. One such example is in ipv4 route cache where
-   deferred freeing (using call_rcu()) is done as soon as
-   the reference count goes to zero. This cannot be done in
-   the case of dentries because tearing down of dentries
-   require blocking (dentry_iput()) which isn't supported from
-   RCU callbacks. Instead, tearing down of dentries happen
-   synchronously in dput(), but actual freeing happens later
-   when RCU grace period is over. This allows safe lock-free
-   walking of the hash chains, but a matched dentry may have
-   been partially torn down. The checking of DCACHE_UNHASHED
-   flag with d_lock held detects such dentries and prevents
-   them from being returned from look-up.
-
-
-Maintaining POSIX rename semantics
-==================================
-
-Since look-up of dentries is lock-free, it can race against
-a concurrent rename operation. For example, during rename
-of file A to B, look-up of either A or B must succeed.
-So, if look-up of B happens after A has been removed from the
-hash chain but not added to the new hash chain, it may fail.
-Also, a comparison while the name is being written concurrently
-by a rename may result in false positive matches violating
-rename semantics.  Issues related to race with rename are
-handled as described below :
-
-1. Look-up can be done in two ways - d_lookup() which is safe
-   from simultaneous renames and __d_lookup() which is not.
-   If __d_lookup() fails, it must be followed up by a d_lookup()
-   to correctly determine whether a dentry is in the hash table
-   or not. d_lookup() protects look-ups using a sequence
-   lock (rename_lock).
-
-2. The name associated with a dentry (d_name) may be changed if
-   a rename is allowed to happen simultaneously. To avoid memcmp()
-   in __d_lookup() go out of bounds due to a rename and false
-   positive comparison, the name comparison is done while holding the
-   per-dentry lock. This prevents concurrent renames during this
-   operation.
-
-3. Hash table walking during look-up may move to a different bucket as
-   the current dentry is moved to a different bucket due to rename.
-   But we use hlists in dcache hash table and they are null-terminated.
-   So, even if a dentry moves to a different bucket, hash chain
-   walk will terminate. [with a list_head list, it may not since
-   termination is when the list_head in the original bucket is reached].
-   Since we redo the d_parent check and compare name while holding
-   d_lock, lock-free look-up will not race against d_move().
-
-4. There can be a theoretical race when a dentry keeps coming back
-   to original bucket due to double moves. Due to this look-up may
-   consider that it has never moved and can end up in a infinite loop.
-   But this is not any worse that theoretical livelocks we already
-   have in the kernel.
-
-
-Important guidelines for filesystem developers related to dcache_rcu
-====================================================================
-
-1. Existing dcache interfaces (pre-2.5.62) exported to filesystem
-   don't change. Only dcache internal implementation changes. However
-   filesystems *must not* delete from the dentry hash chains directly
-   using the list macros like allowed earlier. They must use dcache
-   APIs like d_drop() or __d_drop() depending on the situation.
-
-2. d_flags is now protected by a per-dentry lock (d_lock). All
-   access to d_flags must be protected by it.
-
-3. For a hashed dentry, checking of d_count needs to be protected
-   by d_lock.
-
-
-Papers and other documentation on dcache locking
-================================================
-
-1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
-
-2. http://lse.sourceforge.net/locking/dcache/dcache.html
+A small trail through the Linux kernel by Andries Brouwer. 2001
+    <http://www.win.tue.nl/~aeb/linux/vfs/trail.html>

+ 18 - 16
Documentation/hpet.txt

@@ -1,18 +1,21 @@
 		High Precision Event Timer Driver for Linux
 
-The High Precision Event Timer (HPET) hardware is the future replacement for the 8254 and Real
-Time Clock (RTC) periodic timer functionality.  Each HPET can have up two 32 timers.  It is possible
-to configure the first two timers as legacy replacements for 8254 and RTC periodic.  A specification
-done by INTEL and Microsoft can be found at http://www.intel.com/labs/platcomp/hpet/hpetspec.htm.
-
-The driver supports detection of HPET driver allocation and initialization of the HPET before the
-driver module_init routine is called.  This enables platform code which uses timer 0 or 1 as the
-main timer to intercept HPET initialization.  An example of this initialization can be found in
+The High Precision Event Timer (HPET) hardware is the future replacement
+for the 8254 and Real Time Clock (RTC) periodic timer functionality.
+Each HPET can have up two 32 timers.  It is possible to configure the
+first two timers as legacy replacements for 8254 and RTC periodic timers.
+A specification done by Intel and Microsoft can be found at
+<http://www.intel.com/hardwaredesign/hpetspec.htm>.
+
+The driver supports detection of HPET driver allocation and initialization
+of the HPET before the driver module_init routine is called.  This enables
+platform code which uses timer 0 or 1 as the main timer to intercept HPET
+initialization.  An example of this initialization can be found in
 arch/i386/kernel/time_hpet.c.
 
-The driver provides two APIs which are very similar to the API found in the rtc.c driver.
-There is a user space API and a kernel space API.  An example user space program is provided
-below.
+The driver provides two APIs which are very similar to the API found in
+the rtc.c driver.  There is a user space API and a kernel space API.
+An example user space program is provided below.
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -290,9 +293,8 @@ The kernel API has three interfaces exported from the driver:
 	hpet_unregister(struct hpet_task *tp)
 	hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg)
 
-The kernel module using this interface fills in the ht_func and ht_data members of the
-hpet_task structure before calling hpet_register.  hpet_control simply vectors to the hpet_ioctl
-routine and has the same commands and respective arguments as the user API.  hpet_unregister
+The kernel module using this interface fills in the ht_func and ht_data
+members of the hpet_task structure before calling hpet_register.
+hpet_control simply vectors to the hpet_ioctl routine and has the same
+commands and respective arguments as the user API.  hpet_unregister
 is used to terminate usage of the HPET timer reserved by hpet_register.
-
-

+ 1 - 1
Documentation/magic-number.txt

@@ -120,7 +120,7 @@ ISDN_NET_MAGIC        0x49344C02  isdn_net_local_s  drivers/isdn/i4l/isdn_net_li
 SAVEKMSG_MAGIC2       0x4B4D5347  savekmsg          arch/*/amiga/config.c
 STLI_BOARDMAGIC       0x4bc6c825  stlibrd           include/linux/istallion.h
 CS_STATE_MAGIC        0x4c4f4749  cs_state          sound/oss/cs46xx.c
-SLAB_C_MAGIC          0x4f17a36d  kmem_cache_s      mm/slab.c
+SLAB_C_MAGIC          0x4f17a36d  kmem_cache        mm/slab.c
 COW_MAGIC             0x4f4f4f4d  cow_header_v1     arch/um/drivers/ubd_user.c
 I810_CARD_MAGIC       0x5072696E  i810_card         sound/oss/i810_audio.c
 TRIDENT_CARD_MAGIC    0x5072696E  trident_card      sound/oss/trident.c

+ 0 - 2
Documentation/networking/decnet.txt

@@ -176,8 +176,6 @@ information (_most_ of which _is_ _essential_) includes:
  - Which client caused the problem ?
  - How much data was being transferred ?
  - Was the network congested ?
- - If there was a kernel panic, please run the output through ksymoops
-   before sending it to me, otherwise its _useless_.
  - How can the problem be reproduced ?
  - Can you use tcpdump to get a trace ? (N.B. Most (all?) versions of 
    tcpdump don't understand how to dump DECnet properly, so including

+ 1 - 1
Documentation/oops-tracing.txt

@@ -1,6 +1,6 @@
 NOTE: ksymoops is useless on 2.6.  Please use the Oops in its original format
 (from dmesg, etc).  Ignore any references in this or other docs to "decoding
-the Oops" or "running it through ksymoops".  If you post an Oops fron 2.6 that
+the Oops" or "running it through ksymoops".  If you post an Oops from 2.6 that
 has been run through ksymoops, people will just tell you to repost it.
 
 Quick Summary

+ 12 - 5
Documentation/power/video.txt

@@ -11,9 +11,9 @@ boot video card. (Kernel usually does not even contain video card
 driver -- vesafb and vgacon are widely used).
 
 This is not problem for swsusp, because during swsusp resume, BIOS is
-run normally so video card is normally initialized. S3 has absolutely
-no chance of working with SMP/HT. Be sure it to turn it off before
-testing (swsusp should work ok, OTOH).
+run normally so video card is normally initialized. It should not be
+problem for S1 standby, because hardware should retain its state over
+that.
 
 There are a few types of systems where video works after S3 resume:
 
@@ -64,7 +64,7 @@ your video card (good luck getting docs :-(). Maybe suspending from X
 (proper X, knowing your hardware, not XF68_FBcon) might have better
 chance of working.
 
-Table of known working systems:
+Table of known working notebooks:
 
 Model                           hack (or "how to do it")
 ------------------------------------------------------------------------------
@@ -73,7 +73,7 @@ Acer TM 242FX			vbetool (6)
 Acer TM C110			video_post (8)
 Acer TM C300                    vga=normal (only suspend on console, not in X), vbetool (6) or video_post (8)
 Acer TM 4052LCi		        s3_bios (2)
-Acer TM 636Lci			s3_bios vga=normal (2)
+Acer TM 636Lci			s3_bios,s3_mode (4)
 Acer TM 650 (Radeon M7)		vga=normal plus boot-radeon (5) gets text console back
 Acer TM 660			??? (*)
 Acer TM 800			vga=normal, X patches, see webpage (5) or vbetool (6)
@@ -137,6 +137,13 @@ Toshiba Satellite P10-554       s3_bios,s3_mode (4)(****)
 Toshiba M30                     (2) xor X with nvidia driver using internal AGP
 Uniwill 244IIO			??? (*)
 
+Known working desktop systems
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Mainboard	    Graphics card                 hack (or "how to do it")
+------------------------------------------------------------------------------
+Asus A7V8X	    nVidia RIVA TNT2 model 64	  s3_bios,s3_mode (4)
+
 
 (*) from http://www.ubuntulinux.org/wiki/HoaryPMResults, not sure
     which options to use. If you know, please tell me.

+ 12 - 9
Documentation/s390/driver-model.txt

@@ -8,11 +8,10 @@ All devices which can be addressed by means of ccws are called 'CCW devices' -
 even if they aren't actually driven by ccws.
 
 All ccw devices are accessed via a subchannel, this is reflected in the 
-structures under root/:
+structures under devices/:
 
-root/
-     - sys
-     - legacy
+devices/
+     - system/
      - css0/
            - 0.0.0000/0.0.0815/
 	   - 0.0.0001/0.0.4711/
@@ -36,7 +35,7 @@ availability: Can be 'good' or 'boxed'; 'no path' or 'no device' for
 
 online:     An interface to set the device online and offline.
 	    In the special case of the device being disconnected (see the
-	    notify function under 1.2), piping 0 to online will focibly delete
+	    notify function under 1.2), piping 0 to online will forcibly delete
 	    the device.
 
 The device drivers can add entries to export per-device data and interfaces.
@@ -222,7 +221,7 @@ and are called 'chp0.<chpid>'. They have no driver and do not belong to any bus.
 Please note, that unlike /proc/chpids in 2.4, the channel path objects reflect
 only the logical state and not the physical state, since we cannot track the
 latter consistently due to lacking machine support (we don't need to be aware
-of anyway).
+of it anyway).
 
 status - Can be 'online' or 'offline'.
 	 Piping 'on' or 'off' sets the chpid logically online/offline.
@@ -235,12 +234,16 @@ status - Can be 'online' or 'offline'.
 3. System devices
 -----------------
 
-Note: cpus may yet be added here.
-
 3.1 xpram 
 ---------
 
-xpram shows up under sys/ as 'xpram'.
+xpram shows up under devices/system/ as 'xpram'.
+
+3.2 cpus
+--------
+
+For each cpu, a directory is created under devices/system/cpu/. Each cpu has an
+attribute 'online' which can be 0 or 1.
 
 
 4. Other devices

+ 20 - 9
Documentation/sound/alsa/ALSA-Configuration.txt

@@ -167,7 +167,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
     spdif           - Support SPDIF I/O
     		    - Default: disabled
 
-    Module supports autoprobe and multiple chips (max 8).
+    This module supports one chip and autoprobe.
 
     The power-management is supported.
 
@@ -206,7 +206,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 			  See "AC97 Quirk Option" section below.
     spdif_aclink	- S/PDIF transfer over AC-link (default = 1)
 
-    This module supports up to 8 cards and autoprobe.
+    This module supports one card and autoprobe.
 
     ATI IXP has two different methods to control SPDIF output.  One is
     over AC-link and another is over the "direct" SPDIF output.  The
@@ -218,7 +218,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 
     Module for ATI IXP 150/200/250 AC97 modem controllers.
 
-    Module supports up to 8 cards.
+    This module supports one card and autoprobe.
 
     Note: The default index value of this module is -2, i.e. the first
           slot is excluded.
@@ -637,7 +637,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
     model	- force the model name
     position_fix - Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size)
 
-    Module supports up to 8 cards.
+    This module supports one card and autoprobe.
 
     Each codec may have a model table for different configurations.
     If your machine isn't listed there, the default (usually minimal)
@@ -663,6 +663,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 			adjusted.  Appearing only when compiled with
 			$CONFIG_SND_DEBUG=y
 
+	ALC260
+	  hp		HP machines
+	  fujitsu	Fujitsu S7020
+
 	CMI9880
 	  minimal	3-jack in back
 	  min_fp	3-jack in back, 2-jack in front
@@ -811,7 +815,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 		    semaphores (e.g. on some ASUS laptops)
 		    (default off)
 
-    Module supports autoprobe and multiple bus-master chips (max 8).
+    This module supports one chip and autoprobe.
 
     Note: the latest driver supports auto-detection of chip clock.
     if you still encounter too fast playback, specify the clock
@@ -830,7 +834,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 
     ac97_clock	  - AC'97 codec clock base (0 = auto-detect)
 
-    This module supports up to 8 cards and autoprobe.
+    This module supports one card and autoprobe.
 
     Note: The default index value of this module is -2, i.e. the first
           slot is excluded.
@@ -950,8 +954,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
     use_cache        - 0 or 1 (disabled by default)
     vaio_hack        - alias buffer_top=0x25a800
     reset_workaround - enable AC97 RESET workaround for some laptops
+    reset_workaround2 - enable extended AC97 RESET workaround for some
+		      other laptops
 
-    Module supports autoprobe and multiple chips (max 8).
+    This module supports one chip and autoprobe.
 
     The power-management is supported.
 
@@ -980,6 +986,11 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
     workaround is enabled automatically.  For other laptops with a
     hard freeze, you can try reset_workaround=1 option.
 
+    Note: Dell Latitude CSx laptops have another problem regarding
+    AC97 RESET.  On these laptops, reset_workaround2 option is
+    turned on as default.  This option is worth to try if the
+    previous reset_workaround option doesn't help.
+
     Note: This driver is really crappy.  It's a porting from the
     OSS driver, which is a result of black-magic reverse engineering.
     The detection of codec will fail if the driver is loaded *after*
@@ -1310,7 +1321,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
     ac97_quirk  - AC'97 workaround for strange hardware
 		  See "AC97 Quirk Option" section below.
 
-    Module supports autoprobe and multiple bus-master chips (max 8).
+    This module supports one chip and autoprobe.
 
     Note: on some SMP motherboards like MSI 694D the interrupts might
           not be generated properly.  In such a case, please try to
@@ -1352,7 +1363,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
 
     ac97_clock	- AC'97 codec clock base (default 48000Hz)
 
-    Module supports up to 8 cards.
+    This module supports one card and autoprobe.
 
     Note: The default index value of this module is -2, i.e. the first
           slot is excluded.

+ 15 - 41
Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl

@@ -18,8 +18,8 @@
       </affiliation>
      </author>
 
-     <date>March 6, 2005</date>
-     <edition>0.3.4</edition>
+     <date>October 6, 2005</date>
+     <edition>0.3.5</edition>
 
     <abstract>
       <para>
@@ -30,7 +30,7 @@
 
     <legalnotice>
     <para>
-    Copyright (c) 2002-2004  Takashi Iwai <email>tiwai@suse.de</email>
+    Copyright (c) 2002-2005  Takashi Iwai <email>tiwai@suse.de</email>
     </para>
 
     <para>
@@ -1433,25 +1433,10 @@
         <informalexample>
           <programlisting>
 <![CDATA[
-  if (chip->res_port) {
-          release_resource(chip->res_port);
-          kfree_nocheck(chip->res_port);
-  }
+  release_and_free_resource(chip->res_port);
 ]]>
           </programlisting>
         </informalexample>
-
-      As you can see, the resource pointer is also to be freed
-      via <function>kfree_nocheck()</function> after
-      <function>release_resource()</function> is called. You
-      cannot use <function>kfree()</function> here, because on ALSA,
-      <function>kfree()</function> may be a wrapper to its own
-      allocator with the memory debugging. Since the resource pointer
-      is allocated externally outside the ALSA, it must be released
-      via the native
-      <function>kfree()</function>.
-      <function>kfree_nocheck()</function> is used for that; it calls
-      the native <function>kfree()</function> without wrapper. 
       </para>
 
       <para>
@@ -2190,8 +2175,7 @@ struct _snd_pcm_runtime {
 	unsigned int rate_den;
 
 	/* -- SW params -- */
-	int tstamp_timespec;		/* use timeval (0) or timespec (1) */
-	snd_pcm_tstamp_t tstamp_mode;	/* mmap timestamp is updated */
+	struct timespec tstamp_mode;	/* mmap timestamp is updated */
   	unsigned int period_step;
 	unsigned int sleep_min;		/* min ticks to sleep */
 	snd_pcm_uframes_t xfer_align;	/* xfer size need to be a multiple */
@@ -3709,8 +3693,7 @@ struct _snd_pcm_runtime {
         <para>
           Here, the chip instance is retrieved via
         <function>snd_kcontrol_chip()</function> macro.  This macro
-        converts from kcontrol-&gt;private_data to the type defined by
-        <type>chip_t</type>. The
+        just accesses to kcontrol-&gt;private_data. The
         kcontrol-&gt;private_data field is 
         given as the argument of <function>snd_ctl_new()</function>
         (see the later subsection
@@ -5998,32 +5981,23 @@ struct _snd_pcm_runtime {
         The first argument is the expression to evaluate, and the
       second argument is the action if it fails. When
       <constant>CONFIG_SND_DEBUG</constant>, is set, it will show an
-      error message such as <computeroutput>BUG? (xxx) (called from
-      yyy)</computeroutput>. When no debug flag is set, this is
-      ignored. 
+      error message such as <computeroutput>BUG? (xxx)</computeroutput>
+      together with stack trace.
       </para>
-    </section>
-
-    <section id="useful-functions-snd-runtime-check">
-      <title><function>snd_runtime_check()</function></title>
       <para>
-        This macro is quite similar with
-      <function>snd_assert()</function>. Unlike
-      <function>snd_assert()</function>, the expression is always
-      evaluated regardless of
-      <constant>CONFIG_SND_DEBUG</constant>. When
-      <constant>CONFIG_SND_DEBUG</constant> is set, the macro will
-      show a message like <computeroutput>ERROR (xx) (called from
-      yyy)</computeroutput>. 
+	 When no debug flag is set, this macro is ignored. 
       </para>
     </section>
 
     <section id="useful-functions-snd-bug">
       <title><function>snd_BUG()</function></title>
       <para>
-        It calls <function>snd_assert(0,)</function> -- that is, just
-      prints the error message at the point. It's useful to show that
-      a fatal error happens there. 
+        It shows <computeroutput>BUG?</computeroutput> message and
+      stack trace as well as <function>snd_assert</function> at the point.
+      It's useful to show that a fatal error happens there. 
+      </para>
+      <para>
+	 When no debug flag is set, this macro is ignored. 
       </para>
     </section>
   </chapter>

+ 2 - 2
Documentation/sparse.txt

@@ -41,9 +41,9 @@ sure that bitwise types don't get mixed up (little-endian vs big-endian
 vs cpu-endian vs whatever), and there the constant "0" really _is_
 special.
 
-Modify top-level Makefile to say
+Use
 
-CHECK           = sparse -Wbitwise
+	make C=[12] CF=-Wbitwise
 
 or you don't get any checking at all.
 

+ 3 - 3
Documentation/video4linux/bttv/README.freeze

@@ -27,9 +27,9 @@ information out of a register+stack dump printed by the kernel on
 protection faults (so-called "kernel oops").
 
 If you run into some kind of deadlock, you can try to dump a call trace
-for each process using sysrq-t (see Documentation/sysrq.txt).  ksymoops
-will translate these dumps into kernel symbols too.  This way it is
-possible to figure where *exactly* some process in "D" state is stuck.
+for each process using sysrq-t (see Documentation/sysrq.txt).
+This way it is possible to figure where *exactly* some process in "D"
+state is stuck.
 
 I've seen reports that bttv 0.7.x crashes whereas 0.8.x works rock solid
 for some people.  Thus probably a small buglet left somewhere in bttv

+ 13 - 12
Documentation/vm/hugetlbpage.txt

@@ -13,12 +13,13 @@ This optimization is more critical now as bigger and bigger physical memories
 Users can use the huge page support in Linux kernel by either using the mmap
 system call or standard SYSv shared memory system calls (shmget, shmat).
 
-First the Linux kernel needs to be built with CONFIG_HUGETLB_PAGE (present
-under Processor types and feature)  and CONFIG_HUGETLBFS (present under file
-system option on config menu) config options.
+First the Linux kernel needs to be built with the CONFIG_HUGETLBFS
+(present under "File systems") and CONFIG_HUGETLB_PAGE (selected
+automatically when CONFIG_HUGETLBFS is selected) configuration
+options.
 
 The kernel built with hugepage support should show the number of configured
-hugepages in the system by running the "cat /proc/meminfo" command.  
+hugepages in the system by running the "cat /proc/meminfo" command.
 
 /proc/meminfo also provides information about the total number of hugetlb
 pages configured in the kernel.  It also displays information about the
@@ -38,19 +39,19 @@ in the kernel.
 
 /proc/sys/vm/nr_hugepages indicates the current number of configured hugetlb
 pages in the kernel.  Super user can dynamically request more (or free some
-pre-configured) hugepages. 
-The allocation( or deallocation) of hugetlb pages is posible only if there are
+pre-configured) hugepages.
+The allocation (or deallocation) of hugetlb pages is possible only if there are
 enough physically contiguous free pages in system (freeing of hugepages is
-possible only if there are enough hugetlb pages free that can be transfered 
+possible only if there are enough hugetlb pages free that can be transfered
 back to regular memory pool).
 
 Pages that are used as hugetlb pages are reserved inside the kernel and can
-not be used for other purposes. 
+not be used for other purposes.
 
 Once the kernel with Hugetlb page support is built and running, a user can
 use either the mmap system call or shared memory system calls to start using
 the huge pages.  It is required that the system administrator preallocate
-enough memory for huge page purposes.  
+enough memory for huge page purposes.
 
 Use the following command to dynamically allocate/deallocate hugepages:
 
@@ -80,9 +81,9 @@ memory (huge pages) allowed for that filesystem (/mnt/huge). The size is
 rounded down to HPAGE_SIZE.  The option nr_inode sets the maximum number of
 inodes that /mnt/huge can use.  If the size or nr_inode options are not
 provided on command line then no limits are set.  For size and nr_inodes
-options, you can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For 
-example, size=2K has the same meaning as size=2048. An example is given at 
-the end of this document. 
+options, you can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For
+example, size=2K has the same meaning as size=2048. An example is given at
+the end of this document.
 
 read and write system calls are not supported on files that reside on hugetlb
 file systems.

+ 29 - 3
MAINTAINERS

@@ -1077,6 +1077,26 @@ P:	Jaroslav Kysela
 M:	perex@suse.cz
 S:	Maintained
 
+HPET:	High Precision Event Timers driver (hpet.c)
+P:	Clemens Ladisch
+M:	clemens@ladisch.de
+S:	Maintained
+
+HPET:	i386
+P:	Venkatesh Pallipadi (Venki)
+M:	venkatesh.pallipadi@intel.com
+S:	Maintained
+
+HPET:	x86_64
+P:	Andi Kleen and Vojtech Pavlik
+M:	ak@muc.de and vojtech@suse.cz
+S:	Maintained
+
+HPET:	ACPI hpet.c
+P:	Bob Picco
+M:	bob.picco@hp.com
+S:	Maintained
+
 HPFS FILESYSTEM
 P:	Mikulas Patocka
 M:	mikulas@artax.karlin.mff.cuni.cz
@@ -2051,6 +2071,12 @@ P:	Matt Mackall
 M:	mpm@selenic.com
 S:	Maintained
 
+RAPIDIO SUBSYSTEM
+P:	Matt Porter
+M:	mporter@kernel.crashing.org
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+
 REAL TIME CLOCK DRIVER
 P:	Paul Gortmaker
 M:	p_gortmaker@yahoo.com
@@ -2455,10 +2481,10 @@ L:	linux-kernel@vger.kernel.org
 S:	Maintained
 
 TRIVIAL PATCHES
-P:      Rusty Russell
-M:      trivial@rustcorp.com.au
+P:      Adrian Bunk
+M:      trivial@kernel.org
 L:      linux-kernel@vger.kernel.org
-W:      http://www.kernel.org/pub/linux/kernel/people/rusty/trivial/
+W:      http://www.kernel.org/pub/linux/kernel/people/bunk/trivial/
 S:      Maintained
 
 TMS380 TOKEN-RING NETWORK DRIVER

+ 1 - 1
Makefile

@@ -583,7 +583,7 @@ export MODLIB
 
 
 ifeq ($(KBUILD_EXTMOD),)
-core-y		+= kernel/ mm/ fs/ ipc/ security/ crypto/
+core-y		+= kernel/ mm/ fs/ ipc/ security/ crypto/ block/
 
 vmlinux-dirs	:= $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
 		     $(core-y) $(core-m) $(drivers-y) $(drivers-m) \

+ 1 - 48
arch/arm/kernel/ptrace.c

@@ -648,7 +648,7 @@ static int ptrace_setwmmxregs(struct task_struct *tsk, void __user *ufp)
 
 #endif
 
-static int do_ptrace(int request, struct task_struct *child, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
 	unsigned long tmp;
 	int ret;
@@ -782,53 +782,6 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
 	return ret;
 }
 
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
-{
-	struct task_struct *child;
-	int ret;
-
-	lock_kernel();
-	ret = -EPERM;
-	if (request == PTRACE_TRACEME) {
-		/* are we already being traced? */
-		if (current->ptrace & PT_PTRACED)
-			goto out;
-		ret = security_ptrace(current->parent, current);
-		if (ret)
-			goto out;
-		/* set the ptrace bit in the process flags. */
-		current->ptrace |= PT_PTRACED;
-		ret = 0;
-		goto out;
-	}
-	ret = -ESRCH;
-	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
-	if (child)
-		get_task_struct(child);
-	read_unlock(&tasklist_lock);
-	if (!child)
-		goto out;
-
-	ret = -EPERM;
-	if (pid == 1)		/* you may not mess with init */
-		goto out_tsk;
-
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		goto out_tsk;
-	}
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret == 0)
-		ret = do_ptrace(request, child, addr, data);
-
-out_tsk:
-	put_task_struct(child);
-out:
-	unlock_kernel();
-	return ret;
-}
-
 asmlinkage void syscall_trace(int why, struct pt_regs *regs)
 {
 	unsigned long ip;

+ 1 - 0
arch/arm/mach-aaec2000/clock.c

@@ -14,6 +14,7 @@
 #include <linux/list.h>
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/string.h>
 
 #include <asm/semaphore.h>
 #include <asm/hardware/clock.h>

+ 1 - 0
arch/arm/mach-epxa10db/mm.c

@@ -25,6 +25,7 @@
 #include <asm/hardware.h>
 #include <asm/io.h>
 #include <asm/sizes.h>
+#include <asm/page.h>
  
 #include <asm/mach/map.h>
 

+ 1 - 2
arch/arm/mach-integrator/impd1.c

@@ -420,8 +420,7 @@ static int impd1_probe(struct lm_device *dev)
  free_impd1:
 	if (impd1 && impd1->base)
 		iounmap(impd1->base);
-	if (impd1)
-		kfree(impd1);
+	kfree(impd1);
  release_lm:
 	release_mem_region(dev->resource.start, SZ_4K);
 	return ret;

+ 1 - 0
arch/arm/mach-pxa/corgi_lcd.c

@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
+#include <linux/string.h>
 #include <asm/arch/akita.h>
 #include <asm/arch/corgi.h>
 #include <asm/arch/hardware.h>

+ 1 - 48
arch/arm26/kernel/ptrace.c

@@ -546,7 +546,7 @@ static int ptrace_setfpregs(struct task_struct *tsk, void *ufp)
 			      sizeof(struct user_fp)) ? -EFAULT : 0;
 }
 
-static int do_ptrace(int request, struct task_struct *child, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
 	unsigned long tmp;
 	int ret;
@@ -665,53 +665,6 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
 	return ret;
 }
 
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
-{
-	struct task_struct *child;
-	int ret;
-
-	lock_kernel();
-	ret = -EPERM;
-	if (request == PTRACE_TRACEME) {
-		/* are we already being traced? */
-		if (current->ptrace & PT_PTRACED)
-			goto out;
-		ret = security_ptrace(current->parent, current);
-		if (ret)
-			goto out;
-		/* set the ptrace bit in the process flags. */
-		current->ptrace |= PT_PTRACED;
-		ret = 0;
-		goto out;
-	}
-	ret = -ESRCH;
-	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
-	if (child)
-		get_task_struct(child);
-	read_unlock(&tasklist_lock);
-	if (!child)
-		goto out;
-
-	ret = -EPERM;
-	if (pid == 1)		/* you may not mess with init */
-		goto out_tsk;
-
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		goto out_tsk;
-	}
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret == 0)
-		ret = do_ptrace(request, child, addr, data);
-
-out_tsk:
-	put_task_struct(child);
-out:
-	unlock_kernel();
-	return ret;
-}
-
 asmlinkage void syscall_trace(int why, struct pt_regs *regs)
 {
 	unsigned long ip;

+ 3 - 3
arch/cris/arch-v10/README.mm

@@ -177,7 +177,7 @@ The example address is 0xd004000c; in binary this is:
 Given the top-level Page Directory, the offset in that directory is calculated
 using the upper 8 bits:
 
-extern inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address)
+static inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address)
 {
 	return mm->pgd + (address >> PGDIR_SHIFT);
 }
@@ -190,14 +190,14 @@ The pgd_t from our example will therefore be the 208'th (0xd0) entry in mm->pgd.
 
 Since the Middle Directory does not exist, it is a unity mapping:
 
-extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
+static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
 {
 	return (pmd_t *) dir;
 }
 
 The Page Table provides the final lookup by using bits 13 to 23 as index:
 
-extern inline pte_t * pte_offset(pmd_t * dir, unsigned long address)
+static inline pte_t * pte_offset(pmd_t * dir, unsigned long address)
 {
 	return (pte_t *) pmd_page(*dir) + ((address >> PAGE_SHIFT) &
 					   (PTRS_PER_PTE - 1));

+ 2 - 49
arch/cris/arch-v10/kernel/ptrace.c

@@ -76,55 +76,11 @@ ptrace_disable(struct task_struct *child)
  * (in user space) where the result of the ptrace call is written (instead of
  * being returned).
  */
-asmlinkage int 
-sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	struct task_struct *child;
 	int ret;
 	unsigned long __user *datap = (unsigned long __user *)data;
 
-	lock_kernel();
-	ret = -EPERM;
-	
-	if (request == PTRACE_TRACEME) {
-		/* are we already being traced? */
-		if (current->ptrace & PT_PTRACED)
-			goto out;
-		ret = security_ptrace(current->parent, current);
-		if (ret)
-			goto out;
-		/* set the ptrace bit in the process flags. */
-		current->ptrace |= PT_PTRACED;
-		ret = 0;
-		goto out;
-	}
-	
-	ret = -ESRCH;
-	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
-	
-	if (child)
-		get_task_struct(child);
-	
-	read_unlock(&tasklist_lock);
-	
-	if (!child)
-		goto out;
-	
-	ret = -EPERM;
-	
-	if (pid == 1)		/* Leave the init process alone! */
-		goto out_tsk;
-	
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		goto out_tsk;
-	}
-	
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out_tsk;
-
 	switch (request) {
 		/* Read word at location address. */ 
 		case PTRACE_PEEKTEXT:
@@ -289,10 +245,7 @@ sys_ptrace(long request, long pid, long addr, long data)
 			ret = ptrace_request(child, request, addr, data);
 			break;
 	}
-out_tsk:
-	put_task_struct(child);
-out:
-	unlock_kernel();
+
 	return ret;
 }
 

+ 1 - 1
arch/cris/arch-v10/kernel/signal.c

@@ -476,7 +476,7 @@ give_sigsegv:
  * OK, we're invoking a handler
  */	
 
-extern inline void
+static inline void
 handle_signal(int canrestart, unsigned long sig,
 	      siginfo_t *info, struct k_sigaction *ka,
               sigset_t *oldset, struct pt_regs * regs)

+ 7 - 7
arch/cris/arch-v32/drivers/cryptocop.c

@@ -277,7 +277,7 @@ struct file_operations cryptocop_fops = {
 static void free_cdesc(struct cryptocop_dma_desc *cdesc)
 {
 	DEBUG(printk("free_cdesc: cdesc 0x%p, from_pool=%d\n", cdesc, cdesc->from_pool));
-	if (cdesc->free_buf) kfree(cdesc->free_buf);
+	kfree(cdesc->free_buf);
 
 	if (cdesc->from_pool) {
 		unsigned long int flags;
@@ -2950,15 +2950,15 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig
 		put_page(outpages[i]);
 	}
 
-	if (digest_result) kfree(digest_result);
-	if (inpages) kfree(inpages);
-	if (outpages) kfree(outpages);
+	kfree(digest_result);
+	kfree(inpages);
+	kfree(outpages);
 	if (cop){
-		if (cop->tfrm_op.indata) kfree(cop->tfrm_op.indata);
-		if (cop->tfrm_op.outdata) kfree(cop->tfrm_op.outdata);
+		kfree(cop->tfrm_op.indata);
+		kfree(cop->tfrm_op.outdata);
 		kfree(cop);
 	}
-	if (jc) kfree(jc);
+	kfree(jc);
 
 	DEBUG(print_lock_status());
 

+ 2 - 49
arch/cris/arch-v32/kernel/ptrace.c

@@ -99,55 +99,11 @@ ptrace_disable(struct task_struct *child)
 }
 
 
-asmlinkage int
-sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	struct task_struct *child;
 	int ret;
 	unsigned long __user *datap = (unsigned long __user *)data;
 
-	lock_kernel();
-	ret = -EPERM;
-
-	if (request == PTRACE_TRACEME) {
-		/* are we already being traced? */
-		if (current->ptrace & PT_PTRACED)
-			goto out;
-		ret = security_ptrace(current->parent, current);
-		if (ret)
-			goto out;
-		/* set the ptrace bit in the process flags. */
-		current->ptrace |= PT_PTRACED;
-		ret = 0;
-		goto out;
-	}
-
-	ret = -ESRCH;
-	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
-
-	if (child)
-		get_task_struct(child);
-
-	read_unlock(&tasklist_lock);
-
-	if (!child)
-		goto out;
-
-	ret = -EPERM;
-
-	if (pid == 1)		/* Leave the init process alone! */
-		goto out_tsk;
-
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		goto out_tsk;
-	}
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out_tsk;
-
 	switch (request) {
 		/* Read word at location address. */
 		case PTRACE_PEEKTEXT:
@@ -347,10 +303,7 @@ sys_ptrace(long request, long pid, long addr, long data)
 			ret = ptrace_request(child, request, addr, data);
 			break;
 	}
-out_tsk:
-	put_task_struct(child);
-out:
-	unlock_kernel();
+
 	return ret;
 }
 

+ 1 - 1
arch/cris/arch-v32/kernel/signal.c

@@ -513,7 +513,7 @@ give_sigsegv:
 }
 
 /* Invoke a singal handler to, well, handle the signal. */
-extern inline void
+static inline void
 handle_signal(int canrestart, unsigned long sig,
 	      siginfo_t *info, struct k_sigaction *ka,
               sigset_t *oldset, struct pt_regs * regs)

+ 1 - 1
arch/cris/mm/ioremap.c

@@ -16,7 +16,7 @@
 #include <asm/tlbflush.h>
 #include <asm/arch/memmap.h>
 
-extern inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
+static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
 	unsigned long phys_addr, pgprot_t prot)
 {
 	unsigned long end;

+ 1 - 42
arch/frv/kernel/ptrace.c

@@ -106,48 +106,11 @@ void ptrace_enable(struct task_struct *child)
 	child->thread.frame0->__status |= REG__STATUS_STEP;
 }
 
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	struct task_struct *child;
 	unsigned long tmp;
 	int ret;
 
-	lock_kernel();
-	ret = -EPERM;
-	if (request == PTRACE_TRACEME) {
-		/* are we already being traced? */
-		if (current->ptrace & PT_PTRACED)
-			goto out;
-		ret = security_ptrace(current->parent, current);
-		if (ret)
-			goto out;
-		/* set the ptrace bit in the process flags. */
-		current->ptrace |= PT_PTRACED;
-		ret = 0;
-		goto out;
-	}
-	ret = -ESRCH;
-	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
-	if (child)
-		get_task_struct(child);
-	read_unlock(&tasklist_lock);
-	if (!child)
-		goto out;
-
-	ret = -EPERM;
-	if (pid == 1)		/* you may not mess with init */
-		goto out_tsk;
-
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		goto out_tsk;
-	}
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out_tsk;
-
 	switch (request) {
 		/* when I and D space are separate, these will need to be fixed. */
 	case PTRACE_PEEKTEXT: /* read word at location addr. */
@@ -351,10 +314,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 		ret = -EIO;
 		break;
 	}
-out_tsk:
-	put_task_struct(child);
-out:
-	unlock_kernel();
 	return ret;
 }
 

+ 1 - 38
arch/h8300/kernel/ptrace.c

@@ -57,43 +57,10 @@ void ptrace_disable(struct task_struct *child)
 	h8300_disable_trace(child);
 }
 
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	struct task_struct *child;
 	int ret;
 
-	lock_kernel();
-	ret = -EPERM;
-	if (request == PTRACE_TRACEME) {
-		/* are we already being traced? */
-		if (current->ptrace & PT_PTRACED)
-			goto out;
-		/* set the ptrace bit in the process flags. */
-		current->ptrace |= PT_PTRACED;
-		ret = 0;
-		goto out;
-	}
-	ret = -ESRCH;
-	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
-	if (child)
-		get_task_struct(child);
-	read_unlock(&tasklist_lock);
-	if (!child)
-		goto out;
-
-	ret = -EPERM;
-	if (pid == 1)		/* you may not mess with init */
-		goto out_tsk;
-
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		goto out_tsk;
-	}
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out_tsk;
-
 	switch (request) {
 		case PTRACE_PEEKTEXT: /* read word at location addr. */ 
 		case PTRACE_PEEKDATA: {
@@ -251,10 +218,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 			ret = -EIO;
 			break;
 	}
-out_tsk:
-	put_task_struct(child);
-out:
-	unlock_kernel();
 	return ret;
 }
 

+ 13 - 0
arch/i386/Kconfig

@@ -997,8 +997,21 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
+menu "Instrumentation Support"
+	depends on EXPERIMENTAL
+
 source "arch/i386/oprofile/Kconfig"
 
+config KPROBES
+	bool "Kprobes (EXPERIMENTAL)"
+	help
+	  Kprobes allows you to trap at almost any kernel address and
+	  execute a callback function.  register_kprobe() establishes
+	  a probepoint and specifies the callback.  Kprobes is useful
+	  for kernel debugging, non-intrusive instrumentation and testing.
+	  If in doubt, say "N".
+endmenu
+
 source "arch/i386/Kconfig.debug"
 
 source "security/Kconfig"

+ 0 - 10
arch/i386/Kconfig.debug

@@ -22,16 +22,6 @@ config DEBUG_STACKOVERFLOW
 	  This option will cause messages to be printed if free stack space
 	  drops below a certain limit.
 
-config KPROBES
-	bool "Kprobes"
-	depends on DEBUG_KERNEL
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.  register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-
 config DEBUG_STACK_USAGE
 	bool "Stack utilization instrumentation"
 	depends on DEBUG_KERNEL

+ 8 - 2
arch/i386/kernel/apic.c

@@ -559,14 +559,20 @@ void __devinit setup_local_APIC(void)
  * If Linux enabled the LAPIC against the BIOS default
  * disable it down before re-entering the BIOS on shutdown.
  * Otherwise the BIOS may get confused and not power-off.
+ * Additionally clear all LVT entries before disable_local_APIC
+ * for the case where Linux didn't enable the LAPIC.
  */
 void lapic_shutdown(void)
 {
-	if (!cpu_has_apic || !enabled_via_apicbase)
+	if (!cpu_has_apic)
 		return;
 
 	local_irq_disable();
-	disable_local_APIC();
+	clear_local_APIC();
+
+	if (enabled_via_apicbase)
+		disable_local_APIC();
+
 	local_irq_enable();
 }
 

+ 2 - 3
arch/i386/kernel/apm.c

@@ -447,8 +447,7 @@ static char *	apm_event_name[] = {
 	"system standby resume",
 	"capabilities change"
 };
-#define NR_APM_EVENT_NAME	\
-		(sizeof(apm_event_name) / sizeof(apm_event_name[0]))
+#define NR_APM_EVENT_NAME ARRAY_SIZE(apm_event_name)
 
 typedef struct lookup_t {
 	int	key;
@@ -479,7 +478,7 @@ static const lookup_t error_table[] = {
 	{ APM_NO_ERROR,		"BIOS did not set a return code" },
 	{ APM_NOT_PRESENT,	"No APM present" }
 };
-#define ERROR_COUNT	(sizeof(error_table)/sizeof(lookup_t))
+#define ERROR_COUNT	ARRAY_SIZE(error_table)
 
 /**
  *	apm_error	-	display an APM error

+ 1 - 4
arch/i386/kernel/cpu/common.c

@@ -30,8 +30,6 @@ static int disable_x86_serial_nr __devinitdata = 1;
 
 struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
 
-extern void mcheck_init(struct cpuinfo_x86 *c);
-
 extern int disable_pse;
 
 static void default_init(struct cpuinfo_x86 * c)
@@ -429,9 +427,8 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
 	}
 
 	/* Init Machine Check Exception if available. */
-#ifdef CONFIG_X86_MCE
 	mcheck_init(c);
-#endif
+
 	if (c == &boot_cpu_data)
 		sysenter_setup();
 	enable_sep_cpu();

+ 1 - 1
arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c

@@ -67,7 +67,7 @@ static const struct cpu_id cpu_ids[] = {
 	[CPU_MP4HT_D0]	= {15,  3, 4 },
 	[CPU_MP4HT_E0]	= {15,  4, 1 },
 };
-#define N_IDS	(sizeof(cpu_ids)/sizeof(cpu_ids[0]))
+#define N_IDS	ARRAY_SIZE(cpu_ids)
 
 struct cpu_model
 {

+ 1 - 1
arch/i386/kernel/cpu/mcheck/k7.c

@@ -68,7 +68,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
 
 
 /* AMD K7 machine check is Intel like */
-void __devinit amd_mcheck_init(struct cpuinfo_x86 *c)
+void amd_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
 	int i;

+ 2 - 2
arch/i386/kernel/cpu/mcheck/mce.c

@@ -16,7 +16,7 @@
 
 #include "mce.h"
 
-int mce_disabled __devinitdata = 0;
+int mce_disabled = 0;
 int nr_mce_banks;
 
 EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
@@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_
 void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
 
 /* This has to be run for each processor */
-void __devinit mcheck_init(struct cpuinfo_x86 *c)
+void mcheck_init(struct cpuinfo_x86 *c)
 {
 	if (mce_disabled==1)
 		return;

+ 2 - 2
arch/i386/kernel/cpu/mcheck/p4.c

@@ -77,7 +77,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs)
 }
 
 /* P4/Xeon Thermal regulation detect and init */
-static void __devinit intel_init_thermal(struct cpuinfo_x86 *c)
+static void intel_init_thermal(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
 	unsigned int cpu = smp_processor_id();
@@ -231,7 +231,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
 }
 
 
-void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c)
+void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
 	int i;

+ 1 - 1
arch/i386/kernel/cpu/mcheck/p5.c

@@ -28,7 +28,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod
 }
 
 /* Set up machine check reporting for processors with Intel style MCE */
-void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c)
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
 	

+ 1 - 1
arch/i386/kernel/cpu/mcheck/p6.c

@@ -79,7 +79,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
 }
 
 /* Set up machine check reporting for processors with Intel style MCE */
-void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c)
+void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
 	int i;

+ 1 - 1
arch/i386/kernel/cpu/mcheck/winchip.c

@@ -22,7 +22,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod
 }
 
 /* Set up machine check reporting on the Winchip C6 series */
-void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c)
+void winchip_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 lo, hi;
 	machine_check_vector = winchip_machine_check;

+ 95 - 85
arch/i386/kernel/kprobes.c

@@ -31,22 +31,16 @@
 #include <linux/config.h>
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
-#include <linux/spinlock.h>
 #include <linux/preempt.h>
 #include <asm/cacheflush.h>
 #include <asm/kdebug.h>
 #include <asm/desc.h>
 
-static struct kprobe *current_kprobe;
-static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags;
-static struct kprobe *kprobe_prev;
-static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev;
-static struct pt_regs jprobe_saved_regs;
-static long *jprobe_saved_esp;
-/* copy of the kernel stack at the probe fire time */
-static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
 void jprobe_return_end(void);
 
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
 /*
  * returns non-zero if opcode modifies the interrupt flag.
  */
@@ -91,29 +85,30 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 }
 
-static inline void save_previous_kprobe(void)
+static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
-	kprobe_prev = current_kprobe;
-	kprobe_status_prev = kprobe_status;
-	kprobe_old_eflags_prev = kprobe_old_eflags;
-	kprobe_saved_eflags_prev = kprobe_saved_eflags;
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+	kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags;
+	kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags;
 }
 
-static inline void restore_previous_kprobe(void)
+static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
-	current_kprobe = kprobe_prev;
-	kprobe_status = kprobe_status_prev;
-	kprobe_old_eflags = kprobe_old_eflags_prev;
-	kprobe_saved_eflags = kprobe_saved_eflags_prev;
+	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+	kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags;
+	kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags;
 }
 
-static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs)
+static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+				struct kprobe_ctlblk *kcb)
 {
-	current_kprobe = p;
-	kprobe_saved_eflags = kprobe_old_eflags
+	__get_cpu_var(current_kprobe) = p;
+	kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags
 		= (regs->eflags & (TF_MASK | IF_MASK));
 	if (is_IF_modifier(p->opcode))
-		kprobe_saved_eflags &= ~IF_MASK;
+		kcb->kprobe_saved_eflags &= ~IF_MASK;
 }
 
 static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
@@ -127,6 +122,7 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 		regs->eip = (unsigned long)&p->ainsn.insn;
 }
 
+/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
 				      struct pt_regs *regs)
 {
@@ -157,9 +153,15 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 	int ret = 0;
 	kprobe_opcode_t *addr = NULL;
 	unsigned long *lp;
+	struct kprobe_ctlblk *kcb;
 
-	/* We're in an interrupt, but this is clear and BUG()-safe. */
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
 	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+
 	/* Check if the application is using LDT entry for its code segment and
 	 * calculate the address by reading the base address from the LDT entry.
 	 */
@@ -173,15 +175,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 	}
 	/* Check we're not actually recursing */
 	if (kprobe_running()) {
-		/* We *are* holding lock here, so this is safe.
-		   Disarm the probe we just hit, and ignore it. */
 		p = get_kprobe(addr);
 		if (p) {
-			if (kprobe_status == KPROBE_HIT_SS &&
+			if (kcb->kprobe_status == KPROBE_HIT_SS &&
 				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
 				regs->eflags &= ~TF_MASK;
-				regs->eflags |= kprobe_saved_eflags;
-				unlock_kprobes();
+				regs->eflags |= kcb->kprobe_saved_eflags;
 				goto no_kprobe;
 			}
 			/* We have reentered the kprobe_handler(), since
@@ -190,26 +189,23 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 			 * just single step on the instruction of the new probe
 			 * without calling any user handlers.
 			 */
-			save_previous_kprobe();
-			set_current_kprobe(p, regs);
+			save_previous_kprobe(kcb);
+			set_current_kprobe(p, regs, kcb);
 			p->nmissed++;
 			prepare_singlestep(p, regs);
-			kprobe_status = KPROBE_REENTER;
+			kcb->kprobe_status = KPROBE_REENTER;
 			return 1;
 		} else {
-			p = current_kprobe;
+			p = __get_cpu_var(current_kprobe);
 			if (p->break_handler && p->break_handler(p, regs)) {
 				goto ss_probe;
 			}
 		}
-		/* If it's not ours, can't be delete race, (we hold lock). */
 		goto no_kprobe;
 	}
 
-	lock_kprobes();
 	p = get_kprobe(addr);
 	if (!p) {
-		unlock_kprobes();
 		if (regs->eflags & VM_MASK) {
 			/* We are in virtual-8086 mode. Return 0 */
 			goto no_kprobe;
@@ -232,8 +228,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 		goto no_kprobe;
 	}
 
-	kprobe_status = KPROBE_HIT_ACTIVE;
-	set_current_kprobe(p, regs);
+	set_current_kprobe(p, regs, kcb);
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
 
 	if (p->pre_handler && p->pre_handler(p, regs))
 		/* handler has already set things up, so skip ss setup */
@@ -241,7 +237,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 
 ss_probe:
 	prepare_singlestep(p, regs);
-	kprobe_status = KPROBE_HIT_SS;
+	kcb->kprobe_status = KPROBE_HIT_SS;
 	return 1;
 
 no_kprobe:
@@ -269,9 +265,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
         struct kretprobe_instance *ri = NULL;
         struct hlist_head *head;
         struct hlist_node *node, *tmp;
-	unsigned long orig_ret_address = 0;
+	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
+	spin_lock_irqsave(&kretprobe_lock, flags);
         head = kretprobe_inst_table_head(current);
 
 	/*
@@ -310,14 +307,15 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
 	regs->eip = orig_ret_address;
 
-	unlock_kprobes();
+	reset_current_kprobe();
+	spin_unlock_irqrestore(&kretprobe_lock, flags);
 	preempt_enable_no_resched();
 
-        /*
-         * By returning a non-zero value, we are telling
-         * kprobe_handler() that we have handled unlocking
-         * and re-enabling preemption.
-         */
+	/*
+	 * By returning a non-zero value, we are telling
+	 * kprobe_handler() that we don't want the post_handler
+	 * to run (and have re-enabled preemption)
+	 */
         return 1;
 }
 
@@ -343,7 +341,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
  * that is atop the stack is the address following the copied instruction.
  * We need to make it the address following the original instruction.
  */
-static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes resume_execution(struct kprobe *p,
+		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
 {
 	unsigned long *tos = (unsigned long *)&regs->esp;
 	unsigned long next_eip = 0;
@@ -353,7 +352,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 	switch (p->ainsn.insn[0]) {
 	case 0x9c:		/* pushfl */
 		*tos &= ~(TF_MASK | IF_MASK);
-		*tos |= kprobe_old_eflags;
+		*tos |= kcb->kprobe_old_eflags;
 		break;
 	case 0xc3:		/* ret/lret */
 	case 0xcb:
@@ -394,27 +393,30 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 
 /*
  * Interrupts are disabled on entry as trap1 is an interrupt gate and they
- * remain disabled thoroughout this function.  And we hold kprobe lock.
+ * remain disabled thoroughout this function.
  */
 static inline int post_kprobe_handler(struct pt_regs *regs)
 {
-	if (!kprobe_running())
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (!cur)
 		return 0;
 
-	if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
-		kprobe_status = KPROBE_HIT_SSDONE;
-		current_kprobe->post_handler(current_kprobe, regs, 0);
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
 	}
 
-	resume_execution(current_kprobe, regs);
-	regs->eflags |= kprobe_saved_eflags;
+	resume_execution(cur, regs, kcb);
+	regs->eflags |= kcb->kprobe_saved_eflags;
 
 	/*Restore back the original saved kprobes variables and continue. */
-	if (kprobe_status == KPROBE_REENTER) {
-		restore_previous_kprobe();
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
 		goto out;
 	}
-	unlock_kprobes();
+	reset_current_kprobe();
 out:
 	preempt_enable_no_resched();
 
@@ -429,18 +431,19 @@ out:
 	return 1;
 }
 
-/* Interrupts disabled, kprobe_lock held. */
 static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
-	if (current_kprobe->fault_handler
-	    && current_kprobe->fault_handler(current_kprobe, regs, trapnr))
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
 		return 1;
 
-	if (kprobe_status & KPROBE_HIT_SS) {
-		resume_execution(current_kprobe, regs);
-		regs->eflags |= kprobe_old_eflags;
+	if (kcb->kprobe_status & KPROBE_HIT_SS) {
+		resume_execution(cur, regs, kcb);
+		regs->eflags |= kcb->kprobe_old_eflags;
 
-		unlock_kprobes();
+		reset_current_kprobe();
 		preempt_enable_no_resched();
 	}
 	return 0;
@@ -453,39 +456,41 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 				       unsigned long val, void *data)
 {
 	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
 	switch (val) {
 	case DIE_INT3:
 		if (kprobe_handler(args->regs))
-			return NOTIFY_STOP;
+			ret = NOTIFY_STOP;
 		break;
 	case DIE_DEBUG:
 		if (post_kprobe_handler(args->regs))
-			return NOTIFY_STOP;
+			ret = NOTIFY_STOP;
 		break;
 	case DIE_GPF:
-		if (kprobe_running() &&
-		    kprobe_fault_handler(args->regs, args->trapnr))
-			return NOTIFY_STOP;
-		break;
 	case DIE_PAGE_FAULT:
+		/* kprobe_running() needs smp_processor_id() */
+		preempt_disable();
 		if (kprobe_running() &&
 		    kprobe_fault_handler(args->regs, args->trapnr))
-			return NOTIFY_STOP;
+			ret = NOTIFY_STOP;
+		preempt_enable();
 		break;
 	default:
 		break;
 	}
-	return NOTIFY_DONE;
+	return ret;
 }
 
 int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 	unsigned long addr;
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
-	jprobe_saved_regs = *regs;
-	jprobe_saved_esp = &regs->esp;
-	addr = (unsigned long)jprobe_saved_esp;
+	kcb->jprobe_saved_regs = *regs;
+	kcb->jprobe_saved_esp = &regs->esp;
+	addr = (unsigned long)(kcb->jprobe_saved_esp);
 
 	/*
 	 * TBD: As Linus pointed out, gcc assumes that the callee
@@ -494,7 +499,8 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	 * we also save and restore enough stack bytes to cover
 	 * the argument area.
 	 */
-	memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr));
+	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
+			MIN_STACK_SIZE(addr));
 	regs->eflags &= ~IF_MASK;
 	regs->eip = (unsigned long)(jp->entry);
 	return 1;
@@ -502,36 +508,40 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 
 void __kprobes jprobe_return(void)
 {
-	preempt_enable_no_resched();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
 	asm volatile ("       xchgl   %%ebx,%%esp     \n"
 		      "       int3			\n"
 		      "       .globl jprobe_return_end	\n"
 		      "       jprobe_return_end:	\n"
 		      "       nop			\n"::"b"
-		      (jprobe_saved_esp):"memory");
+		      (kcb->jprobe_saved_esp):"memory");
 }
 
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	u8 *addr = (u8 *) (regs->eip - 1);
-	unsigned long stack_addr = (unsigned long)jprobe_saved_esp;
+	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 
 	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
-		if (&regs->esp != jprobe_saved_esp) {
+		if (&regs->esp != kcb->jprobe_saved_esp) {
 			struct pt_regs *saved_regs =
-			    container_of(jprobe_saved_esp, struct pt_regs, esp);
+			    container_of(kcb->jprobe_saved_esp,
+					    struct pt_regs, esp);
 			printk("current esp %p does not match saved esp %p\n",
-			       &regs->esp, jprobe_saved_esp);
+			       &regs->esp, kcb->jprobe_saved_esp);
 			printk("Saved registers for jprobe %p\n", jp);
 			show_registers(saved_regs);
 			printk("Current registers\n");
 			show_registers(regs);
 			BUG();
 		}
-		*regs = jprobe_saved_regs;
-		memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack,
+		*regs = kcb->jprobe_saved_regs;
+		memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
 		       MIN_STACK_SIZE(stack_addr));
+		preempt_enable_no_resched();
 		return 1;
 	}
 	return 0;

+ 1 - 0
arch/i386/kernel/ldt.c

@@ -18,6 +18,7 @@
 #include <asm/system.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
+#include <asm/mmu_context.h>
 
 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
 static void flush_ldt(void *null)

+ 1 - 1
arch/i386/kernel/mca.c

@@ -132,7 +132,7 @@ static struct resource mca_standard_resources[] = {
 	{ .start = 0x100, .end = 0x107, .name = "POS (MCA)" }
 };
 
-#define MCA_STANDARD_RESOURCES	(sizeof(mca_standard_resources)/sizeof(struct resource))
+#define MCA_STANDARD_RESOURCES	ARRAY_SIZE(mca_standard_resources)
 
 /**
  *	mca_read_and_store_pos - read the POS registers into a memory buffer

+ 2 - 42
arch/i386/kernel/ptrace.c

@@ -354,49 +354,12 @@ ptrace_set_thread_area(struct task_struct *child,
 	return 0;
 }
 
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	struct task_struct *child;
 	struct user * dummy = NULL;
 	int i, ret;
 	unsigned long __user *datap = (unsigned long __user *)data;
 
-	lock_kernel();
-	ret = -EPERM;
-	if (request == PTRACE_TRACEME) {
-		/* are we already being traced? */
-		if (current->ptrace & PT_PTRACED)
-			goto out;
-		ret = security_ptrace(current->parent, current);
-		if (ret)
-			goto out;
-		/* set the ptrace bit in the process flags. */
-		current->ptrace |= PT_PTRACED;
-		ret = 0;
-		goto out;
-	}
-	ret = -ESRCH;
-	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
-	if (child)
-		get_task_struct(child);
-	read_unlock(&tasklist_lock);
-	if (!child)
-		goto out;
-
-	ret = -EPERM;
-	if (pid == 1)		/* you may not mess with init */
-		goto out_tsk;
-
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		goto out_tsk;
-	}
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out_tsk;
-
 	switch (request) {
 	/* when I and D space are separate, these will need to be fixed. */
 	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
@@ -663,10 +626,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 		ret = ptrace_request(child, request, addr, data);
 		break;
 	}
-out_tsk:
-	put_task_struct(child);
-out:
-	unlock_kernel();
+ out_tsk:
 	return ret;
 }
 

+ 2 - 1
arch/i386/kernel/reboot_fixups.c

@@ -10,6 +10,7 @@
 
 #include <asm/delay.h>
 #include <linux/pci.h>
+#include <linux/reboot_fixups.h>
 
 static void cs5530a_warm_reset(struct pci_dev *dev)
 {
@@ -42,7 +43,7 @@ void mach_reboot_fixups(void)
 	struct pci_dev *dev;
 	int i;
 
-	for (i=0; i < (sizeof(fixups_table)/sizeof(fixups_table[0])); i++) {
+	for (i=0; i < ARRAY_SIZE(fixups_table); i++) {
 		cur = &(fixups_table[i]);
 		dev = pci_get_device(cur->vendor, cur->device, NULL);
 		if (!dev)

+ 1 - 0
arch/i386/kernel/scx200.c

@@ -12,6 +12,7 @@
 #include <linux/pci.h>
 
 #include <linux/scx200.h>
+#include <linux/scx200_gpio.h>
 
 /* Verify that the configuration block really is there */
 #define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))

+ 1 - 3
arch/i386/kernel/smpboot.c

@@ -68,11 +68,9 @@ EXPORT_SYMBOL(smp_num_siblings);
 
 /* Package ID of each logical CPU */
 int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-EXPORT_SYMBOL(phys_proc_id);
 
 /* Core ID of each logical CPU */
 int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-EXPORT_SYMBOL(cpu_core_id);
 
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_sibling_map);
@@ -612,7 +610,7 @@ static inline void __inquire_remote_apic(int apicid)
 
 	printk("Inquiring remote APIC #%d...\n", apicid);
 
-	for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+	for (i = 0; i < ARRAY_SIZE(regs); i++) {
 		printk("... APIC #%d %s: ", apicid, names[i]);
 
 		/*

+ 0 - 6
arch/i386/oprofile/Kconfig

@@ -1,7 +1,3 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
 config PROFILING
 	bool "Profiling support (EXPERIMENTAL)"
 	help
@@ -19,5 +15,3 @@ config OPROFILE
 
 	  If unsure, say N.
 
-endmenu
-

+ 1 - 0
arch/i386/power/cpu.c

@@ -118,6 +118,7 @@ void __restore_processor_state(struct saved_context *ctxt)
 	fix_processor_context();
 	do_fpu_end();
 	mtrr_ap_init();
+	mcheck_init(&boot_cpu_data);
 }
 
 void restore_processor_state(void)

+ 13 - 0
arch/ia64/Kconfig

@@ -426,8 +426,21 @@ config GENERIC_PENDING_IRQ
 
 source "arch/ia64/hp/sim/Kconfig"
 
+menu "Instrumentation Support"
+        depends on EXPERIMENTAL
+
 source "arch/ia64/oprofile/Kconfig"
 
+config KPROBES
+	bool "Kprobes (EXPERIMENTAL)"
+	help
+	  Kprobes allows you to trap at almost any kernel address and
+	  execute a callback function.  register_kprobe() establishes
+	  a probepoint and specifies the callback.  Kprobes is useful
+	  for kernel debugging, non-intrusive instrumentation and testing.
+	  If in doubt, say "N".
+endmenu
+
 source "arch/ia64/Kconfig.debug"
 
 source "security/Kconfig"

+ 0 - 11
arch/ia64/Kconfig.debug

@@ -2,17 +2,6 @@ menu "Kernel hacking"
 
 source "lib/Kconfig.debug"
 
-config KPROBES
-        bool "Kprobes"
-        depends on DEBUG_KERNEL
-        help
-          Kprobes allows you to trap at almost any kernel address and
-          execute a callback function.  register_kprobe() establishes
-          a probepoint and specifies the callback.  Kprobes is useful
-          for kernel debugging, non-intrusive instrumentation and testing.
-          If in doubt, say "N".
-
-
 choice
 	prompt "Physical memory granularity"
 	default IA64_GRANULE_64MB

+ 2 - 4
arch/ia64/hp/sim/simserial.c

@@ -642,10 +642,8 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
 	info->event = 0;
 	info->tty = 0;
 	if (info->blocked_open) {
-		if (info->close_delay) {
-			current->state = TASK_INTERRUPTIBLE;
-			schedule_timeout(info->close_delay);
-		}
+		if (info->close_delay)
+			schedule_timeout_interruptible(info->close_delay);
 		wake_up_interruptible(&info->open_wait);
 	}
 	info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);

+ 69 - 57
arch/ia64/kernel/kprobes.c

@@ -26,7 +26,6 @@
 #include <linux/config.h>
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
-#include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/preempt.h>
@@ -38,13 +37,8 @@
 
 extern void jprobe_inst_return(void);
 
-/* kprobe_status settings */
-#define KPROBE_HIT_ACTIVE	0x00000001
-#define KPROBE_HIT_SS		0x00000002
-
-static struct kprobe *current_kprobe, *kprobe_prev;
-static unsigned long kprobe_status, kprobe_status_prev;
-static struct pt_regs jprobe_saved_regs;
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
 enum instruction_type {A, I, M, F, B, L, X, u};
 static enum instruction_type bundle_encoding[32][3] = {
@@ -313,21 +307,22 @@ static int __kprobes valid_kprobe_addr(int template, int slot,
 	return 0;
 }
 
-static inline void save_previous_kprobe(void)
+static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
-	kprobe_prev = current_kprobe;
-	kprobe_status_prev = kprobe_status;
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
 }
 
-static inline void restore_previous_kprobe(void)
+static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
 {
-	current_kprobe = kprobe_prev;
-	kprobe_status = kprobe_status_prev;
+	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+	kcb->kprobe_status = kcb->prev_kprobe.status;
 }
 
-static inline void set_current_kprobe(struct kprobe *p)
+static inline void set_current_kprobe(struct kprobe *p,
+			struct kprobe_ctlblk *kcb)
 {
-	current_kprobe = p;
+	__get_cpu_var(current_kprobe) = p;
 }
 
 static void kretprobe_trampoline(void)
@@ -347,10 +342,11 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head;
 	struct hlist_node *node, *tmp;
-	unsigned long orig_ret_address = 0;
+	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address =
 		((struct fnptr *)kretprobe_trampoline)->ip;
 
+	spin_lock_irqsave(&kretprobe_lock, flags);
         head = kretprobe_inst_table_head(current);
 
 	/*
@@ -389,17 +385,19 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
 	regs->cr_iip = orig_ret_address;
 
-	unlock_kprobes();
+	reset_current_kprobe();
+	spin_unlock_irqrestore(&kretprobe_lock, flags);
 	preempt_enable_no_resched();
 
-        /*
-         * By returning a non-zero value, we are telling
-         * kprobe_handler() that we have handled unlocking
-         * and re-enabling preemption.
-         */
+	/*
+	 * By returning a non-zero value, we are telling
+	 * kprobe_handler() that we don't want the post_handler
+	 * to run (and have re-enabled preemption)
+	 */
         return 1;
 }
 
+/* Called with kretprobe_lock held */
 void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
 				      struct pt_regs *regs)
 {
@@ -606,17 +604,22 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
 	int ret = 0;
 	struct pt_regs *regs = args->regs;
 	kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs);
+	struct kprobe_ctlblk *kcb;
 
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
 	preempt_disable();
+	kcb = get_kprobe_ctlblk();
 
 	/* Handle recursion cases */
 	if (kprobe_running()) {
 		p = get_kprobe(addr);
 		if (p) {
-			if ( (kprobe_status == KPROBE_HIT_SS) &&
+			if ((kcb->kprobe_status == KPROBE_HIT_SS) &&
 	 		     (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) {
   				ia64_psr(regs)->ss = 0;
-				unlock_kprobes();
 				goto no_kprobe;
 			}
 			/* We have reentered the pre_kprobe_handler(), since
@@ -625,17 +628,17 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
 			 * just single step on the instruction of the new probe
 			 * without calling any user handlers.
 			 */
-			save_previous_kprobe();
-			set_current_kprobe(p);
+			save_previous_kprobe(kcb);
+			set_current_kprobe(p, kcb);
 			p->nmissed++;
 			prepare_ss(p, regs);
-			kprobe_status = KPROBE_REENTER;
+			kcb->kprobe_status = KPROBE_REENTER;
 			return 1;
 		} else if (args->err == __IA64_BREAK_JPROBE) {
 			/*
 			 * jprobe instrumented function just completed
 			 */
-			p = current_kprobe;
+			p = __get_cpu_var(current_kprobe);
 			if (p->break_handler && p->break_handler(p, regs)) {
 				goto ss_probe;
 			}
@@ -645,10 +648,8 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
 		}
 	}
 
-	lock_kprobes();
 	p = get_kprobe(addr);
 	if (!p) {
-		unlock_kprobes();
 		if (!is_ia64_break_inst(regs)) {
 			/*
 			 * The breakpoint instruction was removed right
@@ -665,8 +666,8 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
 		goto no_kprobe;
 	}
 
-	kprobe_status = KPROBE_HIT_ACTIVE;
-	set_current_kprobe(p);
+	set_current_kprobe(p, kcb);
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
 
 	if (p->pre_handler && p->pre_handler(p, regs))
 		/*
@@ -678,7 +679,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
 
 ss_probe:
 	prepare_ss(p, regs);
-	kprobe_status = KPROBE_HIT_SS;
+	kcb->kprobe_status = KPROBE_HIT_SS;
 	return 1;
 
 no_kprobe:
@@ -688,23 +689,25 @@ no_kprobe:
 
 static int __kprobes post_kprobes_handler(struct pt_regs *regs)
 {
-	if (!kprobe_running())
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (!cur)
 		return 0;
 
-	if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
-		kprobe_status = KPROBE_HIT_SSDONE;
-		current_kprobe->post_handler(current_kprobe, regs, 0);
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
 	}
 
-	resume_execution(current_kprobe, regs);
+	resume_execution(cur, regs);
 
 	/*Restore back the original saved kprobes variables and continue. */
-	if (kprobe_status == KPROBE_REENTER) {
-		restore_previous_kprobe();
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
 		goto out;
 	}
-
-	unlock_kprobes();
+	reset_current_kprobe();
 
 out:
 	preempt_enable_no_resched();
@@ -713,16 +716,15 @@ out:
 
 static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
 {
-	if (!kprobe_running())
-		return 0;
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
-	if (current_kprobe->fault_handler &&
-	    current_kprobe->fault_handler(current_kprobe, regs, trapnr))
+	if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
 		return 1;
 
-	if (kprobe_status & KPROBE_HIT_SS) {
-		resume_execution(current_kprobe, regs);
-		unlock_kprobes();
+	if (kcb->kprobe_status & KPROBE_HIT_SS) {
+		resume_execution(cur, regs);
+		reset_current_kprobe();
 		preempt_enable_no_resched();
 	}
 
@@ -733,31 +735,38 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 				       unsigned long val, void *data)
 {
 	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
 	switch(val) {
 	case DIE_BREAK:
 		if (pre_kprobes_handler(args))
-			return NOTIFY_STOP;
+			ret = NOTIFY_STOP;
 		break;
 	case DIE_SS:
 		if (post_kprobes_handler(args->regs))
-			return NOTIFY_STOP;
+			ret = NOTIFY_STOP;
 		break;
 	case DIE_PAGE_FAULT:
-		if (kprobes_fault_handler(args->regs, args->trapnr))
-			return NOTIFY_STOP;
+		/* kprobe_running() needs smp_processor_id() */
+		preempt_disable();
+		if (kprobe_running() &&
+			kprobes_fault_handler(args->regs, args->trapnr))
+			ret = NOTIFY_STOP;
+		preempt_enable();
 	default:
 		break;
 	}
-	return NOTIFY_DONE;
+	return ret;
 }
 
 int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 	unsigned long addr = ((struct fnptr *)(jp->entry))->ip;
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
 	/* save architectural state */
-	jprobe_saved_regs = *regs;
+	kcb->jprobe_saved_regs = *regs;
 
 	/* after rfi, execute the jprobe instrumented function */
 	regs->cr_iip = addr & ~0xFULL;
@@ -775,7 +784,10 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
-	*regs = jprobe_saved_regs;
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	*regs = kcb->jprobe_saved_regs;
+	preempt_enable_no_resched();
 	return 1;
 }
 

+ 1 - 1
arch/ia64/kernel/perfmon.c

@@ -4940,7 +4940,7 @@ abort_locked:
 	if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
 
 error_args:
-	if (args_k) kfree(args_k);
+	kfree(args_k);
 
 	DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));
 

+ 7 - 0
arch/ia64/kernel/setup.c

@@ -92,6 +92,13 @@ extern void efi_initialize_iomem_resources(struct resource *,
 extern char _text[], _end[], _etext[];
 
 unsigned long ia64_max_cacheline_size;
+
+int dma_get_cache_alignment(void)
+{
+        return ia64_max_cacheline_size;
+}
+EXPORT_SYMBOL(dma_get_cache_alignment);
+
 unsigned long ia64_iobase;	/* virtual address for I/O accesses */
 EXPORT_SYMBOL(ia64_iobase);
 struct io_space io_space[MAX_IO_SPACES];

+ 0 - 6
arch/ia64/oprofile/Kconfig

@@ -1,7 +1,3 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
 config PROFILING
 	bool "Profiling support (EXPERIMENTAL)"
 	help
@@ -22,5 +18,3 @@ config OPROFILE
 
 	  If unsure, say N.
 
-endmenu
-

+ 2 - 4
arch/m68k/atari/time.c

@@ -212,10 +212,8 @@ int atari_tt_hwclk( int op, struct rtc_time *t )
      * additionally the RTC_SET bit is set to prevent an update cycle.
      */
 
-    while( RTC_READ(RTC_FREQ_SELECT) & RTC_UIP ) {
-        current->state = TASK_INTERRUPTIBLE;
-        schedule_timeout(HWCLK_POLL_INTERVAL);
-    }
+    while( RTC_READ(RTC_FREQ_SELECT) & RTC_UIP )
+        schedule_timeout_interruptible(HWCLK_POLL_INTERVAL);
 
     local_irq_save(flags);
     RTC_WRITE( RTC_CONTROL, ctrl | RTC_SET );

+ 3 - 44
arch/m68k/kernel/ptrace.c

@@ -121,48 +121,11 @@ void ptrace_disable(struct task_struct *child)
 	child->thread.work.syscall_trace = 0;
 }
 
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	struct task_struct *child;
 	unsigned long tmp;
 	int i, ret = 0;
 
-	lock_kernel();
-	if (request == PTRACE_TRACEME) {
-		/* are we already being traced? */
-		if (current->ptrace & PT_PTRACED) {
-			ret = -EPERM;
-			goto out;
-		}
-		/* set the ptrace bit in the process flags. */
-		current->ptrace |= PT_PTRACED;
-		goto out;
-	}
-	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
-	if (child)
-		get_task_struct(child);
-	read_unlock(&tasklist_lock);
-	if (unlikely(!child)) {
-		ret = -ESRCH;
-		goto out;
-	}
-
-	/* you may not mess with init */
-	if (unlikely(pid == 1)) {
-		ret = -EPERM;
-		goto out_tsk;
-	}
-
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		goto out_tsk;
-	}
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret)
-		goto out_tsk;
-
 	switch (request) {
 	/* when I and D space are separate, these will need to be fixed. */
 	case PTRACE_PEEKTEXT:	/* read word at location addr. */
@@ -317,14 +280,10 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 		ret = ptrace_request(child, request, addr, data);
 		break;
 	}
-out_tsk:
-	put_task_struct(child);
-out:
-	unlock_kernel();
+
 	return ret;
 out_eio:
-	ret = -EIO;
-	goto out_tsk;
+	return -EIO;
 }
 
 asmlinkage void syscall_trace(void)

+ 14 - 3
arch/m68knommu/Kconfig

@@ -71,6 +71,11 @@ config M5206e
 	help
 	  Motorola ColdFire 5206e processor support.
 
+config M520x
+	bool "MCF520x"
+	help
+	   Freescale Coldfire 5207/5208 processor support.
+
 config M523x
 	bool "MCF523x"
 	help
@@ -120,7 +125,7 @@ config M527x
 
 config COLDFIRE
 	bool
-	depends on (M5206 || M5206e || M523x || M5249 || M527x || M5272 || M528x || M5307 || M5407)
+	depends on (M5206 || M5206e || M520x || M523x || M5249 || M527x || M5272 || M528x || M5307 || M5407)
 	default y
 
 choice
@@ -322,6 +327,12 @@ config ELITE
 	help
 	  Support for the Motorola M5206eLITE board.
 
+config M5208EVB
+	bool "Freescale M5208EVB board support"
+	depends on M520x
+	help
+	  Support for the Freescale Coldfire M5208EVB.
+
 config M5235EVB
 	bool "Freescale M5235EVB support"
 	depends on M523x
@@ -465,10 +476,10 @@ config ARNEWSH
 	default y
 	depends on (ARN5206 || ARN5307)
 
-config MOTOROLA
+config FREESCALE
 	bool
 	default y
-	depends on (M5206eC3 || M5235EVB || M5249C3 || M5271EVB || M5272C3 || M5275EVB || M5282EVB || M5307C3 || M5407C3)
+	depends on (M5206eC3 || M5208EVB || M5235EVB || M5249C3 || M5271EVB || M5272C3 || M5275EVB || M5282EVB || M5307C3 || M5407C3)
 
 config HW_FEITH
 	bool

+ 5 - 2
arch/m68knommu/Makefile

@@ -14,6 +14,7 @@ platform-$(CONFIG_M68VZ328)	:= 68VZ328
 platform-$(CONFIG_M68360)	:= 68360
 platform-$(CONFIG_M5206)	:= 5206
 platform-$(CONFIG_M5206e)	:= 5206e
+platform-$(CONFIG_M520x)	:= 520x
 platform-$(CONFIG_M523x)	:= 523x
 platform-$(CONFIG_M5249)	:= 5249
 platform-$(CONFIG_M527x)	:= 527x
@@ -29,7 +30,7 @@ board-$(CONFIG_UCDIMM)		:= ucdimm
 board-$(CONFIG_UCQUICC)		:= uCquicc
 board-$(CONFIG_DRAGEN2)		:= de2
 board-$(CONFIG_ARNEWSH)		:= ARNEWSH
-board-$(CONFIG_MOTOROLA)	:= MOTOROLA
+board-$(CONFIG_FREESCALE)	:= FREESCALE
 board-$(CONFIG_M5235EVB)	:= M5235EVB
 board-$(CONFIG_M5271EVB)	:= M5271EVB
 board-$(CONFIG_M5275EVB)	:= M5275EVB
@@ -41,6 +42,7 @@ board-$(CONFIG_SECUREEDGEMP3)	:= MP3
 board-$(CONFIG_CLEOPATRA)	:= CLEOPATRA
 board-$(CONFIG_senTec)		:= senTec
 board-$(CONFIG_SNEHA) 	        := SNEHA
+board-$(CONFIG_M5208EVB)	:= M5208EVB
 board-$(CONFIG_MOD5272)		:= MOD5272
 BOARD := $(board-y)
 
@@ -56,6 +58,7 @@ MODEL := $(model-y)
 #
 cpuclass-$(CONFIG_M5206)	:= 5307
 cpuclass-$(CONFIG_M5206e)	:= 5307
+cpuclass-$(CONFIG_M520x)	:= 5307
 cpuclass-$(CONFIG_M523x)	:= 5307
 cpuclass-$(CONFIG_M5249)	:= 5307
 cpuclass-$(CONFIG_M527x)	:= 5307
@@ -80,6 +83,7 @@ export PLATFORM BOARD MODEL CPUCLASS
 #
 cflags-$(CONFIG_M5206)		:= -m5200 -Wa,-S -Wa,-m5200
 cflags-$(CONFIG_M5206e)		:= -m5200 -Wa,-S -Wa,-m5200
+cflags-$(CONFIG_M520x)		:= -m5307 -Wa,-S -Wa,-m5307
 cflags-$(CONFIG_M523x)		:= -m5307 -Wa,-S -Wa,-m5307
 cflags-$(CONFIG_M5249)		:= -m5200 -Wa,-S -Wa,-m5200
 cflags-$(CONFIG_M527x)		:= -m5307 -Wa,-S -Wa,-m5307
@@ -95,7 +99,6 @@ cflags-$(CONFIG_M68360)		:= -m68332
 AFLAGS += $(cflags-y)
 
 CFLAGS += $(cflags-y)
-CFLAGS += -fno-builtin
 CFLAGS += -O1 -g
 CFLAGS += -D__linux__
 CFLAGS += -DUTS_SYSNAME=\"uClinux\"

+ 1 - 0
arch/m68knommu/kernel/asm-offsets.c

@@ -15,6 +15,7 @@
 #include <linux/hardirq.h>
 #include <asm/bootinfo.h>
 #include <asm/irq.h>
+#include <asm/irqnode.h>
 #include <asm/thread_info.h>
 
 #define DEFINE(sym, val) \

+ 1 - 38
arch/m68knommu/kernel/ptrace.c

@@ -101,43 +101,10 @@ void ptrace_disable(struct task_struct *child)
 	put_reg(child, PT_SR, tmp);
 }
 
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(truct task_struct *child, long request, long addr, long data)
 {
-	struct task_struct *child;
 	int ret;
 
-	lock_kernel();
-	ret = -EPERM;
-	if (request == PTRACE_TRACEME) {
-		/* are we already being traced? */
-		if (current->ptrace & PT_PTRACED)
-			goto out;
-		/* set the ptrace bit in the process flags. */
-		current->ptrace |= PT_PTRACED;
-		ret = 0;
-		goto out;
-	}
-	ret = -ESRCH;
-	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
-	if (child)
-		get_task_struct(child);
-	read_unlock(&tasklist_lock);
-	if (!child)
-		goto out;
-
-	ret = -EPERM;
-	if (pid == 1)		/* you may not mess with init */
-		goto out_tsk;
-
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		goto out_tsk;
-	}
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out_tsk;
-
 	switch (request) {
 		/* when I and D space are separate, these will need to be fixed. */
 		case PTRACE_PEEKTEXT: /* read word at location addr. */ 
@@ -357,10 +324,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 			ret = -EIO;
 			break;
 	}
-out_tsk:
-	put_task_struct(child);
-out:
-	unlock_kernel();
 	return ret;
 }
 

+ 4 - 1
arch/m68knommu/kernel/setup.c

@@ -107,6 +107,9 @@ void (*mach_power_off)( void ) = NULL;
 #if defined(CONFIG_M5206e)
 	#define	CPU "COLDFIRE(m5206e)"
 #endif
+#if defined(CONFIG_M520x)
+	#define CPU "COLDFIRE(m520x)"
+#endif
 #if defined(CONFIG_M523x)
 	#define CPU "COLDFIRE(m523x)"
 #endif
@@ -132,7 +135,7 @@ void (*mach_power_off)( void ) = NULL;
 	#define	CPU "COLDFIRE(m5407)"
 #endif
 #ifndef CPU
-	#define	CPU "UNKOWN"
+	#define	CPU "UNKNOWN"
 #endif
 
 /* (es) */

+ 9 - 0
arch/m68knommu/kernel/vmlinux.lds.S

@@ -124,6 +124,14 @@
 #define	RAM_LENGTH	0x3e0000
 #endif
 
+/*
+ *	The Freescale 5208EVB board has 32MB of RAM.
+ */
+#if defined(CONFIG_M5208EVB)
+#define	RAM_START	0x40020000
+#define	RAM_LENGTH	0x01e00000
+#endif
+
 /*
  *	The senTec COBRA5272 board has nearly the same memory layout as 
  *	the M5272C3. We assume 16MiB ram.
@@ -275,6 +283,7 @@ SECTIONS {
 		*(__ksymtab_strings)
 
 		/* Built-in module parameters */
+		. = ALIGN(4) ;
 		__start___param = .;
 		*(__param)
 		__stop___param = .;

+ 19 - 0
arch/m68knommu/platform/520x/Makefile

@@ -0,0 +1,19 @@
+#
+# Makefile for the M5208 specific file.
+#
+
+#
+# If you want to play with the HW breakpoints then you will
+# need to add define this,  which will give you a stack backtrace
+# on the console port whenever a DBG interrupt occurs.  You have to
+# set up you HW breakpoints to trigger a DBG interrupt:
+#
+# EXTRA_CFLAGS += -DTRAP_DBG_INTERRUPT
+# EXTRA_AFLAGS += -DTRAP_DBG_INTERRUPT
+#
+
+ifdef CONFIG_FULLDEBUG
+AFLAGS += -DDEBUGGER_COMPATIBLE_CACHE=1
+endif
+
+obj-y := config.o

+ 65 - 0
arch/m68knommu/platform/520x/config.c

@@ -0,0 +1,65 @@
+/***************************************************************************/
+
+/*
+ *  linux/arch/m68knommu/platform/520x/config.c
+ *
+ *  Copyright (C) 2005,      Freescale (www.freescale.com)
+ *  Copyright (C) 2005,      Intec Automation (mike@steroidmicros.com)
+ *  Copyright (C) 1999-2003, Greg Ungerer (gerg@snapgear.com)
+ *  Copyright (C) 2001-2003, SnapGear Inc. (www.snapgear.com)
+ */
+
+/***************************************************************************/
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <asm/machdep.h>
+#include <asm/dma.h>
+
+/***************************************************************************/
+
+/*
+ *	DMA channel base address table.
+ */
+unsigned int dma_base_addr[MAX_M68K_DMA_CHANNELS];
+unsigned int dma_device_address[MAX_M68K_DMA_CHANNELS];
+
+/***************************************************************************/
+
+void coldfire_pit_tick(void);
+void coldfire_pit_init(irqreturn_t (*handler)(int, void *, struct pt_regs *));
+unsigned long coldfire_pit_offset(void);
+void coldfire_trap_init(void);
+void coldfire_reset(void);
+
+/***************************************************************************/
+
+/*
+ *  Program the vector to be an auto-vectored.
+ */
+
+void mcf_autovector(unsigned int vec)
+{
+    /* Everything is auto-vectored on the 520x devices */
+}
+
+/***************************************************************************/
+
+void config_BSP(char *commandp, int size)
+{
+#ifdef CONFIG_BOOTPARAM
+    strncpy(commandp, CONFIG_BOOTPARAM_STRING, size);
+    commandp[size-1] = 0;
+#else
+    memset(commandp, 0, size);
+#endif
+
+    mach_sched_init = coldfire_pit_init;
+    mach_tick = coldfire_pit_tick;
+    mach_gettimeoffset = coldfire_pit_offset;
+    mach_trap_init = coldfire_trap_init;
+    mach_reset = coldfire_reset;
+}
+
+/***************************************************************************/

+ 1 - 0
arch/m68knommu/platform/5307/Makefile

@@ -19,6 +19,7 @@ endif
 obj-$(CONFIG_COLDFIRE)	+= entry.o vectors.o ints.o
 obj-$(CONFIG_M5206)	+= timers.o
 obj-$(CONFIG_M5206e)	+= timers.o
+obj-$(CONFIG_M520x)	+= pit.o
 obj-$(CONFIG_M523x)	+= pit.o
 obj-$(CONFIG_M5249)	+= timers.o
 obj-$(CONFIG_M527x)     += pit.o

+ 3 - 0
arch/m68knommu/platform/5307/head.S

@@ -113,6 +113,9 @@
 #define MEM_BASE	0x02000000
 #define VBR_BASE	0x20000000	/* vectors in SRAM */
 #endif
+#if defined(CONFIG_M5208EVB)
+#define MEM_BASE	0x40000000
+#endif
 
 #ifndef MEM_BASE
 #define	MEM_BASE	0x00000000	/* memory base at address 0 */

+ 1 - 0
arch/m68knommu/platform/5307/ints.c

@@ -26,6 +26,7 @@
 
 #include <asm/system.h>
 #include <asm/irq.h>
+#include <asm/irqnode.h>
 #include <asm/traps.h>
 #include <asm/page.h>
 #include <asm/machdep.h>

+ 6 - 6
arch/m68knommu/platform/5307/pit.c

@@ -3,7 +3,7 @@
 /*
  *	pit.c -- Motorola ColdFire PIT timer. Currently this type of
  *	         hardware timer only exists in the Motorola ColdFire
- *		 5270/5271 and 5282 CPUs.
+ *		 5270/5271, 5282 and other CPUs.
  *
  *	Copyright (C) 1999-2004, Greg Ungerer (gerg@snapgear.com)
  *	Copyright (C) 2001-2004, SnapGear Inc. (www.snapgear.com)
@@ -47,10 +47,10 @@ void coldfire_pit_init(irqreturn_t (*handler)(int, void *, struct pt_regs *))
 
 	icrp = (volatile unsigned char *) (MCF_IPSBAR + MCFICM_INTC0 +
 		MCFINTC_ICR0 + MCFINT_PIT1);
-	*icrp = 0x2b; /* PIT1 with level 5, priority 3 */
+	*icrp = ICR_INTRCONF;
 
-	imrp = (volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH);
-	*imrp &= ~(1 << (MCFINT_PIT1 - 32));
+	imrp = (volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFPIT_IMR);
+	*imrp &= ~MCFPIT_IMR_IBIT;
 
 	/* Set up PIT timer 1 as poll clock */
 	tp = (volatile struct mcfpit *) (MCF_IPSBAR + MCFPIT_BASE1);
@@ -70,7 +70,7 @@ unsigned long coldfire_pit_offset(void)
 	unsigned long pmr, pcntr, offset;
 
 	tp = (volatile struct mcfpit *) (MCF_IPSBAR + MCFPIT_BASE1);
-	ipr = (volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IPRH);
+	ipr = (volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFPIT_IMR);
 
 	pmr = *(&tp->pmr);
 	pcntr = *(&tp->pcntr);
@@ -80,7 +80,7 @@ unsigned long coldfire_pit_offset(void)
 	 * timer interupt is pending, then add on a ticks worth of time.
 	 */
 	offset = ((pmr - pcntr) * (1000000 / HZ)) / pmr;
-	if ((offset < (1000000 / HZ / 2)) && (*ipr & (1 << (MCFINT_PIT1 - 32))))
+	if ((offset < (1000000 / HZ / 2)) && (*ipr & MCFPIT_IMR_IBIT))
 		offset += 1000000 / HZ;
 	return offset;	
 }

+ 1 - 1
arch/mips/Kconfig

@@ -958,7 +958,7 @@ config SOC_PNX8550
 	bool
 	select DMA_NONCOHERENT
 	select HW_HAS_PCI
-	select SYS_HAS_CPU_R4X00
+	select SYS_HAS_CPU_MIPS32_R1
 	select SYS_SUPPORTS_32BIT_KERNEL
 
 config SWAP_IO_SPACE

+ 4 - 0
arch/mips/boot/.gitignore

@@ -0,0 +1,4 @@
+mkboot
+elf2ecoff
+zImage
+zImage.tmp

+ 6 - 5
arch/mips/configs/pnx8550-jbs_defconfig

@@ -129,7 +129,7 @@ CONFIG_MIPS_L1_CACHE_SHIFT=5
 #
 # CPU selection
 #
-# CONFIG_CPU_MIPS32_R1 is not set
+CONFIG_CPU_MIPS32_R1=y
 # CONFIG_CPU_MIPS32_R2 is not set
 # CONFIG_CPU_MIPS64_R1 is not set
 # CONFIG_CPU_MIPS64_R2 is not set
@@ -137,7 +137,7 @@ CONFIG_MIPS_L1_CACHE_SHIFT=5
 # CONFIG_CPU_TX39XX is not set
 # CONFIG_CPU_VR41XX is not set
 # CONFIG_CPU_R4300 is not set
-CONFIG_CPU_R4X00=y
+# CONFIG_CPU_R4X00 is not set
 # CONFIG_CPU_TX49XX is not set
 # CONFIG_CPU_R5000 is not set
 # CONFIG_CPU_R5432 is not set
@@ -148,10 +148,11 @@ CONFIG_CPU_R4X00=y
 # CONFIG_CPU_RM7000 is not set
 # CONFIG_CPU_RM9000 is not set
 # CONFIG_CPU_SB1 is not set
-CONFIG_SYS_HAS_CPU_R4X00=y
+CONFIG_SYS_HAS_CPU_MIPS32_R1=y
+CONFIG_CPU_MIPS32=y
+CONFIG_CPU_MIPSR1=y
 CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y
 CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y
-CONFIG_CPU_SUPPORTS_64BIT_KERNEL=y
 
 #
 # Kernel type
@@ -162,11 +163,11 @@ CONFIG_PAGE_SIZE_4KB=y
 # CONFIG_PAGE_SIZE_8KB is not set
 # CONFIG_PAGE_SIZE_16KB is not set
 # CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_CPU_HAS_PREFETCH=y
 # CONFIG_MIPS_MT is not set
 # CONFIG_64BIT_PHYS_ADDR is not set
 # CONFIG_CPU_ADVANCED is not set
 CONFIG_CPU_HAS_LLSC=y
-CONFIG_CPU_HAS_LLDSCD=y
 CONFIG_CPU_HAS_SYNC=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_GENERIC_IRQ_PROBE=y

+ 6 - 4
arch/mips/configs/pnx8550-v2pci_defconfig

@@ -128,7 +128,7 @@ CONFIG_MIPS_L1_CACHE_SHIFT=5
 #
 # CPU selection
 #
-# CONFIG_CPU_MIPS32_R1 is not set
+CONFIG_CPU_MIPS32_R1=y
 # CONFIG_CPU_MIPS32_R2 is not set
 # CONFIG_CPU_MIPS64_R1 is not set
 # CONFIG_CPU_MIPS64_R2 is not set
@@ -136,7 +136,7 @@ CONFIG_MIPS_L1_CACHE_SHIFT=5
 # CONFIG_CPU_TX39XX is not set
 # CONFIG_CPU_VR41XX is not set
 # CONFIG_CPU_R4300 is not set
-CONFIG_CPU_R4X00=y
+# CONFIG_CPU_R4X00 is not set
 # CONFIG_CPU_TX49XX is not set
 # CONFIG_CPU_R5000 is not set
 # CONFIG_CPU_R5432 is not set
@@ -147,10 +147,11 @@ CONFIG_CPU_R4X00=y
 # CONFIG_CPU_RM7000 is not set
 # CONFIG_CPU_RM9000 is not set
 # CONFIG_CPU_SB1 is not set
-CONFIG_SYS_HAS_CPU_R4X00=y
+CONFIG_SYS_HAS_CPU_MIPS32_R1=y
+CONFIG_CPU_MIPS32=y
+CONFIG_CPU_MIPSR1=y
 CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y
 CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y
-CONFIG_CPU_SUPPORTS_64BIT_KERNEL=y
 
 #
 # Kernel type
@@ -161,6 +162,7 @@ CONFIG_PAGE_SIZE_4KB=y
 # CONFIG_PAGE_SIZE_8KB is not set
 # CONFIG_PAGE_SIZE_16KB is not set
 # CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_CPU_HAS_PREFETCH=y
 # CONFIG_MIPS_MT is not set
 # CONFIG_64BIT_PHYS_ADDR is not set
 CONFIG_CPU_ADVANCED=y

+ 6 - 0
arch/mips/ddb5xxx/common/rtc_ds1386.c

@@ -41,7 +41,9 @@ rtc_ds1386_get_time(void)
 	u8 byte;
 	u8 temp;
 	unsigned int year, month, day, hour, minute, second;
+	unsigned long flags;
 
+	spin_lock_irqsave(&rtc_lock, flags);
 	/* let us freeze external registers */
 	byte = READ_RTC(0xB);
 	byte &= 0x3f;
@@ -60,6 +62,7 @@ rtc_ds1386_get_time(void)
 	/* enable time transfer */
 	byte |= 0x80;
 	WRITE_RTC(0xB, byte);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	/* calc hour */
 	if (temp & 0x40) {
@@ -81,7 +84,9 @@ rtc_ds1386_set_time(unsigned long t)
 	u8 byte;
 	u8 temp;
 	u8 year, month, day, hour, minute, second;
+	unsigned long flags;
 
+	spin_lock_irqsave(&rtc_lock, flags);
 	/* let us freeze external registers */
 	byte = READ_RTC(0xB);
 	byte &= 0x3f;
@@ -133,6 +138,7 @@ rtc_ds1386_set_time(unsigned long t)
 	if (second != READ_RTC(0x1)) {
 		WRITE_RTC(0x1, second);
 	}
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	return 0;
 }

+ 22 - 2
arch/mips/dec/time.c

@@ -37,10 +37,25 @@
 #include <asm/dec/machtype.h>
 
 
+/*
+ * Returns true if a clock update is in progress
+ */
+static inline unsigned char dec_rtc_is_updating(void)
+{
+	unsigned char uip;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	return uip;
+}
+
 static unsigned long dec_rtc_get_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec, real_year;
 	int i;
+	unsigned long flags;
 
 	/* The Linux interpretation of the DS1287 clock register contents:
 	 * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
@@ -49,11 +64,12 @@ static unsigned long dec_rtc_get_time(void)
 	 */
 	/* read RTC exactly on falling edge of update flag */
 	for (i = 0; i < 1000000; i++)	/* may take up to 1 second... */
-		if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
+		if (dec_rtc_is_updating())
 			break;
 	for (i = 0; i < 1000000; i++)	/* must try at least 2.228 ms */
-		if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
+		if (!dec_rtc_is_updating())
 			break;
+	spin_lock_irqsave(&rtc_lock, flags);
 	/* Isn't this overkill?  UIP above should guarantee consistency */
 	do {
 		sec = CMOS_READ(RTC_SECONDS);
@@ -77,6 +93,7 @@ static unsigned long dec_rtc_get_time(void)
 	 * of unused BBU RAM locations.
 	 */
 	real_year = CMOS_READ(RTC_DEC_YEAR);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 	year += real_year - 72 + 2000;
 
 	return mktime(year, mon, day, hour, min, sec);
@@ -95,6 +112,8 @@ static int dec_rtc_set_mmss(unsigned long nowtime)
 	int real_seconds, real_minutes, cmos_minutes;
 	unsigned char save_control, save_freq_select;
 
+	/* irq are locally disabled here */
+	spin_lock(&rtc_lock);
 	/* tell the clock it's being set */
 	save_control = CMOS_READ(RTC_CONTROL);
 	CMOS_WRITE((save_control | RTC_SET), RTC_CONTROL);
@@ -141,6 +160,7 @@ static int dec_rtc_set_mmss(unsigned long nowtime)
 	 */
 	CMOS_WRITE(save_control, RTC_CONTROL);
 	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+	spin_unlock(&rtc_lock);
 
 	return retval;
 }

+ 6 - 0
arch/mips/jmr3927/common/rtc_ds1742.c

@@ -57,7 +57,9 @@ rtc_ds1742_get_time(void)
 {
 	unsigned int year, month, day, hour, minute, second;
 	unsigned int century;
+	unsigned long flags;
 
+	spin_lock_irqsave(&rtc_lock, flags);
 	CMOS_WRITE(RTC_READ, RTC_CONTROL);
 	second = BCD2BIN(CMOS_READ(RTC_SECONDS) & RTC_SECONDS_MASK);
 	minute = BCD2BIN(CMOS_READ(RTC_MINUTES));
@@ -67,6 +69,7 @@ rtc_ds1742_get_time(void)
 	year = BCD2BIN(CMOS_READ(RTC_YEAR));
 	century = BCD2BIN(CMOS_READ(RTC_CENTURY) & RTC_CENTURY_MASK);
 	CMOS_WRITE(0, RTC_CONTROL);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	year += century * 100;
 
@@ -81,7 +84,9 @@ rtc_ds1742_set_time(unsigned long t)
 	u8 year, month, day, hour, minute, second;
 	u8 cmos_year, cmos_month, cmos_day, cmos_hour, cmos_minute, cmos_second;
 	int cmos_century;
+	unsigned long flags;
 
+	spin_lock_irqsave(&rtc_lock, flags);
 	CMOS_WRITE(RTC_READ, RTC_CONTROL);
 	cmos_second = (u8)(CMOS_READ(RTC_SECONDS) & RTC_SECONDS_MASK);
 	cmos_minute = (u8)CMOS_READ(RTC_MINUTES);
@@ -139,6 +144,7 @@ rtc_ds1742_set_time(unsigned long t)
 
 	/* RTC_CENTURY and RTC_CONTROL share same address... */
 	CMOS_WRITE(cmos_century, RTC_CONTROL);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	return 0;
 }

+ 1 - 2
arch/mips/kernel/irixsig.c

@@ -502,8 +502,7 @@ asmlinkage int irix_sigpoll_sys(unsigned long __user *set,
 	while(1) {
 		long tmp = 0;
 
-		current->state = TASK_INTERRUPTIBLE;
-		expire = schedule_timeout(expire);
+		expire = schedule_timeout_interruptible(expire);
 
 		for (i=0; i<=4; i++)
 			tmp |= (current->pending.signal.sig[i] & kset.sig[i]);

+ 5 - 50
arch/mips/kernel/ptrace.c

@@ -174,51 +174,10 @@ int ptrace_setfpregs (struct task_struct *child, __u32 __user *data)
 	return 0;
 }
 
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	struct task_struct *child;
 	int ret;
 
-#if 0
-	printk("ptrace(r=%d,pid=%d,addr=%08lx,data=%08lx)\n",
-	       (int) request, (int) pid, (unsigned long) addr,
-	       (unsigned long) data);
-#endif
-	lock_kernel();
-	ret = -EPERM;
-	if (request == PTRACE_TRACEME) {
-		/* are we already being traced? */
-		if (current->ptrace & PT_PTRACED)
-			goto out;
-		if ((ret = security_ptrace(current->parent, current)))
-			goto out;
-		/* set the ptrace bit in the process flags. */
-		current->ptrace |= PT_PTRACED;
-		ret = 0;
-		goto out;
-	}
-	ret = -ESRCH;
-	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
-	if (child)
-		get_task_struct(child);
-	read_unlock(&tasklist_lock);
-	if (!child)
-		goto out;
-
-	ret = -EPERM;
-	if (pid == 1)		/* you may not mess with init */
-		goto out_tsk;
-
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		goto out_tsk;
-	}
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out_tsk;
-
 	switch (request) {
 	/* when I and D space are separate, these will need to be fixed. */
 	case PTRACE_PEEKTEXT: /* read word at location addr. */
@@ -319,7 +278,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 			if (!cpu_has_dsp) {
 				tmp = 0;
 				ret = -EIO;
-				goto out_tsk;
+				goto out;
 			}
 			if (child->thread.dsp.used_dsp) {
 				dregs = __get_dsp_regs(child);
@@ -333,14 +292,14 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 			if (!cpu_has_dsp) {
 				tmp = 0;
 				ret = -EIO;
-				goto out_tsk;
+				goto out;
 			}
 			tmp = child->thread.dsp.dspcontrol;
 			break;
 		default:
 			tmp = 0;
 			ret = -EIO;
-			goto out_tsk;
+			goto out;
 		}
 		ret = put_user(tmp, (unsigned long __user *) data);
 		break;
@@ -495,11 +454,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 		ret = ptrace_request(child, request, addr, data);
 		break;
 	}
-
-out_tsk:
-	put_task_struct(child);
-out:
-	unlock_kernel();
+ out:
 	return ret;
 }
 

+ 93 - 104
arch/mips/kernel/rtlx.c

@@ -20,42 +20,42 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/init.h>
-#include <asm/uaccess.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/vmalloc.h>
-#include <linux/elf.h>
-#include <linux/seq_file.h>
-#include <linux/syscalls.h>
-#include <linux/moduleloader.h>
-#include <linux/interrupt.h>
 #include <linux/poll.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <asm/mipsmtregs.h>
-#include <asm/cacheflush.h>
-#include <asm/atomic.h>
+#include <asm/bitops.h>
 #include <asm/cpu.h>
 #include <asm/processor.h>
-#include <asm/system.h>
 #include <asm/rtlx.h>
+#include <asm/uaccess.h>
 
-#define RTLX_MAJOR 64
 #define RTLX_TARG_VPE 1
 
-struct rtlx_info *rtlx;
+static struct rtlx_info *rtlx;
 static int major;
 static char module_name[] = "rtlx";
-static inline int spacefree(int read, int write, int size);
+static struct irqaction irq;
+static int irq_num;
+
+static inline int spacefree(int read, int write, int size)
+{
+	if (read == write) {
+		/*
+		 * never fill the buffer completely, so indexes are always
+		 * equal if empty and only empty, or !equal if data available
+		 */
+		return size - 1;
+	}
+
+	return ((read + size - write) % size) - 1;
+}
 
 static struct chan_waitqueues {
 	wait_queue_head_t rt_queue;
 	wait_queue_head_t lx_queue;
 } channel_wqs[RTLX_CHANNELS];
 
-static struct irqaction irq;
-static int irq_num;
-
 extern void *vpe_get_shared(int index);
 
 static void rtlx_dispatch(struct pt_regs *regs)
@@ -63,9 +63,8 @@ static void rtlx_dispatch(struct pt_regs *regs)
 	do_IRQ(MIPSCPU_INT_BASE + MIPS_CPU_RTLX_IRQ, regs);
 }
 
-irqreturn_t rtlx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t rtlx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
-	irqreturn_t r = IRQ_HANDLED;
 	int i;
 
 	for (i = 0; i < RTLX_CHANNELS; i++) {
@@ -75,30 +74,7 @@ irqreturn_t rtlx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 			wake_up_interruptible(&channel_wqs[i].lx_queue);
 	}
 
-	return r;
-}
-
-void dump_rtlx(void)
-{
-	int i;
-
-	printk("id 0x%lx state %d\n", rtlx->id, rtlx->state);
-
-	for (i = 0; i < RTLX_CHANNELS; i++) {
-		struct rtlx_channel *chan = &rtlx->channel[i];
-
-		printk(" rt_state %d lx_state %d buffer_size %d\n",
-		       chan->rt_state, chan->lx_state, chan->buffer_size);
-
-		printk(" rt_read %d rt_write %d\n",
-		       chan->rt_read, chan->rt_write);
-
-		printk(" lx_read %d lx_write %d\n",
-		       chan->lx_read, chan->lx_write);
-
-		printk(" rt_buffer <%s>\n", chan->rt_buffer);
-		printk(" lx_buffer <%s>\n", chan->lx_buffer);
-	}
+	return IRQ_HANDLED;
 }
 
 /* call when we have the address of the shared structure from the SP side. */
@@ -108,7 +84,7 @@ static int rtlx_init(struct rtlx_info *rtlxi)
 
 	if (rtlxi->id != RTLX_ID) {
 		printk(KERN_WARNING "no valid RTLX id at 0x%p\n", rtlxi);
-		return (-ENOEXEC);
+		return -ENOEXEC;
 	}
 
 	/* initialise the wait queues */
@@ -120,9 +96,8 @@ static int rtlx_init(struct rtlx_info *rtlxi)
 	/* set up for interrupt handling */
 	memset(&irq, 0, sizeof(struct irqaction));
 
-	if (cpu_has_vint) {
+	if (cpu_has_vint)
 		set_vi_handler(MIPS_CPU_RTLX_IRQ, rtlx_dispatch);
-	}
 
 	irq_num = MIPSCPU_INT_BASE + MIPS_CPU_RTLX_IRQ;
 	irq.handler = rtlx_interrupt;
@@ -132,7 +107,8 @@ static int rtlx_init(struct rtlx_info *rtlxi)
 	setup_irq(irq_num, &irq);
 
 	rtlx = rtlxi;
-	return (0);
+
+	return 0;
 }
 
 /* only allow one open process at a time to open each channel */
@@ -147,36 +123,36 @@ static int rtlx_open(struct inode *inode, struct file *filp)
 	if (rtlx == NULL) {
 		struct rtlx_info **p;
 		if( (p = vpe_get_shared(RTLX_TARG_VPE)) == NULL) {
-			printk(" vpe_get_shared is NULL. Has an SP program been loaded?\n");
-			return (-EFAULT);
+			printk(KERN_ERR "vpe_get_shared is NULL. "
+			       "Has an SP program been loaded?\n");
+			return -EFAULT;
 		}
 
 		if (*p == NULL) {
-			printk(" vpe_shared %p %p\n", p, *p);
-			return (-EFAULT);
+			printk(KERN_ERR "vpe_shared %p %p\n", p, *p);
+			return -EFAULT;
 		}
 
 		if ((ret = rtlx_init(*p)) < 0)
-			return (ret);
+			return ret;
 	}
 
 	chan = &rtlx->channel[minor];
 
-	/* already open? */
-	if (chan->lx_state == RTLX_STATE_OPENED)
-		return (-EBUSY);
+	if (test_and_set_bit(RTLX_STATE_OPENED, &chan->lx_state))
+		return -EBUSY;
 
-	chan->lx_state = RTLX_STATE_OPENED;
-	return (0);
+	return 0;
 }
 
 static int rtlx_release(struct inode *inode, struct file *filp)
 {
-	int minor;
+	int minor = MINOR(inode->i_rdev);
 
-	minor = MINOR(inode->i_rdev);
-	rtlx->channel[minor].lx_state = RTLX_STATE_UNUSED;
-	return (0);
+	clear_bit(RTLX_STATE_OPENED, &rtlx->channel[minor].lx_state);
+	smp_mb__after_clear_bit();
+
+	return 0;
 }
 
 static unsigned int rtlx_poll(struct file *file, poll_table * wait)
@@ -199,12 +175,13 @@ static unsigned int rtlx_poll(struct file *file, poll_table * wait)
 	if (spacefree(chan->rt_read, chan->rt_write, chan->buffer_size))
 		mask |= POLLOUT | POLLWRNORM;
 
-	return (mask);
+	return mask;
 }
 
 static ssize_t rtlx_read(struct file *file, char __user * buffer, size_t count,
 			 loff_t * ppos)
 {
+	unsigned long failed;
 	size_t fl = 0L;
 	int minor;
 	struct rtlx_channel *lx;
@@ -216,7 +193,7 @@ static ssize_t rtlx_read(struct file *file, char __user * buffer, size_t count,
 	/* data available? */
 	if (lx->lx_write == lx->lx_read) {
 		if (file->f_flags & O_NONBLOCK)
-			return (0);	// -EAGAIN makes cat whinge
+			return 0;	/* -EAGAIN makes cat whinge */
 
 		/* go to sleep */
 		add_wait_queue(&channel_wqs[minor].lx_queue, &wait);
@@ -232,39 +209,39 @@ static ssize_t rtlx_read(struct file *file, char __user * buffer, size_t count,
 	}
 
 	/* find out how much in total */
-	count = min( count,
-		     (size_t)(lx->lx_write + lx->buffer_size - lx->lx_read) % lx->buffer_size);
+	count = min(count,
+		    (size_t)(lx->lx_write + lx->buffer_size - lx->lx_read) % lx->buffer_size);
 
 	/* then how much from the read pointer onwards */
-	fl = min( count, (size_t)lx->buffer_size - lx->lx_read);
+	fl = min(count, (size_t)lx->buffer_size - lx->lx_read);
 
-	copy_to_user (buffer, &lx->lx_buffer[lx->lx_read], fl);
+	failed = copy_to_user (buffer, &lx->lx_buffer[lx->lx_read], fl);
+	if (failed) {
+		count = fl - failed;
+		goto out;
+	}
 
 	/* and if there is anything left at the beginning of the buffer */
-	if ( count - fl )
-		copy_to_user (buffer + fl, lx->lx_buffer, count - fl);
+	if (count - fl) {
+		failed = copy_to_user (buffer + fl, lx->lx_buffer, count - fl);
+		if (failed) {
+			count -= failed;
+			goto out;
+		}
+	}
 
+out:
 	/* update the index */
 	lx->lx_read += count;
 	lx->lx_read %= lx->buffer_size;
 
-	return (count);
-}
-
-static inline int spacefree(int read, int write, int size)
-{
-	if (read == write) {
-		/* never fill the buffer completely, so indexes are always equal if empty
-		   and only empty, or !equal if data available */
-		return (size - 1);
-	}
-
-	return ((read + size - write) % size) - 1;
+	return count;
 }
 
 static ssize_t rtlx_write(struct file *file, const char __user * buffer,
 			  size_t count, loff_t * ppos)
 {
+	unsigned long failed;
 	int minor;
 	struct rtlx_channel *rt;
 	size_t fl;
@@ -277,7 +254,7 @@ static ssize_t rtlx_write(struct file *file, const char __user * buffer,
 	if (!spacefree(rt->rt_read, rt->rt_write, rt->buffer_size)) {
 
 		if (file->f_flags & O_NONBLOCK)
-			return (-EAGAIN);
+			return -EAGAIN;
 
 		add_wait_queue(&channel_wqs[minor].rt_queue, &wait);
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -290,52 +267,64 @@ static ssize_t rtlx_write(struct file *file, const char __user * buffer,
 	}
 
 	/* total number of bytes to copy */
-	count = min( count, (size_t)spacefree(rt->rt_read, rt->rt_write, rt->buffer_size) );
+	count = min(count, (size_t)spacefree(rt->rt_read, rt->rt_write, rt->buffer_size) );
 
 	/* first bit from write pointer to the end of the buffer, or count */
 	fl = min(count, (size_t) rt->buffer_size - rt->rt_write);
 
-	copy_from_user(&rt->rt_buffer[rt->rt_write], buffer, fl);
+	failed = copy_from_user(&rt->rt_buffer[rt->rt_write], buffer, fl);
+	if (failed) {
+		count = fl - failed;
+		goto out;
+	}
 
 	/* if there's any left copy to the beginning of the buffer */
-	if( count - fl )
-		copy_from_user(rt->rt_buffer, buffer + fl, count - fl);
+	if (count - fl) {
+		failed = copy_from_user(rt->rt_buffer, buffer + fl, count - fl);
+		if (failed) {
+			count -= failed;
+			goto out;
+		}
+	}
 
+out:
 	rt->rt_write += count;
 	rt->rt_write %= rt->buffer_size;
 
-	return(count);
+	return count;
 }
 
 static struct file_operations rtlx_fops = {
-	.owner = THIS_MODULE,
-	.open = rtlx_open,
-	.release = rtlx_release,
-	.write = rtlx_write,
-	.read = rtlx_read,
-	.poll = rtlx_poll
+	.owner		= THIS_MODULE,
+	.open		= rtlx_open,
+	.release	= rtlx_release,
+	.write		= rtlx_write,
+	.read		= rtlx_read,
+	.poll		= rtlx_poll
 };
 
-static int rtlx_module_init(void)
+static char register_chrdev_failed[] __initdata =
+	KERN_ERR "rtlx_module_init: unable to register device\n";
+
+static int __init rtlx_module_init(void)
 {
-	if ((major = register_chrdev(RTLX_MAJOR, module_name, &rtlx_fops)) < 0) {
-		printk("rtlx_module_init: unable to register device\n");
-		return (-EBUSY);
+	major = register_chrdev(0, module_name, &rtlx_fops);
+	if (major < 0) {
+		printk(register_chrdev_failed);
+		return major;
 	}
 
-	if (major == 0)
-		major = RTLX_MAJOR;
-
-	return (0);
+	return 0;
 }
 
-static void rtlx_module_exit(void)
+static void __exit rtlx_module_exit(void)
 {
 	unregister_chrdev(major, module_name);
 }
 
 module_init(rtlx_module_init);
 module_exit(rtlx_module_exit);
+
 MODULE_DESCRIPTION("MIPS RTLX");
-MODULE_AUTHOR("Elizabeth Clarke, MIPS Technologies, Inc");
+MODULE_AUTHOR("Elizabeth Clarke, MIPS Technologies, Inc.");
 MODULE_LICENSE("GPL");

+ 0 - 3
arch/mips/kernel/signal.c

@@ -384,9 +384,6 @@ give_sigsegv:
 	return 0;
 }
 
-extern void setup_rt_frame_n32(struct k_sigaction * ka,
-	struct pt_regs *regs, int signr, sigset_t *set, siginfo_t *info);
-
 static inline int handle_signal(unsigned long sig, siginfo_t *info,
 	struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs)
 {

+ 8 - 5
arch/mips/kernel/signal32.c

@@ -647,8 +647,8 @@ static inline void *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
 	return (void *)((sp - frame_size) & ALMASK);
 }
 
-void setup_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
-			       int signr, sigset_t *set)
+int setup_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
+	int signr, sigset_t *set)
 {
 	struct sigframe *frame;
 	int err = 0;
@@ -694,13 +694,15 @@ void setup_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
 	       current->comm, current->pid,
 	       frame, regs->cp0_epc, frame->sf_code);
 #endif
-        return;
+	return 1;
 
 give_sigsegv:
 	force_sigsegv(signr, current);
+	return 0;
 }
 
-void setup_rt_frame_32(struct k_sigaction * ka, struct pt_regs *regs, int signr,	sigset_t *set, siginfo_t *info)
+int setup_rt_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
+	int signr, sigset_t *set, siginfo_t *info)
 {
 	struct rt_sigframe32 *frame;
 	int err = 0;
@@ -763,10 +765,11 @@ void setup_rt_frame_32(struct k_sigaction * ka, struct pt_regs *regs, int signr,
 	       current->comm, current->pid,
 	       frame, regs->cp0_epc, frame->rs_code);
 #endif
-	return;
+	return 1;
 
 give_sigsegv:
 	force_sigsegv(signr, current);
+	return 0;
 }
 
 static inline int handle_signal(unsigned long sig, siginfo_t *info,

+ 44 - 56
arch/mips/kernel/vpe.c

@@ -58,10 +58,6 @@
 
 typedef void *vpe_handle;
 
-// defined here because the kernel module loader doesn't have
-// anything to do with it.
-#define SHN_MIPS_SCOMMON 0xff03
-
 #ifndef ARCH_SHF_SMALL
 #define ARCH_SHF_SMALL 0
 #endif
@@ -69,11 +65,8 @@ typedef void *vpe_handle;
 /* If this is set, the section belongs in the init part of the module */
 #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
 
-// temp number,
-#define VPE_MAJOR 63
-
 static char module_name[] = "vpe";
-static int major = 0;
+static int major;
 
 /* grab the likely amount of memory we will need. */
 #ifdef CONFIG_MIPS_VPE_LOADER_TOM
@@ -98,22 +91,7 @@ enum tc_state {
 	TC_STATE_DYNAMIC
 };
 
-struct vpe;
-typedef struct tc {
-	enum tc_state state;
-	int index;
-
-	/* parent VPE */
-	struct vpe *pvpe;
-
-	/* The list of TC's with this VPE */
-	struct list_head tc;
-
-	/* The global list of tc's */
-	struct list_head list;
-} tc_t;
-
-typedef struct vpe {
+struct vpe {
 	enum vpe_state state;
 
 	/* (device) minor associated with this vpe */
@@ -135,7 +113,21 @@ typedef struct vpe {
 
 	/* shared symbol address */
 	void *shared_ptr;
-} vpe_t;
+};
+
+struct tc {
+	enum tc_state state;
+	int index;
+
+	/* parent VPE */
+	struct vpe *pvpe;
+
+	/* The list of TC's with this VPE */
+	struct list_head tc;
+
+	/* The global list of tc's */
+	struct list_head list;
+};
 
 struct vpecontrol_ {
 	/* Virtual processing elements */
@@ -146,7 +138,7 @@ struct vpecontrol_ {
 } vpecontrol;
 
 static void release_progmem(void *ptr);
-static void dump_vpe(vpe_t * v);
+static void dump_vpe(struct vpe * v);
 extern void save_gp_address(unsigned int secbase, unsigned int rel);
 
 /* get the vpe associated with this minor */
@@ -197,13 +189,11 @@ struct vpe *alloc_vpe(int minor)
 {
 	struct vpe *v;
 
-	if ((v = kmalloc(sizeof(struct vpe), GFP_KERNEL)) == NULL) {
+	if ((v = kzalloc(sizeof(struct vpe), GFP_KERNEL)) == NULL) {
 		printk(KERN_WARNING "VPE: alloc_vpe no mem\n");
 		return NULL;
 	}
 
-	memset(v, 0, sizeof(struct vpe));
-
 	INIT_LIST_HEAD(&v->tc);
 	list_add_tail(&v->list, &vpecontrol.vpe_list);
 
@@ -216,13 +206,11 @@ struct tc *alloc_tc(int index)
 {
 	struct tc *t;
 
-	if ((t = kmalloc(sizeof(struct tc), GFP_KERNEL)) == NULL) {
+	if ((t = kzalloc(sizeof(struct tc), GFP_KERNEL)) == NULL) {
 		printk(KERN_WARNING "VPE: alloc_tc no mem\n");
 		return NULL;
 	}
 
-	memset(t, 0, sizeof(struct tc));
-
 	INIT_LIST_HEAD(&t->tc);
 	list_add_tail(&t->list, &vpecontrol.tc_list);
 
@@ -412,16 +400,17 @@ static int apply_r_mips_26(struct module *me, uint32_t *location,
 		return -ENOEXEC;
 	}
 
-/* Not desperately convinced this is a good check of an overflow condition
-   anyway. But it gets in the way of handling undefined weak symbols which
-   we want to set to zero.
-   if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
-   printk(KERN_ERR
-   "module %s: relocation overflow\n",
-   me->name);
-   return -ENOEXEC;
-   }
-*/
+/*
+ * Not desperately convinced this is a good check of an overflow condition
+ * anyway. But it gets in the way of handling undefined weak symbols which
+ * we want to set to zero.
+ * if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
+ * printk(KERN_ERR
+ * "module %s: relocation overflow\n",
+ * me->name);
+ * return -ENOEXEC;
+ * }
+ */
 
 	*location = (*location & ~0x03ffffff) |
 		((*location + (v >> 2)) & 0x03ffffff);
@@ -681,7 +670,7 @@ static void dump_tclist(void)
 }
 
 /* We are prepared so configure and start the VPE... */
-int vpe_run(vpe_t * v)
+int vpe_run(struct vpe * v)
 {
 	unsigned long val;
 	struct tc *t;
@@ -772,7 +761,7 @@ int vpe_run(vpe_t * v)
 	return 0;
 }
 
-static unsigned long find_vpe_symbols(vpe_t * v, Elf_Shdr * sechdrs,
+static unsigned long find_vpe_symbols(struct vpe * v, Elf_Shdr * sechdrs,
 				      unsigned int symindex, const char *strtab,
 				      struct module *mod)
 {
@@ -792,10 +781,12 @@ static unsigned long find_vpe_symbols(vpe_t * v, Elf_Shdr * sechdrs,
 	return 0;
 }
 
-/* Allocates a VPE with some program code space(the load address), copies the contents
-   of the program (p)buffer performing relocatations/etc, free's it when finished.
+/*
+ * Allocates a VPE with some program code space(the load address), copies
+ * the contents of the program (p)buffer performing relocatations/etc,
+ * free's it when finished.
 */
-int vpe_elfload(vpe_t * v)
+int vpe_elfload(struct vpe * v)
 {
 	Elf_Ehdr *hdr;
 	Elf_Shdr *sechdrs;
@@ -931,7 +922,7 @@ cleanup:
 	return err;
 }
 
-static void dump_vpe(vpe_t * v)
+static void dump_vpe(struct vpe * v)
 {
 	struct tc *t;
 
@@ -947,7 +938,7 @@ static void dump_vpe(vpe_t * v)
 static int vpe_open(struct inode *inode, struct file *filp)
 {
 	int minor;
-	vpe_t *v;
+	struct vpe *v;
 
 	/* assume only 1 device at the mo. */
 	if ((minor = MINOR(inode->i_rdev)) != 1) {
@@ -1001,7 +992,7 @@ static int vpe_open(struct inode *inode, struct file *filp)
 static int vpe_release(struct inode *inode, struct file *filp)
 {
 	int minor, ret = 0;
-	vpe_t *v;
+	struct vpe *v;
 	Elf_Ehdr *hdr;
 
 	minor = MINOR(inode->i_rdev);
@@ -1035,7 +1026,7 @@ static ssize_t vpe_write(struct file *file, const char __user * buffer,
 {
 	int minor;
 	size_t ret = count;
-	vpe_t *v;
+	struct vpe *v;
 
 	minor = MINOR(file->f_dentry->d_inode->i_rdev);
 	if ((v = get_vpe(minor)) == NULL)
@@ -1180,14 +1171,11 @@ static int __init vpe_module_init(void)
 		return -ENODEV;
 	}
 
-	if ((major = register_chrdev(VPE_MAJOR, module_name, &vpe_fops) < 0)) {
+	if ((major = register_chrdev(0, module_name, &vpe_fops) < 0)) {
 		printk("VPE loader: unable to register character device\n");
-		return -EBUSY;
+		return major;
 	}
 
-	if (major == 0)
-		major = VPE_MAJOR;
-
 	dmt();
 	dvpe();
 

+ 9 - 0
arch/mips/lasat/ds1603.c

@@ -8,6 +8,7 @@
 #include <asm/lasat/lasat.h>
 #include <linux/delay.h>
 #include <asm/lasat/ds1603.h>
+#include <asm/time.h>
 
 #include "ds1603.h"
 
@@ -138,19 +139,27 @@ static void rtc_end_op(void)
 unsigned long ds1603_read(void)
 {
 	unsigned long word;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rtc_lock, flags);
 	rtc_init_op();
 	rtc_write_byte(READ_TIME_CMD);
 	word = rtc_read_word();
 	rtc_end_op();
+	spin_unlock_irqrestore(&rtc_lock, flags);
 	return word;
 }
 
 int ds1603_set(unsigned long time)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&rtc_lock, flags);
 	rtc_init_op();
 	rtc_write_byte(SET_TIME_CMD);
 	rtc_write_word(time);
 	rtc_end_op();
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	return 0;
 }

+ 6 - 0
arch/mips/momentum/jaguar_atx/setup.c

@@ -149,7 +149,9 @@ arch_initcall(per_cpu_mappings);
 unsigned long m48t37y_get_time(void)
 {
 	unsigned int year, month, day, hour, min, sec;
+	unsigned long flags;
 
+	spin_lock_irqsave(&rtc_lock, flags);
 	/* stop the update */
 	rtc_base[0x7ff8] = 0x40;
 
@@ -166,6 +168,7 @@ unsigned long m48t37y_get_time(void)
 
 	/* start the update */
 	rtc_base[0x7ff8] = 0x00;
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	return mktime(year, month, day, hour, min, sec);
 }
@@ -173,11 +176,13 @@ unsigned long m48t37y_get_time(void)
 int m48t37y_set_time(unsigned long sec)
 {
 	struct rtc_time tm;
+	unsigned long flags;
 
 	/* convert to a more useful format -- note months count from 0 */
 	to_tm(sec, &tm);
 	tm.tm_mon += 1;
 
+	spin_lock_irqsave(&rtc_lock, flags);
 	/* enable writing */
 	rtc_base[0x7ff8] = 0x80;
 
@@ -201,6 +206,7 @@ int m48t37y_set_time(unsigned long sec)
 
 	/* disable writing */
 	rtc_base[0x7ff8] = 0x00;
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	return 0;
 }

Nem az összes módosított fájl került megjelenítésre, mert túl sok fájl változott