Browse Source

Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus

Lachlan McIlroy 17 years ago
parent
commit
c58310bf49
100 changed files with 3537 additions and 500 deletions
  1. 11 26
      Documentation/00-INDEX
  2. 22 0
      Documentation/ABI/testing/procfs-diskstats
  3. 28 0
      Documentation/ABI/testing/sysfs-block
  4. 99 0
      Documentation/ABI/testing/sysfs-firmware-acpi
  5. 1 1
      Documentation/ABI/testing/sysfs-kernel-uids
  6. 17 0
      Documentation/BUG-HUNTING
  7. 1 1
      Documentation/DocBook/Makefile
  8. 20 0
      Documentation/DocBook/filesystems.tmpl
  9. 13 13
      Documentation/DocBook/genericirq.tmpl
  10. 1 85
      Documentation/DocBook/kernel-api.tmpl
  11. 16 16
      Documentation/DocBook/kernel-locking.tmpl
  12. 1 1
      Documentation/DocBook/lsm.tmpl
  13. 29 29
      Documentation/DocBook/mtdnand.tmpl
  14. 106 0
      Documentation/DocBook/networking.tmpl
  15. 8 8
      Documentation/DocBook/procfs-guide.tmpl
  16. 9 9
      Documentation/DocBook/rapidio.tmpl
  17. 16 5
      Documentation/DocBook/s390-drivers.tmpl
  18. 1 1
      Documentation/DocBook/scsi.tmpl
  19. 17 17
      Documentation/DocBook/videobook.tmpl
  20. 6 6
      Documentation/DocBook/z8530book.tmpl
  21. 1 1
      Documentation/RCU/NMI-RCU.txt
  22. 493 0
      Documentation/Smack.txt
  23. 5 11
      Documentation/SubmitChecklist
  24. 3 3
      Documentation/accounting/getdelays.c
  25. 15 0
      Documentation/acpi/dsdt-override.txt
  26. 43 0
      Documentation/acpi/initramfs-add-dsdt.sh
  27. 26 0
      Documentation/acpi/method-tracing.txt
  28. 2 0
      Documentation/aoe/mkdevs.sh
  29. 4 1
      Documentation/aoe/udev-install.sh
  30. 9 7
      Documentation/aoe/udev.txt
  31. 11 11
      Documentation/cgroups.txt
  32. 279 0
      Documentation/controllers/memory.txt
  33. 23 0
      Documentation/cpuidle/core.txt
  34. 31 0
      Documentation/cpuidle/driver.txt
  35. 29 0
      Documentation/cpuidle/governor.txt
  36. 79 0
      Documentation/cpuidle/sysfs.txt
  37. 8 15
      Documentation/cpusets.txt
  38. 0 0
      Documentation/edac.txt
  39. 0 1
      Documentation/email-clients.txt
  40. 3 3
      Documentation/fb/deferred_io.txt
  41. 11 43
      Documentation/feature-removal-schedule.txt
  42. 4 0
      Documentation/filesystems/00-INDEX
  43. 0 3
      Documentation/filesystems/Locking
  44. 5 5
      Documentation/filesystems/dnotify.txt
  45. 1 0
      Documentation/filesystems/isofs.txt
  46. 19 11
      Documentation/filesystems/porting
  47. 86 17
      Documentation/filesystems/proc.txt
  48. 0 0
      Documentation/filesystems/sharedsubtree.txt
  49. 50 17
      Documentation/filesystems/vfs.txt
  50. 121 12
      Documentation/gpio.txt
  51. 36 0
      Documentation/hwmon/ads7828
  52. 1 1
      Documentation/hwmon/it87
  53. 2 2
      Documentation/hwmon/lm78
  54. 8 3
      Documentation/hwmon/lm87
  55. 1 1
      Documentation/hwmon/userspace-tools
  56. 3 2
      Documentation/hwmon/w83627ehf
  57. 1 2
      Documentation/hwmon/w83627hf
  58. 8 14
      Documentation/hwmon/w83781d
  59. 54 0
      Documentation/hwmon/w83l786ng
  60. 1 1
      Documentation/i2c/busses/i2c-piix4
  61. 3 0
      Documentation/i2c/chips/pca9539
  62. 9 6
      Documentation/ia64/aliasing-test.c
  63. 1 1
      Documentation/input/input-programming.txt
  64. 14 1
      Documentation/iostats.txt
  65. 10 5
      Documentation/kernel-parameters.txt
  66. 74 18
      Documentation/kprobes.txt
  67. 10 10
      Documentation/kref.txt
  68. 10 0
      Documentation/laptops/00-INDEX
  69. 202 0
      Documentation/laptops/acer-wmi.txt
  70. 0 1
      Documentation/laptops/sony-laptop.txt
  71. 0 0
      Documentation/laptops/sonypi.txt
  72. 108 8
      Documentation/laptops/thinkpad-acpi.txt
  73. 23 6
      Documentation/leds-class.txt
  74. 10 0
      Documentation/md.txt
  75. 149 0
      Documentation/mn10300/ABI.txt
  76. 60 0
      Documentation/mn10300/compartmentalisation.txt
  77. 2 2
      Documentation/pcmcia/driver-changes.txt
  78. 59 0
      Documentation/pm_qos_interface.txt
  79. 5 0
      Documentation/power/swsusp.txt
  80. 42 0
      Documentation/powerpc/booting-without-of.txt
  81. 21 21
      Documentation/rtc.txt
  82. 59 0
      Documentation/sched-rt-group.txt
  83. 16 0
      Documentation/scheduler/00-INDEX
  84. 0 0
      Documentation/scheduler/sched-arch.txt
  85. 0 0
      Documentation/scheduler/sched-coding.txt
  86. 0 0
      Documentation/scheduler/sched-design-CFS.txt
  87. 0 0
      Documentation/scheduler/sched-design.txt
  88. 0 0
      Documentation/scheduler/sched-domains.txt
  89. 0 0
      Documentation/scheduler/sched-nice-design.txt
  90. 0 0
      Documentation/scheduler/sched-stats.txt
  91. 41 0
      Documentation/scsi/ChangeLog.arcmsr
  92. 1 1
      Documentation/scsi/scsi_mid_low_api.txt
  93. 10 0
      Documentation/sysctl/fs.txt
  94. 30 1
      Documentation/sysctl/kernel.txt
  95. 26 3
      Documentation/sysctl/vm.txt
  96. 245 0
      Documentation/thermal/sysfs-api.txt
  97. 226 0
      Documentation/unaligned-memory-access.txt
  98. 138 11
      Documentation/vm/slabinfo.c
  99. 2 0
      Documentation/w1/masters/00-INDEX
  100. 33 0
      Documentation/w1/masters/w1-gpio

+ 11 - 26
Documentation/00-INDEX

@@ -14,6 +14,7 @@ Following translations are available on the WWW:
 	- this file.
 ABI/
 	- info on kernel <-> userspace ABI and relative interface stability.
+
 BUG-HUNTING
 	- brute force method of doing binary search of patches to find bug.
 Changes
@@ -66,6 +67,8 @@ VGA-softcursor.txt
 	- how to change your VGA cursor from a blinking underscore.
 accounting/
 	- documentation on accounting and taskstats.
+acpi/
+	- info on ACPI-specific hooks in the kernel.
 aoe/
 	- description of AoE (ATA over Ethernet) along with config examples.
 applying-patches.txt
@@ -106,6 +109,8 @@ cpu-hotplug.txt
 	- document describing CPU hotplug support in the Linux kernel.
 cpu-load.txt
 	- document describing how CPU load statistics are collected.
+cpuidle/
+	- info on CPU_IDLE, CPU idle state management subsystem.
 cpusets.txt
 	- documents the cpusets feature; assign CPUs and Mem to a set of tasks.
 cputopology.txt
@@ -126,18 +131,16 @@ devices.txt
 	- plain ASCII listing of all the nodes in /dev/ with major minor #'s.
 digiepca.txt
 	- info on Digi Intl. {PC,PCI,EISA}Xx and Xem series cards.
-dnotify.txt
-	- info about directory notification in Linux.
 dontdiff
 	- file containing a list of files that should never be diff'ed.
 driver-model/
 	- directory with info about Linux driver model.
-drivers/
-	- directory with driver documentation (currently only EDAC).
 dvb/
 	- info on Linux Digital Video Broadcast (DVB) subsystem.
 early-userspace/
 	- info about initramfs, klibc, and userspace early during boot.
+edac.txt
+	- information on EDAC - Error Detection And Correction
 eisa.txt
 	- info on EISA bus support.
 exception.txt
@@ -226,6 +229,8 @@ kref.txt
 	- docs on adding reference counters (krefs) to kernel objects.
 laptop-mode.txt
 	- how to conserve battery power using laptop-mode.
+laptops/
+	- directory with laptop related info and laptop driver documentation.
 ldm.txt
 	- a brief description of LDM (Windows Dynamic Disks).
 leds-class.txt
@@ -334,20 +339,8 @@ rtc.txt
 	- notes on how to use the Real Time Clock (aka CMOS clock) driver.
 s390/
 	- directory with info on using Linux on the IBM S390.
-sched-arch.txt
-	- CPU Scheduler implementation hints for architecture specific code.
-sched-coding.txt
-	- reference for various scheduler-related methods in the O(1) scheduler.
-sched-design.txt
-	- goals, design and implementation of the Linux O(1) scheduler.
-sched-design-CFS.txt
-	- goals, design and implementation of the Complete Fair Scheduler.
-sched-domains.txt
-	- information on scheduling domains.
-sched-nice-design.txt
-	- How and why the scheduler's nice levels are implemented.
-sched-stats.txt
-	- information on schedstats (Linux Scheduler Statistics).
+scheduler/
+	- directory with info on the scheduler.
 scsi/
 	- directory with info on Linux scsi support.
 serial/
@@ -360,14 +353,8 @@ sgi-visws.txt
 	- short blurb on the SGI Visual Workstations.
 sh/
 	- directory with info on porting Linux to a new architecture.
-sharedsubtree.txt
-	- a description of shared subtrees for namespaces.
 smart-config.txt
 	- description of the Smart Config makefile feature.
-sony-laptop.txt
-	- Sony Notebook Control Driver (SNC) Readme.
-sonypi.txt
-	- info on Linux Sony Programmable I/O Device support.
 sound/
 	- directory with info on sound card support.
 sparc/
@@ -398,8 +385,6 @@ sysrq.txt
 	- info on the magic SysRq key.
 telephony/
 	- directory with info on telephony (e.g. voice over IP) support.
-thinkpad-acpi.txt
-	- information on the (IBM and Lenovo) ThinkPad ACPI Extras driver.
 time_interpolators.txt
 	- info on time interpolators.
 tipar.txt

+ 22 - 0
Documentation/ABI/testing/procfs-diskstats

@@ -0,0 +1,22 @@
+What:		/proc/diskstats
+Date:		February 2008
+Contact:	Jerome Marchand <jmarchan@redhat.com>
+Description:
+		The /proc/diskstats file displays the I/O statistics
+		of block devices. Each line contains the following 14
+		fields:
+		 1 - major number
+		 2 - minor mumber
+		 3 - device name
+		 4 - reads completed succesfully
+		 5 - reads merged
+		 6 - sectors read
+		 7 - time spent reading (ms)
+		 8 - writes completed
+		 9 - writes merged
+		10 - sectors written
+		11 - time spent writing (ms)
+		12 - I/Os currently in progress
+		13 - time spent doing I/Os (ms)
+		14 - weighted time spent doing I/Os (ms)
+		For more details refer to Documentation/iostats.txt

+ 28 - 0
Documentation/ABI/testing/sysfs-block

@@ -0,0 +1,28 @@
+What:		/sys/block/<disk>/stat
+Date:		February 2008
+Contact:	Jerome Marchand <jmarchan@redhat.com>
+Description:
+		The /sys/block/<disk>/stat files displays the I/O
+		statistics of disk <disk>. They contain 11 fields:
+		 1 - reads completed succesfully
+		 2 - reads merged
+		 3 - sectors read
+		 4 - time spent reading (ms)
+		 5 - writes completed
+		 6 - writes merged
+		 7 - sectors written
+		 8 - time spent writing (ms)
+		 9 - I/Os currently in progress
+		10 - time spent doing I/Os (ms)
+		11 - weighted time spent doing I/Os (ms)
+		For more details refer Documentation/iostats.txt
+
+
+What:		/sys/block/<disk>/<part>/stat
+Date:		February 2008
+Contact:	Jerome Marchand <jmarchan@redhat.com>
+Description:
+		The /sys/block/<disk>/<part>/stat files display the
+		I/O statistics of partition <part>. The format is the
+		same as the above-written /sys/block/<disk>/stat
+		format.

+ 99 - 0
Documentation/ABI/testing/sysfs-firmware-acpi

@@ -0,0 +1,99 @@
+What:		/sys/firmware/acpi/interrupts/
+Date:		February 2008
+Contact:	Len Brown <lenb@kernel.org>
+Description:
+		All ACPI interrupts are handled via a single IRQ,
+		the System Control Interrupt (SCI), which appears
+		as "acpi" in /proc/interrupts.
+
+		However, one of the main functions of ACPI is to make
+		the platform understand random hardware without
+		special driver support.  So while the SCI handles a few
+		well known (fixed feature) interrupts sources, such
+		as the power button, it can also handle a variable
+		number of a "General Purpose Events" (GPE).
+
+		A GPE vectors to a specified handler in AML, which
+		can do a anything the BIOS writer wants from
+		OS context.  GPE 0x12, for example, would vector
+		to a level or edge handler called _L12 or _E12.
+		The handler may do its business and return.
+		Or the handler may send send a Notify event
+		to a Linux device driver registered on an ACPI device,
+		such as a battery, or a processor.
+
+		To figure out where all the SCI's are coming from,
+		/sys/firmware/acpi/interrupts contains a file listing
+		every possible source, and the count of how many
+		times it has triggered.
+
+		$ cd /sys/firmware/acpi/interrupts
+		$ grep . *
+		error:0
+		ff_gbl_lock:0
+		ff_pmtimer:0
+		ff_pwr_btn:0
+		ff_rt_clk:0
+		ff_slp_btn:0
+		gpe00:0
+		gpe01:0
+		gpe02:0
+		gpe03:0
+		gpe04:0
+		gpe05:0
+		gpe06:0
+		gpe07:0
+		gpe08:0
+		gpe09:174
+		gpe0A:0
+		gpe0B:0
+		gpe0C:0
+		gpe0D:0
+		gpe0E:0
+		gpe0F:0
+		gpe10:0
+		gpe11:60
+		gpe12:0
+		gpe13:0
+		gpe14:0
+		gpe15:0
+		gpe16:0
+		gpe17:0
+		gpe18:0
+		gpe19:7
+		gpe1A:0
+		gpe1B:0
+		gpe1C:0
+		gpe1D:0
+		gpe1E:0
+		gpe1F:0
+		gpe_all:241
+		sci:241
+
+		sci - The total number of times the ACPI SCI
+		has claimed an interrupt.
+
+		gpe_all - count of SCI caused by GPEs.
+
+		gpeXX - count for individual GPE source
+
+		ff_gbl_lock - Global Lock
+
+		ff_pmtimer - PM Timer
+
+		ff_pwr_btn - Power Button
+
+		ff_rt_clk - Real Time Clock
+
+		ff_slp_btn - Sleep Button
+
+		error - an interrupt that can't be accounted for above.
+
+		Root has permission to clear any of these counters.  Eg.
+		# echo 0 > gpe11
+
+		All counters can be cleared by clearing the total "sci":
+		# echo 0 > sci
+
+		None of these counters has an effect on the function
+		of the system, they are simply statistics.

+ 1 - 1
Documentation/ABI/testing/sysfs-kernel-uids

@@ -11,4 +11,4 @@ Description:
 		example would be, if User A has shares = 1024 and user
 		B has shares = 2048, User B will get twice the CPU
 		bandwidth user A will. For more details refer
-		Documentation/sched-design-CFS.txt
+		Documentation/scheduler/sched-design-CFS.txt

+ 17 - 0
Documentation/BUG-HUNTING

@@ -214,6 +214,23 @@ And recompile the kernel with CONFIG_DEBUG_INFO enabled:
   gdb vmlinux
   (gdb) p vt_ioctl
   (gdb) l *(0x<address of vt_ioctl> + 0xda8)
+or, as one command
+  (gdb) l *(vt_ioctl + 0xda8)
+
+If you have a call trace, such as :-
+>Call Trace:
+> [<ffffffff8802c8e9>] :jbd:log_wait_commit+0xa3/0xf5
+> [<ffffffff810482d9>] autoremove_wake_function+0x0/0x2e
+> [<ffffffff8802770b>] :jbd:journal_stop+0x1be/0x1ee
+> ...
+this shows the problem in the :jbd: module. You can load that module in gdb
+and list the relevant code.
+  gdb fs/jbd/jbd.ko
+  (gdb) p log_wait_commit
+  (gdb) l *(0x<address> + 0xa3)
+or
+  (gdb) l *(log_wait_commit + 0xa3)
+
 
 Another very useful option of the Kernel Hacking section in menuconfig is
 Debug memory allocations. This will help you see whether data has been

+ 1 - 1
Documentation/DocBook/Makefile

@@ -8,7 +8,7 @@
 
 DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
 	    kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
-	    procfs-guide.xml writing_usb_driver.xml \
+	    procfs-guide.xml writing_usb_driver.xml networking.xml \
 	    kernel-api.xml filesystems.xml lsm.xml usb.xml \
 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
 	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml

+ 20 - 0
Documentation/DocBook/filesystems.tmpl

@@ -398,4 +398,24 @@ an example.
 
   </chapter>
 
+  <chapter id="splice">
+      <title>splice API</title>
+  <para>
+	splice is a method for moving blocks of data around inside the
+	kernel, without continually transferring them between the kernel
+	and user space.
+  </para>
+!Ffs/splice.c
+  </chapter>
+
+  <chapter id="pipes">
+      <title>pipes API</title>
+  <para>
+	Pipe interfaces are all for in-kernel (builtin image) use.
+	They are not exported for use by modules.
+  </para>
+!Iinclude/linux/pipe_fs_i.h
+!Ffs/pipe.c
+  </chapter>
+
 </book>

+ 13 - 13
Documentation/DocBook/genericirq.tmpl

@@ -172,7 +172,7 @@
 	  <listitem><para>Chiplevel hardware encapsulation</para></listitem>
 	</orderedlist>
     </para>
-    <sect1>
+    <sect1 id="Interrupt_control_flow">
 	<title>Interrupt control flow</title>
 	<para>
 	Each interrupt is described by an interrupt descriptor structure
@@ -190,7 +190,7 @@
 	referenced by the assigned chip descriptor structure.
 	</para>
     </sect1>
-    <sect1>
+    <sect1 id="Highlevel_Driver_API">
 	<title>Highlevel Driver API</title>
 	<para>
 	  The highlevel Driver API consists of following functions:
@@ -210,7 +210,7 @@
 	  See the autogenerated function documentation for details.
 	</para>
     </sect1>
-    <sect1>
+    <sect1 id="Highlevel_IRQ_flow_handlers">
 	<title>Highlevel IRQ flow handlers</title>
 	<para>
 	  The generic layer provides a set of pre-defined irq-flow methods:
@@ -224,9 +224,9 @@
 	  specific) are assigned to specific interrupts by the architecture
 	  either during bootup or during device initialization.
 	</para>
-	<sect2>
+	<sect2 id="Default_flow_implementations">
 	<title>Default flow implementations</title>
-	    <sect3>
+	    <sect3 id="Helper_functions">
 	 	<title>Helper functions</title>
 		<para>
 		The helper functions call the chip primitives and
@@ -267,9 +267,9 @@ noop(irq)
 	        </para>
 	    </sect3>
 	</sect2>
-	<sect2>
+	<sect2 id="Default_flow_handler_implementations">
 	<title>Default flow handler implementations</title>
-	    <sect3>
+	    <sect3 id="Default_Level_IRQ_flow_handler">
 	 	<title>Default Level IRQ flow handler</title>
 		<para>
 		handle_level_irq provides a generic implementation
@@ -284,7 +284,7 @@ desc->chip->end();
 		</programlisting>
 		</para>
    	    </sect3>
-	    <sect3>
+	    <sect3 id="Default_Edge_IRQ_flow_handler">
 	 	<title>Default Edge IRQ flow handler</title>
 		<para>
 		handle_edge_irq provides a generic implementation
@@ -311,7 +311,7 @@ desc->chip->end();
 		</programlisting>
 		</para>
    	    </sect3>
-	    <sect3>
+	    <sect3 id="Default_simple_IRQ_flow_handler">
 	 	<title>Default simple IRQ flow handler</title>
 		<para>
 		handle_simple_irq provides a generic implementation
@@ -328,7 +328,7 @@ handle_IRQ_event(desc->action);
 		</programlisting>
 		</para>
    	    </sect3>
-	    <sect3>
+	    <sect3 id="Default_per_CPU_flow_handler">
 	 	<title>Default per CPU flow handler</title>
 		<para>
 		handle_percpu_irq provides a generic implementation
@@ -349,7 +349,7 @@ desc->chip->end();
 		</para>
    	    </sect3>
 	</sect2>
-	<sect2>
+	<sect2 id="Quirks_and_optimizations">
 	<title>Quirks and optimizations</title>
 	<para>
 	The generic functions are intended for 'clean' architectures and chips,
@@ -358,7 +358,7 @@ desc->chip->end();
 	overriding the highlevel irq-flow handler.
 	</para>
 	</sect2>
-	<sect2>
+	<sect2 id="Delayed_interrupt_disable">
 	<title>Delayed interrupt disable</title>
 	<para>
 	This per interrupt selectable feature, which was introduced by Russell
@@ -380,7 +380,7 @@ desc->chip->end();
 	</para>
 	</sect2>
     </sect1>
-    <sect1>
+    <sect1 id="Chiplevel_hardware_encapsulation">
 	<title>Chiplevel hardware encapsulation</title>
 	<para>
 	The chip level hardware descriptor structure irq_chip

+ 1 - 85
Documentation/DocBook/kernel-api.tmpl

@@ -165,6 +165,7 @@ X!Ilib/string.c
 !Emm/vmalloc.c
 !Imm/page_alloc.c
 !Emm/mempool.c
+!Emm/dmapool.c
 !Emm/page-writeback.c
 !Emm/truncate.c
      </sect1>
@@ -203,65 +204,6 @@ X!Ilib/string.c
      </sect1>
   </chapter>
 
-  <chapter id="netcore">
-     <title>Linux Networking</title>
-     <sect1><title>Networking Base Types</title>
-!Iinclude/linux/net.h
-     </sect1>
-     <sect1><title>Socket Buffer Functions</title>
-!Iinclude/linux/skbuff.h
-!Iinclude/net/sock.h
-!Enet/socket.c
-!Enet/core/skbuff.c
-!Enet/core/sock.c
-!Enet/core/datagram.c
-!Enet/core/stream.c
-     </sect1>
-     <sect1><title>Socket Filter</title>
-!Enet/core/filter.c
-     </sect1>
-     <sect1><title>Generic Network Statistics</title>
-!Iinclude/linux/gen_stats.h
-!Enet/core/gen_stats.c
-!Enet/core/gen_estimator.c
-     </sect1>
-     <sect1><title>SUN RPC subsystem</title>
-<!-- The !D functionality is not perfect, garbage has to be protected by comments
-!Dnet/sunrpc/sunrpc_syms.c
--->
-!Enet/sunrpc/xdr.c
-!Enet/sunrpc/svcsock.c
-!Enet/sunrpc/sched.c
-     </sect1>
-  </chapter>
-
-  <chapter id="netdev">
-     <title>Network device support</title>
-     <sect1><title>Driver Support</title>
-!Enet/core/dev.c
-!Enet/ethernet/eth.c
-!Enet/sched/sch_generic.c
-!Iinclude/linux/etherdevice.h
-!Iinclude/linux/netdevice.h
-     </sect1>
-     <sect1><title>PHY Support</title>
-!Edrivers/net/phy/phy.c
-!Idrivers/net/phy/phy.c
-!Edrivers/net/phy/phy_device.c
-!Idrivers/net/phy/phy_device.c
-!Edrivers/net/phy/mdio_bus.c
-!Idrivers/net/phy/mdio_bus.c
-     </sect1>
-<!-- FIXME: Removed for now since no structured comments in source
-     <sect1><title>Wireless</title>
-X!Enet/core/wireless.c
-     </sect1>
--->
-     <sect1><title>Synchronous PPP</title>
-!Edrivers/net/wan/syncppp.c
-     </sect1>
-  </chapter>
-
   <chapter id="modload">
      <title>Module Support</title>
      <sect1><title>Module Loading</title>
@@ -371,7 +313,6 @@ X!Iinclude/linux/device.h
 !Edrivers/base/class.c
 !Edrivers/base/firmware_class.c
 !Edrivers/base/transport_class.c
-!Edrivers/base/dmapool.c
 <!-- Cannot be included, because
      attribute_container_add_class_device_adapter
  and attribute_container_classdev_to_container
@@ -508,11 +449,6 @@ X!Isound/sound_firmware.c
 !Edrivers/serial/8250.c
   </chapter>
 
-  <chapter id="z85230">
-     <title>Z85230 Support Library</title>
-!Edrivers/net/wan/z85230.c
-  </chapter>
-
   <chapter id="fbdev">
      <title>Frame Buffer Library</title>
 
@@ -712,24 +648,4 @@ X!Idrivers/video/console/fonts.c
 !Edrivers/i2c/i2c-core.c
   </chapter>
 
-  <chapter id="splice">
-      <title>splice API</title>
-  <para>
-	splice is a method for moving blocks of data around inside the
-	kernel, without continually transferring them between the kernel
-	and user space.
-  </para>
-!Ffs/splice.c
-  </chapter>
-
-  <chapter id="pipes">
-      <title>pipes API</title>
-  <para>
-	Pipe interfaces are all for in-kernel (builtin image) use.
-	They are not exported for use by modules.
-  </para>
-!Iinclude/linux/pipe_fs_i.h
-!Ffs/pipe.c
-  </chapter>
-
 </book>

+ 16 - 16
Documentation/DocBook/kernel-locking.tmpl

@@ -717,7 +717,7 @@ used, and when it gets full, throws out the least used one.
     <para>
 For our first example, we assume that all operations are in user
 context (ie. from system calls), so we can sleep.  This means we can
-use a semaphore to protect the cache and all the objects within
+use a mutex to protect the cache and all the objects within
 it.  Here's the code:
     </para>
 
@@ -725,7 +725,7 @@ it.  Here's the code:
 #include &lt;linux/list.h&gt;
 #include &lt;linux/slab.h&gt;
 #include &lt;linux/string.h&gt;
-#include &lt;asm/semaphore.h&gt;
+#include &lt;linux/mutex.h&gt;
 #include &lt;asm/errno.h&gt;
 
 struct object
@@ -737,7 +737,7 @@ struct object
 };
 
 /* Protects the cache, cache_num, and the objects within it */
-static DECLARE_MUTEX(cache_lock);
+static DEFINE_MUTEX(cache_lock);
 static LIST_HEAD(cache);
 static unsigned int cache_num = 0;
 #define MAX_CACHE_SIZE 10
@@ -789,17 +789,17 @@ int cache_add(int id, const char *name)
         obj-&gt;id = id;
         obj-&gt;popularity = 0;
 
-        down(&amp;cache_lock);
+        mutex_lock(&amp;cache_lock);
         __cache_add(obj);
-        up(&amp;cache_lock);
+        mutex_unlock(&amp;cache_lock);
         return 0;
 }
 
 void cache_delete(int id)
 {
-        down(&amp;cache_lock);
+        mutex_lock(&amp;cache_lock);
         __cache_delete(__cache_find(id));
-        up(&amp;cache_lock);
+        mutex_unlock(&amp;cache_lock);
 }
 
 int cache_find(int id, char *name)
@@ -807,13 +807,13 @@ int cache_find(int id, char *name)
         struct object *obj;
         int ret = -ENOENT;
 
-        down(&amp;cache_lock);
+        mutex_lock(&amp;cache_lock);
         obj = __cache_find(id);
         if (obj) {
                 ret = 0;
                 strcpy(name, obj-&gt;name);
         }
-        up(&amp;cache_lock);
+        mutex_unlock(&amp;cache_lock);
         return ret;
 }
 </programlisting>
@@ -853,7 +853,7 @@ The change is shown below, in standard patch format: the
          int popularity;
  };
 
--static DECLARE_MUTEX(cache_lock);
+-static DEFINE_MUTEX(cache_lock);
 +static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED;
  static LIST_HEAD(cache);
  static unsigned int cache_num = 0;
@@ -870,22 +870,22 @@ The change is shown below, in standard patch format: the
          obj-&gt;id = id;
          obj-&gt;popularity = 0;
 
--        down(&amp;cache_lock);
+-        mutex_lock(&amp;cache_lock);
 +        spin_lock_irqsave(&amp;cache_lock, flags);
          __cache_add(obj);
--        up(&amp;cache_lock);
+-        mutex_unlock(&amp;cache_lock);
 +        spin_unlock_irqrestore(&amp;cache_lock, flags);
          return 0;
  }
 
  void cache_delete(int id)
  {
--        down(&amp;cache_lock);
+-        mutex_lock(&amp;cache_lock);
 +        unsigned long flags;
 +
 +        spin_lock_irqsave(&amp;cache_lock, flags);
          __cache_delete(__cache_find(id));
--        up(&amp;cache_lock);
+-        mutex_unlock(&amp;cache_lock);
 +        spin_unlock_irqrestore(&amp;cache_lock, flags);
  }
 
@@ -895,14 +895,14 @@ The change is shown below, in standard patch format: the
          int ret = -ENOENT;
 +        unsigned long flags;
 
--        down(&amp;cache_lock);
+-        mutex_lock(&amp;cache_lock);
 +        spin_lock_irqsave(&amp;cache_lock, flags);
          obj = __cache_find(id);
          if (obj) {
                  ret = 0;
                  strcpy(name, obj-&gt;name);
          }
--        up(&amp;cache_lock);
+-        mutex_unlock(&amp;cache_lock);
 +        spin_unlock_irqrestore(&amp;cache_lock, flags);
          return ret;
  }

+ 1 - 1
Documentation/DocBook/lsm.tmpl

@@ -33,7 +33,7 @@
  </authorgroup>
  </articleinfo>
 
-<sect1><title>Introduction</title>
+<sect1 id="Introduction"><title>Introduction</title>
 
 <para>
 In March 2001, the National Security Agency (NSA) gave a presentation

+ 29 - 29
Documentation/DocBook/mtdnand.tmpl

@@ -80,7 +80,7 @@
      struct member has a short description which is marked with an [XXX] identifier.
      The following chapters explain the meaning of those identifiers.
      </para>
-     <sect1>   
+     <sect1 id="Function_identifiers_XXX">
 	<title>Function identifiers [XXX]</title>
      	<para>
 	The functions are marked with [XXX] identifiers in the short
@@ -115,7 +115,7 @@
 		</para></listitem>
 	</itemizedlist>
      </sect1>
-     <sect1>   
+     <sect1 id="Struct_member_identifiers_XXX">
 	<title>Struct member identifiers [XXX]</title>
      	<para>
 	The struct members are marked with [XXX] identifiers in the 
@@ -159,7 +159,7 @@
 		basic functions and fill out some really board dependent
 		members in the nand chip description structure.
 	</para>
-	<sect1>
+	<sect1 id="Basic_defines">
 		<title>Basic defines</title>
 		<para>
 			At least you have to provide a mtd structure and
@@ -185,7 +185,7 @@ static struct nand_chip board_chip;
 static unsigned long baseaddr;
 		</programlisting>
 	</sect1>
-	<sect1>
+	<sect1 id="Partition_defines">
 		<title>Partition defines</title>
 		<para>
 			If you want to divide your device into partitions, then
@@ -204,7 +204,7 @@ static struct mtd_partition partition_info[] = {
 };
 		</programlisting>
 	</sect1>
-	<sect1>
+	<sect1 id="Hardware_control_functions">
 		<title>Hardware control function</title>
 		<para>
 			The hardware control function provides access to the 
@@ -246,7 +246,7 @@ static void board_hwcontrol(struct mtd_info *mtd, int cmd)
 }
 		</programlisting>
 	</sect1>
-	<sect1>
+	<sect1 id="Device_ready_function">
 		<title>Device ready function</title>
 		<para>
 			If the hardware interface has the ready busy pin of the NAND chip connected to a
@@ -257,7 +257,7 @@ static void board_hwcontrol(struct mtd_info *mtd, int cmd)
 			the function must not be defined and the function pointer this->dev_ready is set to NULL.		
 		</para>
 	</sect1>
-	<sect1>
+	<sect1 id="Init_function">
 		<title>Init function</title>
 		<para>
 			The init function allocates memory and sets up all the board
@@ -325,7 +325,7 @@ out:
 module_init(board_init);
 		</programlisting>
 	</sect1>
-	<sect1>
+	<sect1 id="Exit_function">
 		<title>Exit function</title>
 		<para>
 			The exit function is only neccecary if the driver is
@@ -359,7 +359,7 @@ module_exit(board_cleanup);
 		driver. For a list of functions which can be overridden by the board
 		driver see the documentation of the nand_chip structure.
 	</para>
-	<sect1>
+	<sect1 id="Multiple_chip_control">
 		<title>Multiple chip control</title>
 		<para>
 			The nand driver can control chip arrays. Therefor the
@@ -419,9 +419,9 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 }
 		</programlisting>
 	</sect1>
-	<sect1>
+	<sect1 id="Hardware_ECC_support">
 		<title>Hardware ECC support</title>
-		<sect2>
+		<sect2 id="Functions_and_constants">
 			<title>Functions and constants</title>
 			<para>
 				The nand driver supports three different types of
@@ -475,7 +475,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 				</itemizedlist>
 			</para>
 		</sect2>
-		<sect2>
+		<sect2 id="Hardware_ECC_with_syndrome_calculation">
 		<title>Hardware ECC with syndrome calculation</title>
 			<para>
 				Many hardware ECC implementations provide Reed-Solomon
@@ -500,7 +500,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 			</para>
 		</sect2>
 	</sect1>
-	<sect1>
+	<sect1 id="Bad_Block_table_support">
 		<title>Bad block table support</title>
 		<para>
 			Most NAND chips mark the bad blocks at a defined
@@ -552,7 +552,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 			allows faster access than always checking the
 			bad block information on the flash chip itself.
 		</para>
-		<sect2>
+		<sect2 id="Flash_based_tables">
 			<title>Flash based tables</title>
 			<para>
 				It may be desired or neccecary to keep a bad block table in FLASH. 
@@ -587,7 +587,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 				</itemizedlist>
 			</para>
 		</sect2>
-		<sect2>
+		<sect2 id="User_defined_tables">
 			<title>User defined tables</title>
 			<para>
 				User defined tables are created by filling out a 
@@ -676,7 +676,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 			</para>
 		</sect2>
 	</sect1>
-	<sect1>
+	<sect1 id="Spare_area_placement">
 		<title>Spare area (auto)placement</title>
 		<para>
 			The nand driver implements different possibilities for
@@ -730,7 +730,7 @@ struct nand_oobinfo {
 			</para></listitem>
 			</itemizedlist>
 		</para>
-		<sect2>
+		<sect2 id="Placement_defined_by_fs_driver">
 			<title>Placement defined by fs driver</title>
 			<para>
 				The calling function provides a pointer to a nand_oobinfo
@@ -760,7 +760,7 @@ struct nand_oobinfo {
 				done according to the given scheme in the nand_oobinfo structure.
 			</para>
 		</sect2>
-		<sect2>
+		<sect2 id="Automatic_placement">
 			<title>Automatic placement</title>
 			<para>
 				Automatic placement uses the built in defaults to place the
@@ -774,7 +774,7 @@ struct nand_oobinfo {
 				done according to the default builtin scheme.
 			</para>
 		</sect2>
-		<sect2>
+		<sect2 id="User_space_placement_selection">
 			<title>User space placement selection</title>
 		<para>
 			All non ecc functions like mtd->read and mtd->write use an internal 
@@ -789,9 +789,9 @@ struct nand_oobinfo {
 		</para>
 		</sect2>
 	</sect1>	
-	<sect1>
+	<sect1 id="Spare_area_autoplacement_default">
 		<title>Spare area autoplacement default schemes</title>
-		<sect2>
+		<sect2 id="pagesize_256">
 			<title>256 byte pagesize</title>
 <informaltable><tgroup cols="3"><tbody>
 <row>
@@ -843,7 +843,7 @@ pages this byte is reserved</entry>
 </row>
 </tbody></tgroup></informaltable>
 		</sect2>
-		<sect2>
+		<sect2 id="pagesize_512">
 			<title>512 byte pagesize</title>
 <informaltable><tgroup cols="3"><tbody>
 <row>
@@ -906,7 +906,7 @@ in this page</entry>
 </row>
 </tbody></tgroup></informaltable>
 		</sect2>
-		<sect2>
+		<sect2 id="pagesize_2048">
 			<title>2048 byte pagesize</title>
 <informaltable><tgroup cols="3"><tbody>
 <row>
@@ -1126,9 +1126,9 @@ in this page</entry>
      <para>
      This chapter describes the constants which might be relevant for a driver developer.
      </para>
-     <sect1>   
+     <sect1 id="Chip_option_constants">
 	<title>Chip option constants</title>
-     	<sect2>   
+     	<sect2 id="Constants_for_chip_id_table">
 		<title>Constants for chip id table</title>
      		<para>
 		These constants are defined in nand.h. They are ored together to describe
@@ -1153,7 +1153,7 @@ in this page</entry>
 		</programlisting>
      		</para>
      	</sect2>
-     	<sect2>   
+     	<sect2 id="Constants_for_runtime_options">
 		<title>Constants for runtime options</title>
      		<para>
 		These constants are defined in nand.h. They are ored together to describe
@@ -1171,7 +1171,7 @@ in this page</entry>
      	</sect2>
      </sect1>	
 
-     <sect1>   
+     <sect1 id="EEC_selection_constants">
 	<title>ECC selection constants</title>
 	<para>
 	Use these constants to select the ECC algorithm.
@@ -1192,7 +1192,7 @@ in this page</entry>
 	</para>
      </sect1>	
 
-     <sect1>   
+     <sect1 id="Hardware_control_related_constants">
 	<title>Hardware control related constants</title>
 	<para>
 	These constants describe the requested hardware access function when
@@ -1218,7 +1218,7 @@ in this page</entry>
 	</para>
      </sect1>	
 
-     <sect1>   
+     <sect1 id="Bad_block_table_constants">
 	<title>Bad block table related constants</title>
 	<para>
 	These constants describe the options used for bad block

+ 106 - 0
Documentation/DocBook/networking.tmpl

@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="LinuxNetworking">
+ <bookinfo>
+  <title>Linux Networking and Network Devices APIs</title>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+  <chapter id="netcore">
+     <title>Linux Networking</title>
+     <sect1><title>Networking Base Types</title>
+!Iinclude/linux/net.h
+     </sect1>
+     <sect1><title>Socket Buffer Functions</title>
+!Iinclude/linux/skbuff.h
+!Iinclude/net/sock.h
+!Enet/socket.c
+!Enet/core/skbuff.c
+!Enet/core/sock.c
+!Enet/core/datagram.c
+!Enet/core/stream.c
+     </sect1>
+     <sect1><title>Socket Filter</title>
+!Enet/core/filter.c
+     </sect1>
+     <sect1><title>Generic Network Statistics</title>
+!Iinclude/linux/gen_stats.h
+!Enet/core/gen_stats.c
+!Enet/core/gen_estimator.c
+     </sect1>
+     <sect1><title>SUN RPC subsystem</title>
+<!-- The !D functionality is not perfect, garbage has to be protected by comments
+!Dnet/sunrpc/sunrpc_syms.c
+-->
+!Enet/sunrpc/xdr.c
+!Enet/sunrpc/svc_xprt.c
+!Enet/sunrpc/xprt.c
+!Enet/sunrpc/sched.c
+!Enet/sunrpc/socklib.c
+!Enet/sunrpc/stats.c
+!Enet/sunrpc/rpc_pipe.c
+!Enet/sunrpc/rpcb_clnt.c
+!Enet/sunrpc/clnt.c
+     </sect1>
+  </chapter>
+
+  <chapter id="netdev">
+     <title>Network device support</title>
+     <sect1><title>Driver Support</title>
+!Enet/core/dev.c
+!Enet/ethernet/eth.c
+!Enet/sched/sch_generic.c
+!Iinclude/linux/etherdevice.h
+!Iinclude/linux/netdevice.h
+     </sect1>
+     <sect1><title>PHY Support</title>
+!Edrivers/net/phy/phy.c
+!Idrivers/net/phy/phy.c
+!Edrivers/net/phy/phy_device.c
+!Idrivers/net/phy/phy_device.c
+!Edrivers/net/phy/mdio_bus.c
+!Idrivers/net/phy/mdio_bus.c
+     </sect1>
+<!-- FIXME: Removed for now since no structured comments in source
+     <sect1><title>Wireless</title>
+X!Enet/core/wireless.c
+     </sect1>
+-->
+     <sect1><title>Synchronous PPP</title>
+!Edrivers/net/wan/syncppp.c
+     </sect1>
+  </chapter>
+
+</book>

+ 8 - 8
Documentation/DocBook/procfs-guide.tmpl

@@ -85,7 +85,7 @@
 
 
 
-  <preface>
+  <preface id="Preface">
     <title>Preface</title>
 
     <para>
@@ -230,7 +230,7 @@
 
 
 
-    <sect1>
+    <sect1 id="Creating_a_symlink">
       <title>Creating a symlink</title>
 
       <funcsynopsis>
@@ -254,7 +254,7 @@
       </para>
     </sect1>
 
-    <sect1>
+    <sect1 id="Creating_a_directory">
       <title>Creating a directory</title>
       
       <funcsynopsis>
@@ -274,7 +274,7 @@
 
 
 
-    <sect1>
+    <sect1 id="Removing_an_entry">
       <title>Removing an entry</title>
       
       <funcsynopsis>
@@ -340,7 +340,7 @@ entry->write_proc = write_proc_foo;
 
 
 
-    <sect1>
+    <sect1 id="Reading_data">
       <title>Reading data</title>
 
       <para>
@@ -448,7 +448,7 @@ entry->write_proc = write_proc_foo;
 
 
 
-    <sect1>
+    <sect1 id="Writing_data">
       <title>Writing data</title>
 
       <para>
@@ -579,7 +579,7 @@ int foo_read_func(char *page, char **start, off_t off,
 
 
 
-    <sect1>
+    <sect1 id="Modules">
       <title>Modules</title>
 
       <para>
@@ -599,7 +599,7 @@ entry->owner = THIS_MODULE;
 
 
 
-    <sect1>
+    <sect1 id="Mode_and_ownership">
       <title>Mode and ownership</title>
 
       <para>

+ 9 - 9
Documentation/DocBook/rapidio.tmpl

@@ -77,11 +77,11 @@
   <chapter id="bugs">
      <title>Known Bugs and Limitations</title>
 
-     <sect1>
+     <sect1 id="known_bugs">
      	<title>Bugs</title>
 	  <para>None. ;)</para>
      </sect1>
-     <sect1>
+     <sect1 id="Limitations">
      	<title>Limitations</title>
 	  <para>
 	    <orderedlist>
@@ -100,7 +100,7 @@
 		on devices, request/map memory region resources,
 		and manage mailboxes/doorbells.
 	</para>
-	<sect1>
+	<sect1 id="Functions">
 		<title>Functions</title>
 !Iinclude/linux/rio_drv.h
 !Edrivers/rapidio/rio-driver.c
@@ -116,23 +116,23 @@
      subsystem.
      </para>
 
-     <sect1><title>Structures</title>
+     <sect1 id="Structures"><title>Structures</title>
 !Iinclude/linux/rio.h
      </sect1>
-     <sect1><title>Enumeration and Discovery</title>
+     <sect1 id="Enumeration_and_Discovery"><title>Enumeration and Discovery</title>
 !Idrivers/rapidio/rio-scan.c
      </sect1>
-     <sect1><title>Driver functionality</title>
+     <sect1 id="Driver_functionality"><title>Driver functionality</title>
 !Idrivers/rapidio/rio.c
 !Idrivers/rapidio/rio-access.c
      </sect1>
-     <sect1><title>Device model support</title>
+     <sect1 id="Device_model_support"><title>Device model support</title>
 !Idrivers/rapidio/rio-driver.c
      </sect1>
-     <sect1><title>Sysfs support</title>
+     <sect1 id="Sysfs_support"><title>Sysfs support</title>
 !Idrivers/rapidio/rio-sysfs.c
      </sect1>
-     <sect1><title>PPC32 support</title>
+     <sect1 id="PPC32_support"><title>PPC32 support</title>
 !Iarch/powerpc/kernel/rio.c
 !Earch/powerpc/sysdev/fsl_rio.c
 !Iarch/powerpc/sysdev/fsl_rio.c

+ 16 - 5
Documentation/DocBook/s390-drivers.tmpl

@@ -59,7 +59,7 @@
    <title>Introduction</title>
   <para>
     This document describes the interfaces available for device drivers that
-    drive s390 based channel attached devices. This includes interfaces for
+    drive s390 based channel attached I/O devices. This includes interfaces for
     interaction with the hardware and interfaces for interacting with the
     common driver core. Those interfaces are provided by the s390 common I/O
     layer.
@@ -86,9 +86,10 @@
 	The ccw bus typically contains the majority of devices available to
 	a s390 system. Named after the channel command word (ccw), the basic
 	command structure used to address its devices, the ccw bus contains
-	so-called channel attached devices. They are addressed via subchannels,
-	visible on the css bus. A device driver, however, will never interact
-	with the subchannel directly, but only via the device on the ccw bus,
+	so-called channel attached devices. They are addressed via I/O
+	subchannels, visible on the css bus. A device driver for
+	channel-attached devices, however, will never interact	with the
+	subchannel directly, but only via the I/O device on the ccw bus,
 	the ccw device.
   </para>
     <sect1 id="channelIO">
@@ -116,7 +117,6 @@
 !Iinclude/asm-s390/ccwdev.h
 !Edrivers/s390/cio/device.c
 !Edrivers/s390/cio/device_ops.c
-!Edrivers/s390/cio/airq.c
     </sect1>
     <sect1 id="cmf">
      <title>The channel-measurement facility</title>
@@ -147,4 +147,15 @@
    </sect1>
   </chapter>
 
+  <chapter id="genericinterfaces">
+   <title>Generic interfaces</title>
+  <para>
+	Some interfaces are available to other drivers that do not necessarily
+	have anything to do with the busses described above, but still are
+	indirectly using basic infrastructure in the common I/O layer.
+	One example is the support for adapter interrupts.
+  </para>
+!Edrivers/s390/cio/airq.c
+  </chapter>
+
 </book>

+ 1 - 1
Documentation/DocBook/scsi.tmpl

@@ -12,7 +12,7 @@
         <surname>Bottomley</surname>
         <affiliation>
           <address>
-            <email>James.Bottomley@steeleye.com</email>
+            <email>James.Bottomley@hansenpartnership.com</email>
           </address>
         </affiliation>
       </author>

+ 17 - 17
Documentation/DocBook/videobook.tmpl

@@ -170,7 +170,7 @@ int __init myradio_init(struct video_init *v)
   <para>
         The types available are
   </para>
-   <table frame="all"><title>Device Types</title>
+   <table frame="all" id="Device_Types"><title>Device Types</title>
    <tgroup cols="3" align="left">
    <tbody>
    <row>
@@ -291,7 +291,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg)
         allows the applications to find out what sort of a card they have found and
         to figure out what they want to do about it. The fields in the structure are
   </para>
-   <table frame="all"><title>struct video_capability fields</title>
+   <table frame="all" id="video_capability_fields"><title>struct video_capability fields</title>
    <tgroup cols="2" align="left">
    <tbody>
    <row>
@@ -365,7 +365,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg)
   <para>
         The video_tuner structure has the following fields
   </para>
-   <table frame="all"><title>struct video_tuner fields</title>
+   <table frame="all" id="video_tuner_fields"><title>struct video_tuner fields</title>
    <tgroup cols="2" align="left">
    <tbody>
    <row>
@@ -398,7 +398,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg)
     </tgroup>
     </table>
 
-   <table frame="all"><title>struct video_tuner flags</title>
+   <table frame="all" id="video_tuner_flags"><title>struct video_tuner flags</title>
    <tgroup cols="2" align="left">
    <tbody>
    <row>
@@ -421,7 +421,7 @@ static int radio_ioctl(struct video_device *dev, unsigned int cmd, void *arg)
     </tgroup>
     </table>
 
-   <table frame="all"><title>struct video_tuner modes</title>
+   <table frame="all" id="video_tuner_modes"><title>struct video_tuner modes</title>
    <tgroup cols="2" align="left">
    <tbody>
    <row>
@@ -572,7 +572,7 @@ static int current_volume=0;
   <para>
         Then we fill in the video_audio structure. This has the following format
   </para>
-   <table frame="all"><title>struct video_audio fields</title>
+   <table frame="all" id="video_audio_fields"><title>struct video_audio fields</title>
    <tgroup cols="2" align="left">
    <tbody>
    <row>
@@ -607,7 +607,7 @@ static int current_volume=0;
    </tgroup>
    </table>
 
-   <table frame="all"><title>struct video_audio flags</title>
+   <table frame="all" id="video_audio_flags"><title>struct video_audio flags</title>
    <tgroup cols="2" align="left">
    <tbody>
    <row>
@@ -625,7 +625,7 @@ static int current_volume=0;
    </tgroup>
    </table>
 
-   <table frame="all"><title>struct video_audio modes</title>
+   <table frame="all" id="video_audio_modes"><title>struct video_audio modes</title>
    <tgroup cols="2" align="left">
    <tbody>
    <row>
@@ -775,7 +775,7 @@ module_exit(cleanup);
   </para>
   </sect1>
   </chapter>
-  <chapter>
+  <chapter id="Video_Capture_Devices">
         <title>Video Capture Devices</title>
   <sect1 id="introvid">
   <title>Video Capture Device Types</title>
@@ -855,7 +855,7 @@ static struct video_device my_camera
         We use the extra video capability flags that did not apply to the
         radio interface. The video related flags are
   </para>
-   <table frame="all"><title>Capture Capabilities</title>
+   <table frame="all" id="Capture_Capabilities"><title>Capture Capabilities</title>
    <tgroup cols="2" align="left">
    <tbody>
    <row>
@@ -1195,7 +1195,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg)
         inputs to the video card). Our example card has a single camera input. The
         fields in the structure are
   </para>
-   <table frame="all"><title>struct video_channel fields</title>
+   <table frame="all" id="video_channel_fields"><title>struct video_channel fields</title>
    <tgroup cols="2" align="left">
    <tbody>
    <row>
@@ -1218,7 +1218,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg)
     </tbody>
     </tgroup>
     </table>
-    <table frame="all"><title>struct video_channel flags</title>
+    <table frame="all" id="video_channel_flags"><title>struct video_channel flags</title>
     <tgroup cols="2" align="left">
     <tbody>
     <row>
@@ -1229,7 +1229,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg)
     </tbody>
     </tgroup>
     </table>
-    <table frame="all"><title>struct video_channel types</title>
+    <table frame="all" id="video_channel_types"><title>struct video_channel types</title>
     <tgroup cols="2" align="left">
     <tbody>
     <row>
@@ -1242,7 +1242,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg)
     </tbody>
     </tgroup>
     </table>
-    <table frame="all"><title>struct video_channel norms</title>
+    <table frame="all" id="video_channel_norms"><title>struct video_channel norms</title>
     <tgroup cols="2" align="left">
     <tbody>
     <row>
@@ -1328,7 +1328,7 @@ static int camera_ioctl(struct video_device *dev, unsigned int cmd, void *arg)
         for every other pixel in the image. The other common formats the interface 
         defines are
   </para>
-   <table frame="all"><title>Framebuffer Encodings</title>
+   <table frame="all" id="Framebuffer_Encodings"><title>Framebuffer Encodings</title>
    <tgroup cols="2" align="left">
    <tbody>
    <row>
@@ -1466,7 +1466,7 @@ static struct video_buffer capture_fb;
         display. The video_window structure is used to describe the way the image 
         should be displayed. 
    </para>
-   <table frame="all"><title>struct video_window fields</title>
+   <table frame="all" id="video_window_fields"><title>struct video_window fields</title>
    <tgroup cols="2" align="left">
    <tbody>
    <row>
@@ -1503,7 +1503,7 @@ static struct video_buffer capture_fb;
     <para>
         Each clip is a struct video_clip which has the following fields
    </para>
-   <table frame="all"><title>video_clip fields</title>
+   <table frame="all" id="video_clip_fields"><title>video_clip fields</title>
    <tgroup cols="2" align="left">
    <tbody>
    <row>

+ 6 - 6
Documentation/DocBook/z8530book.tmpl

@@ -77,7 +77,7 @@
   </para>
   </chapter>
   
-  <chapter>
+  <chapter id="Driver_Modes">
  	<title>Driver Modes</title>
   <para>
 	The Z85230 driver layer can drive Z8530, Z85C30 and Z85230 devices
@@ -108,7 +108,7 @@
   </para>
   </chapter>
 
-  <chapter>
+  <chapter id="Using_the_Z85230_driver">
  	<title>Using the Z85230 driver</title>
   <para>
 	The Z85230 driver provides the back end interface to your board. To
@@ -174,7 +174,7 @@
   </para>
   </chapter>
 
-  <chapter>
+  <chapter id="Attaching_Network_Interfaces">
  	<title>Attaching Network Interfaces</title>
   <para>
 	If you wish to use the network interface facilities of the driver,
@@ -216,7 +216,7 @@
   </para>
   </chapter>
 
-  <chapter>
+  <chapter id="Configuring_And_Activating_The_Port">
  	<title>Configuring And Activating The Port</title>
   <para>
 	The Z85230 driver provides helper functions and tables to load the
@@ -300,7 +300,7 @@
   </para>
   </chapter>
 
-  <chapter>
+  <chapter id="Network_Layer_Functions">
  	<title>Network Layer Functions</title>
   <para>
 	The Z8530 layer provides functions to queue packets for
@@ -327,7 +327,7 @@
   </para>
   </chapter>
 
-  <chapter>
+  <chapter id="Porting_The_Z8530_Driver">
      <title>Porting The Z8530 Driver</title>
   <para>
 	The Z8530 driver is written to be portable. In DMA mode it makes

+ 1 - 1
Documentation/RCU/NMI-RCU.txt

@@ -25,7 +25,7 @@ the NMI handler to take the default machine-specific action.
 This nmi_callback variable is a global function pointer to the current
 NMI handler.
 
-	fastcall void do_nmi(struct pt_regs * regs, long error_code)
+	void do_nmi(struct pt_regs * regs, long error_code)
 	{
 		int cpu;
 

+ 493 - 0
Documentation/Smack.txt

@@ -0,0 +1,493 @@
+
+
+    "Good for you, you've decided to clean the elevator!"
+    - The Elevator, from Dark Star
+
+Smack is the the Simplified Mandatory Access Control Kernel.
+Smack is a kernel based implementation of mandatory access
+control that includes simplicity in its primary design goals.
+
+Smack is not the only Mandatory Access Control scheme
+available for Linux. Those new to Mandatory Access Control
+are encouraged to compare Smack with the other mechanisms
+available to determine which is best suited to the problem
+at hand.
+
+Smack consists of three major components:
+    - The kernel
+    - A start-up script and a few modified applications
+    - Configuration data
+
+The kernel component of Smack is implemented as a Linux
+Security Modules (LSM) module. It requires netlabel and
+works best with file systems that support extended attributes,
+although xattr support is not strictly required.
+It is safe to run a Smack kernel under a "vanilla" distribution.
+Smack kernels use the CIPSO IP option. Some network
+configurations are intolerant of IP options and can impede
+access to systems that use them as Smack does.
+
+The startup script etc-init.d-smack should be installed
+in /etc/init.d/smack and should be invoked early in the
+start-up process. On Fedora rc5.d/S02smack is recommended.
+This script ensures that certain devices have the correct
+Smack attributes and loads the Smack configuration if
+any is defined. This script invokes two programs that
+ensure configuration data is properly formatted. These
+programs are /usr/sbin/smackload and /usr/sin/smackcipso.
+The system will run just fine without these programs,
+but it will be difficult to set access rules properly.
+
+A version of "ls" that provides a "-M" option to display
+Smack labels on long listing is available.
+
+A hacked version of sshd that allows network logins by users
+with specific Smack labels is available. This version does
+not work for scp. You must set the /etc/ssh/sshd_config
+line:
+   UsePrivilegeSeparation no
+
+The format of /etc/smack/usr is:
+
+   username smack
+
+In keeping with the intent of Smack, configuration data is
+minimal and not strictly required. The most important
+configuration step is mounting the smackfs pseudo filesystem.
+
+Add this line to /etc/fstab:
+
+    smackfs /smack smackfs smackfsdef=* 0 0
+
+and create the /smack directory for mounting.
+
+Smack uses extended attributes (xattrs) to store file labels.
+The command to set a Smack label on a file is:
+
+    # attr -S -s SMACK64 -V "value" path
+
+NOTE: Smack labels are limited to 23 characters. The attr command
+      does not enforce this restriction and can be used to set
+      invalid Smack labels on files.
+
+If you don't do anything special all users will get the floor ("_")
+label when they log in. If you do want to log in via the hacked ssh
+at other labels use the attr command to set the smack value on the
+home directory and it's contents.
+
+You can add access rules in /etc/smack/accesses. They take the form:
+
+    subjectlabel objectlabel access
+
+access is a combination of the letters rwxa which specify the
+kind of access permitted a subject with subjectlabel on an
+object with objectlabel. If there is no rule no access is allowed.
+
+A process can see the smack label it is running with by
+reading /proc/self/attr/current. A privileged process can
+set the process smack by writing there.
+
+Look for additional programs on http://schaufler-ca.com
+
+From the Smack Whitepaper:
+
+The Simplified Mandatory Access Control Kernel
+
+Casey Schaufler
+casey@schaufler-ca.com
+
+Mandatory Access Control
+
+Computer systems employ a variety of schemes to constrain how information is
+shared among the people and services using the machine. Some of these schemes
+allow the program or user to decide what other programs or users are allowed
+access to pieces of data. These schemes are called discretionary access
+control mechanisms because the access control is specified at the discretion
+of the user. Other schemes do not leave the decision regarding what a user or
+program can access up to users or programs. These schemes are called mandatory
+access control mechanisms because you don't have a choice regarding the users
+or programs that have access to pieces of data.
+
+Bell & LaPadula
+
+From the middle of the 1980's until the turn of the century Mandatory Access
+Control (MAC) was very closely associated with the Bell & LaPadula security
+model, a mathematical description of the United States Department of Defense
+policy for marking paper documents. MAC in this form enjoyed a following
+within the Capital Beltway and Scandinavian supercomputer centers but was
+often sited as failing to address general needs.
+
+Domain Type Enforcement
+
+Around the turn of the century Domain Type Enforcement (DTE) became popular.
+This scheme organizes users, programs, and data into domains that are
+protected from each other. This scheme has been widely deployed as a component
+of popular Linux distributions. The administrative overhead required to
+maintain this scheme and the detailed understanding of the whole system
+necessary to provide a secure domain mapping leads to the scheme being
+disabled or used in limited ways in the majority of cases.
+
+Smack
+
+Smack is a Mandatory Access Control mechanism designed to provide useful MAC
+while avoiding the pitfalls of its predecessors. The limitations of Bell &
+LaPadula are addressed by providing a scheme whereby access can be controlled
+according to the requirements of the system and its purpose rather than those
+imposed by an arcane government policy. The complexity of Domain Type
+Enforcement and avoided by defining access controls in terms of the access
+modes already in use.
+
+Smack Terminology
+
+The jargon used to talk about Smack will be familiar to those who have dealt
+with other MAC systems and shouldn't be too difficult for the uninitiated to
+pick up. There are four terms that are used in a specific way and that are
+especially important:
+
+	Subject: A subject is an active entity on the computer system.
+	On Smack a subject is a task, which is in turn the basic unit
+	of execution.
+
+	Object: An object is a passive entity on the computer system.
+	On Smack files of all types, IPC, and tasks can be objects.
+
+	Access: Any attempt by a subject to put information into or get
+	information from an object is an access.
+
+	Label: Data that identifies the Mandatory Access Control
+	characteristics of a subject or an object.
+
+These definitions are consistent with the traditional use in the security
+community. There are also some terms from Linux that are likely to crop up:
+
+	Capability: A task that possesses a capability has permission to
+	violate an aspect of the system security policy, as identified by
+	the specific capability. A task that possesses one or more
+	capabilities is a privileged task, whereas a task with no
+	capabilities is an unprivileged task.
+
+	Privilege: A task that is allowed to violate the system security
+	policy is said to have privilege. As of this writing a task can
+	have privilege either by possessing capabilities or by having an
+	effective user of root.
+
+Smack Basics
+
+Smack is an extension to a Linux system. It enforces additional restrictions
+on what subjects can access which objects, based on the labels attached to
+each of the subject and the object.
+
+Labels
+
+Smack labels are ASCII character strings, one to twenty-three characters in
+length. Single character labels using special characters, that being anything
+other than a letter or digit, are reserved for use by the Smack development
+team. Smack labels are unstructured, case sensitive, and the only operation
+ever performed on them is comparison for equality. Smack labels cannot
+contain unprintable characters or the "/" (slash) character.
+
+There are some predefined labels:
+
+	_ Pronounced "floor", a single underscore character.
+	^ Pronounced "hat", a single circumflex character.
+	* Pronounced "star", a single asterisk character.
+	? Pronounced "huh", a single question mark character.
+
+Every task on a Smack system is assigned a label. System tasks, such as
+init(8) and systems daemons, are run with the floor ("_") label. User tasks
+are assigned labels according to the specification found in the
+/etc/smack/user configuration file.
+
+Access Rules
+
+Smack uses the traditional access modes of Linux. These modes are read,
+execute, write, and occasionally append. There are a few cases where the
+access mode may not be obvious. These include:
+
+	Signals: A signal is a write operation from the subject task to
+	the object task.
+	Internet Domain IPC: Transmission of a packet is considered a
+	write operation from the source task to the destination task.
+
+Smack restricts access based on the label attached to a subject and the label
+attached to the object it is trying to access. The rules enforced are, in
+order:
+
+	1. Any access requested by a task labeled "*" is denied.
+	2. A read or execute access requested by a task labeled "^"
+	   is permitted.
+	3. A read or execute access requested on an object labeled "_"
+	   is permitted.
+	4. Any access requested on an object labeled "*" is permitted.
+	5. Any access requested by a task on an object with the same
+	   label is permitted.
+	6. Any access requested that is explicitly defined in the loaded
+	   rule set is permitted.
+	7. Any other access is denied.
+
+Smack Access Rules
+
+With the isolation provided by Smack access separation is simple. There are
+many interesting cases where limited access by subjects to objects with
+different labels is desired. One example is the familiar spy model of
+sensitivity, where a scientist working on a highly classified project would be
+able to read documents of lower classifications and anything she writes will
+be "born" highly classified. To accommodate such schemes Smack includes a
+mechanism for specifying rules allowing access between labels.
+
+Access Rule Format
+
+The format of an access rule is:
+
+	subject-label object-label access
+
+Where subject-label is the Smack label of the task, object-label is the Smack
+label of the thing being accessed, and access is a string specifying the sort
+of access allowed. The Smack labels are limited to 23 characters. The access
+specification is searched for letters that describe access modes:
+
+	a: indicates that append access should be granted.
+	r: indicates that read access should be granted.
+	w: indicates that write access should be granted.
+	x: indicates that execute access should be granted.
+
+Uppercase values for the specification letters are allowed as well.
+Access mode specifications can be in any order. Examples of acceptable rules
+are:
+
+	TopSecret Secret  rx
+	Secret    Unclass R
+	Manager   Game    x
+	User      HR      w
+	New       Old     rRrRr
+	Closed    Off     -
+
+Examples of unacceptable rules are:
+
+	Top Secret Secret     rx
+	Ace        Ace        r
+	Odd        spells     waxbeans
+
+Spaces are not allowed in labels. Since a subject always has access to files
+with the same label specifying a rule for that case is pointless. Only
+valid letters (rwxaRWXA) and the dash ('-') character are allowed in
+access specifications. The dash is a placeholder, so "a-r" is the same
+as "ar". A lone dash is used to specify that no access should be allowed.
+
+Applying Access Rules
+
+The developers of Linux rarely define new sorts of things, usually importing
+schemes and concepts from other systems. Most often, the other systems are
+variants of Unix. Unix has many endearing properties, but consistency of
+access control models is not one of them. Smack strives to treat accesses as
+uniformly as is sensible while keeping with the spirit of the underlying
+mechanism.
+
+File system objects including files, directories, named pipes, symbolic links,
+and devices require access permissions that closely match those used by mode
+bit access. To open a file for reading read access is required on the file. To
+search a directory requires execute access. Creating a file with write access
+requires both read and write access on the containing directory. Deleting a
+file requires read and write access to the file and to the containing
+directory. It is possible that a user may be able to see that a file exists
+but not any of its attributes by the circumstance of having read access to the
+containing directory but not to the differently labeled file. This is an
+artifact of the file name being data in the directory, not a part of the file.
+
+IPC objects, message queues, semaphore sets, and memory segments exist in flat
+namespaces and access requests are only required to match the object in
+question.
+
+Process objects reflect tasks on the system and the Smack label used to access
+them is the same Smack label that the task would use for its own access
+attempts. Sending a signal via the kill() system call is a write operation
+from the signaler to the recipient. Debugging a process requires both reading
+and writing. Creating a new task is an internal operation that results in two
+tasks with identical Smack labels and requires no access checks.
+
+Sockets are data structures attached to processes and sending a packet from
+one process to another requires that the sender have write access to the
+receiver. The receiver is not required to have read access to the sender.
+
+Setting Access Rules
+
+The configuration file /etc/smack/accesses contains the rules to be set at
+system startup. The contents are written to the special file /smack/load.
+Rules can be written to /smack/load at any time and take effect immediately.
+For any pair of subject and object labels there can be only one rule, with the
+most recently specified overriding any earlier specification.
+
+The program smackload is provided to ensure data is formatted
+properly when written to /smack/load. This program reads lines
+of the form
+
+    subjectlabel objectlabel mode.
+
+Task Attribute
+
+The Smack label of a process can be read from /proc/<pid>/attr/current. A
+process can read its own Smack label from /proc/self/attr/current. A
+privileged process can change its own Smack label by writing to
+/proc/self/attr/current but not the label of another process.
+
+File Attribute
+
+The Smack label of a filesystem object is stored as an extended attribute
+named SMACK64 on the file. This attribute is in the security namespace. It can
+only be changed by a process with privilege.
+
+Privilege
+
+A process with CAP_MAC_OVERRIDE is privileged.
+
+Smack Networking
+
+As mentioned before, Smack enforces access control on network protocol
+transmissions. Every packet sent by a Smack process is tagged with its Smack
+label. This is done by adding a CIPSO tag to the header of the IP packet. Each
+packet received is expected to have a CIPSO tag that identifies the label and
+if it lacks such a tag the network ambient label is assumed. Before the packet
+is delivered a check is made to determine that a subject with the label on the
+packet has write access to the receiving process and if that is not the case
+the packet is dropped.
+
+CIPSO Configuration
+
+It is normally unnecessary to specify the CIPSO configuration. The default
+values used by the system handle all internal cases. Smack will compose CIPSO
+label values to match the Smack labels being used without administrative
+intervention. Unlabeled packets that come into the system will be given the
+ambient label.
+
+Smack requires configuration in the case where packets from a system that is
+not smack that speaks CIPSO may be encountered. Usually this will be a Trusted
+Solaris system, but there are other, less widely deployed systems out there.
+CIPSO provides 3 important values, a Domain Of Interpretation (DOI), a level,
+and a category set with each packet. The DOI is intended to identify a group
+of systems that use compatible labeling schemes, and the DOI specified on the
+smack system must match that of the remote system or packets will be
+discarded. The DOI is 3 by default. The value can be read from /smack/doi and
+can be changed by writing to /smack/doi.
+
+The label and category set are mapped to a Smack label as defined in
+/etc/smack/cipso.
+
+A Smack/CIPSO mapping has the form:
+
+	smack level [category [category]*]
+
+Smack does not expect the level or category sets to be related in any
+particular way and does not assume or assign accesses based on them. Some
+examples of mappings:
+
+	TopSecret 7
+	TS:A,B    7 1 2
+	SecBDE    5 2 4 6
+	RAFTERS   7 12 26
+
+The ":" and "," characters are permitted in a Smack label but have no special
+meaning.
+
+The mapping of Smack labels to CIPSO values is defined by writing to
+/smack/cipso. Again, the format of data written to this special file
+is highly restrictive, so the program smackcipso is provided to
+ensure the writes are done properly. This program takes mappings
+on the standard input and sends them to /smack/cipso properly.
+
+In addition to explicit mappings Smack supports direct CIPSO mappings. One
+CIPSO level is used to indicate that the category set passed in the packet is
+in fact an encoding of the Smack label. The level used is 250 by default. The
+value can be read from /smack/direct and changed by writing to /smack/direct.
+
+Socket Attributes
+
+There are two attributes that are associated with sockets. These attributes
+can only be set by privileged tasks, but any task can read them for their own
+sockets.
+
+	SMACK64IPIN: The Smack label of the task object. A privileged
+	program that will enforce policy may set this to the star label.
+
+	SMACK64IPOUT: The Smack label transmitted with outgoing packets.
+	A privileged program may set this to match the label of another
+	task with which it hopes to communicate.
+
+Writing Applications for Smack
+
+There are three sorts of applications that will run on a Smack system. How an
+application interacts with Smack will determine what it will have to do to
+work properly under Smack.
+
+Smack Ignorant Applications
+
+By far the majority of applications have no reason whatever to care about the
+unique properties of Smack. Since invoking a program has no impact on the
+Smack label associated with the process the only concern likely to arise is
+whether the process has execute access to the program.
+
+Smack Relevant Applications
+
+Some programs can be improved by teaching them about Smack, but do not make
+any security decisions themselves. The utility ls(1) is one example of such a
+program.
+
+Smack Enforcing Applications
+
+These are special programs that not only know about Smack, but participate in
+the enforcement of system policy. In most cases these are the programs that
+set up user sessions. There are also network services that provide information
+to processes running with various labels.
+
+File System Interfaces
+
+Smack maintains labels on file system objects using extended attributes. The
+Smack label of a file, directory, or other file system object can be obtained
+using getxattr(2).
+
+	len = getxattr("/", "security.SMACK64", value, sizeof (value));
+
+will put the Smack label of the root directory into value. A privileged
+process can set the Smack label of a file system object with setxattr(2).
+
+	len = strlen("Rubble");
+	rc = setxattr("/foo", "security.SMACK64", "Rubble", len, 0);
+
+will set the Smack label of /foo to "Rubble" if the program has appropriate
+privilege.
+
+Socket Interfaces
+
+The socket attributes can be read using fgetxattr(2).
+
+A privileged process can set the Smack label of outgoing packets with
+fsetxattr(2).
+
+	len = strlen("Rubble");
+	rc = fsetxattr(fd, "security.SMACK64IPOUT", "Rubble", len, 0);
+
+will set the Smack label "Rubble" on packets going out from the socket if the
+program has appropriate privilege.
+
+	rc = fsetxattr(fd, "security.SMACK64IPIN, "*", strlen("*"), 0);
+
+will set the Smack label "*" as the object label against which incoming
+packets will be checked if the program has appropriate privilege.
+
+Administration
+
+Smack supports some mount options:
+
+	smackfsdef=label: specifies the label to give files that lack
+	the Smack label extended attribute.
+
+	smackfsroot=label: specifies the label to assign the root of the
+	file system if it lacks the Smack extended attribute.
+
+	smackfshat=label: specifies a label that must have read access to
+	all labels set on the filesystem. Not yet enforced.
+
+	smackfsfloor=label: specifies a label to which all labels set on the
+	filesystem must have read access. Not yet enforced.
+
+These mount options apply to all file system types.
+

+ 5 - 11
Documentation/SubmitChecklist

@@ -20,7 +20,11 @@ kernel patches.
 4: ppc64 is a good architecture for cross-compilation checking because it
    tends to use `unsigned long' for 64-bit quantities.
 
-5: Matches kernel coding style(!)
+5: Check your patch for general style as detailed in
+   Documentation/CodingStyle.  Check for trivial violations with the
+   patch style checker prior to submission (scripts/checkpatch.pl).
+   You should be able to justify all violations that remain in
+   your patch.
 
 6: Any new or modified CONFIG options don't muck up the config menu.
 
@@ -79,13 +83,3 @@ kernel patches.
 23: Tested after it has been merged into the -mm patchset to make sure
     that it still works with all of the other queued patches and various
     changes in the VM, VFS, and other subsystems.
-
-24: Avoid whitespace damage such as indenting with spaces or whitespace
-    at the end of lines.  You can test this by feeding the patch to
-    "git apply --check --whitespace=error-all"
-
-25: Check your patch for general style as detailed in
-    Documentation/CodingStyle.  Check for trivial violations with the
-    patch style checker prior to submission (scripts/checkpatch.pl).
-    You should be able to justify all violations that remain in
-    your patch.

+ 3 - 3
Documentation/accounting/getdelays.c

@@ -168,7 +168,7 @@ int get_family_id(int sd)
 		char buf[256];
 	} ans;
 
-	int id, rc;
+	int id = 0, rc;
 	struct nlattr *na;
 	int rep_len;
 
@@ -209,7 +209,7 @@ void print_delayacct(struct taskstats *t)
 void task_context_switch_counts(struct taskstats *t)
 {
 	printf("\n\nTask   %15s%15s\n"
-	       "       %15lu%15lu\n",
+	       "       %15llu%15llu\n",
 	       "voluntary", "nonvoluntary",
 	       t->nvcsw, t->nivcsw);
 }
@@ -399,7 +399,7 @@ int main(int argc, char *argv[])
 			goto done;
 		}
 
-		PRINTF("nlmsghdr size=%d, nlmsg_len=%d, rep_len=%d\n",
+		PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n",
 		       sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
 
 

+ 15 - 0
Documentation/acpi/dsdt-override.txt

@@ -0,0 +1,15 @@
+Linux supports two methods of overriding the BIOS DSDT:
+
+CONFIG_ACPI_CUSTOM_DSDT builds the image into the kernel.
+
+CONFIG_ACPI_CUSTOM_DSDT_INITRD adds the image to the initrd.
+
+When to use these methods is described in detail on the
+Linux/ACPI home page:
+http://www.lesswatts.org/projects/acpi/overridingDSDT.php
+
+Note that if both options are used, the DSDT supplied
+by the INITRD method takes precedence.
+
+Documentation/initramfs-add-dsdt.sh is provided for convenience
+for use with the CONFIG_ACPI_CUSTOM_DSDT_INITRD method.

+ 43 - 0
Documentation/acpi/initramfs-add-dsdt.sh

@@ -0,0 +1,43 @@
+#!/bin/bash
+# Adds a DSDT file to the initrd (if it's an initramfs)
+# first argument is the name of archive
+# second argument is the name of the file to add
+# The file will be copied as /DSDT.aml
+
+# 20060126: fix "Premature end of file" with some old cpio (Roland Robic)
+# 20060205: this time it should really work
+
+# check the arguments
+if [ $# -ne 2 ]; then
+	program_name=$(basename $0)
+	echo "\
+$program_name: too few arguments
+Usage: $program_name initrd-name.img DSDT-to-add.aml
+Adds a DSDT file to an initrd (in initramfs format)
+
+  initrd-name.img: filename of the initrd in initramfs format
+  DSDT-to-add.aml: filename of the DSDT file to add
+  " 1>&2
+    exit 1
+fi
+
+# we should check it's an initramfs
+
+tempcpio=$(mktemp -d)
+# cleanup on exit, hangup, interrupt, quit, termination
+trap 'rm -rf $tempcpio' 0 1 2 3 15
+
+# extract the archive
+gunzip -c "$1" > "$tempcpio"/initramfs.cpio || exit 1
+
+# copy the DSDT file at the root of the directory so that we can call it "/DSDT.aml"
+cp -f "$2" "$tempcpio"/DSDT.aml
+
+# add the file
+cd "$tempcpio"
+(echo DSDT.aml | cpio --quiet -H newc -o -A -O "$tempcpio"/initramfs.cpio) || exit 1
+cd "$OLDPWD"
+
+# re-compress the archive
+gzip -c "$tempcpio"/initramfs.cpio > "$1"
+

+ 26 - 0
Documentation/acpi/method-tracing.txt

@@ -0,0 +1,26 @@
+/sys/module/acpi/parameters/:
+
+trace_method_name
+	The AML method name that the user wants to trace
+
+trace_debug_layer
+	The temporary debug_layer used when tracing the method.
+	Using 0xffffffff by default if it is 0.
+
+trace_debug_level
+	The temporary debug_level used when tracing the method.
+	Using 0x00ffffff by default if it is 0.
+
+trace_state
+	The status of the tracing feature.
+
+	"enabled" means this feature is enabled
+	and the AML method is traced every time it's executed.
+
+	"1" means this feature is enabled and the AML method
+	will only be traced during the next execution.
+
+	"disabled" means this feature is disabled.
+	Users can enable/disable this debug tracing feature by
+	"echo string > /sys/module/acpi/parameters/trace_state".
+	"string" should be one of "enable", "disable" and "1".

+ 2 - 0
Documentation/aoe/mkdevs.sh

@@ -29,6 +29,8 @@ rm -f $dir/interfaces
 mknod -m 0200 $dir/interfaces c $MAJOR 4
 rm -f $dir/revalidate
 mknod -m 0200 $dir/revalidate c $MAJOR 5
+rm -f $dir/flush
+mknod -m 0200 $dir/flush c $MAJOR 6
 
 export n_partitions
 mkshelf=`echo $0 | sed 's!mkdevs!mkshelf!'`

+ 4 - 1
Documentation/aoe/udev-install.sh

@@ -23,7 +23,10 @@ fi
 # /etc/udev/rules.d
 #
 rules_d="`sed -n '/^udev_rules=/{ s!udev_rules=!!; s!\"!!g; p; }' $conf`"
-if test -z "$rules_d" || test ! -d "$rules_d"; then
+if test -z "$rules_d" ; then
+	rules_d=/etc/udev/rules.d
+fi
+if test ! -d "$rules_d"; then
 	echo "$me Error: cannot find udev rules directory" 1>&2
 	exit 1
 fi

+ 9 - 7
Documentation/aoe/udev.txt

@@ -1,6 +1,7 @@
 # These rules tell udev what device nodes to create for aoe support.
-# They may be installed along the following lines (adjusted to what
-# you see on your system).
+# They may be installed along the following lines.  Check the section
+# 8 udev manpage to see whether your udev supports SUBSYSTEM, and
+# whether it uses one or two equal signs for SUBSYSTEM and KERNEL.
 # 
 #   ecashin@makki ~$ su
 #   Password:
@@ -15,10 +16,11 @@
 #  
 
 # aoe char devices
-SUBSYSTEM="aoe", KERNEL="discover",	NAME="etherd/%k", GROUP="disk", MODE="0220"
-SUBSYSTEM="aoe", KERNEL="err",		NAME="etherd/%k", GROUP="disk", MODE="0440"
-SUBSYSTEM="aoe", KERNEL="interfaces",	NAME="etherd/%k", GROUP="disk", MODE="0220"
-SUBSYSTEM="aoe", KERNEL="revalidate",	NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="discover",	NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="err",	NAME="etherd/%k", GROUP="disk", MODE="0440"
+SUBSYSTEM=="aoe", KERNEL=="interfaces",	NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="revalidate",	NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="flush",	NAME="etherd/%k", GROUP="disk", MODE="0220"
 
 # aoe block devices     
-KERNEL="etherd*",       NAME="%k", GROUP="disk"
+KERNEL=="etherd*",       NAME="%k", GROUP="disk"

+ 11 - 11
Documentation/cgroups.txt

@@ -456,7 +456,7 @@ methods are create/destroy. Any others that are null are presumed to
 be successful no-ops.
 
 struct cgroup_subsys_state *create(struct cgroup *cont)
-LL=cgroup_mutex
+(cgroup_mutex held by caller)
 
 Called to create a subsystem state object for a cgroup. The
 subsystem should allocate its subsystem state object for the passed
@@ -471,14 +471,19 @@ it's the root of the hierarchy) and may be an appropriate place for
 initialization code.
 
 void destroy(struct cgroup *cont)
-LL=cgroup_mutex
+(cgroup_mutex held by caller)
 
-The cgroup system is about to destroy the passed cgroup; the
-subsystem should do any necessary cleanup
+The cgroup system is about to destroy the passed cgroup; the subsystem
+should do any necessary cleanup and free its subsystem state
+object. By the time this method is called, the cgroup has already been
+unlinked from the file system and from the child list of its parent;
+cgroup->parent is still valid. (Note - can also be called for a
+newly-created cgroup if an error occurs after this subsystem's
+create() method has been called for the new cgroup).
 
 int can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
 	       struct task_struct *task)
-LL=cgroup_mutex
+(cgroup_mutex held by caller)
 
 Called prior to moving a task into a cgroup; if the subsystem
 returns an error, this will abort the attach operation.  If a NULL
@@ -489,25 +494,20 @@ remain valid while the caller holds cgroup_mutex.
 
 void attach(struct cgroup_subsys *ss, struct cgroup *cont,
 	    struct cgroup *old_cont, struct task_struct *task)
-LL=cgroup_mutex
-
 
 Called after the task has been attached to the cgroup, to allow any
 post-attachment activity that requires memory allocations or blocking.
 
 void fork(struct cgroup_subsy *ss, struct task_struct *task)
-LL=callback_mutex, maybe read_lock(tasklist_lock)
 
 Called when a task is forked into a cgroup. Also called during
 registration for all existing tasks.
 
 void exit(struct cgroup_subsys *ss, struct task_struct *task)
-LL=callback_mutex
 
 Called during task exit
 
 int populate(struct cgroup_subsys *ss, struct cgroup *cont)
-LL=none
 
 Called after creation of a cgroup to allow a subsystem to populate
 the cgroup directory with file entries.  The subsystem should make
@@ -524,7 +524,7 @@ example in cpusets, no task may attach before 'cpus' and 'mems' are set
 up.
 
 void bind(struct cgroup_subsys *ss, struct cgroup *root)
-LL=callback_mutex
+(cgroup_mutex held by caller)
 
 Called when a cgroup subsystem is rebound to a different hierarchy
 and root cgroup. Currently this will only involve movement between

+ 279 - 0
Documentation/controllers/memory.txt

@@ -0,0 +1,279 @@
+Memory Controller
+
+Salient features
+
+a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages
+b. The infrastructure allows easy addition of other types of memory to control
+c. Provides *zero overhead* for non memory controller users
+d. Provides a double LRU: global memory pressure causes reclaim from the
+   global LRU; a cgroup on hitting a limit, reclaims from the per
+   cgroup LRU
+
+NOTE: Swap Cache (unmapped) is not accounted now.
+
+Benefits and Purpose of the memory controller
+
+The memory controller isolates the memory behaviour of a group of tasks
+from the rest of the system. The article on LWN [12] mentions some probable
+uses of the memory controller. The memory controller can be used to
+
+a. Isolate an application or a group of applications
+   Memory hungry applications can be isolated and limited to a smaller
+   amount of memory.
+b. Create a cgroup with limited amount of memory, this can be used
+   as a good alternative to booting with mem=XXXX.
+c. Virtualization solutions can control the amount of memory they want
+   to assign to a virtual machine instance.
+d. A CD/DVD burner could control the amount of memory used by the
+   rest of the system to ensure that burning does not fail due to lack
+   of available memory.
+e. There are several other use cases, find one or use the controller just
+   for fun (to learn and hack on the VM subsystem).
+
+1. History
+
+The memory controller has a long history. A request for comments for the memory
+controller was posted by Balbir Singh [1]. At the time the RFC was posted
+there were several implementations for memory control. The goal of the
+RFC was to build consensus and agreement for the minimal features required
+for memory control. The first RSS controller was posted by Balbir Singh[2]
+in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the
+RSS controller. At OLS, at the resource management BoF, everyone suggested
+that we handle both page cache and RSS together. Another request was raised
+to allow user space handling of OOM. The current memory controller is
+at version 6; it combines both mapped (RSS) and unmapped Page
+Cache Control [11].
+
+2. Memory Control
+
+Memory is a unique resource in the sense that it is present in a limited
+amount. If a task requires a lot of CPU processing, the task can spread
+its processing over a period of hours, days, months or years, but with
+memory, the same physical memory needs to be reused to accomplish the task.
+
+The memory controller implementation has been divided into phases. These
+are:
+
+1. Memory controller
+2. mlock(2) controller
+3. Kernel user memory accounting and slab control
+4. user mappings length controller
+
+The memory controller is the first controller developed.
+
+2.1. Design
+
+The core of the design is a counter called the res_counter. The res_counter
+tracks the current memory usage and limit of the group of processes associated
+with the controller. Each cgroup has a memory controller specific data
+structure (mem_cgroup) associated with it.
+
+2.2. Accounting
+
+		+--------------------+
+		|  mem_cgroup     |
+		|  (res_counter)     |
+		+--------------------+
+		 /            ^      \
+		/             |       \
+           +---------------+  |        +---------------+
+           | mm_struct     |  |....    | mm_struct     |
+           |               |  |        |               |
+           +---------------+  |        +---------------+
+                              |
+                              + --------------+
+                                              |
+           +---------------+           +------+--------+
+           | page          +---------->  page_cgroup|
+           |               |           |               |
+           +---------------+           +---------------+
+
+             (Figure 1: Hierarchy of Accounting)
+
+
+Figure 1 shows the important aspects of the controller
+
+1. Accounting happens per cgroup
+2. Each mm_struct knows about which cgroup it belongs to
+3. Each page has a pointer to the page_cgroup, which in turn knows the
+   cgroup it belongs to
+
+The accounting is done as follows: mem_cgroup_charge() is invoked to setup
+the necessary data structures and check if the cgroup that is being charged
+is over its limit. If it is then reclaim is invoked on the cgroup.
+More details can be found in the reclaim section of this document.
+If everything goes well, a page meta-data-structure called page_cgroup is
+allocated and associated with the page.  This routine also adds the page to
+the per cgroup LRU.
+
+2.2.1 Accounting details
+
+All mapped pages (RSS) and unmapped user pages (Page Cache) are accounted.
+RSS pages are accounted at the time of page_add_*_rmap() unless they've already
+been accounted for earlier. A file page will be accounted for as Page Cache;
+it's mapped into the page tables of a process, duplicate accounting is carefully
+avoided. Page Cache pages are accounted at the time of add_to_page_cache().
+The corresponding routines that remove a page from the page tables or removes
+a page from Page Cache is used to decrement the accounting counters of the
+cgroup.
+
+2.3 Shared Page Accounting
+
+Shared pages are accounted on the basis of the first touch approach. The
+cgroup that first touches a page is accounted for the page. The principle
+behind this approach is that a cgroup that aggressively uses a shared
+page will eventually get charged for it (once it is uncharged from
+the cgroup that brought it in -- this will happen on memory pressure).
+
+2.4 Reclaim
+
+Each cgroup maintains a per cgroup LRU that consists of an active
+and inactive list. When a cgroup goes over its limit, we first try
+to reclaim memory from the cgroup so as to make space for the new
+pages that the cgroup has touched. If the reclaim is unsuccessful,
+an OOM routine is invoked to select and kill the bulkiest task in the
+cgroup.
+
+The reclaim algorithm has not been modified for cgroups, except that
+pages that are selected for reclaiming come from the per cgroup LRU
+list.
+
+2. Locking
+
+The memory controller uses the following hierarchy
+
+1. zone->lru_lock is used for selecting pages to be isolated
+2. mem->per_zone->lru_lock protects the per cgroup LRU (per zone)
+3. lock_page_cgroup() is used to protect page->page_cgroup
+
+3. User Interface
+
+0. Configuration
+
+a. Enable CONFIG_CGROUPS
+b. Enable CONFIG_RESOURCE_COUNTERS
+c. Enable CONFIG_CGROUP_MEM_CONT
+
+1. Prepare the cgroups
+# mkdir -p /cgroups
+# mount -t cgroup none /cgroups -o memory
+
+2. Make the new group and move bash into it
+# mkdir /cgroups/0
+# echo $$ >  /cgroups/0/tasks
+
+Since now we're in the 0 cgroup,
+We can alter the memory limit:
+# echo -n 4M > /cgroups/0/memory.limit_in_bytes
+
+NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
+mega or gigabytes.
+
+# cat /cgroups/0/memory.limit_in_bytes
+4194304 Bytes
+
+NOTE: The interface has now changed to display the usage in bytes
+instead of pages
+
+We can check the usage:
+# cat /cgroups/0/memory.usage_in_bytes
+1216512 Bytes
+
+A successful write to this file does not guarantee a successful set of
+this limit to the value written into the file.  This can be due to a
+number of factors, such as rounding up to page boundaries or the total
+availability of memory on the system.  The user is required to re-read
+this file after a write to guarantee the value committed by the kernel.
+
+# echo -n 1 > memory.limit_in_bytes
+# cat memory.limit_in_bytes
+4096 Bytes
+
+The memory.failcnt field gives the number of times that the cgroup limit was
+exceeded.
+
+The memory.stat file gives accounting information. Now, the number of
+caches, RSS and Active pages/Inactive pages are shown.
+
+The memory.force_empty gives an interface to drop *all* charges by force.
+
+# echo -n 1 > memory.force_empty
+
+will drop all charges in cgroup. Currently, this is maintained for test.
+
+4. Testing
+
+Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11].
+Apart from that v6 has been tested with several applications and regular
+daily use. The controller has also been tested on the PPC64, x86_64 and
+UML platforms.
+
+4.1 Troubleshooting
+
+Sometimes a user might find that the application under a cgroup is
+terminated. There are several causes for this:
+
+1. The cgroup limit is too low (just too low to do anything useful)
+2. The user is using anonymous memory and swap is turned off or too low
+
+A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of
+some of the pages cached in the cgroup (page cache pages).
+
+4.2 Task migration
+
+When a task migrates from one cgroup to another, it's charge is not
+carried forward. The pages allocated from the original cgroup still
+remain charged to it, the charge is dropped when the page is freed or
+reclaimed.
+
+4.3 Removing a cgroup
+
+A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
+cgroup might have some charge associated with it, even though all
+tasks have migrated away from it. Such charges are automatically dropped at
+rmdir() if there are no tasks.
+
+4.4 Choosing what to account  -- Page Cache (unmapped) vs RSS (mapped)?
+
+The type of memory accounted by the cgroup can be limited to just
+mapped pages by writing "1" to memory.control_type field
+
+echo -n 1 > memory.control_type
+
+5. TODO
+
+1. Add support for accounting huge pages (as a separate controller)
+2. Make per-cgroup scanner reclaim not-shared pages first
+3. Teach controller to account for shared-pages
+4. Start reclamation when the limit is lowered
+5. Start reclamation in the background when the limit is
+   not yet hit but the usage is getting closer
+
+Summary
+
+Overall, the memory controller has been a stable controller and has been
+commented and discussed quite extensively in the community.
+
+References
+
+1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/
+2. Singh, Balbir. Memory Controller (RSS Control),
+   http://lwn.net/Articles/222762/
+3. Emelianov, Pavel. Resource controllers based on process cgroups
+   http://lkml.org/lkml/2007/3/6/198
+4. Emelianov, Pavel. RSS controller based on process cgroups (v2)
+   http://lkml.org/lkml/2007/4/9/74
+5. Emelianov, Pavel. RSS controller based on process cgroups (v3)
+   http://lkml.org/lkml/2007/5/30/244
+6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/
+7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control
+   subsystem (v3), http://lwn.net/Articles/235534/
+8. Singh, Balbir. RSS controller V2 test results (lmbench),
+   http://lkml.org/lkml/2007/5/17/232
+9. Singh, Balbir. RSS controller V2 AIM9 results
+   http://lkml.org/lkml/2007/5/18/1
+10. Singh, Balbir. Memory controller v6 results,
+    http://lkml.org/lkml/2007/8/19/36
+11. Singh, Balbir. Memory controller v6, http://lkml.org/lkml/2007/8/17/69
+12. Corbet, Jonathan, Controlling memory use in cgroups,
+    http://lwn.net/Articles/243795/

+ 23 - 0
Documentation/cpuidle/core.txt

@@ -0,0 +1,23 @@
+
+		Supporting multiple CPU idle levels in kernel
+
+				cpuidle
+
+General Information:
+
+Various CPUs today support multiple idle levels that are differentiated
+by varying exit latencies and power consumption during idle.
+cpuidle is a generic in-kernel infrastructure that separates
+idle policy (governor) from idle mechanism (driver) and provides a
+standardized infrastructure to support independent development of
+governors and drivers.
+
+cpuidle resides under drivers/cpuidle.
+
+Boot options:
+"cpuidle_sysfs_switch"
+enables current_governor interface in /sys/devices/system/cpu/cpuidle/,
+which can be used to switch governors at run time. This boot option
+is meant for developer testing only. In normal usage, kernel picks the
+best governor based on governor ratings.
+SEE ALSO: sysfs.txt in this directory.

+ 31 - 0
Documentation/cpuidle/driver.txt

@@ -0,0 +1,31 @@
+
+
+		Supporting multiple CPU idle levels in kernel
+
+				cpuidle drivers
+
+
+
+
+cpuidle driver hooks into the cpuidle infrastructure and handles the
+architecture/platform dependent part of CPU idle states. Driver
+provides the platform idle state detection capability and also
+has mechanisms in place to support actual entry-exit into CPU idle states.
+
+cpuidle driver initializes the cpuidle_device structure for each CPU device
+and registers with cpuidle using cpuidle_register_device.
+
+It can also support the dynamic changes (like battery <-> AC), by using
+cpuidle_pause_and_lock, cpuidle_disable_device and cpuidle_enable_device,
+cpuidle_resume_and_unlock.
+
+Interfaces:
+extern int cpuidle_register_driver(struct cpuidle_driver *drv);
+extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
+extern int cpuidle_register_device(struct cpuidle_device *dev);
+extern void cpuidle_unregister_device(struct cpuidle_device *dev);
+
+extern void cpuidle_pause_and_lock(void);
+extern void cpuidle_resume_and_unlock(void);
+extern int cpuidle_enable_device(struct cpuidle_device *dev);
+extern void cpuidle_disable_device(struct cpuidle_device *dev);

+ 29 - 0
Documentation/cpuidle/governor.txt

@@ -0,0 +1,29 @@
+
+
+
+		Supporting multiple CPU idle levels in kernel
+
+				cpuidle governors
+
+
+
+
+cpuidle governor is policy routine that decides what idle state to enter at
+any given time. cpuidle core uses different callbacks to the governor.
+
+* enable() to enable governor for a particular device
+* disable() to disable governor for a particular device
+* select() to select an idle state to enter
+* reflect() called after returning from the idle state, which can be used
+  by the governor for some record keeping.
+
+More than one governor can be registered at the same time and
+users can switch between drivers using /sysfs interface (when enabled).
+More than one governor part is supported for developers to easily experiment
+with different governors. By default, most optimal governor based on your
+kernel configuration and platform will be selected by cpuidle.
+
+Interfaces:
+extern int cpuidle_register_governor(struct cpuidle_governor *gov);
+extern void cpuidle_unregister_governor(struct cpuidle_governor *gov);
+struct cpuidle_governor

+ 79 - 0
Documentation/cpuidle/sysfs.txt

@@ -0,0 +1,79 @@
+
+
+		Supporting multiple CPU idle levels in kernel
+
+				cpuidle sysfs
+
+System global cpuidle related information and tunables are under
+/sys/devices/system/cpu/cpuidle
+
+The current interfaces in this directory has self-explanatory names:
+* current_driver
+* current_governor_ro
+
+With cpuidle_sysfs_switch boot option (meant for developer testing)
+following objects are visible instead.
+* current_driver
+* available_governors
+* current_governor
+In this case users can switch the governor at run time by writing
+to current_governor.
+
+
+Per logical CPU specific cpuidle information are under
+/sys/devices/system/cpu/cpuX/cpuidle
+for each online cpu X
+
+--------------------------------------------------------------------------------
+# ls -lR /sys/devices/system/cpu/cpu0/cpuidle/
+/sys/devices/system/cpu/cpu0/cpuidle/:
+total 0
+drwxr-xr-x 2 root root 0 Feb  8 10:42 state0
+drwxr-xr-x 2 root root 0 Feb  8 10:42 state1
+drwxr-xr-x 2 root root 0 Feb  8 10:42 state2
+drwxr-xr-x 2 root root 0 Feb  8 10:42 state3
+
+/sys/devices/system/cpu/cpu0/cpuidle/state0:
+total 0
+-r--r--r-- 1 root root 4096 Feb  8 10:42 desc
+-r--r--r-- 1 root root 4096 Feb  8 10:42 latency
+-r--r--r-- 1 root root 4096 Feb  8 10:42 name
+-r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 time
+-r--r--r-- 1 root root 4096 Feb  8 10:42 usage
+
+/sys/devices/system/cpu/cpu0/cpuidle/state1:
+total 0
+-r--r--r-- 1 root root 4096 Feb  8 10:42 desc
+-r--r--r-- 1 root root 4096 Feb  8 10:42 latency
+-r--r--r-- 1 root root 4096 Feb  8 10:42 name
+-r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 time
+-r--r--r-- 1 root root 4096 Feb  8 10:42 usage
+
+/sys/devices/system/cpu/cpu0/cpuidle/state2:
+total 0
+-r--r--r-- 1 root root 4096 Feb  8 10:42 desc
+-r--r--r-- 1 root root 4096 Feb  8 10:42 latency
+-r--r--r-- 1 root root 4096 Feb  8 10:42 name
+-r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 time
+-r--r--r-- 1 root root 4096 Feb  8 10:42 usage
+
+/sys/devices/system/cpu/cpu0/cpuidle/state3:
+total 0
+-r--r--r-- 1 root root 4096 Feb  8 10:42 desc
+-r--r--r-- 1 root root 4096 Feb  8 10:42 latency
+-r--r--r-- 1 root root 4096 Feb  8 10:42 name
+-r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 time
+-r--r--r-- 1 root root 4096 Feb  8 10:42 usage
+--------------------------------------------------------------------------------
+
+
+* desc : Small description about the idle state (string)
+* latency : Latency to exit out of this idle state (in microseconds)
+* name : Name of the idle state (string)
+* power : Power consumed while in this idle state (in milliwatts)
+* time : Total time spent in this idle state (in microseconds)
+* usage : Number of times this state was entered (count)

+ 8 - 15
Documentation/cpusets.txt

@@ -523,21 +523,14 @@ from one cpuset to another, then the kernel will adjust the tasks
 memory placement, as above, the next time that the kernel attempts
 to allocate a page of memory for that task.
 
-If a cpuset has its CPUs modified, then each task using that
-cpuset does _not_ change its behavior automatically.  In order to
-minimize the impact on the critical scheduling code in the kernel,
-tasks will continue to use their prior CPU placement until they
-are rebound to their cpuset, by rewriting their pid to the 'tasks'
-file of their cpuset.  If a task had been bound to some subset of its
-cpuset using the sched_setaffinity() call, and if any of that subset
-is still allowed in its new cpuset settings, then the task will be
-restricted to the intersection of the CPUs it was allowed on before,
-and its new cpuset CPU placement.  If, on the other hand, there is
-no overlap between a tasks prior placement and its new cpuset CPU
-placement, then the task will be allowed to run on any CPU allowed
-in its new cpuset.  If a task is moved from one cpuset to another,
-its CPU placement is updated in the same way as if the tasks pid is
-rewritten to the 'tasks' file of its current cpuset.
+If a cpuset has its 'cpus' modified, then each task in that cpuset
+will have its allowed CPU placement changed immediately.  Similarly,
+if a tasks pid is written to a cpusets 'tasks' file, in either its
+current cpuset or another cpuset, then its allowed CPU placement is
+changed immediately.  If such a task had been bound to some subset
+of its cpuset using the sched_setaffinity() call, the task will be
+allowed to run on any CPU allowed in its new cpuset, negating the
+affect of the prior sched_setaffinity() call.
 
 In summary, the memory placement of a task whose cpuset is changed is
 updated by the kernel, on the next allocation of a page for that task,

+ 0 - 0
Documentation/drivers/edac/edac.txt → Documentation/edac.txt


+ 0 - 1
Documentation/email-clients.txt

@@ -170,7 +170,6 @@ Sylpheed (GUI)
 
 - Works well for inlining text (or using attachments).
 - Allows use of an external editor.
-- Not good for IMAP.
 - Is slow on large folders.
 - Won't do TLS SMTP auth over a non-SSL connection.
 - Has a helpful ruler bar in the compose window.

+ 3 - 3
Documentation/fb/deferred_io.txt

@@ -7,10 +7,10 @@ IO. The following example may be a useful explanation of how one such setup
 works:
 
 - userspace app like Xfbdev mmaps framebuffer
-- deferred IO and driver sets up nopage and page_mkwrite handlers
+- deferred IO and driver sets up fault and page_mkwrite handlers
 - userspace app tries to write to mmaped vaddress
-- we get pagefault and reach nopage handler
-- nopage handler finds and returns physical page
+- we get pagefault and reach fault handler
+- fault handler finds and returns physical page
 - we get page_mkwrite where we add this page to a list
 - schedule a workqueue task to be run after a delay
 - app continues writing to that page with no additional cost. this is

+ 11 - 43
Documentation/feature-removal-schedule.txt

@@ -6,14 +6,6 @@ be removed from this file.
 
 ---------------------------
 
-What:	MXSER
-When:	December 2007
-Why:	Old mxser driver is obsoleted by the mxser_new. Give it some time yet
-	and remove it.
-Who:	Jiri Slaby <jirislaby@gmail.com>
-
----------------------------
-
 What:	dev->power.power_state
 When:	July 2007
 Why:	Broken design for runtime control over driver power states, confusing
@@ -107,17 +99,6 @@ Who:	Eric Biederman <ebiederm@xmission.com>
 
 ---------------------------
 
-What:  a.out interpreter support for ELF executables
-When:  2.6.25
-Files: fs/binfmt_elf.c
-Why:   Using a.out interpreters for ELF executables was a feature for
-       transition from a.out to ELF. But now it is unlikely to be still
-       needed anymore and removing it would simplify the hairy ELF
-       loader code.
-Who:   Andi Kleen <ak@suse.de>
-
----------------------------
-
 What:	remove EXPORT_SYMBOL(kernel_thread)
 When:	August 2006
 Files:	arch/*/kernel/*_ksyms.c
@@ -130,15 +111,6 @@ Who:	Christoph Hellwig <hch@lst.de>
 
 ---------------------------
 
-What:	CONFIG_FORCED_INLINING
-When:	June 2006
-Why:	Config option is there to see if gcc is good enough. (in january
-        2006). If it is, the behavior should just be the default. If it's not,
-	the option should just go away entirely.
-Who:    Arjan van de Ven
-
----------------------------
-
 What:   eepro100 network driver
 When:   January 2007
 Why:    replaced by the e100 driver
@@ -200,21 +172,6 @@ Who:	Len Brown <len.brown@intel.com>
 
 ---------------------------
 
-What:	'time' kernel boot parameter
-When:	January 2008
-Why:	replaced by 'printk.time=<value>' so that printk timestamps can be
-	enabled or disabled as needed
-Who:	Randy Dunlap <randy.dunlap@oracle.com>
-
----------------------------
-
-What:  drivers depending on OSS_OBSOLETE
-When:  options in 2.6.23, code in 2.6.25
-Why:   obsolete OSS drivers
-Who:   Adrian Bunk <bunk@stusta.de>
-
----------------------------
-
 What: libata spindown skipping and warning
 When: Dec 2008
 Why:  Some halt(8) implementations synchronize caches for and spin
@@ -338,3 +295,14 @@ Why:	The support code for the old firmware hurts code readability/maintainabilit
 	and slightly hurts runtime performance. Bugfixes for the old firmware
 	are not provided by Broadcom anymore.
 Who:	Michael Buesch <mb@bu3sch.de>
+
+---------------------------
+
+What:	Solaris/SunOS syscall and binary support on Sparc
+When:	2.6.26
+Why:	Largely unmaintained and almost entirely unused.  File system
+	layering used to divert library and dynamic linker searches to
+	/usr/gnemul is extremely buggy and unfixable.  Making it work
+	is largely pointless as without a lot of work only the most
+	trivial of Solaris binaries can work with the emulation code.
+Who:	David S. Miller <davem@davemloft.net>

+ 4 - 0
Documentation/filesystems/00-INDEX

@@ -32,6 +32,8 @@ directory-locking
 	- info about the locking scheme used for directory operations.
 dlmfs.txt
 	- info on the userspace interface to the OCFS2 DLM.
+dnotify.txt
+	- info about directory notification in Linux.
 ecryptfs.txt
 	- docs on eCryptfs: stacked cryptographic filesystem for Linux.
 ext2.txt
@@ -80,6 +82,8 @@ relay.txt
 	- info on relay, for efficient streaming from kernel to user space.
 romfs.txt
 	- description of the ROMFS filesystem.
+sharedsubtree.txt
+	- a description of shared subtrees for namespaces.
 smbfs.txt
 	- info on using filesystems with the SMB protocol (Win 3.11 and NT).
 spufs.txt

+ 0 - 3
Documentation/filesystems/Locking

@@ -90,7 +90,6 @@ of the locking scheme for directory operations.
 prototypes:
 	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
-	void (*read_inode) (struct inode *);
 	void (*dirty_inode) (struct inode *);
 	int (*write_inode) (struct inode *, int);
 	void (*put_inode) (struct inode *);
@@ -114,7 +113,6 @@ locking rules:
 			BKL	s_lock	s_umount
 alloc_inode:		no	no	no
 destroy_inode:		no
-read_inode:		no				(see below)
 dirty_inode:		no				(must not sleep)
 write_inode:		no
 put_inode:		no
@@ -133,7 +131,6 @@ show_options:		no				(vfsmount->sem)
 quota_read:		no	no	no		(see below)
 quota_write:		no	no	no		(see below)
 
-->read_inode() is not a method - it's a callback used in iget().
 ->remount_fs() will have the s_umount lock if it's already mounted.
 When called from get_sb_single, it does NOT have the s_umount lock.
 ->quota_read() and ->quota_write() functions are both guaranteed to

+ 5 - 5
Documentation/dnotify.txt → Documentation/filesystems/dnotify.txt

@@ -69,24 +69,24 @@ Example
 	#include <signal.h>
 	#include <stdio.h>
 	#include <unistd.h>
-	
+
 	static volatile int event_fd;
-	
+
 	static void handler(int sig, siginfo_t *si, void *data)
 	{
 		event_fd = si->si_fd;
 	}
-	
+
 	int main(void)
 	{
 		struct sigaction act;
 		int fd;
-		
+
 		act.sa_sigaction = handler;
 		sigemptyset(&act.sa_mask);
 		act.sa_flags = SA_SIGINFO;
 		sigaction(SIGRTMIN + 1, &act, NULL);
-		
+
 		fd = open(".", O_RDONLY);
 		fcntl(fd, F_SETSIG, SIGRTMIN + 1);
 		fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT);

+ 1 - 0
Documentation/filesystems/isofs.txt

@@ -24,6 +24,7 @@ Mount options unique to the isofs filesystem.
   map=normal    Map non-Rock Ridge filenames to lower case
   map=acorn     As map=normal but also apply Acorn extensions if present
   mode=xxx      Sets the permissions on files to xxx
+  dmode=xxx     Sets the permissions on directories to xxx
   nojoliet      Ignore Joliet extensions if they are present.
   norock        Ignore Rock Ridge extensions if they are present.
   hide		Completely strip hidden files from the file system.

+ 19 - 11
Documentation/filesystems/porting

@@ -34,8 +34,8 @@ FOO_I(inode) (see in-tree filesystems for examples).
 
 Make them ->alloc_inode and ->destroy_inode in your super_operations.
 
-Keep in mind that now you need explicit initialization of private data -
-typically in ->read_inode() and after getting an inode from new_inode().
+Keep in mind that now you need explicit initialization of private data
+typically between calling iget_locked() and unlocking the inode.
 
 At some point that will become mandatory.
 
@@ -173,10 +173,10 @@ should be a non-blocking function that initializes those parts of a
 newly created inode to allow the test function to succeed. 'data' is
 passed as an opaque value to both test and set functions.
 
-When the inode has been created by iget5_locked(), it will be returned with
-the I_NEW flag set and will still be locked. read_inode has not been
-called so the file system still has to finalize the initialization. Once
-the inode is initialized it must be unlocked by calling unlock_new_inode().
+When the inode has been created by iget5_locked(), it will be returned with the
+I_NEW flag set and will still be locked.  The filesystem then needs to finalize
+the initialization. Once the inode is initialized it must be unlocked by
+calling unlock_new_inode().
 
 The filesystem is responsible for setting (and possibly testing) i_ino
 when appropriate. There is also a simpler iget_locked function that
@@ -184,11 +184,19 @@ just takes the superblock and inode number as arguments and does the
 test and set for you.
 
 e.g.
-       inode = iget_locked(sb, ino);
-       if (inode->i_state & I_NEW) {
-               read_inode_from_disk(inode);
-               unlock_new_inode(inode);
-       }
+	inode = iget_locked(sb, ino);
+	if (inode->i_state & I_NEW) {
+		err = read_inode_from_disk(inode);
+		if (err < 0) {
+			iget_failed(inode);
+			return err;
+		}
+		unlock_new_inode(inode);
+	}
+
+Note that if the process of setting up a new inode fails, then iget_failed()
+should be called on the inode to render it dead, and an appropriate error
+should be passed back to the caller.
 
 ---
 [recommended]

+ 86 - 17
Documentation/filesystems/proc.txt

@@ -1029,6 +1029,14 @@ nr_inodes
 Denotes the  number  of  inodes the system has allocated. This number will
 grow and shrink dynamically.
 
+nr_open
+-------
+
+Denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
 nr_free_inodes
 --------------
 
@@ -1315,13 +1323,28 @@ for writeout by the pdflush daemons.  It is expressed in 100'ths of a second.
 Data which has been dirty in-memory for longer than this interval will be
 written out next time a pdflush daemon wakes up.
 
+highmem_is_dirtyable
+--------------------
+
+Only present if CONFIG_HIGHMEM is set.
+
+This defaults to 0 (false), meaning that the ratios set above are calculated
+as a percentage of lowmem only.  This protects against excessive scanning
+in page reclaim, swapping and general VM distress.
+
+Setting this to 1 can be useful on 32 bit machines where you want to make
+random changes within an MMAPed file that is larger than your available
+lowmem without causing large quantities of random IO.  Is is safe if the
+behavior of all programs running on the machine is known and memory will
+not be otherwise stressed.
+
 legacy_va_layout
 ----------------
 
 If non-zero, this sysctl disables the new 32-bit mmap mmap layout - the kernel
 will use the legacy (2.4) layout for all processes.
 
-lower_zone_protection
+lowmem_reserve_ratio
 ---------------------
 
 For some specialised workloads on highmem machines it is dangerous for
@@ -1341,25 +1364,71 @@ captured into pinned user memory.
 mechanism will also defend that region from allocations which could use
 highmem or lowmem).
 
-The `lower_zone_protection' tunable determines how aggressive the kernel is
-in defending these lower zones.  The default value is zero - no
-protection at all.
+The `lowmem_reserve_ratio' tunable determines how aggressive the kernel is
+in defending these lower zones.
 
 If you have a machine which uses highmem or ISA DMA and your
 applications are using mlock(), or if you are running with no swap then
-you probably should increase the lower_zone_protection setting.
-
-The units of this tunable are fairly vague.  It is approximately equal
-to "megabytes," so setting lower_zone_protection=100 will protect around 100
-megabytes of the lowmem zone from user allocations.  It will also make
-those 100 megabytes unavailable for use by applications and by
-pagecache, so there is a cost.
-
-The effects of this tunable may be observed by monitoring
-/proc/meminfo:LowFree.  Write a single huge file and observe the point
-at which LowFree ceases to fall.
-
-A reasonable value for lower_zone_protection is 100.
+you probably should change the lowmem_reserve_ratio setting.
+
+The lowmem_reserve_ratio is an array. You can see them by reading this file.
+-
+% cat /proc/sys/vm/lowmem_reserve_ratio
+256     256     32
+-
+Note: # of this elements is one fewer than number of zones. Because the highest
+      zone's value is not necessary for following calculation.
+
+But, these values are not used directly. The kernel calculates # of protection
+pages for each zones from them. These are shown as array of protection pages
+in /proc/zoneinfo like followings. (This is an example of x86-64 box).
+Each zone has an array of protection pages like this.
+
+-
+Node 0, zone      DMA
+  pages free     1355
+        min      3
+        low      3
+        high     4
+	:
+	:
+    numa_other   0
+        protection: (0, 2004, 2004, 2004)
+	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  pagesets
+    cpu: 0 pcp: 0
+        :
+-
+These protections are added to score to judge whether this zone should be used
+for page allocation or should be reclaimed.
+
+In this example, if normal pages (index=2) are required to this DMA zone and
+pages_high is used for watermark, the kernel judges this zone should not be
+used because pages_free(1355) is smaller than watermark + protection[2]
+(4 + 2004 = 2008). If this protection value is 0, this zone would be used for
+normal page requirement. If requirement is DMA zone(index=0), protection[0]
+(=0) is used.
+
+zone[i]'s protection[j] is calculated by following exprssion.
+
+(i < j):
+  zone[i]->protection[j]
+  = (total sums of present_pages from zone[i+1] to zone[j] on the node)
+    / lowmem_reserve_ratio[i];
+(i = j):
+   (should not be protected. = 0;
+(i > j):
+   (not necessary, but looks 0)
+
+The default values of lowmem_reserve_ratio[i] are
+    256 (if zone[i] means DMA or DMA32 zone)
+    32  (others).
+As above expression, they are reciprocal number of ratio.
+256 means 1/256. # of protection pages becomes about "0.39%" of total present
+pages of higher zones on the node.
+
+If you would like to protect more pages, smaller values are effective.
+The minimum value is 1 (1/1 -> 100%).
 
 page-cluster
 ------------

+ 0 - 0
Documentation/sharedsubtree.txt → Documentation/filesystems/sharedsubtree.txt


+ 50 - 17
Documentation/filesystems/vfs.txt

@@ -151,7 +151,7 @@ The get_sb() method has the following arguments:
   const char *dev_name: the device name we are mounting.
 
   void *data: arbitrary mount options, usually comes as an ASCII
-	string
+	string (see "Mount Options" section)
 
   struct vfsmount *mnt: a vfs-internal representation of a mount point
 
@@ -182,7 +182,7 @@ A fill_super() method implementation has the following arguments:
   	must initialize this properly.
 
   void *data: arbitrary mount options, usually comes as an ASCII
-	string
+	string (see "Mount Options" section)
 
   int silent: whether or not to be silent on error
 
@@ -203,8 +203,6 @@ struct super_operations {
         struct inode *(*alloc_inode)(struct super_block *sb);
         void (*destroy_inode)(struct inode *);
 
-        void (*read_inode) (struct inode *);
-
         void (*dirty_inode) (struct inode *);
         int (*write_inode) (struct inode *, int);
         void (*put_inode) (struct inode *);
@@ -242,15 +240,6 @@ or bottom half).
   	->alloc_inode was defined and simply undoes anything done by
 	->alloc_inode.
 
-  read_inode: this method is called to read a specific inode from the
-        mounted filesystem.  The i_ino member in the struct inode is
-	initialized by the VFS to indicate which inode to read. Other
-	members are filled in by this method.
-
-	You can set this to NULL and use iget5_locked() instead of iget()
-	to read inodes.  This is necessary for filesystems for which the
-	inode number is not sufficient to identify an inode.
-
   dirty_inode: this method is called by the VFS to mark an inode dirty.
 
   write_inode: this method is called when the VFS needs to write an
@@ -302,15 +291,16 @@ or bottom half).
 
   umount_begin: called when the VFS is unmounting a filesystem.
 
-  show_options: called by the VFS to show mount options for /proc/<pid>/mounts.
+  show_options: called by the VFS to show mount options for
+	/proc/<pid>/mounts.  (see "Mount Options" section)
 
   quota_read: called by the VFS to read from filesystem quota file.
 
   quota_write: called by the VFS to write to filesystem quota file.
 
-The read_inode() method is responsible for filling in the "i_op"
-field. This is a pointer to a "struct inode_operations" which
-describes the methods that can be performed on individual inodes.
+Whoever sets up the inode is responsible for filling in the "i_op" field. This
+is a pointer to a "struct inode_operations" which describes the methods that
+can be performed on individual inodes.
 
 
 The Inode Object
@@ -980,6 +970,49 @@ manipulate dentries:
 For further information on dentry locking, please refer to the document
 Documentation/filesystems/dentry-locking.txt.
 
+Mount Options
+=============
+
+Parsing options
+---------------
+
+On mount and remount the filesystem is passed a string containing a
+comma separated list of mount options.  The options can have either of
+these forms:
+
+  option
+  option=value
+
+The <linux/parser.h> header defines an API that helps parse these
+options.  There are plenty of examples on how to use it in existing
+filesystems.
+
+Showing options
+---------------
+
+If a filesystem accepts mount options, it must define show_options()
+to show all the currently active options.  The rules are:
+
+  - options MUST be shown which are not default or their values differ
+    from the default
+
+  - options MAY be shown which are enabled by default or have their
+    default value
+
+Options used only internally between a mount helper and the kernel
+(such as file descriptors), or which only have an effect during the
+mounting (such as ones controlling the creation of a journal) are exempt
+from the above rules.
+
+The underlying reason for the above rules is to make sure, that a
+mount can be accurately replicated (e.g. umounting and mounting again)
+based on the information found in /proc/mounts.
+
+A simple method of saving options at mount/remount time and showing
+them is provided with the save_mount_options() and
+generic_show_options() helper functions.  Please note, that using
+these may have drawbacks.  For more info see header comments for these
+functions in fs/namespace.c.
 
 Resources
 =========

+ 121 - 12
Documentation/gpio.txt

@@ -32,7 +32,7 @@ The exact capabilities of GPIOs vary between systems.  Common options:
   - Input values are likewise readable (1, 0).  Some chips support readback
     of pins configured as "output", which is very useful in such "wire-OR"
     cases (to support bidirectional signaling).  GPIO controllers may have
-    input de-glitch logic, sometimes with software controls.
+    input de-glitch/debounce logic, sometimes with software controls.
 
   - Inputs can often be used as IRQ signals, often edge triggered but
     sometimes level triggered.  Such IRQs may be configurable as system
@@ -60,10 +60,13 @@ used on a board that's wired differently.  Only least-common-denominator
 functionality can be very portable.  Other features are platform-specific,
 and that can be critical for glue logic.
 
-Plus, this doesn't define an implementation framework, just an interface.
+Plus, this doesn't require any implementation framework, just an interface.
 One platform might implement it as simple inline functions accessing chip
 registers; another might implement it by delegating through abstractions
-used for several very different kinds of GPIO controller.
+used for several very different kinds of GPIO controller.  (There is some
+optional code supporting such an implementation strategy, described later
+in this document, but drivers acting as clients to the GPIO interface must
+not care how it's implemented.)
 
 That said, if the convention is supported on their platform, drivers should
 use it when possible.  Platforms should declare GENERIC_GPIO support in
@@ -121,6 +124,11 @@ before tasking is enabled, as part of early board setup.
 For output GPIOs, the value provided becomes the initial output value.
 This helps avoid signal glitching during system startup.
 
+For compatibility with legacy interfaces to GPIOs, setting the direction
+of a GPIO implicitly requests that GPIO (see below) if it has not been
+requested already.  That compatibility may be removed in the future;
+explicitly requesting GPIOs is strongly preferred.
+
 Setting the direction can fail if the GPIO number is invalid, or when
 that particular GPIO can't be used in that mode.  It's generally a bad
 idea to rely on boot firmware to have set the direction correctly, since
@@ -133,6 +141,7 @@ Spinlock-Safe GPIO access
 -------------------------
 Most GPIO controllers can be accessed with memory read/write instructions.
 That doesn't need to sleep, and can safely be done from inside IRQ handlers.
+(That includes hardirq contexts on RT kernels.)
 
 Use these calls to access such GPIOs:
 
@@ -145,7 +154,7 @@ Use these calls to access such GPIOs:
 The values are boolean, zero for low, nonzero for high.  When reading the
 value of an output pin, the value returned should be what's seen on the
 pin ... that won't always match the specified output value, because of
-issues including wire-OR and output latencies.
+issues including open-drain signaling and output latencies.
 
 The get/set calls have no error returns because "invalid GPIO" should have
 been reported earlier from gpio_direction_*().  However, note that not all
@@ -170,7 +179,8 @@ get to the head of a queue to transmit a command and get its response.
 This requires sleeping, which can't be done from inside IRQ handlers.
 
 Platforms that support this type of GPIO distinguish them from other GPIOs
-by returning nonzero from this call:
+by returning nonzero from this call (which requires a valid GPIO number,
+either explicitly or implicitly requested):
 
 	int gpio_cansleep(unsigned gpio);
 
@@ -209,8 +219,11 @@ before tasking is enabled, as part of early board setup.
 These calls serve two basic purposes.  One is marking the signals which
 are actually in use as GPIOs, for better diagnostics; systems may have
 several hundred potential GPIOs, but often only a dozen are used on any
-given board.  Another is to catch conflicts between drivers, reporting
-errors when drivers wrongly think they have exclusive use of that signal.
+given board.  Another is to catch conflicts, identifying errors when
+(a) two or more drivers wrongly think they have exclusive use of that
+signal, or (b) something wrongly believes it's safe to remove drivers
+needed to manage a signal that's in active use.  That is, requesting a
+GPIO can serve as a kind of lock.
 
 These two calls are optional because not not all current Linux platforms
 offer such functionality in their GPIO support; a valid implementation
@@ -223,6 +236,9 @@ Note that requesting a GPIO does NOT cause it to be configured in any
 way; it just marks that GPIO as in use.  Separate code must handle any
 pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown).
 
+Also note that it's your responsibility to have stopped using a GPIO
+before you free it.
+
 
 GPIOs mapped to IRQs
 --------------------
@@ -238,7 +254,7 @@ map between them using calls like:
 
 Those return either the corresponding number in the other namespace, or
 else a negative errno code if the mapping can't be done.  (For example,
-some GPIOs can't used as IRQs.)  It is an unchecked error to use a GPIO
+some GPIOs can't be used as IRQs.)  It is an unchecked error to use a GPIO
 number that wasn't set up as an input using gpio_direction_input(), or
 to use an IRQ number that didn't originally come from gpio_to_irq().
 
@@ -299,17 +315,110 @@ Related to multiplexing is configuration and enabling of the pullups or
 pulldowns integrated on some platforms.  Not all platforms support them,
 or support them in the same way; and any given board might use external
 pullups (or pulldowns) so that the on-chip ones should not be used.
+(When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.)
 
 There are other system-specific mechanisms that are not specified here,
 like the aforementioned options for input de-glitching and wire-OR output.
 Hardware may support reading or writing GPIOs in gangs, but that's usually
 configuration dependent:  for GPIOs sharing the same bank.  (GPIOs are
 commonly grouped in banks of 16 or 32, with a given SOC having several such
-banks.)  Some systems can trigger IRQs from output GPIOs.  Code relying on
-such mechanisms will necessarily be nonportable.
+banks.)  Some systems can trigger IRQs from output GPIOs, or read values
+from pins not managed as GPIOs.  Code relying on such mechanisms will
+necessarily be nonportable.
 
-Dynamic definition of GPIOs is not currently supported; for example, as
+Dynamic definition of GPIOs is not currently standard; for example, as
 a side effect of configuring an add-on board with some GPIO expanders.
 
 These calls are purely for kernel space, but a userspace API could be built
-on top of it.
+on top of them.
+
+
+GPIO implementor's framework (OPTIONAL)
+=======================================
+As noted earlier, there is an optional implementation framework making it
+easier for platforms to support different kinds of GPIO controller using
+the same programming interface.
+
+As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file
+will be found there.  That will list all the controllers registered through
+this framework, and the state of the GPIOs currently in use.
+
+
+Controller Drivers: gpio_chip
+-----------------------------
+In this framework each GPIO controller is packaged as a "struct gpio_chip"
+with information common to each controller of that type:
+
+ - methods to establish GPIO direction
+ - methods used to access GPIO values
+ - flag saying whether calls to its methods may sleep
+ - optional debugfs dump method (showing extra state like pullup config)
+ - label for diagnostics
+
+There is also per-instance data, which may come from device.platform_data:
+the number of its first GPIO, and how many GPIOs it exposes.
+
+The code implementing a gpio_chip should support multiple instances of the
+controller, possibly using the driver model.  That code will configure each
+gpio_chip and issue gpiochip_add().  Removing a GPIO controller should be
+rare; use gpiochip_remove() when it is unavoidable.
+
+Most often a gpio_chip is part of an instance-specific structure with state
+not exposed by the GPIO interfaces, such as addressing, power management,
+and more.  Chips such as codecs will have complex non-GPIO state,
+
+Any debugfs dump method should normally ignore signals which haven't been
+requested as GPIOs.  They can use gpiochip_is_requested(), which returns
+either NULL or the label associated with that GPIO when it was requested.
+
+
+Platform Support
+----------------
+To support this framework, a platform's Kconfig will "select HAVE_GPIO_LIB"
+and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines
+three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep().
+They may also want to provide a custom value for ARCH_NR_GPIOS.
+
+Trivial implementations of those functions can directly use framework
+code, which always dispatches through the gpio_chip:
+
+  #define gpio_get_value	__gpio_get_value
+  #define gpio_set_value	__gpio_set_value
+  #define gpio_cansleep		__gpio_cansleep
+
+Fancier implementations could instead define those as inline functions with
+logic optimizing access to specific SOC-based GPIOs.  For example, if the
+referenced GPIO is the constant "12", getting or setting its value could
+cost as little as two or three instructions, never sleeping.  When such an
+optimization is not possible those calls must delegate to the framework
+code, costing at least a few dozen instructions.  For bitbanged I/O, such
+instruction savings can be significant.
+
+For SOCs, platform-specific code defines and registers gpio_chip instances
+for each bank of on-chip GPIOs.  Those GPIOs should be numbered/labeled to
+match chip vendor documentation, and directly match board schematics.  They
+may well start at zero and go up to a platform-specific limit.  Such GPIOs
+are normally integrated into platform initialization to make them always be
+available, from arch_initcall() or earlier; they can often serve as IRQs.
+
+
+Board Support
+-------------
+For external GPIO controllers -- such as I2C or SPI expanders, ASICs, multi
+function devices, FPGAs or CPLDs -- most often board-specific code handles
+registering controller devices and ensures that their drivers know what GPIO
+numbers to use with gpiochip_add().  Their numbers often start right after
+platform-specific GPIOs.
+
+For example, board setup code could create structures identifying the range
+of GPIOs that chip will expose, and passes them to each GPIO expander chip
+using platform_data.  Then the chip driver's probe() routine could pass that
+data to gpiochip_add().
+
+Initialization order can be important.  For example, when a device relies on
+an I2C-based GPIO, its probe() routine should only be called after that GPIO
+becomes available.  That may mean the device should not be registered until
+calls for that GPIO can work.  One way to address such dependencies is for
+such gpio_chip controllers to provide setup() and teardown() callbacks to
+board specific code; those board specific callbacks would register devices
+once all the necessary resources are available.

+ 36 - 0
Documentation/hwmon/ads7828

@@ -0,0 +1,36 @@
+Kernel driver ads7828
+=====================
+
+Supported chips:
+  * Texas Instruments/Burr-Brown ADS7828
+    Prefix: 'ads7828'
+    Addresses scanned: I2C 0x48, 0x49, 0x4a, 0x4b
+    Datasheet: Publicly available at the Texas Instruments website :
+               http://focus.ti.com/lit/ds/symlink/ads7828.pdf
+
+Authors:
+        Steve Hardy <steve@linuxrealtime.co.uk>
+
+Module Parameters
+-----------------
+
+* se_input: bool (default Y)
+  Single ended operation - set to N for differential mode
+* int_vref: bool (default Y)
+  Operate with the internal 2.5V reference - set to N for external reference
+* vref_mv: int (default 2500)
+  If using an external reference, set this to the reference voltage in mV
+
+Description
+-----------
+
+This driver implements support for the Texas Instruments ADS7828.
+
+This device is a 12-bit 8-channel A-D converter.
+
+It can operate in single ended mode (8 +ve inputs) or in differential mode,
+where 4 differential pairs can be measured.
+
+The chip also has the facility to use an external voltage reference.  This
+may be required if your hardware supplies the ADS7828 from a 5V supply, see
+the datasheet for more details.

+ 1 - 1
Documentation/hwmon/it87

@@ -30,7 +30,7 @@ Supported chips:
     Datasheet: No longer be available
 
 Authors:
-    Christophe Gauthron <chrisg@0-in.com>
+    Christophe Gauthron
     Jean Delvare <khali@linux-fr.org>
 
 

+ 2 - 2
Documentation/hwmon/lm78

@@ -4,12 +4,12 @@ Kernel driver lm78
 Supported chips:
   * National Semiconductor LM78 / LM78-J
     Prefix: 'lm78'
-    Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports)
+    Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports)
     Datasheet: Publicly available at the National Semiconductor website
                http://www.national.com/
   * National Semiconductor LM79
     Prefix: 'lm79'
-    Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports)
+    Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports)
     Datasheet: Publicly available at the National Semiconductor website
                http://www.national.com/
 

+ 8 - 3
Documentation/hwmon/lm87

@@ -4,8 +4,12 @@ Kernel driver lm87
 Supported chips:
   * National Semiconductor LM87
     Prefix: 'lm87'
-    Addresses scanned: I2C 0x2c - 0x2f
+    Addresses scanned: I2C 0x2c - 0x2e
     Datasheet: http://www.national.com/pf/LM/LM87.html
+  * Analog Devices ADM1024
+    Prefix: 'adm1024'
+    Addresses scanned: I2C 0x2c - 0x2e
+    Datasheet: http://www.analog.com/en/prod/0,2877,ADM1024,00.html
 
 Authors:
         Frodo Looijaard <frodol@dds.nl>,
@@ -19,11 +23,12 @@ Authors:
 Description
 -----------
 
-This driver implements support for the National Semiconductor LM87.
+This driver implements support for the National Semiconductor LM87
+and the Analog Devices ADM1024.
 
 The LM87 implements up to three temperature sensors, up to two fan
 rotation speed sensors, up to seven voltage sensors, alarms, and some
-miscellaneous stuff.
+miscellaneous stuff. The ADM1024 is fully compatible.
 
 Temperatures are measured in degrees Celsius. Each input has a high
 and low alarm settings. A high limit produces an alarm when the value

+ 1 - 1
Documentation/hwmon/userspace-tools

@@ -14,7 +14,7 @@ Lm-sensors
 
 Core set of utilities that will allow you to obtain health information,
 setup monitoring limits etc. You can get them on their homepage
-http://www.lm-sensors.nu/ or as a package from your Linux distribution.
+http://www.lm-sensors.org/ or as a package from your Linux distribution.
 
 If from website:
 Get lm-sensors from project web site. Please note, you need only userspace

+ 3 - 2
Documentation/hwmon/w83627ehf

@@ -23,8 +23,9 @@ W83627DHG super I/O chips. We will refer to them collectively as Winbond chips.
 
 The chips implement three temperature sensors, five fan rotation
 speed sensors, ten analog voltage sensors (only nine for the 627DHG), one
-VID (6 pins), alarms with beep warnings (control unimplemented), and
-some automatic fan regulation strategies (plus manual fan control mode).
+VID (6 pins for the 627EHF/EHG, 8 pins for the 627DHG), alarms with beep
+warnings (control unimplemented), and some automatic fan regulation
+strategies (plus manual fan control mode).
 
 Temperatures are measured in degrees Celsius and measurement resolution is 1
 degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when

+ 1 - 2
Documentation/hwmon/w83627hf

@@ -73,5 +73,4 @@ doesn't help, you may just ignore the bogus VID reading with no harm done.
 
 For further information on this driver see the w83781d driver documentation.
 
-[1] http://www2.lm-sensors.nu/~lm78/cvs/browse.cgi/lm_sensors2/doc/vid
-
+[1] http://www.lm-sensors.org/browser/lm-sensors/trunk/doc/vid

+ 8 - 14
Documentation/hwmon/w83781d

@@ -4,20 +4,16 @@ Kernel driver w83781d
 Supported chips:
   * Winbond W83781D
     Prefix: 'w83781d'
-    Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports)
+    Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports)
     Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/w83781d.pdf
   * Winbond W83782D
     Prefix: 'w83782d'
-    Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports)
+    Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports)
     Datasheet: http://www.winbond.com/PDF/sheet/w83782d.pdf
   * Winbond W83783S
     Prefix: 'w83783s'
     Addresses scanned: I2C 0x2d
     Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/w83783s.pdf
-  * Winbond W83627HF
-    Prefix: 'w83627hf'
-    Addresses scanned: I2C 0x20 - 0x2f, ISA 0x290 (8 I/O ports)
-    Datasheet: http://www.winbond.com/PDF/sheet/w83627hf.pdf
   * Asus AS99127F
     Prefix: 'as99127f'
     Addresses scanned: I2C 0x28 - 0x2f
@@ -50,20 +46,18 @@ force_subclients=bus,caddr,saddr,saddr
 Description
 -----------
 
-This driver implements support for the Winbond W83781D, W83782D, W83783S,
-W83627HF chips, and the Asus AS99127F chips. We will refer to them
-collectively as W8378* chips.
+This driver implements support for the Winbond W83781D, W83782D, W83783S
+chips, and the Asus AS99127F chips. We will refer to them collectively as
+W8378* chips.
 
 There is quite some difference between these chips, but they are similar
 enough that it was sensible to put them together in one driver.
-The W83627HF chip is assumed to be identical to the ISA W83782D.
 The Asus chips are similar to an I2C-only W83782D.
 
 Chip        #vin    #fanin  #pwm    #temp   wchipid vendid  i2c     ISA
 as99127f    7       3       0       3       0x31    0x12c3  yes     no
 as99127f rev.2 (type_name = as99127f)       0x31    0x5ca3  yes     no
 w83781d     7       3       0       3       0x10-1  0x5ca3  yes     yes
-w83627hf    9       3       2       3       0x21    0x5ca3  yes     yes(LPC)
 w83782d     9       3       2-4     3       0x30    0x5ca3  yes     yes
 w83783s     5-6     3       2       1-2     0x40    0x5ca3  yes     no
 
@@ -143,9 +137,9 @@ Individual alarm and beep bits:
 0x000400: in6
 0x000800: fan3
 0x001000: chassis
-0x002000: temp3 (W83782D and W83627HF only)
-0x010000: in7 (W83782D and W83627HF only)
-0x020000: in8 (W83782D and W83627HF only)
+0x002000: temp3 (W83782D only)
+0x010000: in7 (W83782D only)
+0x020000: in8 (W83782D only)
 
 If an alarm triggers, it will remain triggered until the hardware register
 is read at least once. This means that the cause for the alarm may

+ 54 - 0
Documentation/hwmon/w83l786ng

@@ -0,0 +1,54 @@
+Kernel driver w83l786ng
+=====================
+
+Supported chips:
+  * Winbond W83L786NG/W83L786NR
+    Prefix: 'w83l786ng'
+    Addresses scanned: I2C 0x2e - 0x2f
+    Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/W83L786NRNG09.pdf
+
+Author: Kevin Lo <kevlo@kevlo.org>
+
+
+Module Parameters
+-----------------
+
+* reset boolean
+  (default 0)
+  Use 'reset=1' to reset the chip (via index 0x40, bit 7). The default
+  behavior is no chip reset to preserve BIOS settings
+
+
+Description
+-----------
+
+This driver implements support for Winbond W83L786NG/W83L786NR chips.
+
+The driver implements two temperature sensors, two fan rotation speed
+sensors, and three voltage sensors.
+
+Temperatures are measured in degrees Celsius and measurement resolution is 1
+degC for temp1 and temp2.
+
+Fan rotation speeds are reported in RPM (rotations per minute). Fan readings
+readings can be divided by a programmable divider (1, 2, 4, 8, 16, 32, 64
+or 128 for fan 1/2) to give the readings more range or accuracy.
+
+Voltage sensors (also known as IN sensors) report their values in millivolts.
+An alarm is triggered if the voltage has crossed a programmable minimum
+or maximum limit.
+
+/sys files
+----------
+
+pwm[1-2] - this file stores PWM duty cycle or DC value (fan speed) in range:
+	    0 (stop) to 255 (full)
+pwm[1-2]_enable - this file controls mode of fan/temperature control:
+            * 0 Manual Mode
+            * 1 Thermal Cruise
+            * 2 Smart Fan II
+            * 4 FAN_SET
+pwm[1-2]_mode - Select PWM of DC mode
+            * 0 DC
+            * 1 PWM
+tolerance[1-2] - Value in degrees of Celsius (degC) for +- T

+ 1 - 1
Documentation/i2c/busses/i2c-piix4

@@ -95,4 +95,4 @@ of all affected systems, so the only safe solution was to prevent access to
 the SMBus on all IBM systems (detected using DMI data.)
 
 For additional information, read:
-http://www2.lm-sensors.nu/~lm78/cvs/lm_sensors2/README.thinkpad
+http://www.lm-sensors.org/browser/lm-sensors/trunk/README.thinkpad

+ 3 - 0
Documentation/i2c/chips/pca9539

@@ -1,6 +1,9 @@
 Kernel driver pca9539
 =====================
 
+NOTE: this driver is deprecated and will be dropped soon, use
+drivers/gpio/pca9539.c instead.
+
 Supported chips:
   * Philips PCA9539
     Prefix: 'pca9539'

+ 9 - 6
Documentation/ia64/aliasing-test.c

@@ -16,6 +16,7 @@
 #include <fcntl.h>
 #include <fnmatch.h>
 #include <string.h>
+#include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <unistd.h>
@@ -65,7 +66,7 @@ int scan_tree(char *path, char *file, off_t offset, size_t length, int touch)
 {
 	struct dirent **namelist;
 	char *name, *path2;
-	int i, n, r, rc, result = 0;
+	int i, n, r, rc = 0, result = 0;
 	struct stat buf;
 
 	n = scandir(path, &namelist, 0, alphasort);
@@ -113,7 +114,7 @@ skip:
 		free(namelist[i]);
 	}
 	free(namelist);
-	return rc;
+	return result;
 }
 
 char buf[1024];
@@ -149,7 +150,7 @@ int scan_rom(char *path, char *file)
 {
 	struct dirent **namelist;
 	char *name, *path2;
-	int i, n, r, rc, result = 0;
+	int i, n, r, rc = 0, result = 0;
 	struct stat buf;
 
 	n = scandir(path, &namelist, 0, alphasort);
@@ -180,7 +181,7 @@ int scan_rom(char *path, char *file)
 			 * important thing is that no MCA happened.
 			 */
 			if (rc > 0)
-				fprintf(stderr, "PASS: %s read %ld bytes\n", path2, rc);
+				fprintf(stderr, "PASS: %s read %d bytes\n", path2, rc);
 			else {
 				fprintf(stderr, "PASS: %s not readable\n", path2);
 				return rc;
@@ -201,10 +202,10 @@ skip:
 		free(namelist[i]);
 	}
 	free(namelist);
-	return rc;
+	return result;
 }
 
-int main()
+int main(void)
 {
 	int rc;
 
@@ -256,4 +257,6 @@ int main()
 	scan_tree("/proc/bus/pci", "??.?", 0xA0000, 0x20000, 0);
 	scan_tree("/proc/bus/pci", "??.?", 0xC0000, 0x40000, 1);
 	scan_tree("/proc/bus/pci", "??.?", 0, 1024*1024, 0);
+
+	return rc;
 }

+ 1 - 1
Documentation/input/input-programming.txt

@@ -22,7 +22,7 @@ static struct input_dev *button_dev;
 
 static void button_interrupt(int irq, void *dummy, struct pt_regs *fp)
 {
-	input_report_key(button_dev, BTN_1, inb(BUTTON_PORT) & 1);
+	input_report_key(button_dev, BTN_0, inb(BUTTON_PORT) & 1);
 	input_sync(button_dev);
 }
 

+ 14 - 1
Documentation/iostats.txt

@@ -58,7 +58,7 @@ they should not wrap twice before you notice them.
 Each set of stats only applies to the indicated device; if you want
 system-wide stats you'll have to find all the devices and sum them all up.
 
-Field  1 -- # of reads issued
+Field  1 -- # of reads completed
     This is the total number of reads completed successfully.
 Field  2 -- # of reads merged, field 6 -- # of writes merged
     Reads and writes which are adjacent to each other may be merged for
@@ -132,6 +132,19 @@ words, the number of reads for partitions is counted slightly before time
 of queuing for partitions, and at completion for whole disks.  This is
 a subtle distinction that is probably uninteresting for most cases.
 
+More significant is the error induced by counting the numbers of
+reads/writes before merges for partitions and after for disks. Since a
+typical workload usually contains a lot of successive and adjacent requests,
+the number of reads/writes issued can be several times higher than the
+number of reads/writes completed.
+
+In 2.6.25, the full statistic set is again available for partitions and
+disk and partition statistics are consistent again. Since we still don't
+keep record of the partition-relative address, an operation is attributed to
+the partition which contains the first sector of the request after the
+eventual merges. As requests can be merged across partition, this could lead
+to some (probably insignificant) innacuracy.
+
 Additional notes
 ----------------
 

+ 10 - 5
Documentation/kernel-parameters.txt

@@ -147,8 +147,10 @@ and is between 256 and 4096 characters. It is defined in the file
 			default: 0
 
 	acpi_sleep=	[HW,ACPI] Sleep options
-			Format: { s3_bios, s3_mode }
-			See Documentation/power/video.txt
+			Format: { s3_bios, s3_mode, s3_beep }
+			See Documentation/power/video.txt for s3_bios and s3_mode.
+			s3_beep is for debugging; it makes the PC's speaker beep
+			as soon as the kernel's real-mode entry point is called.
 
 	acpi_sci=	[HW,ACPI] ACPI System Control Interrupt trigger mode
 			Format: { level | edge | high | low }
@@ -175,6 +177,9 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	acpi_no_auto_ssdt	[HW,ACPI] Disable automatic loading of SSDT
 
+	acpi_no_initrd_override	[KNL,ACPI]
+			Disable loading custom ACPI tables from the initramfs
+
 	acpi_os_name=	[HW,ACPI] Tell ACPI BIOS the name of the OS
 			Format: To spoof as Windows 98: ="Microsoft Windows"
 
@@ -780,6 +785,9 @@ and is between 256 and 4096 characters. It is defined in the file
 			loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same
 			as idle=poll.
 
+	ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
+			Claim all unknown PCI IDE storage controllers.
+
 	ignore_loglevel	[KNL]
 			Ignore loglevel setting - this will print /all/
 			kernel messages to the console. Useful for debugging.
@@ -1965,9 +1973,6 @@ and is between 256 and 4096 characters. It is defined in the file
 			<deci-seconds>: poll all this frequency
 			0: no polling (default)
 
-	time		Show timing data prefixed to each printk message line
-			[deprecated, see 'printk.time']
-
 	tipar.timeout=	[HW,PPT]
 			Set communications timeout in tenths of a second
 			(default 15).

+ 74 - 18
Documentation/kprobes.txt

@@ -92,11 +92,12 @@ handler has run.  Up to MAX_STACK_SIZE bytes are copied -- e.g.,
 64 bytes on i386.
 
 Note that the probed function's args may be passed on the stack
-or in registers (e.g., for x86_64 or for an i386 fastcall function).
-The jprobe will work in either case, so long as the handler's
-prototype matches that of the probed function.
+or in registers.  The jprobe will work in either case, so long as the
+handler's prototype matches that of the probed function.
 
-1.3 How Does a Return Probe Work?
+1.3 Return Probes
+
+1.3.1 How Does a Return Probe Work?
 
 When you call register_kretprobe(), Kprobes establishes a kprobe at
 the entry to the function.  When the probed function is called and this
@@ -107,9 +108,9 @@ At boot time, Kprobes registers a kprobe at the trampoline.
 
 When the probed function executes its return instruction, control
 passes to the trampoline and that probe is hit.  Kprobes' trampoline
-handler calls the user-specified handler associated with the kretprobe,
-then sets the saved instruction pointer to the saved return address,
-and that's where execution resumes upon return from the trap.
+handler calls the user-specified return handler associated with the
+kretprobe, then sets the saved instruction pointer to the saved return
+address, and that's where execution resumes upon return from the trap.
 
 While the probed function is executing, its return address is
 stored in an object of type kretprobe_instance.  Before calling
@@ -131,6 +132,30 @@ zero when the return probe is registered, and is incremented every
 time the probed function is entered but there is no kretprobe_instance
 object available for establishing the return probe.
 
+1.3.2 Kretprobe entry-handler
+
+Kretprobes also provides an optional user-specified handler which runs
+on function entry. This handler is specified by setting the entry_handler
+field of the kretprobe struct. Whenever the kprobe placed by kretprobe at the
+function entry is hit, the user-defined entry_handler, if any, is invoked.
+If the entry_handler returns 0 (success) then a corresponding return handler
+is guaranteed to be called upon function return. If the entry_handler
+returns a non-zero error then Kprobes leaves the return address as is, and
+the kretprobe has no further effect for that particular function instance.
+
+Multiple entry and return handler invocations are matched using the unique
+kretprobe_instance object associated with them. Additionally, a user
+may also specify per return-instance private data to be part of each
+kretprobe_instance object. This is especially useful when sharing private
+data between corresponding user entry and return handlers. The size of each
+private data object can be specified at kretprobe registration time by
+setting the data_size field of the kretprobe struct. This data can be
+accessed through the data field of each kretprobe_instance object.
+
+In case probed function is entered but there is no kretprobe_instance
+object available, then in addition to incrementing the nmissed count,
+the user entry_handler invocation is also skipped.
+
 2. Architectures Supported
 
 Kprobes, jprobes, and return probes are implemented on the following
@@ -244,9 +269,9 @@ Kprobes runs the handler whose address is jp->entry.
 The handler should have the same arg list and return type as the probed
 function; and just before it returns, it must call jprobe_return().
 (The handler never actually returns, since jprobe_return() returns
-control to Kprobes.)  If the probed function is declared asmlinkage,
-fastcall, or anything else that affects how args are passed, the
-handler's declaration must match.
+control to Kprobes.)  If the probed function is declared asmlinkage
+or anything else that affects how args are passed, the handler's
+declaration must match.
 
 register_jprobe() returns 0 on success, or a negative errno otherwise.
 
@@ -274,6 +299,8 @@ of interest:
 - ret_addr: the return address
 - rp: points to the corresponding kretprobe object
 - task: points to the corresponding task struct
+- data: points to per return-instance private data; see "Kretprobe
+	entry-handler" for details.
 
 The regs_return_value(regs) macro provides a simple abstraction to
 extract the return value from the appropriate register as defined by
@@ -556,23 +583,52 @@ report failed calls to sys_open().
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
+#include <linux/ktime.h>
+
+/* per-instance private data */
+struct my_data {
+	ktime_t entry_stamp;
+};
 
 static const char *probed_func = "sys_open";
 
-/* Return-probe handler: If the probed function fails, log the return value. */
-static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+/* Timestamp function entry. */
+static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	struct my_data *data;
+
+	if(!current->mm)
+		return 1; /* skip kernel threads */
+
+	data = (struct my_data *)ri->data;
+	data->entry_stamp = ktime_get();
+	return 0;
+}
+
+/* If the probed function failed, log the return value and duration.
+ * Duration may turn out to be zero consistently, depending upon the
+ * granularity of time accounting on the platform. */
+static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	int retval = regs_return_value(regs);
+	struct my_data *data = (struct my_data *)ri->data;
+	s64 delta;
+	ktime_t now;
+
 	if (retval < 0) {
-		printk("%s returns %d\n", probed_func, retval);
+		now = ktime_get();
+		delta = ktime_to_ns(ktime_sub(now, data->entry_stamp));
+		printk("%s: return val = %d (duration = %lld ns)\n",
+		       probed_func, retval, delta);
 	}
 	return 0;
 }
 
 static struct kretprobe my_kretprobe = {
-	.handler = ret_handler,
-	/* Probe up to 20 instances concurrently. */
-	.maxactive = 20
+	.handler = return_handler,
+	.entry_handler = entry_handler,
+	.data_size = sizeof(struct my_data),
+	.maxactive = 20, /* probe up to 20 instances concurrently */
 };
 
 static int __init kretprobe_init(void)
@@ -584,7 +640,7 @@ static int __init kretprobe_init(void)
 		printk("register_kretprobe failed, returned %d\n", ret);
 		return -1;
 	}
-	printk("Planted return probe at %p\n", my_kretprobe.kp.addr);
+	printk("Kretprobe active on %s\n", my_kretprobe.kp.symbol_name);
 	return 0;
 }
 
@@ -594,7 +650,7 @@ static void __exit kretprobe_exit(void)
 	printk("kretprobe unregistered\n");
 	/* nmissed > 0 suggests that maxactive was set too low. */
 	printk("Missed probing %d instances of %s\n",
-		my_kretprobe.nmissed, probed_func);
+	       my_kretprobe.nmissed, probed_func);
 }
 
 module_init(kretprobe_init)

+ 10 - 10
Documentation/kref.txt

@@ -141,10 +141,10 @@ The last rule (rule 3) is the nastiest one to handle.  Say, for
 instance, you have a list of items that are each kref-ed, and you wish
 to get the first one.  You can't just pull the first item off the list
 and kref_get() it.  That violates rule 3 because you are not already
-holding a valid pointer.  You must add locks or semaphores.  For
-instance:
+holding a valid pointer.  You must add a mutex (or some other lock).
+For instance:
 
-static DECLARE_MUTEX(sem);
+static DEFINE_MUTEX(mutex);
 static LIST_HEAD(q);
 struct my_data
 {
@@ -155,12 +155,12 @@ struct my_data
 static struct my_data *get_entry()
 {
 	struct my_data *entry = NULL;
-	down(&sem);
+	mutex_lock(&mutex);
 	if (!list_empty(&q)) {
 		entry = container_of(q.next, struct my_q_entry, link);
 		kref_get(&entry->refcount);
 	}
-	up(&sem);
+	mutex_unlock(&mutex);
 	return entry;
 }
 
@@ -174,9 +174,9 @@ static void release_entry(struct kref *ref)
 
 static void put_entry(struct my_data *entry)
 {
-	down(&sem);
+	mutex_lock(&mutex);
 	kref_put(&entry->refcount, release_entry);
-	up(&sem);
+	mutex_unlock(&mutex);
 }
 
 The kref_put() return value is useful if you do not want to hold the
@@ -191,13 +191,13 @@ static void release_entry(struct kref *ref)
 
 static void put_entry(struct my_data *entry)
 {
-	down(&sem);
+	mutex_lock(&mutex);
 	if (kref_put(&entry->refcount, release_entry)) {
 		list_del(&entry->link);
-		up(&sem);
+		mutex_unlock(&mutex);
 		kfree(entry);
 	} else
-		up(&sem);
+		mutex_unlock(&mutex);
 }
 
 This is really more useful if you have to call other routines as part

+ 10 - 0
Documentation/laptops/00-INDEX

@@ -0,0 +1,10 @@
+00-INDEX
+	- This file
+acer-wmi.txt
+	- information on the Acer Laptop WMI Extras driver.
+sony-laptop.txt
+	- Sony Notebook Control Driver (SNC) Readme.
+sonypi.txt
+	- info on Linux Sony Programmable I/O Device support.
+thinkpad-acpi.txt
+	- information on the (IBM and Lenovo) ThinkPad ACPI Extras driver.

+ 202 - 0
Documentation/laptops/acer-wmi.txt

@@ -0,0 +1,202 @@
+Acer Laptop WMI Extras Driver
+http://code.google.com/p/aceracpi
+Version 0.1
+9th February 2008
+
+Copyright 2007-2008 Carlos Corbacho <carlos@strangeworlds.co.uk>
+
+acer-wmi is a driver to allow you to control various parts of your Acer laptop
+hardware under Linux which are exposed via ACPI-WMI.
+
+This driver completely replaces the old out-of-tree acer_acpi, which I am
+currently maintaining for bug fixes only on pre-2.6.25 kernels. All development
+work is now focused solely on acer-wmi.
+
+Disclaimer
+**********
+
+Acer and Wistron have provided nothing towards the development acer_acpi or
+acer-wmi. All information we have has been through the efforts of the developers
+and the users to discover as much as possible about the hardware.
+
+As such, I do warn that this could break your hardware - this is extremely
+unlikely of course, but please bear this in mind.
+
+Background
+**********
+
+acer-wmi is derived from acer_acpi, originally developed by Mark
+Smith in 2005, then taken over by Carlos Corbacho in 2007, in order to activate
+the wireless LAN card under a 64-bit version of Linux, as acerhk[1] (the
+previous solution to the problem) relied on making 32 bit BIOS calls which are
+not possible in kernel space from a 64 bit OS.
+
+[1] acerhk: http://www.cakey.de/acerhk/
+
+Supported Hardware
+******************
+
+Please see the website for the current list of known working hardare:
+
+http://code.google.com/p/aceracpi/wiki/SupportedHardware
+
+If your laptop is not listed, or listed as unknown, and works with acer-wmi,
+please contact me with a copy of the DSDT.
+
+If your Acer laptop doesn't work with acer-wmi, I would also like to see the
+DSDT.
+
+To send me the DSDT, as root/sudo:
+
+cat /sys/firmware/acpi/DSDT > dsdt
+
+And send me the resulting 'dsdt' file.
+
+Usage
+*****
+
+On Acer laptops, acer-wmi should already be autoloaded based on DMI matching.
+For non-Acer laptops, until WMI based autoloading support is added, you will
+need to manually load acer-wmi.
+
+acer-wmi creates /sys/devices/platform/acer-wmi, and fills it with various
+files whose usage is detailed below, which enables you to control some of the
+following (varies between models):
+
+* the wireless LAN card radio
+* inbuilt Bluetooth adapter
+* inbuilt 3G card
+* mail LED of your laptop
+* brightness of the LCD panel
+
+Wireless
+********
+
+With regards to wireless, all acer-wmi does is enable the radio on the card. It
+is not responsible for the wireless LED - once the radio is enabled, this is
+down to the wireless driver for your card. So the behaviour of the wireless LED,
+once you enable the radio, will depend on your hardware and driver combination.
+
+e.g. With the BCM4318 on the Acer Aspire 5020 series:
+
+ndiswrapper: Light blinks on when transmitting
+bcm43xx/b43: Solid light, blinks off when transmitting
+
+Wireless radio control is unconditionally enabled - all Acer laptops that support
+acer-wmi come with built-in wireless. However, should you feel so inclined to
+ever wish to remove the card, or swap it out at some point, please get in touch
+with me, as we may well be able to gain some data on wireless card detection.
+
+To read the status of the wireless radio (0=off, 1=on):
+cat /sys/devices/platform/acer-wmi/wireless
+
+To enable the wireless radio:
+echo 1 > /sys/devices/platform/acer-wmi/wireless
+
+To disable the wireless radio:
+echo 0 > /sys/devices/platform/acer-wmi/wireless
+
+To set the state of the wireless radio when loading acer-wmi, pass:
+wireless=X (where X is 0 or 1)
+
+Bluetooth
+*********
+
+For bluetooth, this is an internal USB dongle, so once enabled, you will get
+a USB device connection event, and a new USB device appears. When you disable
+bluetooth, you get the reverse - a USB device disconnect event, followed by the
+device disappearing again.
+
+Bluetooth is autodetected by acer-wmi, so if you do not have a bluetooth module
+installed in your laptop, this file won't exist (please be aware that it is
+quite common for Acer not to fit bluetooth to their laptops - so just because
+you have a bluetooth button on the laptop, doesn't mean that bluetooth is
+installed).
+
+For the adventurously minded - if you want to buy an internal bluetooth
+module off the internet that is compatible with your laptop and fit it, then
+it will work just fine with acer-wmi.
+
+To read the status of the bluetooth module (0=off, 1=on):
+cat /sys/devices/platform/acer-wmi/wireless
+
+To enable the bluetooth module:
+echo 1 > /sys/devices/platform/acer-wmi/bluetooth
+
+To disable the bluetooth module:
+echo 0 > /sys/devices/platform/acer-wmi/bluetooth
+
+To set the state of the bluetooth module when loading acer-wmi, pass:
+bluetooth=X (where X is 0 or 1)
+
+3G
+**
+
+3G is currently not autodetected, so the 'threeg' file is always created under
+sysfs. So far, no-one in possession of an Acer laptop with 3G built-in appears to
+have tried Linux, or reported back, so we don't have any information on this.
+
+If you have an Acer laptop that does have a 3G card in, please contact me so we
+can properly detect these, and find out a bit more about them.
+
+To read the status of the 3G card (0=off, 1=on):
+cat /sys/devices/platform/acer-wmi/threeg
+
+To enable the 3G card:
+echo 1 > /sys/devices/platform/acer-wmi/threeg
+
+To disable the 3G card:
+echo 0 > /sys/devices/platform/acer-wmi/threeg
+
+To set the state of the 3G card when loading acer-wmi, pass:
+threeg=X (where X is 0 or 1)
+
+Mail LED
+********
+
+This can be found in most older Acer laptops supported by acer-wmi, and many
+newer ones - it is built into the 'mail' button, and blinks when active.
+
+On newer (WMID) laptops though, we have no way of detecting the mail LED. If
+your laptop identifies itself in dmesg as a WMID model, then please try loading
+acer_acpi with:
+
+force_series=2490
+
+This will use a known alternative method of reading/ writing the mail LED. If
+it works, please report back to me with the DMI data from your laptop so this
+can be added to acer-wmi.
+
+The LED is exposed through the LED subsystem, and can be found in:
+
+/sys/devices/platform/acer-wmi/leds/acer-mail:green/
+
+The mail LED is autodetected, so if you don't have one, the LED device won't
+be registered.
+
+If you have a mail LED that is not green, please report this to me.
+
+Backlight
+*********
+
+The backlight brightness control is available on all acer-wmi supported
+hardware. The maximum brightness level is usually 15, but on some newer laptops
+it's 10 (this is again autodetected).
+
+The backlight is exposed through the backlight subsystem, and can be found in:
+
+/sys/devices/platform/acer-wmi/backlight/acer-wmi/
+
+Credits
+*******
+
+Olaf Tauber, who did the real hard work when he developed acerhk
+http://www.informatik.hu-berlin.de/~tauber/acerhk
+All the authors of laptop ACPI modules in the kernel, whose work
+was an inspiration in the early days of acer_acpi
+Mathieu Segaud, who solved the problem with having to modprobe the driver
+twice in acer_acpi 0.2.
+Jim Ramsay, who added support for the WMID interface
+Mark Smith, who started the original acer_acpi
+
+And the many people who have used both acer_acpi and acer-wmi.

+ 0 - 1
Documentation/sony-laptop.txt → Documentation/laptops/sony-laptop.txt

@@ -114,4 +114,3 @@ Bugs/Limitations:
   sonypi driver (through /dev/sonypi) does not try to use the
   sony-laptop driver. In the future, spicctrl could try sonypi first,
   and if it isn't present, try sony-laptop instead.
-

+ 0 - 0
Documentation/sonypi.txt → Documentation/laptops/sonypi.txt


+ 108 - 8
Documentation/thinkpad-acpi.txt → Documentation/laptops/thinkpad-acpi.txt

@@ -1,7 +1,7 @@
 		     ThinkPad ACPI Extras Driver
 
-                            Version 0.17
-                         October 04th, 2007
+                            Version 0.19
+                         January 06th, 2008
 
                Borislav Deianov <borislav@users.sf.net>
              Henrique de Moraes Holschuh <hmh@hmh.eng.br>
@@ -215,6 +215,11 @@ The following commands can be written to the /proc/acpi/ibm/hotkey file:
 	... any other 8-hex-digit mask ...
 	echo reset > /proc/acpi/ibm/hotkey -- restore the original mask
 
+The procfs interface does not support NVRAM polling control.  So as to
+maintain maximum bug-to-bug compatibility, it does not report any masks,
+nor does it allow one to manipulate the hot key mask when the firmware
+does not support masks at all, even if NVRAM polling is in use.
+
 sysfs notes:
 
 	hotkey_bios_enabled:
@@ -231,17 +236,26 @@ sysfs notes:
 		to this value.
 
 	hotkey_enable:
-		Enables/disables the hot keys feature, and reports
-		current status of the hot keys feature.
+		Enables/disables the hot keys feature in the ACPI
+		firmware, and reports current status of the hot keys
+		feature.  Has no effect on the NVRAM hot key polling
+		functionality.
 
 		0: disables the hot keys feature / feature disabled
 		1: enables the hot keys feature / feature enabled
 
 	hotkey_mask:
-		bit mask to enable driver-handling and ACPI event
-		generation for each hot key (see above).  Returns the
-		current status of the hot keys mask, and allows one to
-		modify it.
+		bit mask to enable driver-handling (and depending on
+		the firmware, ACPI event generation) for each hot key
+		(see above).  Returns the current status of the hot keys
+		mask, and allows one to modify it.
+
+		Note: when NVRAM polling is active, the firmware mask
+		will be different from the value returned by
+		hotkey_mask.  The driver will retain enabled bits for
+		hotkeys that are under NVRAM polling even if the
+		firmware refuses them, and will not set these bits on
+		the firmware hot key mask.
 
 	hotkey_all_mask:
 		bit mask that should enable event reporting for all
@@ -257,12 +271,48 @@ sysfs notes:
 		handled by the firmware anyway.  Echo it to
 		hotkey_mask above, to use.
 
+	hotkey_source_mask:
+		bit mask that selects which hot keys will the driver
+		poll the NVRAM for.  This is auto-detected by the driver
+		based on the capabilities reported by the ACPI firmware,
+		but it can be overridden at runtime.
+
+		Hot keys whose bits are set in both hotkey_source_mask
+		and also on hotkey_mask are polled for in NVRAM.  Only a
+		few hot keys are available through CMOS NVRAM polling.
+
+		Warning: when in NVRAM mode, the volume up/down/mute
+		keys are synthesized according to changes in the mixer,
+		so you have to use volume up or volume down to unmute,
+		as per the ThinkPad volume mixer user interface.  When
+		in ACPI event mode, volume up/down/mute are reported as
+		separate events, but this behaviour may be corrected in
+		future releases of this driver, in which case the
+		ThinkPad volume mixer user interface semanthics will be
+		enforced.
+
+	hotkey_poll_freq:
+		frequency in Hz for hot key polling. It must be between
+		0 and 25 Hz.  Polling is only carried out when strictly
+		needed.
+
+		Setting hotkey_poll_freq to zero disables polling, and
+		will cause hot key presses that require NVRAM polling
+		to never be reported.
+
+		Setting hotkey_poll_freq too low will cause repeated
+		pressings of the same hot key to be misreported as a
+		single key press, or to not even be detected at all.
+		The recommended polling frequency is 10Hz.
+
 	hotkey_radio_sw:
 		if the ThinkPad has a hardware radio switch, this
 		attribute will read 0 if the switch is in the "radios
 		disabled" postition, and 1 if the switch is in the
 		"radios enabled" position.
 
+		This attribute has poll()/select() support.
+
 	hotkey_report_mode:
 		Returns the state of the procfs ACPI event report mode
 		filter for hot keys.  If it is set to 1 (the default),
@@ -277,6 +327,25 @@ sysfs notes:
 		May return -EPERM (write access locked out by module
 		parameter) or -EACCES (read-only).
 
+	wakeup_reason:
+		Set to 1 if the system is waking up because the user
+		requested a bay ejection.  Set to 2 if the system is
+		waking up because the user requested the system to
+		undock.  Set to zero for normal wake-ups or wake-ups
+		due to unknown reasons.
+
+		This attribute has poll()/select() support.
+
+	wakeup_hotunplug_complete:
+		Set to 1 if the system was waken up because of an
+		undock or bay ejection request, and that request
+		was sucessfully completed.  At this point, it might
+		be useful to send the system back to sleep, at the
+		user's choice.  Refer to HKEY events 0x4003 and
+		0x3003, below.
+
+		This attribute has poll()/select() support.
+
 input layer notes:
 
 A Hot key is mapped to a single input layer EV_KEY event, possibly
@@ -427,6 +496,23 @@ Non hot-key ACPI HKEY event map:
 The above events are not propagated by the driver, except for legacy
 compatibility purposes when hotkey_report_mode is set to 1.
 
+0x2304		System is waking up from suspend to undock
+0x2305		System is waking up from suspend to eject bay
+0x2404		System is waking up from hibernation to undock
+0x2405		System is waking up from hibernation to eject bay
+
+The above events are never propagated by the driver.
+
+0x3003		Bay ejection (see 0x2x05) complete, can sleep again
+0x4003		Undocked (see 0x2x04), can sleep again
+0x5009		Tablet swivel: switched to tablet mode
+0x500A		Tablet swivel: switched to normal mode
+0x500B		Tablet pen insterted into its storage bay
+0x500C		Tablet pen removed from its storage bay
+0x5010		Brightness level changed (newer Lenovo BIOSes)
+
+The above events are propagated by the driver.
+
 Compatibility notes:
 
 ibm-acpi and thinkpad-acpi 0.15 (mainline kernels before 2.6.23) never
@@ -1263,3 +1349,17 @@ Sysfs interface changelog:
 		and the hwmon class for libsensors4 (lm-sensors 3)
 		compatibility.  Moved all hwmon attributes to this
 		new platform device.
+
+0x020100:	Marker for thinkpad-acpi with hot key NVRAM polling
+		support.  If you must, use it to know you should not
+		start an userspace NVRAM poller (allows to detect when
+		NVRAM is compiled out by the user because it is
+		unneeded/undesired in the first place).
+0x020101:	Marker for thinkpad-acpi with hot key NVRAM polling
+		and proper hotkey_mask semanthics (version 8 of the
+		NVRAM polling patch).  Some development snapshots of
+		0.18 had an earlier version that did strange things
+		to hotkey_mask.
+
+0x020200:	Add poll()/select() support to the following attributes:
+		hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason

+ 23 - 6
Documentation/leds-class.txt

@@ -39,12 +39,33 @@ LED Device Naming
 
 Is currently of the form:
 
-"devicename:colour"
+"devicename:colour:function"
 
 There have been calls for LED properties such as colour to be exported as
 individual led class attributes. As a solution which doesn't incur as much
 overhead, I suggest these become part of the device name. The naming scheme
-above leaves scope for further attributes should they be needed.
+above leaves scope for further attributes should they be needed. If sections
+of the name don't apply, just leave that section blank.
+
+
+Hardware accelerated blink of LEDs
+==================================
+
+Some LEDs can be programmed to blink without any CPU interaction. To
+support this feature, a LED driver can optionally implement the
+blink_set() function (see <linux/leds.h>). If implemeted, triggers can
+attempt to use it before falling back to software timers. The blink_set()
+function should return 0 if the blink setting is supported, or -EINVAL
+otherwise, which means that LED blinking will be handled by software.
+
+The blink_set() function should choose a user friendly blinking
+value if it is called with *delay_on==0 && *delay_off==0 parameters. In
+this case the driver should give back the chosen value through delay_on
+and delay_off parameters to the leds subsystem.
+
+Any call to the brightness_set() callback function should cancel the
+previously programmed hardware blinking function so setting the brightness
+to 0 can also cancel the blinking of the LED.
 
 
 Known Issues
@@ -55,10 +76,6 @@ would cause nightmare dependency issues. I see this as a minor issue
 compared to the benefits the simple trigger functionality brings. The
 rest of the LED subsystem can be modular.
 
-Some leds can be programmed to flash in hardware. As this isn't a generic
-LED device property, this should be exported as a device specific sysfs
-attribute rather than part of the class if this functionality is required.
-
 
 Future Development
 ==================

+ 10 - 0
Documentation/md.txt

@@ -416,6 +416,16 @@ also have
      sectors in total that could need to be processed.  The two
      numbers are separated by a '/'  thus effectively showing one
      value, a fraction of the process that is complete.
+     A 'select' on this attribute will return when resync completes,
+     when it reaches the current sync_max (below) and possibly at
+     other times.
+
+   sync_max
+     This is a number of sectors at which point a resync/recovery
+     process will pause.  When a resync is active, the value can
+     only ever be increased, never decreased.  The value of 'max'
+     effectively disables the limit.
+
 
    sync_speed
      This shows the current actual speed, in K/sec, of the current

+ 149 - 0
Documentation/mn10300/ABI.txt

@@ -0,0 +1,149 @@
+			   =========================
+			   MN10300 FUNCTION CALL ABI
+			   =========================
+
+=======
+GENERAL
+=======
+
+The MN10300/AM33 kernel runs in little-endian mode; big-endian mode is not
+supported.
+
+The stack grows downwards, and should always be 32-bit aligned. There are
+separate stack pointer registers for userspace and the kernel.
+
+
+================
+ARGUMENT PASSING
+================
+
+The first two arguments (assuming up to 32-bits per argument) to a function are
+passed in the D0 and D1 registers respectively; all other arguments are passed
+on the stack.
+
+If 64-bit arguments are being passed, then they are never split between
+registers and the stack. If the first argument is a 64-bit value, it will be
+passed in D0:D1. If the first argument is not a 64-bit value, but the second
+is, the second will be passed entirely on the stack and D1 will be unused.
+
+Arguments smaller than 32-bits are not coelesced within a register or a stack
+word. For example, two byte-sized arguments will always be passed in separate
+registers or word-sized stack slots.
+
+
+=================
+CALLING FUNCTIONS
+=================
+
+The caller must allocate twelve bytes on the stack for the callee's use before
+it inserts a CALL instruction. The CALL instruction will write into the TOS
+word, but won't actually modify the stack pointer; similarly, the RET
+instruction reads from the TOS word of the stack, but doesn't move the stack
+pointer beyond it.
+
+
+	Stack:
+	|		|
+	|		|
+	|---------------| SP+20
+	| 4th Arg	|
+	|---------------| SP+16
+	| 3rd Arg	|
+	|---------------| SP+12
+	| D1 Save Slot	|
+	|---------------| SP+8
+	| D0 Save Slot	|
+	|---------------| SP+4
+	| Return Addr	|
+	|---------------| SP
+	|		|
+	|		|
+
+
+The caller must leave space on the stack (hence an allocation of twelve bytes)
+in which the callee may store the first two arguments.
+
+
+============
+RETURN VALUE
+============
+
+The return value is passed in D0 for an integer (or D0:D1 for a 64-bit value),
+or A0 for a pointer.
+
+If the return value is a value larger than 64-bits, or is a structure or an
+array, then a hidden first argument will be passed to the callee by the caller:
+this will point to a piece of memory large enough to hold the result of the
+function. In this case, the callee will return the value in that piece of
+memory, and no value will be returned in D0 or A0.
+
+
+===================
+REGISTER CLOBBERING
+===================
+
+The values in certain registers may be clobbered by the callee, and other
+values must be saved:
+
+	Clobber:	D0-D1, A0-A1, E0-E3
+	Save:		D2-D3, A2-A3, E4-E7, SP
+
+All other non-supervisor-only registers are clobberable (such as MDR, MCRL,
+MCRH).
+
+
+=================
+SPECIAL REGISTERS
+=================
+
+Certain ordinary registers may carry special usage for the compiler:
+
+	A3:	Frame pointer
+	E2:	TLS pointer
+
+
+==========
+KERNEL ABI
+==========
+
+The kernel may use a slightly different ABI internally.
+
+ (*) E2
+
+     If CONFIG_MN10300_CURRENT_IN_E2 is defined, then the current task pointer
+     will be kept in the E2 register, and that register will be marked
+     unavailable for the compiler to use as a scratch register.
+
+     Normally the kernel uses something like:
+
+	MOV	SP,An
+	AND	0xFFFFE000,An
+	MOV	(An),Rm		// Rm holds current
+	MOV	(yyy,Rm)	// Access current->yyy
+
+     To find the address of current; but since this option permits current to
+     be carried globally in an register, it can use:
+
+	MOV	(yyy,E2)	// Access current->yyy
+
+     instead.
+
+
+===============
+SYSTEM CALL ABI
+===============
+
+System calls are called with the following convention:
+
+	REGISTER	ENTRY			EXIT
+	===============	=======================	=======================
+	D0		Syscall number		Return value
+	A0		1st syscall argument	Saved
+	D1		2nd syscall argument	Saved
+	A3		3rd syscall argument	Saved
+	A2		4th syscall argument	Saved
+	D3		5th syscall argument	Saved
+	D2		6th syscall argument	Saved
+
+All other registers are saved.  The layout is a consequence of the way the MOVM
+instruction stores registers onto the stack.

+ 60 - 0
Documentation/mn10300/compartmentalisation.txt

@@ -0,0 +1,60 @@
+		   =========================================
+		   PART-SPECIFIC SOURCE COMPARTMENTALISATION
+		   =========================================
+
+The sources for various parts are compartmentalised at two different levels:
+
+ (1) Processor level
+
+     The "processor level" is a CPU core plus the other on-silicon
+     peripherals.
+
+     Processor-specific header files are divided among directories in a similar
+     way to the CPU level:
+
+	(*) include/asm-mn10300/proc-mn103e010/
+
+	    Support for the AM33v2 CPU core.
+
+     The appropriate processor is selected by a CONFIG_MN10300_PROC_YYYY option
+     from the "Processor support" choice menu in the arch/mn10300/Kconfig file.
+
+
+ (2) Unit level
+
+     The "unit level" is a processor plus all the external peripherals
+     controlled by that processor.
+
+     Unit-specific header files are divided among directories in a similar way
+     to the CPU level; not only that, but specific sources may also be
+     segregated into separate directories under the arch directory:
+
+	(*) include/asm-mn10300/unit-asb2303/
+	(*) arch/mn10300/unit-asb2303/
+
+	    Support for the ASB2303 board with an ASB2308 daughter board.
+
+	(*) include/asm-mn10300/unit-asb2305/
+	(*) arch/mn10300/unit-asb2305/
+
+	    Support for the ASB2305 board.
+
+     The appropriate processor is selected by a CONFIG_MN10300_UNIT_ZZZZ option
+     from the "Unit type" choice menu in the arch/mn10300/Kconfig file.
+
+
+============
+COMPILE TIME
+============
+
+When the kernel is compiled, symbolic links will be made in the asm header file
+directory for this arch:
+
+	include/asm-mn10300/proc => include/asm-mn10300/proc-YYYY/
+	include/asm-mn10300/unit => include/asm-mn10300/unit-ZZZZ/
+
+So that the header files contained in those directories can be accessed without
+lots of #ifdef-age.
+
+The appropriate arch/mn10300/unit-ZZZZ directory will also be entered by the
+compilation process; all other unit-specific directories will be ignored.

+ 2 - 2
Documentation/pcmcia/driver-changes.txt

@@ -33,8 +33,8 @@ This file details changes in 2.6 which affect PCMCIA card driver authors:
    and can be used (e.g. for SET_NETDEV_DEV) by using
    handle_to_dev(client_handle_t * handle).
 
-* Convert internal I/O port addresses to unsigned long (as of 2.6.11)
-   ioaddr_t should be replaced by kio_addr_t in PCMCIA card drivers.
+* Convert internal I/O port addresses to unsigned int (as of 2.6.11)
+   ioaddr_t should be replaced by unsigned int in PCMCIA card drivers.
 
 * irq_mask and irq_list parameters (as of 2.6.11)
    The irq_mask and irq_list parameters should no longer be used in

+ 59 - 0
Documentation/pm_qos_interface.txt

@@ -0,0 +1,59 @@
+PM quality of Service interface.
+
+This interface provides a kernel and user mode interface for registering
+performance expectations by drivers, subsystems and user space applications on
+one of the parameters.
+
+Currently we have {cpu_dma_latency, network_latency, network_throughput} as the
+initial set of pm_qos parameters.
+
+The infrastructure exposes multiple misc device nodes one per implemented
+parameter.  The set of parameters implement is defined by pm_qos_power_init()
+and pm_qos_params.h.  This is done because having the available parameters
+being runtime configurable or changeable from a driver was seen as too easy to
+abuse.
+
+For each parameter a list of performance requirements is maintained along with
+an aggregated target value.  The aggregated target value is updated with
+changes to the requirement list or elements of the list.  Typically the
+aggregated target value is simply the max or min of the requirement values held
+in the parameter list elements.
+
+From kernel mode the use of this interface is simple:
+pm_qos_add_requirement(param_id, name, target_value):
+Will insert a named element in the list for that identified PM_QOS parameter
+with the target value.  Upon change to this list the new target is recomputed
+and any registered notifiers are called only if the target value is now
+different.
+
+pm_qos_update_requirement(param_id, name, new_target_value):
+Will search the list identified by the param_id for the named list element and
+then update its target value, calling the notification tree if the aggregated
+target is changed.  with that name is already registered.
+
+pm_qos_remove_requirement(param_id, name):
+Will search the identified list for the named element and remove it, after
+removal it will update the aggregate target and call the notification tree if
+the target was changed as a result of removing the named requirement.
+
+
+From user mode:
+Only processes can register a pm_qos requirement.  To provide for automatic
+cleanup for process the interface requires the process to register its
+parameter requirements in the following way:
+
+To register the default pm_qos target for the specific parameter, the process
+must open one of /dev/[cpu_dma_latency, network_latency, network_throughput]
+
+As long as the device node is held open that process has a registered
+requirement on the parameter.  The name of the requirement is "process_<PID>"
+derived from the current->pid from within the open system call.
+
+To change the requested target value the process needs to write a s32 value to
+the open device node.  This translates to a pm_qos_update_requirement call.
+
+To remove the user mode request for a target value simply close the device
+node.
+
+
+

+ 5 - 0
Documentation/power/swsusp.txt

@@ -386,6 +386,11 @@ before suspending; then remount them after resuming.
 There is a work-around for this problem.  For more information, see
 Documentation/usb/persist.txt.
 
+Q: Can I suspend-to-disk using a swap partition under LVM?
+
+A: No. You can suspend successfully, but you'll not be able to
+resume. uswsusp should be able to work with LVM. See suspend.sf.net.
+
 Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were
 compiled with the similar configuration files. Anyway I found that
 suspend to disk (and resume) is much slower on 2.6.16 compared to

+ 42 - 0
Documentation/powerpc/booting-without-of.txt

@@ -57,6 +57,7 @@ Table of Contents
       n) 4xx/Axon EMAC ethernet nodes
       o) Xilinx IP cores
       p) Freescale Synchronous Serial Interface
+	  q) USB EHCI controllers
 
   VII - Specifying interrupt information for devices
     1) interrupts property
@@ -2577,6 +2578,20 @@ platforms are moved over to use the flattened-device-tree model.
       Requred properties:
        - current-speed : Baud rate of uartlite
 
+      v) Xilinx hwicap
+
+		Xilinx hwicap devices provide access to the configuration logic
+		of the FPGA through the Internal Configuration Access Port
+		(ICAP).  The ICAP enables partial reconfiguration of the FPGA,
+		readback of the configuration information, and some control over
+		'warm boots' of the FPGA fabric.
+
+		Required properties:
+		- xlnx,family : The family of the FPGA, necessary since the
+                      capabilities of the underlying ICAP hardware
+                      differ between different families.  May be
+                      'virtex2p', 'virtex4', or 'virtex5'.
+
     p) Freescale Synchronous Serial Interface
 
        The SSI is a serial device that communicates with audio codecs.  It can
@@ -2775,6 +2790,33 @@ platforms are moved over to use the flattened-device-tree model.
 		interrupt-parent = < &ipic >;
         };
 
+    q) USB EHCI controllers
+
+    Required properties:
+      - compatible : should be "usb-ehci".
+      - reg : should contain at least address and length of the standard EHCI
+        register set for the device. Optional platform-dependent registers
+        (debug-port or other) can be also specified here, but only after
+        definition of standard EHCI registers.
+      - interrupts : one EHCI interrupt should be described here.
+    If device registers are implemented in big endian mode, the device
+    node should have "big-endian-regs" property.
+    If controller implementation operates with big endian descriptors,
+    "big-endian-desc" property should be specified.
+    If both big endian registers and descriptors are used by the controller
+    implementation, "big-endian" property can be specified instead of having
+    both "big-endian-regs" and "big-endian-desc".
+
+     Example (Sequoia 440EPx):
+	    ehci@e0000300 {
+		   compatible = "ibm,usb-ehci-440epx", "usb-ehci";
+		   interrupt-parent = <&UIC0>;
+		   interrupts = <1a 4>;
+		   reg = <0 e0000300 90 0 e0000390 70>;
+		   big-endian;
+	   };
+
+
    More devices will be defined as this spec matures.
 
 VII - Specifying interrupt information for devices

+ 21 - 21
Documentation/rtc.txt

@@ -182,8 +182,8 @@ driver returns ENOIOCTLCMD.  Some common examples:
 	since the frequency is stored in the irq_freq member of the rtc_device
 	structure.  Your driver needs to initialize the irq_freq member during
 	init.  Make sure you check the requested frequency is in range of your
-	hardware in the irq_set_freq function.  If you cannot actually change
-	the frequency, just return -ENOTTY.
+	hardware in the irq_set_freq function.  If it isn't, return -EINVAL.  If
+	you cannot actually change the frequency, do not define irq_set_freq.
 
 If all else fails, check out the rtc-test.c driver!
 
@@ -268,8 +268,8 @@ int main(int argc, char **argv)
 		/* This read will block */
 		retval = read(fd, &data, sizeof(unsigned long));
 		if (retval == -1) {
-		        perror("read");
-		        exit(errno);
+			perror("read");
+			exit(errno);
 		}
 		fprintf(stderr, " %d",i);
 		fflush(stderr);
@@ -326,11 +326,11 @@ test_READ:
 		rtc_tm.tm_sec %= 60;
 		rtc_tm.tm_min++;
 	}
-	if  (rtc_tm.tm_min == 60) {
+	if (rtc_tm.tm_min == 60) {
 		rtc_tm.tm_min = 0;
 		rtc_tm.tm_hour++;
 	}
-	if  (rtc_tm.tm_hour == 24)
+	if (rtc_tm.tm_hour == 24)
 		rtc_tm.tm_hour = 0;
 
 	retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
@@ -407,8 +407,8 @@ test_PIE:
 					"\n...Periodic IRQ rate is fixed\n");
 				goto done;
 			}
-		        perror("RTC_IRQP_SET ioctl");
-		        exit(errno);
+			perror("RTC_IRQP_SET ioctl");
+			exit(errno);
 		}
 
 		fprintf(stderr, "\n%ldHz:\t", tmp);
@@ -417,27 +417,27 @@ test_PIE:
 		/* Enable periodic interrupts */
 		retval = ioctl(fd, RTC_PIE_ON, 0);
 		if (retval == -1) {
-		        perror("RTC_PIE_ON ioctl");
-		        exit(errno);
+			perror("RTC_PIE_ON ioctl");
+			exit(errno);
 		}
 
 		for (i=1; i<21; i++) {
-		        /* This blocks */
-		        retval = read(fd, &data, sizeof(unsigned long));
-		        if (retval == -1) {
-				       perror("read");
-				       exit(errno);
-		        }
-		        fprintf(stderr, " %d",i);
-		        fflush(stderr);
-		        irqcount++;
+			/* This blocks */
+			retval = read(fd, &data, sizeof(unsigned long));
+			if (retval == -1) {
+				perror("read");
+				exit(errno);
+			}
+			fprintf(stderr, " %d",i);
+			fflush(stderr);
+			irqcount++;
 		}
 
 		/* Disable periodic interrupts */
 		retval = ioctl(fd, RTC_PIE_OFF, 0);
 		if (retval == -1) {
-		        perror("RTC_PIE_OFF ioctl");
-		        exit(errno);
+			perror("RTC_PIE_OFF ioctl");
+			exit(errno);
 		}
 	}
 

+ 59 - 0
Documentation/sched-rt-group.txt

@@ -0,0 +1,59 @@
+
+
+Real-Time group scheduling.
+
+The problem space:
+
+In order to schedule multiple groups of realtime tasks each group must
+be assigned a fixed portion of the CPU time available. Without a minimum
+guarantee a realtime group can obviously fall short. A fuzzy upper limit
+is of no use since it cannot be relied upon. Which leaves us with just
+the single fixed portion.
+
+CPU time is divided by means of specifying how much time can be spent
+running in a given period. Say a frame fixed realtime renderer must
+deliver 25 frames a second, which yields a period of 0.04s. Now say
+it will also have to play some music and respond to input, leaving it
+with around 80% for the graphics. We can then give this group a runtime
+of 0.8 * 0.04s = 0.032s.
+
+This way the graphics group will have a 0.04s period with a 0.032s runtime
+limit.
+
+Now if the audio thread needs to refill the DMA buffer every 0.005s, but
+needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s
+= 0.00015s.
+
+
+The Interface:
+
+system wide:
+
+/proc/sys/kernel/sched_rt_period_ms
+/proc/sys/kernel/sched_rt_runtime_us
+
+CONFIG_FAIR_USER_SCHED
+
+/sys/kernel/uids/<uid>/cpu_rt_runtime_us
+
+or
+
+CONFIG_FAIR_CGROUP_SCHED
+
+/cgroup/<cgroup>/cpu.rt_runtime_us
+
+[ time is specified in us because the interface is s32; this gives an
+  operating range of ~35m to 1us ]
+
+The period takes values in [ 1, INT_MAX ], runtime in [ -1, INT_MAX - 1 ].
+
+A runtime of -1 specifies runtime == period, ie. no limit.
+
+New groups get the period from /proc/sys/kernel/sched_rt_period_us and
+a runtime of 0.
+
+Settings are constrained to:
+
+   \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
+
+in order to keep the configuration schedulable.

+ 16 - 0
Documentation/scheduler/00-INDEX

@@ -0,0 +1,16 @@
+00-INDEX
+	- this file.
+sched-arch.txt
+	- CPU Scheduler implementation hints for architecture specific code.
+sched-coding.txt
+	- reference for various scheduler-related methods in the O(1) scheduler.
+sched-design.txt
+	- goals, design and implementation of the Linux O(1) scheduler.
+sched-design-CFS.txt
+	- goals, design and implementation of the Complete Fair Scheduler.
+sched-domains.txt
+	- information on scheduling domains.
+sched-nice-design.txt
+	- How and why the scheduler's nice levels are implemented.
+sched-stats.txt
+	- information on schedstats (Linux Scheduler Statistics).

+ 0 - 0
Documentation/sched-arch.txt → Documentation/scheduler/sched-arch.txt


+ 0 - 0
Documentation/sched-coding.txt → Documentation/scheduler/sched-coding.txt


+ 0 - 0
Documentation/sched-design-CFS.txt → Documentation/scheduler/sched-design-CFS.txt


+ 0 - 0
Documentation/sched-design.txt → Documentation/scheduler/sched-design.txt


+ 0 - 0
Documentation/sched-domains.txt → Documentation/scheduler/sched-domains.txt


+ 0 - 0
Documentation/sched-nice-design.txt → Documentation/scheduler/sched-nice-design.txt


+ 0 - 0
Documentation/sched-stats.txt → Documentation/scheduler/sched-stats.txt


+ 41 - 0
Documentation/scsi/ChangeLog.arcmsr

@@ -68,4 +68,45 @@
 **						2. modify the arcmsr_pci_slot_reset function
 **						3. modify the arcmsr_pci_ers_disconnect_forepart function
 **						4. modify the arcmsr_pci_ers_need_reset_forepart function
+** 1.20.00.15   09/27/2007	 Erich Chen & Nick Cheng
+**						1. add arcmsr_enable_eoi_mode() on adapter Type B
+** 						2. add readl(reg->iop2drv_doorbell_reg) in arcmsr_handle_hbb_isr()
+**						in case of the doorbell interrupt clearance is cached
+** 1.20.00.15   10/01/2007	 Erich Chen & Nick Cheng
+**						1. modify acb->devstate[i][j]
+**						as ARECA_RAID_GOOD instead of
+**						ARECA_RAID_GONE in arcmsr_alloc_ccb_pool
+** 1.20.00.15   11/06/2007       Erich Chen & Nick Cheng
+**						1. add conditional declaration for
+** 						arcmsr_pci_error_detected() and
+**						arcmsr_pci_slot_reset
+** 1.20.00.15	11/23/2007       Erich Chen & Nick Cheng
+**						1.check if the sg list member number
+**						exceeds arcmsr default limit in arcmsr_build_ccb()
+**						2.change the returned value type of arcmsr_build_ccb()
+**						from "void" to "int"
+**						3.add the conditional check if arcmsr_build_ccb()
+**						returns FAILED
+** 1.20.00.15	12/04/2007	 Erich Chen & Nick Cheng
+**						1. modify arcmsr_drain_donequeue() to ignore unknown
+**						command and let kernel process command timeout.
+**						This could handle IO request violating max. segments
+**						while Linux XFS over DM-CRYPT.
+**						Thanks to Milan Broz's comments <mbroz@redhat.com>
+** 1.20.00.15	12/24/2007	 Erich Chen & Nick Cheng
+**						1.fix the portability problems
+**						2.fix type B where we should _not_ iounmap() acb->pmu;
+**						it's not ioremapped.
+**						3.add return -ENOMEM if ioremap() fails
+**						4.transfer IS_SG64_ADDR w/ cpu_to_le32()
+**						in arcmsr_build_ccb
+**						5. modify acb->devstate[i][j] as ARECA_RAID_GONE instead of
+**						ARECA_RAID_GOOD in arcmsr_alloc_ccb_pool()
+**						6.fix arcmsr_cdb->Context as (unsigned long)arcmsr_cdb
+**						7.add the checking state of
+**						(outbound_intstatus & ARCMSR_MU_OUTBOUND_HANDLE_INT) == 0
+**						in arcmsr_handle_hba_isr
+**						8.replace pci_alloc_consistent()/pci_free_consistent() with kmalloc()/kfree() in arcmsr_iop_message_xfer()
+**						9. fix the release of dma memory for type B in arcmsr_free_ccb_pool()
+**						10.fix the arcmsr_polling_hbb_ccbdone()
 **************************************************************************

+ 1 - 1
Documentation/scsi/scsi_mid_low_api.txt

@@ -1407,7 +1407,7 @@ Credits
 =======
 The following people have contributed to this document:
         Mike Anderson <andmike at us dot ibm dot com>
-        James Bottomley <James dot Bottomley at steeleye dot com> 
+        James Bottomley <James dot Bottomley at hansenpartnership dot com>
         Patrick Mansfield <patmans at us dot ibm dot com> 
         Christoph Hellwig <hch at infradead dot org>
         Doug Ledford <dledford at redhat dot com>

+ 10 - 0
Documentation/sysctl/fs.txt

@@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs:
 - inode-max
 - inode-nr
 - inode-state
+- nr_open
 - overflowuid
 - overflowgid
 - suid_dumpable
@@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum.
 
 ==============================================================
 
+nr_open:
+
+This denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
+==============================================================
+
 inode-max, inode-nr & inode-state:
 
 As with file handles, the kernel allocates the inode structures

+ 30 - 1
Documentation/sysctl/kernel.txt

@@ -29,7 +29,7 @@ show up in /proc/sys/kernel:
 - java-interpreter            [ binfmt_java, obsolete ]
 - kstack_depth_to_print       [ X86 only ]
 - l2cr                        [ PPC only ]
-- modprobe                    ==> Documentation/kmod.txt
+- modprobe                    ==> Documentation/debugging-modules.txt
 - msgmax
 - msgmnb
 - msgmni
@@ -41,6 +41,7 @@ show up in /proc/sys/kernel:
 - pid_max
 - powersave-nap               [ PPC only ]
 - printk
+- randomize_va_space
 - real-root-dev               ==> Documentation/initrd.txt
 - reboot-cmd                  [ SPARC only ]
 - rtsig-max
@@ -280,6 +281,34 @@ send before ratelimiting kicks in.
 
 ==============================================================
 
+randomize-va-space:
+
+This option can be used to select the type of process address
+space randomization that is used in the system, for architectures
+that support this feature.
+
+0 - Turn the process address space randomization off by default.
+
+1 - Make the addresses of mmap base, stack and VDSO page randomized.
+    This, among other things, implies that shared libraries will be
+    loaded to random addresses. Also for PIE-linked binaries, the location
+    of code start is randomized.
+
+    With heap randomization, the situation is a little bit more
+    complicated.
+    There a few legacy applications out there (such as some ancient
+    versions of libc.so.5 from 1996) that assume that brk area starts
+    just after the end of the code+bss. These applications break when
+    start of the brk area is randomized. There are however no known
+    non-legacy applications that would be broken this way, so for most
+    systems it is safe to choose full randomization. However there is
+    a CONFIG_COMPAT_BRK option for systems with ancient and/or broken
+    binaries, that makes heap non-randomized, but keeps all other
+    parts of process address space randomized if randomize_va_space
+    sysctl is turned on.
+
+==============================================================
+
 reboot-cmd: (Sparc only)
 
 ??? This seems to be a way to give an argument to the Sparc

+ 26 - 3
Documentation/sysctl/vm.txt

@@ -22,6 +22,7 @@ Currently, these files are in /proc/sys/vm:
 - dirty_background_ratio
 - dirty_expire_centisecs
 - dirty_writeback_centisecs
+- highmem_is_dirtyable   (only if CONFIG_HIGHMEM set)
 - max_map_count
 - min_free_kbytes
 - laptop_mode
@@ -31,6 +32,7 @@ Currently, these files are in /proc/sys/vm:
 - min_unmapped_ratio
 - min_slab_ratio
 - panic_on_oom
+- oom_dump_tasks
 - oom_kill_allocating_task
 - mmap_min_address
 - numa_zonelist_order
@@ -40,9 +42,9 @@ Currently, these files are in /proc/sys/vm:
 ==============================================================
 
 dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
-dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode,
-block_dump, swap_token_timeout, drop-caches,
-hugepages_treat_as_movable:
+dirty_writeback_centisecs, highmem_is_dirtyable,
+vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout,
+drop-caches, hugepages_treat_as_movable:
 
 See Documentation/filesystems/proc.txt
 
@@ -231,6 +233,27 @@ according to your policy of failover.
 
 =============================================================
 
+oom_dump_tasks
+
+Enables a system-wide task dump (excluding kernel threads) to be
+produced when the kernel performs an OOM-killing and includes such
+information as pid, uid, tgid, vm size, rss, cpu, oom_adj score, and
+name.  This is helpful to determine why the OOM killer was invoked
+and to identify the rogue task that caused it.
+
+If this is set to zero, this information is suppressed.  On very
+large systems with thousands of tasks it may not be feasible to dump
+the memory state information for each one.  Such systems should not
+be forced to incur a performance penalty in OOM conditions when the
+information may not be desired.
+
+If this is set to non-zero, this information is shown whenever the
+OOM killer actually kills a memory-hogging task.
+
+The default value is 0.
+
+=============================================================
+
 oom_kill_allocating_task
 
 This enables or disables killing the OOM-triggering task in

+ 245 - 0
Documentation/thermal/sysfs-api.txt

@@ -0,0 +1,245 @@
+Generic Thermal Sysfs driver How To
+=========================
+
+Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com>
+
+Updated: 2 January 2008
+
+Copyright (c)  2008 Intel Corporation
+
+
+0. Introduction
+
+The generic thermal sysfs provides a set of interfaces for thermal zone devices (sensors)
+and thermal cooling devices (fan, processor...) to register with the thermal management
+solution and to be a part of it.
+
+This how-to focuses on enabling new thermal zone and cooling devices to participate
+in thermal management.
+This solution is platform independent and any type of thermal zone devices and
+cooling devices should be able to make use of the infrastructure.
+
+The main task of the thermal sysfs driver is to expose thermal zone attributes as well
+as cooling device attributes to the user space.
+An intelligent thermal management application can make decisions based on inputs
+from thermal zone attributes (the current temperature and trip point temperature)
+and throttle appropriate devices.
+
+[0-*]	denotes any positive number starting from 0
+[1-*]	denotes any positive number starting from 1
+
+1. thermal sysfs driver interface functions
+
+1.1 thermal zone device interface
+1.1.1 struct thermal_zone_device *thermal_zone_device_register(char *name, int trips,
+				void *devdata, struct thermal_zone_device_ops *ops)
+
+	This interface function adds a new thermal zone device (sensor) to
+	/sys/class/thermal folder as thermal_zone[0-*].
+	It tries to bind all the thermal cooling devices registered at the same time.
+
+	name: the thermal zone name.
+	trips: the total number of trip points this thermal zone supports.
+	devdata: device private data
+	ops: thermal zone device call-backs.
+		.bind: bind the thermal zone device with a thermal cooling device.
+		.unbind: unbind the thermal zone device with a thermal cooling device.
+		.get_temp: get the current temperature of the thermal zone.
+		.get_mode: get the current mode (user/kernel) of the thermal zone.
+			   "kernel" means thermal management is done in kernel.
+			   "user" will prevent kernel thermal driver actions upon trip points
+			   so that user applications can take charge of thermal management.
+		.set_mode: set the mode (user/kernel) of the thermal zone.
+		.get_trip_type: get the type of certain trip point.
+		.get_trip_temp: get the temperature above which the certain trip point
+				will be fired.
+
+1.1.2 void thermal_zone_device_unregister(struct thermal_zone_device *tz)
+
+	This interface function removes the thermal zone device.
+	It deletes the corresponding entry form /sys/class/thermal folder and unbind all
+	the thermal cooling devices it uses.
+
+1.2 thermal cooling device interface
+1.2.1 struct thermal_cooling_device *thermal_cooling_device_register(char *name,
+					void *devdata, struct thermal_cooling_device_ops *)
+
+	This interface function adds a new thermal cooling device (fan/processor/...) to
+	/sys/class/thermal/ folder as cooling_device[0-*].
+	It tries to bind itself to all the thermal zone devices register at the same time.
+	name: the cooling device name.
+	devdata: device private data.
+	ops: thermal cooling devices call-backs.
+		.get_max_state: get the Maximum throttle state of the cooling device.
+		.get_cur_state: get the Current throttle state of the cooling device.
+		.set_cur_state: set the Current throttle state of the cooling device.
+
+1.2.2 void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
+
+	This interface function remove the thermal cooling device.
+	It deletes the corresponding entry form /sys/class/thermal folder and unbind
+	itself from all	the thermal zone devices using it.
+
+1.3 interface for binding a thermal zone device with a thermal cooling device
+1.3.1 int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
+			int trip, struct thermal_cooling_device *cdev);
+
+	This interface function bind a thermal cooling device to the certain trip point
+	of a thermal zone device.
+	This function is usually called in the thermal zone device .bind callback.
+	tz: the thermal zone device
+	cdev: thermal cooling device
+	trip: indicates which trip point the cooling devices is associated with
+		 in this thermal zone.
+
+1.3.2 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz,
+				int trip, struct thermal_cooling_device *cdev);
+
+	This interface function unbind a thermal cooling device from the certain trip point
+	of a thermal zone device.
+	This function is usually called in the thermal zone device .unbind callback.
+	tz: the thermal zone device
+	cdev: thermal cooling device
+	trip: indicates which trip point the cooling devices is associated with
+		in this thermal zone.
+
+2. sysfs attributes structure
+
+RO	read only value
+RW	read/write value
+
+All thermal sysfs attributes will be represented under /sys/class/thermal
+
+Thermal zone device sys I/F, created once it's registered:
+|thermal_zone[0-*]:
+	|-----type:			Type of the thermal zone
+	|-----temp:			Current temperature
+	|-----mode:			Working mode of the thermal zone
+	|-----trip_point_[0-*]_temp:	Trip point temperature
+	|-----trip_point_[0-*]_type:	Trip point type
+
+Thermal cooling device sys I/F, created once it's registered:
+|cooling_device[0-*]:
+	|-----type :			Type of the cooling device(processor/fan/...)
+	|-----max_state:		Maximum cooling state of the cooling device
+	|-----cur_state:		Current cooling state of the cooling device
+
+
+These two dynamic attributes are created/removed in pairs.
+They represent the relationship between a thermal zone and its associated cooling device.
+They are created/removed for each
+thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful execution.
+
+|thermal_zone[0-*]
+	|-----cdev[0-*]:		The [0-*]th cooling device in the current thermal zone
+	|-----cdev[0-*]_trip_point:	Trip point that cdev[0-*] is associated with
+
+
+***************************
+* Thermal zone attributes *
+***************************
+
+type				Strings which represent the thermal zone type.
+				This is given by thermal zone driver as part of registration.
+				Eg: "ACPI thermal zone" indicates it's a ACPI thermal device
+				RO
+				Optional
+
+temp				Current temperature as reported by thermal zone (sensor)
+				Unit: degree Celsius
+				RO
+				Required
+
+mode				One of the predefined values in [kernel, user]
+				This file gives information about the algorithm
+				that is currently managing the thermal zone.
+				It can be either default kernel based algorithm
+				or user space application.
+				RW
+				Optional
+				kernel	= Thermal management in kernel thermal zone driver.
+				user	= Preventing kernel thermal zone driver actions upon
+					  trip points so that user application can take full
+					  charge of the thermal management.
+
+trip_point_[0-*]_temp		The temperature above which trip point will be fired
+				Unit: degree Celsius
+				RO
+				Optional
+
+trip_point_[0-*]_type 		Strings which indicate the type of the trip point
+				E.g. it can be one of critical, hot, passive,
+				    active[0-*] for ACPI thermal zone.
+				RO
+				Optional
+
+cdev[0-*]			Sysfs link to the thermal cooling device node where the sys I/F
+				for cooling device throttling control represents.
+				RO
+				Optional
+
+cdev[0-*]_trip_point		The trip point with which cdev[0-*] is associated in this thermal zone
+				-1 means the cooling device is not associated with any trip point.
+				RO
+				Optional
+
+******************************
+* Cooling device  attributes *
+******************************
+
+type				String which represents the type of device
+				eg: For generic ACPI: this should be "Fan",
+				"Processor" or "LCD"
+				eg. For memory controller device on intel_menlow platform:
+				this should be "Memory controller"
+				RO
+				Optional
+
+max_state			The maximum permissible cooling state of this cooling device.
+				RO
+				Required
+
+cur_state			The current cooling state of this cooling device.
+				the value can any integer numbers between 0 and max_state,
+				cur_state == 0 means no cooling
+				cur_state == max_state means the maximum cooling.
+				RW
+				Required
+
+3. A simple implementation
+
+ACPI thermal zone may support multiple trip points like critical/hot/passive/active.
+If an ACPI thermal zone supports critical, passive, active[0] and active[1] at the same time,
+it may register itself as a thermal_zone_device (thermal_zone1) with 4 trip points in all.
+It has one processor and one fan, which are both registered as thermal_cooling_device.
+If the processor is listed in _PSL method, and the fan is listed in _AL0 method,
+the sys I/F structure will be built like this:
+
+/sys/class/thermal:
+
+|thermal_zone1:
+	|-----type:			ACPI thermal zone
+	|-----temp:			37
+	|-----mode:			kernel
+	|-----trip_point_0_temp:	100
+	|-----trip_point_0_type:	critical
+	|-----trip_point_1_temp:	80
+	|-----trip_point_1_type:	passive
+	|-----trip_point_2_temp:	70
+	|-----trip_point_2_type:	active[0]
+	|-----trip_point_3_temp:	60
+	|-----trip_point_3_type:	active[1]
+	|-----cdev0:			--->/sys/class/thermal/cooling_device0
+	|-----cdev0_trip_point:		1	/* cdev0 can be used for passive */
+	|-----cdev1:			--->/sys/class/thermal/cooling_device3
+	|-----cdev1_trip_point:		2	/* cdev1 can be used for active[0]*/
+
+|cooling_device0:
+	|-----type:			Processor
+	|-----max_state:		8
+	|-----cur_state:		0
+
+|cooling_device3:
+	|-----type:			Fan
+	|-----max_state:		2
+	|-----cur_state:		0

+ 226 - 0
Documentation/unaligned-memory-access.txt

@@ -0,0 +1,226 @@
+UNALIGNED MEMORY ACCESSES
+=========================
+
+Linux runs on a wide variety of architectures which have varying behaviour
+when it comes to memory access. This document presents some details about
+unaligned accesses, why you need to write code that doesn't cause them,
+and how to write such code!
+
+
+The definition of an unaligned access
+=====================================
+
+Unaligned memory accesses occur when you try to read N bytes of data starting
+from an address that is not evenly divisible by N (i.e. addr % N != 0).
+For example, reading 4 bytes of data from address 0x10004 is fine, but
+reading 4 bytes of data from address 0x10005 would be an unaligned memory
+access.
+
+The above may seem a little vague, as memory access can happen in different
+ways. The context here is at the machine code level: certain instructions read
+or write a number of bytes to or from memory (e.g. movb, movw, movl in x86
+assembly). As will become clear, it is relatively easy to spot C statements
+which will compile to multiple-byte memory access instructions, namely when
+dealing with types such as u16, u32 and u64.
+
+
+Natural alignment
+=================
+
+The rule mentioned above forms what we refer to as natural alignment:
+When accessing N bytes of memory, the base memory address must be evenly
+divisible by N, i.e. addr % N == 0.
+
+When writing code, assume the target architecture has natural alignment
+requirements.
+
+In reality, only a few architectures require natural alignment on all sizes
+of memory access. However, we must consider ALL supported architectures;
+writing code that satisfies natural alignment requirements is the easiest way
+to achieve full portability.
+
+
+Why unaligned access is bad
+===========================
+
+The effects of performing an unaligned memory access vary from architecture
+to architecture. It would be easy to write a whole document on the differences
+here; a summary of the common scenarios is presented below:
+
+ - Some architectures are able to perform unaligned memory accesses
+   transparently, but there is usually a significant performance cost.
+ - Some architectures raise processor exceptions when unaligned accesses
+   happen. The exception handler is able to correct the unaligned access,
+   at significant cost to performance.
+ - Some architectures raise processor exceptions when unaligned accesses
+   happen, but the exceptions do not contain enough information for the
+   unaligned access to be corrected.
+ - Some architectures are not capable of unaligned memory access, but will
+   silently perform a different memory access to the one that was requested,
+   resulting a a subtle code bug that is hard to detect!
+
+It should be obvious from the above that if your code causes unaligned
+memory accesses to happen, your code will not work correctly on certain
+platforms and will cause performance problems on others.
+
+
+Code that does not cause unaligned access
+=========================================
+
+At first, the concepts above may seem a little hard to relate to actual
+coding practice. After all, you don't have a great deal of control over
+memory addresses of certain variables, etc.
+
+Fortunately things are not too complex, as in most cases, the compiler
+ensures that things will work for you. For example, take the following
+structure:
+
+	struct foo {
+		u16 field1;
+		u32 field2;
+		u8 field3;
+	};
+
+Let us assume that an instance of the above structure resides in memory
+starting at address 0x10000. With a basic level of understanding, it would
+not be unreasonable to expect that accessing field2 would cause an unaligned
+access. You'd be expecting field2 to be located at offset 2 bytes into the
+structure, i.e. address 0x10002, but that address is not evenly divisible
+by 4 (remember, we're reading a 4 byte value here).
+
+Fortunately, the compiler understands the alignment constraints, so in the
+above case it would insert 2 bytes of padding in between field1 and field2.
+Therefore, for standard structure types you can always rely on the compiler
+to pad structures so that accesses to fields are suitably aligned (assuming
+you do not cast the field to a type of different length).
+
+Similarly, you can also rely on the compiler to align variables and function
+parameters to a naturally aligned scheme, based on the size of the type of
+the variable.
+
+At this point, it should be clear that accessing a single byte (u8 or char)
+will never cause an unaligned access, because all memory addresses are evenly
+divisible by one.
+
+On a related topic, with the above considerations in mind you may observe
+that you could reorder the fields in the structure in order to place fields
+where padding would otherwise be inserted, and hence reduce the overall
+resident memory size of structure instances. The optimal layout of the
+above example is:
+
+	struct foo {
+		u32 field2;
+		u16 field1;
+		u8 field3;
+	};
+
+For a natural alignment scheme, the compiler would only have to add a single
+byte of padding at the end of the structure. This padding is added in order
+to satisfy alignment constraints for arrays of these structures.
+
+Another point worth mentioning is the use of __attribute__((packed)) on a
+structure type. This GCC-specific attribute tells the compiler never to
+insert any padding within structures, useful when you want to use a C struct
+to represent some data that comes in a fixed arrangement 'off the wire'.
+
+You might be inclined to believe that usage of this attribute can easily
+lead to unaligned accesses when accessing fields that do not satisfy
+architectural alignment requirements. However, again, the compiler is aware
+of the alignment constraints and will generate extra instructions to perform
+the memory access in a way that does not cause unaligned access. Of course,
+the extra instructions obviously cause a loss in performance compared to the
+non-packed case, so the packed attribute should only be used when avoiding
+structure padding is of importance.
+
+
+Code that causes unaligned access
+=================================
+
+With the above in mind, let's move onto a real life example of a function
+that can cause an unaligned memory access. The following function adapted
+from include/linux/etherdevice.h is an optimized routine to compare two
+ethernet MAC addresses for equality.
+
+unsigned int compare_ether_addr(const u8 *addr1, const u8 *addr2)
+{
+	const u16 *a = (const u16 *) addr1;
+	const u16 *b = (const u16 *) addr2;
+	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
+}
+
+In the above function, the reference to a[0] causes 2 bytes (16 bits) to
+be read from memory starting at address addr1. Think about what would happen
+if addr1 was an odd address such as 0x10003. (Hint: it'd be an unaligned
+access.)
+
+Despite the potential unaligned access problems with the above function, it
+is included in the kernel anyway but is understood to only work on
+16-bit-aligned addresses. It is up to the caller to ensure this alignment or
+not use this function at all. This alignment-unsafe function is still useful
+as it is a decent optimization for the cases when you can ensure alignment,
+which is true almost all of the time in ethernet networking context.
+
+
+Here is another example of some code that could cause unaligned accesses:
+	void myfunc(u8 *data, u32 value)
+	{
+		[...]
+		*((u32 *) data) = cpu_to_le32(value);
+		[...]
+	}
+
+This code will cause unaligned accesses every time the data parameter points
+to an address that is not evenly divisible by 4.
+
+In summary, the 2 main scenarios where you may run into unaligned access
+problems involve:
+ 1. Casting variables to types of different lengths
+ 2. Pointer arithmetic followed by access to at least 2 bytes of data
+
+
+Avoiding unaligned accesses
+===========================
+
+The easiest way to avoid unaligned access is to use the get_unaligned() and
+put_unaligned() macros provided by the <asm/unaligned.h> header file.
+
+Going back to an earlier example of code that potentially causes unaligned
+access:
+
+	void myfunc(u8 *data, u32 value)
+	{
+		[...]
+		*((u32 *) data) = cpu_to_le32(value);
+		[...]
+	}
+
+To avoid the unaligned memory access, you would rewrite it as follows:
+
+	void myfunc(u8 *data, u32 value)
+	{
+		[...]
+		value = cpu_to_le32(value);
+		put_unaligned(value, (u32 *) data);
+		[...]
+	}
+
+The get_unaligned() macro works similarly. Assuming 'data' is a pointer to
+memory and you wish to avoid unaligned access, its usage is as follows:
+
+	u32 value = get_unaligned((u32 *) data);
+
+These macros work work for memory accesses of any length (not just 32 bits as
+in the examples above). Be aware that when compared to standard access of
+aligned memory, using these macros to access unaligned memory can be costly in
+terms of performance.
+
+If use of such macros is not convenient, another option is to use memcpy(),
+where the source or destination (or both) are of type u8* or unsigned char*.
+Due to the byte-wise nature of this operation, unaligned accesses are avoided.
+
+--
+Author: Daniel Drake <dsd@gentoo.org>
+With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
+Johannes Berg, Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock,
+Uli Kunitz, Vadim Lobanov
+

+ 138 - 11
Documentation/vm/slabinfo.c

@@ -32,6 +32,13 @@ struct slabinfo {
 	int sanity_checks, slab_size, store_user, trace;
 	int order, poison, reclaim_account, red_zone;
 	unsigned long partial, objects, slabs;
+	unsigned long alloc_fastpath, alloc_slowpath;
+	unsigned long free_fastpath, free_slowpath;
+	unsigned long free_frozen, free_add_partial, free_remove_partial;
+	unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill;
+	unsigned long cpuslab_flush, deactivate_full, deactivate_empty;
+	unsigned long deactivate_to_head, deactivate_to_tail;
+	unsigned long deactivate_remote_frees;
 	int numa[MAX_NODES];
 	int numa_partial[MAX_NODES];
 } slabinfo[MAX_SLABS];
@@ -64,8 +71,10 @@ int show_inverted = 0;
 int show_single_ref = 0;
 int show_totals = 0;
 int sort_size = 0;
+int sort_active = 0;
 int set_debug = 0;
 int show_ops = 0;
+int show_activity = 0;
 
 /* Debug options */
 int sanity = 0;
@@ -93,8 +102,10 @@ void usage(void)
 	printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n"
 		"slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n"
 		"-a|--aliases           Show aliases\n"
+		"-A|--activity          Most active slabs first\n"
 		"-d<options>|--debug=<options> Set/Clear Debug options\n"
-		"-e|--empty		Show empty slabs\n"
+		"-D|--display-active    Switch line format to activity\n"
+		"-e|--empty             Show empty slabs\n"
 		"-f|--first-alias       Show first alias\n"
 		"-h|--help              Show usage information\n"
 		"-i|--inverted          Inverted list\n"
@@ -281,8 +292,11 @@ int line = 0;
 
 void first_line(void)
 {
-	printf("Name                   Objects Objsize    Space "
-		"Slabs/Part/Cpu  O/S O %%Fr %%Ef Flg\n");
+	if (show_activity)
+		printf("Name                   Objects    Alloc     Free   %%Fast\n");
+	else
+		printf("Name                   Objects Objsize    Space "
+			"Slabs/Part/Cpu  O/S O %%Fr %%Ef Flg\n");
 }
 
 /*
@@ -309,6 +323,12 @@ unsigned long slab_size(struct slabinfo *s)
 	return 	s->slabs * (page_size << s->order);
 }
 
+unsigned long slab_activity(struct slabinfo *s)
+{
+	return 	s->alloc_fastpath + s->free_fastpath +
+		s->alloc_slowpath + s->free_slowpath;
+}
+
 void slab_numa(struct slabinfo *s, int mode)
 {
 	int node;
@@ -392,6 +412,71 @@ const char *onoff(int x)
 	return "Off";
 }
 
+void slab_stats(struct slabinfo *s)
+{
+	unsigned long total_alloc;
+	unsigned long total_free;
+	unsigned long total;
+
+	if (!s->alloc_slab)
+		return;
+
+	total_alloc = s->alloc_fastpath + s->alloc_slowpath;
+	total_free = s->free_fastpath + s->free_slowpath;
+
+	if (!total_alloc)
+		return;
+
+	printf("\n");
+	printf("Slab Perf Counter       Alloc     Free %%Al %%Fr\n");
+	printf("--------------------------------------------------\n");
+	printf("Fastpath             %8lu %8lu %3lu %3lu\n",
+		s->alloc_fastpath, s->free_fastpath,
+		s->alloc_fastpath * 100 / total_alloc,
+		s->free_fastpath * 100 / total_free);
+	printf("Slowpath             %8lu %8lu %3lu %3lu\n",
+		total_alloc - s->alloc_fastpath, s->free_slowpath,
+		(total_alloc - s->alloc_fastpath) * 100 / total_alloc,
+		s->free_slowpath * 100 / total_free);
+	printf("Page Alloc           %8lu %8lu %3lu %3lu\n",
+		s->alloc_slab, s->free_slab,
+		s->alloc_slab * 100 / total_alloc,
+		s->free_slab * 100 / total_free);
+	printf("Add partial          %8lu %8lu %3lu %3lu\n",
+		s->deactivate_to_head + s->deactivate_to_tail,
+		s->free_add_partial,
+		(s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc,
+		s->free_add_partial * 100 / total_free);
+	printf("Remove partial       %8lu %8lu %3lu %3lu\n",
+		s->alloc_from_partial, s->free_remove_partial,
+		s->alloc_from_partial * 100 / total_alloc,
+		s->free_remove_partial * 100 / total_free);
+
+	printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n",
+		s->deactivate_remote_frees, s->free_frozen,
+		s->deactivate_remote_frees * 100 / total_alloc,
+		s->free_frozen * 100 / total_free);
+
+	printf("Total                %8lu %8lu\n\n", total_alloc, total_free);
+
+	if (s->cpuslab_flush)
+		printf("Flushes %8lu\n", s->cpuslab_flush);
+
+	if (s->alloc_refill)
+		printf("Refill %8lu\n", s->alloc_refill);
+
+	total = s->deactivate_full + s->deactivate_empty +
+			s->deactivate_to_head + s->deactivate_to_tail;
+
+	if (total)
+		printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) "
+			"ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n",
+			s->deactivate_full, (s->deactivate_full * 100) / total,
+			s->deactivate_empty, (s->deactivate_empty * 100) / total,
+			s->deactivate_to_head, (s->deactivate_to_head * 100) / total,
+			s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total);
+}
+
 void report(struct slabinfo *s)
 {
 	if (strcmp(s->name, "*") == 0)
@@ -430,6 +515,7 @@ void report(struct slabinfo *s)
 	ops(s);
 	show_tracking(s);
 	slab_numa(s, 1);
+	slab_stats(s);
 }
 
 void slabcache(struct slabinfo *s)
@@ -479,13 +565,27 @@ void slabcache(struct slabinfo *s)
 		*p++ = 'T';
 
 	*p = 0;
-	printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
-		s->name, s->objects, s->object_size, size_str, dist_str,
-		s->objs_per_slab, s->order,
-		s->slabs ? (s->partial * 100) / s->slabs : 100,
-		s->slabs ? (s->objects * s->object_size * 100) /
-			(s->slabs * (page_size << s->order)) : 100,
-		flags);
+	if (show_activity) {
+		unsigned long total_alloc;
+		unsigned long total_free;
+
+		total_alloc = s->alloc_fastpath + s->alloc_slowpath;
+		total_free = s->free_fastpath + s->free_slowpath;
+
+		printf("%-21s %8ld %8ld %8ld %3ld %3ld \n",
+			s->name, s->objects,
+			total_alloc, total_free,
+			total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0,
+			total_free ? (s->free_fastpath * 100 / total_free) : 0);
+	}
+	else
+		printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
+			s->name, s->objects, s->object_size, size_str, dist_str,
+			s->objs_per_slab, s->order,
+			s->slabs ? (s->partial * 100) / s->slabs : 100,
+			s->slabs ? (s->objects * s->object_size * 100) /
+				(s->slabs * (page_size << s->order)) : 100,
+			flags);
 }
 
 /*
@@ -892,6 +992,8 @@ void sort_slabs(void)
 
 			if (sort_size)
 				result = slab_size(s1) < slab_size(s2);
+			else if (sort_active)
+				result = slab_activity(s1) < slab_activity(s2);
 			else
 				result = strcasecmp(s1->name, s2->name);
 
@@ -1074,6 +1176,23 @@ void read_slab_dir(void)
 			free(t);
 			slab->store_user = get_obj("store_user");
 			slab->trace = get_obj("trace");
+			slab->alloc_fastpath = get_obj("alloc_fastpath");
+			slab->alloc_slowpath = get_obj("alloc_slowpath");
+			slab->free_fastpath = get_obj("free_fastpath");
+			slab->free_slowpath = get_obj("free_slowpath");
+			slab->free_frozen= get_obj("free_frozen");
+			slab->free_add_partial = get_obj("free_add_partial");
+			slab->free_remove_partial = get_obj("free_remove_partial");
+			slab->alloc_from_partial = get_obj("alloc_from_partial");
+			slab->alloc_slab = get_obj("alloc_slab");
+			slab->alloc_refill = get_obj("alloc_refill");
+			slab->free_slab = get_obj("free_slab");
+			slab->cpuslab_flush = get_obj("cpuslab_flush");
+			slab->deactivate_full = get_obj("deactivate_full");
+			slab->deactivate_empty = get_obj("deactivate_empty");
+			slab->deactivate_to_head = get_obj("deactivate_to_head");
+			slab->deactivate_to_tail = get_obj("deactivate_to_tail");
+			slab->deactivate_remote_frees = get_obj("deactivate_remote_frees");
 			chdir("..");
 			if (slab->name[0] == ':')
 				alias_targets++;
@@ -1124,7 +1243,9 @@ void output_slabs(void)
 
 struct option opts[] = {
 	{ "aliases", 0, NULL, 'a' },
+	{ "activity", 0, NULL, 'A' },
 	{ "debug", 2, NULL, 'd' },
+	{ "display-activity", 0, NULL, 'D' },
 	{ "empty", 0, NULL, 'e' },
 	{ "first-alias", 0, NULL, 'f' },
 	{ "help", 0, NULL, 'h' },
@@ -1149,7 +1270,7 @@ int main(int argc, char *argv[])
 
 	page_size = getpagesize();
 
-	while ((c = getopt_long(argc, argv, "ad::efhil1noprstvzTS",
+	while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS",
 						opts, NULL)) != -1)
 		switch (c) {
 		case '1':
@@ -1158,11 +1279,17 @@ int main(int argc, char *argv[])
 		case 'a':
 			show_alias = 1;
 			break;
+		case 'A':
+			sort_active = 1;
+			break;
 		case 'd':
 			set_debug = 1;
 			if (!debug_opt_scan(optarg))
 				fatal("Invalid debug option '%s'\n", optarg);
 			break;
+		case 'D':
+			show_activity = 1;
+			break;
 		case 'e':
 			show_empty = 1;
 			break;

+ 2 - 0
Documentation/w1/masters/00-INDEX

@@ -4,3 +4,5 @@ ds2482
 	- The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses.
 ds2490
 	- The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges.
+w1-gpio
+	- GPIO 1-wire bus master driver.

+ 33 - 0
Documentation/w1/masters/w1-gpio

@@ -0,0 +1,33 @@
+Kernel driver w1-gpio
+=====================
+
+Author: Ville Syrjala <syrjala@sci.fi>
+
+
+Description
+-----------
+
+GPIO 1-wire bus master driver. The driver uses the GPIO API to control the
+wire and the GPIO pin can be specified using platform data.
+
+
+Example (mach-at91)
+-------------------
+
+#include <linux/w1-gpio.h>
+
+static struct w1_gpio_platform_data foo_w1_gpio_pdata = {
+	.pin		= AT91_PIN_PB20,
+	.is_open_drain	= 1,
+};
+
+static struct platform_device foo_w1_device = {
+	.name			= "w1-gpio",
+	.id			= -1,
+	.dev.platform_data	= &foo_w1_gpio_pdata,
+};
+
+...
+	at91_set_GPIO_periph(foo_w1_gpio_pdata.pin, 1);
+	at91_set_multi_drive(foo_w1_gpio_pdata.pin, 1);
+	platform_device_register(&foo_w1_device);

Some files were not shown because too many files changed in this diff