瀏覽代碼

/spare/repo/netdev-2.6 branch 'master'

Jeff Garzik 19 年之前
父節點
當前提交
e3ee3b78f8
共有 100 個文件被更改,包括 2859 次插入1946 次删除
  1. 12 0
      Documentation/feature-removal-schedule.txt
  2. 352 0
      Documentation/networking/cxgb.txt
  3. 288 0
      Documentation/networking/phy.txt
  4. 1 0
      Documentation/sound/alsa/ALSA-Configuration.txt
  5. 11 4
      Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
  6. 6 0
      MAINTAINERS
  7. 2 2
      Makefile
  8. 5 6
      arch/alpha/kernel/signal.c
  9. 2 37
      arch/arm/Kconfig
  10. 3 0
      arch/arm/common/Kconfig
  11. 1 0
      arch/arm/common/Makefile
  12. 166 0
      arch/arm/common/gic.c
  13. 3 2
      arch/arm/kernel/signal.c
  14. 1 1
      arch/arm/mach-ixp4xx/coyote-setup.c
  15. 1 1
      arch/arm/mach-ixp4xx/gtwx5715-setup.c
  16. 1 1
      arch/arm/mach-ixp4xx/ixdp425-setup.c
  17. 7 0
      arch/arm/mach-sa1100/assabet.c
  18. 7 0
      arch/arm/mach-sa1100/cerf.c
  19. 5 0
      arch/arm/mach-sa1100/generic.c
  20. 3 0
      arch/arm/mach-sa1100/generic.h
  21. 12 0
      arch/arm/mach-sa1100/lart.c
  22. 7 0
      arch/arm/mach-sa1100/shannon.c
  23. 7 0
      arch/arm/mach-sa1100/simpad.c
  24. 6 7
      arch/arm26/kernel/signal.c
  25. 5 6
      arch/cris/arch-v10/kernel/signal.c
  26. 5 6
      arch/cris/arch-v32/kernel/signal.c
  27. 5 6
      arch/frv/kernel/signal.c
  28. 5 6
      arch/h8300/kernel/signal.c
  29. 3 2
      arch/i386/kernel/signal.c
  30. 0 1
      arch/i386/pci/common.c
  31. 16 33
      arch/i386/pci/i386.c
  32. 6 9
      arch/ia64/kernel/signal.c
  33. 1 0
      arch/ia64/pci/pci.c
  34. 5 6
      arch/m32r/kernel/signal.c
  35. 5 6
      arch/m68knommu/kernel/signal.c
  36. 5 6
      arch/mips/kernel/irixsig.c
  37. 5 6
      arch/mips/kernel/signal.c
  38. 5 6
      arch/mips/kernel/signal32.c
  39. 5 6
      arch/parisc/kernel/signal.c
  40. 10 1
      arch/ppc/Makefile
  41. 0 203
      arch/ppc/boot/utils/addRamDisk.c
  42. 5 6
      arch/ppc/kernel/signal.c
  43. 4 4
      arch/ppc/syslib/m8xx_setup.c
  44. 34 40
      arch/ppc64/Kconfig
  45. 9 0
      arch/ppc64/Makefile
  46. 2 2
      arch/ppc64/boot/Makefile
  47. 2 2
      arch/ppc64/boot/addnote.c
  48. 1 1
      arch/ppc64/boot/crt0.S
  49. 1 1
      arch/ppc64/boot/div64.S
  50. 149 0
      arch/ppc64/boot/elf.h
  51. 20 35
      arch/ppc64/boot/main.c
  52. 34 0
      arch/ppc64/boot/page.h
  53. 0 36
      arch/ppc64/boot/ppc32-types.h
  54. 62 0
      arch/ppc64/boot/ppc_asm.h
  55. 27 169
      arch/ppc64/boot/prom.c
  56. 18 0
      arch/ppc64/boot/prom.h
  57. 16 0
      arch/ppc64/boot/stdio.h
  58. 1 1
      arch/ppc64/boot/string.S
  59. 16 0
      arch/ppc64/boot/string.h
  60. 1 1
      arch/ppc64/boot/zlib.c
  61. 3 3
      arch/ppc64/configs/g5_defconfig
  62. 3 4
      arch/ppc64/configs/iSeries_defconfig
  63. 3 3
      arch/ppc64/configs/maple_defconfig
  64. 3 3
      arch/ppc64/configs/pSeries_defconfig
  65. 3 3
      arch/ppc64/defconfig
  66. 11 26
      arch/ppc64/kernel/LparData.c
  67. 5 2
      arch/ppc64/kernel/Makefile
  68. 2 1
      arch/ppc64/kernel/asm-offsets.c
  69. 1 39
      arch/ppc64/kernel/cputable.c
  70. 47 0
      arch/ppc64/kernel/firmware.c
  71. 206 317
      arch/ppc64/kernel/head.S
  72. 4 1
      arch/ppc64/kernel/iSeries_htab.c
  73. 26 4
      arch/ppc64/kernel/iSeries_setup.c
  74. 155 0
      arch/ppc64/kernel/iSeries_vio.c
  75. 39 112
      arch/ppc64/kernel/lmb.c
  76. 4 3
      arch/ppc64/kernel/lparcfg.c
  77. 98 0
      arch/ppc64/kernel/misc.S
  78. 0 2
      arch/ppc64/kernel/of_device.c
  79. 2 1
      arch/ppc64/kernel/pSeries_iommu.c
  80. 1 3
      arch/ppc64/kernel/pSeries_lpar.c
  81. 29 10
      arch/ppc64/kernel/pSeries_setup.c
  82. 2 1
      arch/ppc64/kernel/pSeries_smp.c
  83. 273 0
      arch/ppc64/kernel/pSeries_vio.c
  84. 2 2
      arch/ppc64/kernel/pacaData.c
  85. 2 0
      arch/ppc64/kernel/pmac_setup.c
  86. 21 0
      arch/ppc64/kernel/pmc.c
  87. 5 7
      arch/ppc64/kernel/process.c
  88. 152 32
      arch/ppc64/kernel/prom.c
  89. 59 34
      arch/ppc64/kernel/prom_init.c
  90. 17 2
      arch/ppc64/kernel/rtas_pci.c
  91. 19 11
      arch/ppc64/kernel/setup.c
  92. 3 2
      arch/ppc64/kernel/signal.c
  93. 3 2
      arch/ppc64/kernel/signal32.c
  94. 5 52
      arch/ppc64/kernel/sysfs.c
  95. 3 4
      arch/ppc64/kernel/time.c
  96. 35 409
      arch/ppc64/kernel/vio.c
  97. 1 3
      arch/ppc64/mm/hash_low.S
  98. 1 2
      arch/ppc64/mm/hash_native.c
  99. 2 2
      arch/ppc64/mm/hash_utils.c
  100. 212 176
      arch/ppc64/mm/hugetlbpage.c

+ 12 - 0
Documentation/feature-removal-schedule.txt

@@ -135,3 +135,15 @@ Why:	With the 16-bit PCMCIA subsystem now behaving (almost) like a
 	pcmciautils package available at
 	http://kernel.org/pub/linux/utils/kernel/pcmcia/
 Who:	Dominik Brodowski <linux@brodo.de>
+
+---------------------------
+
+What:	ip_queue and ip6_queue (old ipv4-only and ipv6-only netfilter queue)
+When:	December 2005
+Why:	This interface has been obsoleted by the new layer3-independent
+	"nfnetlink_queue".  The Kernel interface is compatible, so the old
+	ip[6]tables "QUEUE" targets still work and will transparently handle
+	all packets into nfnetlink queue number 0.  Userspace users will have
+	to link against API-compatible library on top of libnfnetlink_queue 
+	instead of the current 'libipq'.
+Who:	Harald Welte <laforge@netfilter.org>

+ 352 - 0
Documentation/networking/cxgb.txt

@@ -0,0 +1,352 @@
+                 Chelsio N210 10Gb Ethernet Network Controller
+
+                         Driver Release Notes for Linux
+
+                                 Version 2.1.1
+
+                                 June 20, 2005
+
+CONTENTS
+========
+ INTRODUCTION
+ FEATURES
+ PERFORMANCE
+ DRIVER MESSAGES
+ KNOWN ISSUES
+ SUPPORT
+
+
+INTRODUCTION
+============
+
+ This document describes the Linux driver for Chelsio 10Gb Ethernet Network
+ Controller. This driver supports the Chelsio N210 NIC and is backward
+ compatible with the Chelsio N110 model 10Gb NICs.
+
+
+FEATURES
+========
+
+ Adaptive Interrupts (adaptive-rx)
+ ---------------------------------
+
+  This feature provides an adaptive algorithm that adjusts the interrupt
+  coalescing parameters, allowing the driver to dynamically adapt the latency
+  settings to achieve the highest performance during various types of network
+  load.
+
+  The interface used to control this feature is ethtool. Please see the
+  ethtool manpage for additional usage information.
+
+  By default, adaptive-rx is disabled.
+  To enable adaptive-rx:
+
+      ethtool -C <interface> adaptive-rx on
+
+  To disable adaptive-rx, use ethtool:
+
+      ethtool -C <interface> adaptive-rx off
+
+  After disabling adaptive-rx, the timer latency value will be set to 50us.
+  You may set the timer latency after disabling adaptive-rx:
+
+      ethtool -C <interface> rx-usecs <microseconds>
+
+  An example to set the timer latency value to 100us on eth0:
+
+      ethtool -C eth0 rx-usecs 100
+
+  You may also provide a timer latency value while disabling adpative-rx:
+
+      ethtool -C <interface> adaptive-rx off rx-usecs <microseconds>
+
+  If adaptive-rx is disabled and a timer latency value is specified, the timer
+  will be set to the specified value until changed by the user or until
+  adaptive-rx is enabled.
+
+  To view the status of the adaptive-rx and timer latency values:
+
+      ethtool -c <interface>
+
+
+ TCP Segmentation Offloading (TSO) Support
+ -----------------------------------------
+
+  This feature, also known as "large send", enables a system's protocol stack
+  to offload portions of outbound TCP processing to a network interface card
+  thereby reducing system CPU utilization and enhancing performance.
+
+  The interface used to control this feature is ethtool version 1.8 or higher.
+  Please see the ethtool manpage for additional usage information.
+
+  By default, TSO is enabled.
+  To disable TSO:
+
+      ethtool -K <interface> tso off
+
+  To enable TSO:
+
+      ethtool -K <interface> tso on
+
+  To view the status of TSO:
+
+      ethtool -k <interface>
+
+
+PERFORMANCE
+===========
+
+ The following information is provided as an example of how to change system
+ parameters for "performance tuning" an what value to use. You may or may not
+ want to change these system parameters, depending on your server/workstation
+ application. Doing so is not warranted in any way by Chelsio Communications,
+ and is done at "YOUR OWN RISK". Chelsio will not be held responsible for loss
+ of data or damage to equipment.
+
+ Your distribution may have a different way of doing things, or you may prefer
+ a different method. These commands are shown only to provide an example of
+ what to do and are by no means definitive.
+
+ Making any of the following system changes will only last until you reboot
+ your system. You may want to write a script that runs at boot-up which
+ includes the optimal settings for your system.
+
+  Setting PCI Latency Timer:
+      setpci -d 1425:* 0x0c.l=0x0000F800
+
+  Disabling TCP timestamp:
+      sysctl -w net.ipv4.tcp_timestamps=0
+
+  Disabling SACK:
+      sysctl -w net.ipv4.tcp_sack=0
+
+  Setting large number of incoming connection requests:
+      sysctl -w net.ipv4.tcp_max_syn_backlog=3000
+
+  Setting maximum receive socket buffer size:
+      sysctl -w net.core.rmem_max=1024000
+
+  Setting maximum send socket buffer size:
+      sysctl -w net.core.wmem_max=1024000
+
+  Set smp_affinity (on a multiprocessor system) to a single CPU:
+      echo 1 > /proc/irq/<interrupt_number>/smp_affinity
+
+  Setting default receive socket buffer size:
+      sysctl -w net.core.rmem_default=524287
+
+  Setting default send socket buffer size:
+      sysctl -w net.core.wmem_default=524287
+
+  Setting maximum option memory buffers:
+      sysctl -w net.core.optmem_max=524287
+
+  Setting maximum backlog (# of unprocessed packets before kernel drops):
+      sysctl -w net.core.netdev_max_backlog=300000
+
+  Setting TCP read buffers (min/default/max):
+      sysctl -w net.ipv4.tcp_rmem="10000000 10000000 10000000"
+
+  Setting TCP write buffers (min/pressure/max):
+      sysctl -w net.ipv4.tcp_wmem="10000000 10000000 10000000"
+
+  Setting TCP buffer space (min/pressure/max):
+      sysctl -w net.ipv4.tcp_mem="10000000 10000000 10000000"
+
+  TCP window size for single connections:
+   The receive buffer (RX_WINDOW) size must be at least as large as the
+   Bandwidth-Delay Product of the communication link between the sender and
+   receiver. Due to the variations of RTT, you may want to increase the buffer
+   size up to 2 times the Bandwidth-Delay Product. Reference page 289 of
+   "TCP/IP Illustrated, Volume 1, The Protocols" by W. Richard Stevens.
+   At 10Gb speeds, use the following formula:
+       RX_WINDOW >= 1.25MBytes * RTT(in milliseconds)
+       Example for RTT with 100us: RX_WINDOW = (1,250,000 * 0.1) = 125,000
+   RX_WINDOW sizes of 256KB - 512KB should be sufficient.
+   Setting the min, max, and default receive buffer (RX_WINDOW) size:
+       sysctl -w net.ipv4.tcp_rmem="<min> <default> <max>"
+
+  TCP window size for multiple connections:
+   The receive buffer (RX_WINDOW) size may be calculated the same as single
+   connections, but should be divided by the number of connections. The
+   smaller window prevents congestion and facilitates better pacing,
+   especially if/when MAC level flow control does not work well or when it is
+   not supported on the machine. Experimentation may be necessary to attain
+   the correct value. This method is provided as a starting point fot the
+   correct receive buffer size.
+   Setting the min, max, and default receive buffer (RX_WINDOW) size is
+   performed in the same manner as single connection.
+
+
+DRIVER MESSAGES
+===============
+
+ The following messages are the most common messages logged by syslog. These
+ may be found in /var/log/messages.
+
+  Driver up:
+     Chelsio Network Driver - version 2.1.1
+
+  NIC detected:
+     eth#: Chelsio N210 1x10GBaseX NIC (rev #), PCIX 133MHz/64-bit
+
+  Link up:
+     eth#: link is up at 10 Gbps, full duplex
+
+  Link down:
+     eth#: link is down
+
+
+KNOWN ISSUES
+============
+
+ These issues have been identified during testing. The following information
+ is provided as a workaround to the problem. In some cases, this problem is
+ inherent to Linux or to a particular Linux Distribution and/or hardware
+ platform.
+
+  1. Large number of TCP retransmits on a multiprocessor (SMP) system.
+
+      On a system with multiple CPUs, the interrupt (IRQ) for the network
+      controller may be bound to more than one CPU. This will cause TCP
+      retransmits if the packet data were to be split across different CPUs
+      and re-assembled in a different order than expected.
+
+      To eliminate the TCP retransmits, set smp_affinity on the particular
+      interrupt to a single CPU. You can locate the interrupt (IRQ) used on
+      the N110/N210 by using ifconfig:
+          ifconfig <dev_name> | grep Interrupt
+      Set the smp_affinity to a single CPU:
+          echo 1 > /proc/irq/<interrupt_number>/smp_affinity
+
+      It is highly suggested that you do not run the irqbalance daemon on your
+      system, as this will change any smp_affinity setting you have applied.
+      The irqbalance daemon runs on a 10 second interval and binds interrupts
+      to the least loaded CPU determined by the daemon. To disable this daemon:
+          chkconfig --level 2345 irqbalance off
+
+      By default, some Linux distributions enable the kernel feature,
+      irqbalance, which performs the same function as the daemon. To disable
+      this feature, add the following line to your bootloader:
+          noirqbalance
+
+          Example using the Grub bootloader:
+              title Red Hat Enterprise Linux AS (2.4.21-27.ELsmp)
+              root (hd0,0)
+              kernel /vmlinuz-2.4.21-27.ELsmp ro root=/dev/hda3 noirqbalance
+              initrd /initrd-2.4.21-27.ELsmp.img
+
+  2. After running insmod, the driver is loaded and the incorrect network
+     interface is brought up without running ifup.
+
+      When using 2.4.x kernels, including RHEL kernels, the Linux kernel
+      invokes a script named "hotplug". This script is primarily used to
+      automatically bring up USB devices when they are plugged in, however,
+      the script also attempts to automatically bring up a network interface
+      after loading the kernel module. The hotplug script does this by scanning
+      the ifcfg-eth# config files in /etc/sysconfig/network-scripts, looking
+      for HWADDR=<mac_address>.
+
+      If the hotplug script does not find the HWADDRR within any of the
+      ifcfg-eth# files, it will bring up the device with the next available
+      interface name. If this interface is already configured for a different
+      network card, your new interface will have incorrect IP address and
+      network settings.
+
+      To solve this issue, you can add the HWADDR=<mac_address> key to the
+      interface config file of your network controller.
+
+      To disable this "hotplug" feature, you may add the driver (module name)
+      to the "blacklist" file located in /etc/hotplug. It has been noted that
+      this does not work for network devices because the net.agent script
+      does not use the blacklist file. Simply remove, or rename, the net.agent
+      script located in /etc/hotplug to disable this feature.
+
+  3. Transport Protocol (TP) hangs when running heavy multi-connection traffic
+     on an AMD Opteron system with HyperTransport PCI-X Tunnel chipset.
+
+      If your AMD Opteron system uses the AMD-8131 HyperTransport PCI-X Tunnel
+      chipset, you may experience the "133-Mhz Mode Split Completion Data
+      Corruption" bug identified by AMD while using a 133Mhz PCI-X card on the
+      bus PCI-X bus.
+
+      AMD states, "Under highly specific conditions, the AMD-8131 PCI-X Tunnel
+      can provide stale data via split completion cycles to a PCI-X card that
+      is operating at 133 Mhz", causing data corruption.
+
+      AMD's provides three workarounds for this problem, however, Chelsio
+      recommends the first option for best performance with this bug:
+
+        For 133Mhz secondary bus operation, limit the transaction length and
+        the number of outstanding transactions, via BIOS configuration
+        programming of the PCI-X card, to the following:
+
+           Data Length (bytes): 1k
+           Total allowed outstanding transactions: 2
+
+      Please refer to AMD 8131-HT/PCI-X Errata 26310 Rev 3.08 August 2004,
+      section 56, "133-MHz Mode Split Completion Data Corruption" for more
+      details with this bug and workarounds suggested by AMD.
+
+      It may be possible to work outside AMD's recommended PCI-X settings, try
+      increasing the Data Length to 2k bytes for increased performance. If you
+      have issues with these settings, please revert to the "safe" settings
+      and duplicate the problem before submitting a bug or asking for support.
+
+      NOTE: The default setting on most systems is 8 outstanding transactions
+            and 2k bytes data length.
+
+  4. On multiprocessor systems, it has been noted that an application which
+     is handling 10Gb networking can switch between CPUs causing degraded
+     and/or unstable performance.
+
+      If running on an SMP system and taking performance measurements, it
+      is suggested you either run the latest netperf-2.4.0+ or use a binding
+      tool such as Tim Hockin's procstate utilities (runon)
+      <http://www.hockin.org/~thockin/procstate/>.
+
+      Binding netserver and netperf (or other applications) to particular
+      CPUs will have a significant difference in performance measurements.
+      You may need to experiment which CPU to bind the application to in
+      order to achieve the best performance for your system.
+
+      If you are developing an application designed for 10Gb networking,
+      please keep in mind you may want to look at kernel functions
+      sched_setaffinity & sched_getaffinity to bind your application.
+
+      If you are just running user-space applications such as ftp, telnet,
+      etc., you may want to try the runon tool provided by Tim Hockin's
+      procstate utility. You could also try binding the interface to a
+      particular CPU: runon 0 ifup eth0
+
+
+SUPPORT
+=======
+
+ If you have problems with the software or hardware, please contact our
+ customer support team via email at support@chelsio.com or check our website
+ at http://www.chelsio.com
+
+===============================================================================
+
+ Chelsio Communications
+ 370 San Aleso Ave.
+ Suite 100
+ Sunnyvale, CA 94085
+ http://www.chelsio.com
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License, version 2, as
+published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
+ Copyright (c) 2003-2005 Chelsio Communications. All rights reserved.
+
+===============================================================================

+ 288 - 0
Documentation/networking/phy.txt

@@ -0,0 +1,288 @@
+
+-------
+PHY Abstraction Layer
+(Updated 2005-07-21)
+
+Purpose
+
+ Most network devices consist of set of registers which provide an interface
+ to a MAC layer, which communicates with the physical connection through a
+ PHY.  The PHY concerns itself with negotiating link parameters with the link
+ partner on the other side of the network connection (typically, an ethernet
+ cable), and provides a register interface to allow drivers to determine what
+ settings were chosen, and to configure what settings are allowed.
+
+ While these devices are distinct from the network devices, and conform to a
+ standard layout for the registers, it has been common practice to integrate
+ the PHY management code with the network driver.  This has resulted in large
+ amounts of redundant code.  Also, on embedded systems with multiple (and
+ sometimes quite different) ethernet controllers connected to the same 
+ management bus, it is difficult to ensure safe use of the bus.
+
+ Since the PHYs are devices, and the management busses through which they are
+ accessed are, in fact, busses, the PHY Abstraction Layer treats them as such.
+ In doing so, it has these goals:
+
+   1) Increase code-reuse
+   2) Increase overall code-maintainability
+   3) Speed development time for new network drivers, and for new systems
+ 
+ Basically, this layer is meant to provide an interface to PHY devices which
+ allows network driver writers to write as little code as possible, while
+ still providing a full feature set.
+
+The MDIO bus
+
+ Most network devices are connected to a PHY by means of a management bus.
+ Different devices use different busses (though some share common interfaces).
+ In order to take advantage of the PAL, each bus interface needs to be
+ registered as a distinct device.
+
+ 1) read and write functions must be implemented.  Their prototypes are:
+
+     int write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
+     int read(struct mii_bus *bus, int mii_id, int regnum);
+
+   mii_id is the address on the bus for the PHY, and regnum is the register
+   number.  These functions are guaranteed not to be called from interrupt
+   time, so it is safe for them to block, waiting for an interrupt to signal
+   the operation is complete
+ 
+ 2) A reset function is necessary.  This is used to return the bus to an
+   initialized state.
+
+ 3) A probe function is needed.  This function should set up anything the bus
+   driver needs, setup the mii_bus structure, and register with the PAL using
+   mdiobus_register.  Similarly, there's a remove function to undo all of
+   that (use mdiobus_unregister).
+ 
+ 4) Like any driver, the device_driver structure must be configured, and init
+   exit functions are used to register the driver.
+
+ 5) The bus must also be declared somewhere as a device, and registered.
+
+ As an example for how one driver implemented an mdio bus driver, see
+ drivers/net/gianfar_mii.c and arch/ppc/syslib/mpc85xx_devices.c
+
+Connecting to a PHY
+
+ Sometime during startup, the network driver needs to establish a connection
+ between the PHY device, and the network device.  At this time, the PHY's bus
+ and drivers need to all have been loaded, so it is ready for the connection.
+ At this point, there are several ways to connect to the PHY:
+
+ 1) The PAL handles everything, and only calls the network driver when
+   the link state changes, so it can react.
+
+ 2) The PAL handles everything except interrupts (usually because the
+   controller has the interrupt registers).
+
+ 3) The PAL handles everything, but checks in with the driver every second,
+   allowing the network driver to react first to any changes before the PAL
+   does.
+ 
+ 4) The PAL serves only as a library of functions, with the network device
+   manually calling functions to update status, and configure the PHY
+
+
+Letting the PHY Abstraction Layer do Everything
+
+ If you choose option 1 (The hope is that every driver can, but to still be
+ useful to drivers that can't), connecting to the PHY is simple:
+
+ First, you need a function to react to changes in the link state.  This
+ function follows this protocol:
+
+   static void adjust_link(struct net_device *dev);
+ 
+ Next, you need to know the device name of the PHY connected to this device. 
+ The name will look something like, "phy0:0", where the first number is the
+ bus id, and the second is the PHY's address on that bus.
+ 
+ Now, to connect, just call this function:
+ 
+   phydev = phy_connect(dev, phy_name, &adjust_link, flags);
+
+ phydev is a pointer to the phy_device structure which represents the PHY.  If
+ phy_connect is successful, it will return the pointer.  dev, here, is the
+ pointer to your net_device.  Once done, this function will have started the
+ PHY's software state machine, and registered for the PHY's interrupt, if it
+ has one.  The phydev structure will be populated with information about the
+ current state, though the PHY will not yet be truly operational at this
+ point.
+
+ flags is a u32 which can optionally contain phy-specific flags.
+ This is useful if the system has put hardware restrictions on
+ the PHY/controller, of which the PHY needs to be aware.
+
+ Now just make sure that phydev->supported and phydev->advertising have any
+ values pruned from them which don't make sense for your controller (a 10/100
+ controller may be connected to a gigabit capable PHY, so you would need to
+ mask off SUPPORTED_1000baseT*).  See include/linux/ethtool.h for definitions
+ for these bitfields. Note that you should not SET any bits, or the PHY may
+ get put into an unsupported state.
+
+ Lastly, once the controller is ready to handle network traffic, you call
+ phy_start(phydev).  This tells the PAL that you are ready, and configures the
+ PHY to connect to the network.  If you want to handle your own interrupts,
+ just set phydev->irq to PHY_IGNORE_INTERRUPT before you call phy_start.
+ Similarly, if you don't want to use interrupts, set phydev->irq to PHY_POLL.
+
+ When you want to disconnect from the network (even if just briefly), you call
+ phy_stop(phydev).
+
+Keeping Close Tabs on the PAL
+
+ It is possible that the PAL's built-in state machine needs a little help to
+ keep your network device and the PHY properly in sync.  If so, you can
+ register a helper function when connecting to the PHY, which will be called
+ every second before the state machine reacts to any changes.  To do this, you
+ need to manually call phy_attach() and phy_prepare_link(), and then call
+ phy_start_machine() with the second argument set to point to your special
+ handler.
+
+ Currently there are no examples of how to use this functionality, and testing
+ on it has been limited because the author does not have any drivers which use
+ it (they all use option 1).  So Caveat Emptor.
+
+Doing it all yourself
+
+ There's a remote chance that the PAL's built-in state machine cannot track
+ the complex interactions between the PHY and your network device.  If this is
+ so, you can simply call phy_attach(), and not call phy_start_machine or
+ phy_prepare_link().  This will mean that phydev->state is entirely yours to
+ handle (phy_start and phy_stop toggle between some of the states, so you
+ might need to avoid them).
+
+ An effort has been made to make sure that useful functionality can be
+ accessed without the state-machine running, and most of these functions are
+ descended from functions which did not interact with a complex state-machine.
+ However, again, no effort has been made so far to test running without the
+ state machine, so tryer beware.
+
+ Here is a brief rundown of the functions:
+
+ int phy_read(struct phy_device *phydev, u16 regnum);
+ int phy_write(struct phy_device *phydev, u16 regnum, u16 val);
+
+   Simple read/write primitives.  They invoke the bus's read/write function
+   pointers.
+
+ void phy_print_status(struct phy_device *phydev);
+ 
+   A convenience function to print out the PHY status neatly.
+
+ int phy_clear_interrupt(struct phy_device *phydev);
+ int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
+   
+   Clear the PHY's interrupt, and configure which ones are allowed,
+   respectively.  Currently only supports all on, or all off.
+ 
+ int phy_enable_interrupts(struct phy_device *phydev);
+ int phy_disable_interrupts(struct phy_device *phydev);
+
+   Functions which enable/disable PHY interrupts, clearing them
+   before and after, respectively.
+
+ int phy_start_interrupts(struct phy_device *phydev);
+ int phy_stop_interrupts(struct phy_device *phydev);
+
+   Requests the IRQ for the PHY interrupts, then enables them for
+   start, or disables then frees them for stop.
+
+ struct phy_device * phy_attach(struct net_device *dev, const char *phy_id,
+		 u32 flags);
+
+   Attaches a network device to a particular PHY, binding the PHY to a generic
+   driver if none was found during bus initialization.  Passes in
+   any phy-specific flags as needed.
+
+ int phy_start_aneg(struct phy_device *phydev);
+   
+   Using variables inside the phydev structure, either configures advertising
+   and resets autonegotiation, or disables autonegotiation, and configures
+   forced settings.
+
+ static inline int phy_read_status(struct phy_device *phydev);
+
+   Fills the phydev structure with up-to-date information about the current
+   settings in the PHY.
+
+ void phy_sanitize_settings(struct phy_device *phydev)
+   
+   Resolves differences between currently desired settings, and
+   supported settings for the given PHY device.  Does not make
+   the changes in the hardware, though.
+
+ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+
+   Ethtool convenience functions.
+
+ int phy_mii_ioctl(struct phy_device *phydev,
+                 struct mii_ioctl_data *mii_data, int cmd);
+
+   The MII ioctl.  Note that this function will completely screw up the state
+   machine if you write registers like BMCR, BMSR, ADVERTISE, etc.  Best to
+   use this only to write registers which are not standard, and don't set off
+   a renegotiation.
+
+
+PHY Device Drivers
+
+ With the PHY Abstraction Layer, adding support for new PHYs is
+ quite easy.  In some cases, no work is required at all!  However,
+ many PHYs require a little hand-holding to get up-and-running.
+
+Generic PHY driver
+
+ If the desired PHY doesn't have any errata, quirks, or special
+ features you want to support, then it may be best to not add
+ support, and let the PHY Abstraction Layer's Generic PHY Driver
+ do all of the work.  
+
+Writing a PHY driver
+
+ If you do need to write a PHY driver, the first thing to do is
+ make sure it can be matched with an appropriate PHY device.
+ This is done during bus initialization by reading the device's
+ UID (stored in registers 2 and 3), then comparing it to each
+ driver's phy_id field by ANDing it with each driver's
+ phy_id_mask field.  Also, it needs a name.  Here's an example:
+
+   static struct phy_driver dm9161_driver = {
+         .phy_id         = 0x0181b880,
+	 .name           = "Davicom DM9161E",
+	 .phy_id_mask    = 0x0ffffff0,
+	 ...
+   }
+
+ Next, you need to specify what features (speed, duplex, autoneg,
+ etc) your PHY device and driver support.  Most PHYs support
+ PHY_BASIC_FEATURES, but you can look in include/mii.h for other
+ features.
+
+ Each driver consists of a number of function pointers:
+
+   config_init: configures PHY into a sane state after a reset.
+     For instance, a Davicom PHY requires descrambling disabled.
+   probe: Does any setup needed by the driver
+   suspend/resume: power management
+   config_aneg: Changes the speed/duplex/negotiation settings
+   read_status: Reads the current speed/duplex/negotiation settings
+   ack_interrupt: Clear a pending interrupt
+   config_intr: Enable or disable interrupts
+   remove: Does any driver take-down
+
+ Of these, only config_aneg and read_status are required to be
+ assigned by the driver code.  The rest are optional.  Also, it is
+ preferred to use the generic phy driver's versions of these two
+ functions if at all possible: genphy_read_status and
+ genphy_config_aneg.  If this is not possible, it is likely that
+ you only need to perform some actions before and after invoking
+ these functions, and so your functions will wrap the generic
+ ones.
+
+ Feel free to look at the Marvell, Cicada, and Davicom drivers in
+ drivers/net/phy/ for examples (the lxt and qsemi drivers have
+ not been tested as of this writing)

+ 1 - 0
Documentation/sound/alsa/ALSA-Configuration.txt

@@ -132,6 +132,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
     mpu_irq	- IRQ # for MPU-401 UART (PnP setup)
     dma1	- first DMA # for AD1816A chip (PnP setup)
     dma2	- second DMA # for AD1816A chip (PnP setup)
+    clockfreq   - Clock frequency for AD1816A chip (default = 0, 33000Hz)
     
     Module supports up to 8 cards, autoprobe and PnP.
     

+ 11 - 4
Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl

@@ -3422,10 +3422,17 @@ struct _snd_pcm_runtime {
 
       <para>
         The <structfield>iface</structfield> field specifies the type of
-      the control,
-      <constant>SNDRV_CTL_ELEM_IFACE_XXX</constant>. There are
-      <constant>MIXER</constant>, <constant>PCM</constant>,
-      <constant>CARD</constant>, etc.
+      the control, <constant>SNDRV_CTL_ELEM_IFACE_XXX</constant>, which
+      is usually <constant>MIXER</constant>.
+      Use <constant>CARD</constant> for global controls that are not
+      logically part of the mixer.
+      If the control is closely associated with some specific device on
+      the sound card, use <constant>HWDEP</constant>,
+      <constant>PCM</constant>, <constant>RAWMIDI</constant>,
+      <constant>TIMER</constant>, or <constant>SEQUENCER</constant>, and
+      specify the device number with the
+      <structfield>device</structfield> and
+      <structfield>subdevice</structfield> fields.
       </para>
 
       <para>

+ 6 - 0
MAINTAINERS

@@ -2092,6 +2092,12 @@ M:	support@simtec.co.uk
 W:	http://www.simtec.co.uk/products/EB2410ITX/
 S:	Supported
 
+SIS 190 ETHERNET DRIVER
+P:	Francois Romieu
+M:	romieu@fr.zoreil.com
+L:	netdev@vger.kernel.org
+S:	Maintained
+
 SIS 5513 IDE CONTROLLER DRIVER
 P:	Lionel Bouton
 M:	Lionel.Bouton@inet6.fr

+ 2 - 2
Makefile

@@ -1,8 +1,8 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 13
-EXTRAVERSION =-rc7
-NAME=Woozy Numbat
+EXTRAVERSION =
+NAME=Affluent Albatross
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"

+ 5 - 6
arch/alpha/kernel/signal.c

@@ -566,13 +566,12 @@ handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
 	if (ka->sa.sa_flags & SA_RESETHAND)
 		ka->sa.sa_handler = SIG_DFL;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER)) 
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 static inline void

+ 2 - 37
arch/arm/Kconfig

@@ -635,10 +635,6 @@ config PM
 	  and the Battery Powered Linux mini-HOWTO, available from
 	  <http://www.tldp.org/docs.html#howto>.
 
-	  Note that, even if you say N here, Linux on the x86 architecture
-	  will issue the hlt instruction if nothing is to be done, thereby
-	  sending the processor to sleep and saving power.
-
 config APM
 	tristate "Advanced Power Management Emulation"
 	depends on PM
@@ -650,12 +646,6 @@ config APM
 	  battery status information, and user-space programs will receive
 	  notification of APM "events" (e.g. battery status change).
 
-	  If you select "Y" here, you can disable actual use of the APM
-	  BIOS by passing the "apm=off" option to the kernel at boot time.
-
-	  Note that the APM support is almost completely disabled for
-	  machines with more than one CPU.
-
 	  In order to use APM, you will need supporting software. For location
 	  and more information, read <file:Documentation/pm.txt> and the
 	  Battery Powered Linux mini-HOWTO, available from
@@ -665,39 +655,12 @@ config APM
 	  manpage ("man 8 hdparm") for that), and it doesn't turn off
 	  VESA-compliant "green" monitors.
 
-	  This driver does not support the TI 4000M TravelMate and the ACER
-	  486/DX4/75 because they don't have compliant BIOSes. Many "green"
-	  desktop machines also don't have compliant BIOSes, and this driver
-	  may cause those machines to panic during the boot phase.
-
 	  Generally, if you don't have a battery in your machine, there isn't
 	  much point in using this driver and you should say N. If you get
 	  random kernel OOPSes or reboots that don't seem to be related to
 	  anything, try disabling/enabling this option (or disabling/enabling
 	  APM in your BIOS).
 
-	  Some other things you should try when experiencing seemingly random,
-	  "weird" problems:
-
-	  1) make sure that you have enough swap space and that it is
-	  enabled.
-	  2) pass the "no-hlt" option to the kernel
-	  3) switch on floating point emulation in the kernel and pass
-	  the "no387" option to the kernel
-	  4) pass the "floppy=nodma" option to the kernel
-	  5) pass the "mem=4M" option to the kernel (thereby disabling
-	  all but the first 4 MB of RAM)
-	  6) make sure that the CPU is not over clocked.
-	  7) read the sig11 FAQ at <http://www.bitwizard.nl/sig11/>
-	  8) disable the cache from your BIOS settings
-	  9) install a fan for the video card or exchange video RAM
-	  10) install a better fan for the CPU
-	  11) exchange RAM chips
-	  12) exchange the motherboard.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called apm.
-
 endmenu
 
 source "net/Kconfig"
@@ -752,6 +715,8 @@ source "drivers/hwmon/Kconfig"
 
 source "drivers/misc/Kconfig"
 
+source "drivers/mfd/Kconfig"
+
 source "drivers/media/Kconfig"
 
 source "drivers/video/Kconfig"

+ 3 - 0
arch/arm/common/Kconfig

@@ -1,6 +1,9 @@
 config ICST525
 	bool
 
+config ARM_GIC
+	bool
+
 config ICST307
 	bool
 

+ 1 - 0
arch/arm/common/Makefile

@@ -4,6 +4,7 @@
 
 obj-y				+= rtctime.o
 obj-$(CONFIG_ARM_AMBA)		+= amba.o
+obj-$(CONFIG_ARM_GIC)		+= gic.o
 obj-$(CONFIG_ICST525)		+= icst525.o
 obj-$(CONFIG_ICST307)		+= icst307.o
 obj-$(CONFIG_SA1111)		+= sa1111.o

+ 166 - 0
arch/arm/common/gic.c

@@ -0,0 +1,166 @@
+/*
+ *  linux/arch/arm/common/gic.c
+ *
+ *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Interrupt architecture for the GIC:
+ *
+ * o There is one Interrupt Distributor, which receives interrupts
+ *   from system devices and sends them to the Interrupt Controllers.
+ *
+ * o There is one CPU Interface per CPU, which sends interrupts sent
+ *   by the Distributor, and interrupts generated locally, to the
+ *   associated CPU.
+ *
+ * Note that IRQs 0-31 are special - they are local to each CPU.
+ * As such, the enable set/clear, pending set/clear and active bit
+ * registers are banked per-cpu for these sources.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/mach/irq.h>
+#include <asm/hardware/gic.h>
+
+static void __iomem *gic_dist_base;
+static void __iomem *gic_cpu_base;
+
+/*
+ * Routines to acknowledge, disable and enable interrupts
+ *
+ * Linux assumes that when we're done with an interrupt we need to
+ * unmask it, in the same way we need to unmask an interrupt when
+ * we first enable it.
+ *
+ * The GIC has a seperate notion of "end of interrupt" to re-enable
+ * an interrupt after handling, in order to support hardware
+ * prioritisation.
+ *
+ * We can make the GIC behave in the way that Linux expects by making
+ * our "acknowledge" routine disable the interrupt, then mark it as
+ * complete.
+ */
+static void gic_ack_irq(unsigned int irq)
+{
+	u32 mask = 1 << (irq % 32);
+	writel(mask, gic_dist_base + GIC_DIST_ENABLE_CLEAR + (irq / 32) * 4);
+	writel(irq, gic_cpu_base + GIC_CPU_EOI);
+}
+
+static void gic_mask_irq(unsigned int irq)
+{
+	u32 mask = 1 << (irq % 32);
+	writel(mask, gic_dist_base + GIC_DIST_ENABLE_CLEAR + (irq / 32) * 4);
+}
+
+static void gic_unmask_irq(unsigned int irq)
+{
+	u32 mask = 1 << (irq % 32);
+	writel(mask, gic_dist_base + GIC_DIST_ENABLE_SET + (irq / 32) * 4);
+}
+
+static void gic_set_cpu(struct irqdesc *desc, unsigned int irq, unsigned int cpu)
+{
+	void __iomem *reg = gic_dist_base + GIC_DIST_TARGET + (irq & ~3);
+	unsigned int shift = (irq % 4) * 8;
+	u32 val;
+
+	val = readl(reg) & ~(0xff << shift);
+	val |= 1 << (cpu + shift);
+	writel(val, reg);
+}
+
+static struct irqchip gic_chip = {
+	.ack		= gic_ack_irq,
+	.mask		= gic_mask_irq,
+	.unmask		= gic_unmask_irq,
+#ifdef CONFIG_SMP
+	.set_cpu	= gic_set_cpu,
+#endif
+};
+
+void __init gic_dist_init(void __iomem *base)
+{
+	unsigned int max_irq, i;
+	u32 cpumask = 1 << smp_processor_id();
+
+	cpumask |= cpumask << 8;
+	cpumask |= cpumask << 16;
+
+	gic_dist_base = base;
+
+	writel(0, base + GIC_DIST_CTRL);
+
+	/*
+	 * Find out how many interrupts are supported.
+	 */
+	max_irq = readl(base + GIC_DIST_CTR) & 0x1f;
+	max_irq = (max_irq + 1) * 32;
+
+	/*
+	 * The GIC only supports up to 1020 interrupt sources.
+	 * Limit this to either the architected maximum, or the
+	 * platform maximum.
+	 */
+	if (max_irq > max(1020, NR_IRQS))
+		max_irq = max(1020, NR_IRQS);
+
+	/*
+	 * Set all global interrupts to be level triggered, active low.
+	 */
+	for (i = 32; i < max_irq; i += 16)
+		writel(0, base + GIC_DIST_CONFIG + i * 4 / 16);
+
+	/*
+	 * Set all global interrupts to this CPU only.
+	 */
+	for (i = 32; i < max_irq; i += 4)
+		writel(cpumask, base + GIC_DIST_TARGET + i * 4 / 4);
+
+	/*
+	 * Set priority on all interrupts.
+	 */
+	for (i = 0; i < max_irq; i += 4)
+		writel(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4);
+
+	/*
+	 * Disable all interrupts.
+	 */
+	for (i = 0; i < max_irq; i += 32)
+		writel(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i * 4 / 32);
+
+	/*
+	 * Setup the Linux IRQ subsystem.
+	 */
+	for (i = 29; i < max_irq; i++) {
+		set_irq_chip(i, &gic_chip);
+		set_irq_handler(i, do_level_IRQ);
+		set_irq_flags(i, IRQF_VALID | IRQF_PROBE);
+	}
+
+	writel(1, base + GIC_DIST_CTRL);
+}
+
+void __cpuinit gic_cpu_init(void __iomem *base)
+{
+	gic_cpu_base = base;
+	writel(0xf0, base + GIC_CPU_PRIMASK);
+	writel(1, base + GIC_CPU_CTRL);
+}
+
+#ifdef CONFIG_SMP
+void gic_raise_softirq(cpumask_t cpumask, unsigned int irq)
+{
+	unsigned long map = *cpus_addr(cpumask);
+
+	writel(map << 16 | irq, gic_dist_base + GIC_DIST_SOFTINT);
+}
+#endif

+ 3 - 2
arch/arm/kernel/signal.c

@@ -658,11 +658,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
 	/*
 	 * Block the signal if we were unsuccessful.
 	 */
-	if (ret != 0 || !(ka->sa.sa_flags & SA_NODEFER)) {
+	if (ret != 0) {
 		spin_lock_irq(&tsk->sighand->siglock);
 		sigorsets(&tsk->blocked, &tsk->blocked,
 			  &ka->sa.sa_mask);
-		sigaddset(&tsk->blocked, sig);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
+			sigaddset(&tsk->blocked, sig);
 		recalc_sigpending();
 		spin_unlock_irq(&tsk->sighand->siglock);
 	}

+ 1 - 1
arch/arm/mach-ixp4xx/coyote-setup.c

@@ -36,7 +36,7 @@ static struct flash_platform_data coyote_flash_data = {
 
 static struct resource coyote_flash_resource = {
 	.start		= COYOTE_FLASH_BASE,
-	.end		= COYOTE_FLASH_BASE + COYOTE_FLASH_SIZE,
+	.end		= COYOTE_FLASH_BASE + COYOTE_FLASH_SIZE - 1,
 	.flags		= IORESOURCE_MEM,
 };
 

+ 1 - 1
arch/arm/mach-ixp4xx/gtwx5715-setup.c

@@ -114,7 +114,7 @@ static struct flash_platform_data gtwx5715_flash_data = {
 
 static struct resource gtwx5715_flash_resource = {
 	.start		= GTWX5715_FLASH_BASE,
-	.end		= GTWX5715_FLASH_BASE + GTWX5715_FLASH_SIZE,
+	.end		= GTWX5715_FLASH_BASE + GTWX5715_FLASH_SIZE - 1,
 	.flags		= IORESOURCE_MEM,
 };
 

+ 1 - 1
arch/arm/mach-ixp4xx/ixdp425-setup.c

@@ -36,7 +36,7 @@ static struct flash_platform_data ixdp425_flash_data = {
 
 static struct resource ixdp425_flash_resource = {
 	.start		= IXDP425_FLASH_BASE,
-	.end		= IXDP425_FLASH_BASE + IXDP425_FLASH_SIZE,
+	.end		= IXDP425_FLASH_BASE + IXDP425_FLASH_SIZE - 1,
 	.flags		= IORESOURCE_MEM,
 };
 

+ 7 - 0
arch/arm/mach-sa1100/assabet.c

@@ -35,6 +35,7 @@
 #include <asm/mach/map.h>
 #include <asm/mach/serial_sa1100.h>
 #include <asm/arch/assabet.h>
+#include <asm/arch/mcp.h>
 
 #include "generic.h"
 
@@ -198,6 +199,11 @@ static struct irda_platform_data assabet_irda_data = {
 	.set_speed	= assabet_irda_set_speed,
 };
 
+static struct mcp_plat_data assabet_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
 static void __init assabet_init(void)
 {
 	/*
@@ -246,6 +252,7 @@ static void __init assabet_init(void)
 	sa11x0_set_flash_data(&assabet_flash_data, assabet_flash_resources,
 			      ARRAY_SIZE(assabet_flash_resources));
 	sa11x0_set_irda_data(&assabet_irda_data);
+	sa11x0_set_mcp_data(&assabet_mcp_data);
 }
 
 /*

+ 7 - 0
arch/arm/mach-sa1100/cerf.c

@@ -29,6 +29,7 @@
 #include <asm/mach/serial_sa1100.h>
 
 #include <asm/arch/cerf.h>
+#include <asm/arch/mcp.h>
 #include "generic.h"
 
 static struct resource cerfuart2_resources[] = {
@@ -116,10 +117,16 @@ static void __init cerf_map_io(void)
 	GPDR |= CERF_GPIO_CF_RESET;
 }
 
+static struct mcp_plat_data cerf_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
 static void __init cerf_init(void)
 {
 	platform_add_devices(cerf_devices, ARRAY_SIZE(cerf_devices));
 	sa11x0_set_flash_data(&cerf_flash_data, &cerf_flash_resource, 1);
+	sa11x0_set_mcp_data(&cerf_mcp_data);
 }
 
 MACHINE_START(CERF, "Intrinsyc CerfBoard/CerfCube")

+ 5 - 0
arch/arm/mach-sa1100/generic.c

@@ -221,6 +221,11 @@ static struct platform_device sa11x0mcp_device = {
 	.resource	= sa11x0mcp_resources,
 };
 
+void sa11x0_set_mcp_data(struct mcp_plat_data *data)
+{
+	sa11x0mcp_device.dev.platform_data = data;
+}
+
 static struct resource sa11x0ssp_resources[] = {
 	[0] = {
 		.start	= 0x80070000,

+ 3 - 0
arch/arm/mach-sa1100/generic.h

@@ -34,5 +34,8 @@ struct resource;
 extern void sa11x0_set_flash_data(struct flash_platform_data *flash,
 				  struct resource *res, int nr);
 
+struct sa11x0_ssp_plat_ops;
+extern void sa11x0_set_ssp_data(struct sa11x0_ssp_plat_ops *ops);
+
 struct irda_platform_data;
 void sa11x0_set_irda_data(struct irda_platform_data *irda);

+ 12 - 0
arch/arm/mach-sa1100/lart.c

@@ -13,12 +13,23 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/mach/serial_sa1100.h>
+#include <asm/arch/mcp.h>
 
 #include "generic.h"
 
 
 #warning "include/asm/arch-sa1100/ide.h needs fixing for lart"
 
+static struct mcp_plat_data lart_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
+static void __init lart_init(void)
+{
+	sa11x0_set_mcp_data(&lart_mcp_data);
+}
+
 static struct map_desc lart_io_desc[] __initdata = {
  /* virtual     physical    length      type */
   { 0xe8000000, 0x00000000, 0x00400000, MT_DEVICE }, /* main flash memory */
@@ -47,5 +58,6 @@ MACHINE_START(LART, "LART")
 	.boot_params	= 0xc0000100,
 	.map_io		= lart_map_io,
 	.init_irq	= sa1100_init_irq,
+	.init_machine	= lart_init,
 	.timer		= &sa1100_timer,
 MACHINE_END

+ 7 - 0
arch/arm/mach-sa1100/shannon.c

@@ -18,6 +18,7 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/mach/serial_sa1100.h>
+#include <asm/arch/mcp.h>
 #include <asm/arch/shannon.h>
 
 #include "generic.h"
@@ -52,9 +53,15 @@ static struct resource shannon_flash_resource = {
 	.flags		= IORESOURCE_MEM,
 };
 
+static struct mcp_plat_data shannon_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
 static void __init shannon_init(void)
 {
 	sa11x0_set_flash_data(&shannon_flash_data, &shannon_flash_resource, 1);
+	sa11x0_set_mcp_data(&shannon_mcp_data);
 }
 
 static void __init shannon_map_io(void)

+ 7 - 0
arch/arm/mach-sa1100/simpad.c

@@ -23,6 +23,7 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/mach/serial_sa1100.h>
+#include <asm/arch/mcp.h>
 #include <asm/arch/simpad.h>
 
 #include <linux/serial_core.h>
@@ -123,6 +124,11 @@ static struct resource simpad_flash_resources [] = {
 	}
 };
 
+static struct mcp_plat_data simpad_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
 
 
 static void __init simpad_map_io(void)
@@ -157,6 +163,7 @@ static void __init simpad_map_io(void)
 
 	sa11x0_set_flash_data(&simpad_flash_data, simpad_flash_resources,
 			      ARRAY_SIZE(simpad_flash_resources));
+	sa11x0_set_mcp_data(&simpad_mcp_data);
 }
 
 static void simpad_power_off(void)

+ 6 - 7
arch/arm26/kernel/signal.c

@@ -454,14 +454,13 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
 		if (ka->sa.sa_flags & SA_ONESHOT)
 			ka->sa.sa_handler = SIG_DFL;
 
-		if (!(ka->sa.sa_flags & SA_NODEFER)) {
-			spin_lock_irq(&tsk->sighand->siglock);
-			sigorsets(&tsk->blocked, &tsk->blocked,
-				  &ka->sa.sa_mask);
+		spin_lock_irq(&tsk->sighand->siglock);
+		sigorsets(&tsk->blocked, &tsk->blocked,
+			  &ka->sa.sa_mask);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
 			sigaddset(&tsk->blocked, sig);
-			recalc_sigpending();
-			spin_unlock_irq(&tsk->sighand->siglock);
-		}
+		recalc_sigpending();
+		spin_unlock_irq(&tsk->sighand->siglock);
 		return;
 	}
 

+ 5 - 6
arch/cris/arch-v10/kernel/signal.c

@@ -517,13 +517,12 @@ handle_signal(int canrestart, unsigned long sig,
 	if (ka->sa.sa_flags & SA_ONESHOT)
 		ka->sa.sa_handler = SIG_DFL;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 /*

+ 5 - 6
arch/cris/arch-v32/kernel/signal.c

@@ -568,13 +568,12 @@ handle_signal(int canrestart, unsigned long sig,
 	if (ka->sa.sa_flags & SA_ONESHOT)
 		ka->sa.sa_handler = SIG_DFL;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 /*

+ 5 - 6
arch/frv/kernel/signal.c

@@ -506,13 +506,12 @@ static void handle_signal(unsigned long sig, siginfo_t *info,
 	else
 		setup_frame(sig, ka, oldset, regs);
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked, sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 } /* end handle_signal() */
 
 /*****************************************************************************/

+ 5 - 6
arch/h8300/kernel/signal.c

@@ -488,13 +488,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	else
 		setup_frame(sig, ka, oldset, regs);
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 /*

+ 3 - 2
arch/i386/kernel/signal.c

@@ -577,10 +577,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	else
 		ret = setup_frame(sig, ka, oldset, regs);
 
-	if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
+	if (ret) {
 		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
-		sigaddset(&current->blocked,sig);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
+			sigaddset(&current->blocked,sig);
 		recalc_sigpending();
 		spin_unlock_irq(&current->sighand->siglock);
 	}

+ 0 - 1
arch/i386/pci/common.c

@@ -165,7 +165,6 @@ static int __init pcibios_init(void)
 	if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
 		pcibios_sort();
 #endif
-	pci_assign_unassigned_resources();
 	return 0;
 }
 

+ 16 - 33
arch/i386/pci/i386.c

@@ -170,43 +170,26 @@ static void __init pcibios_allocate_resources(int pass)
 static int __init pcibios_assign_resources(void)
 {
 	struct pci_dev *dev = NULL;
-	int idx;
-	struct resource *r;
-
-	for_each_pci_dev(dev) {
-		int class = dev->class >> 8;
-
-		/* Don't touch classless devices and host bridges */
-		if (!class || class == PCI_CLASS_BRIDGE_HOST)
-			continue;
-
-		for(idx=0; idx<6; idx++) {
-			r = &dev->resource[idx];
-
-			/*
-			 *  Don't touch IDE controllers and I/O ports of video cards!
-			 */
-			if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) ||
-			    (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO)))
-				continue;
-
-			/*
-			 *  We shall assign a new address to this resource, either because
-			 *  the BIOS forgot to do so or because we have decided the old
-			 *  address was unusable for some reason.
-			 */
-			if (!r->start && r->end)
-				pci_assign_resource(dev, idx);
-		}
+	struct resource *r, *pr;
 
-		if (pci_probe & PCI_ASSIGN_ROMS) {
+	if (!(pci_probe & PCI_ASSIGN_ROMS)) {
+		/* Try to use BIOS settings for ROMs, otherwise let
+		   pci_assign_unassigned_resources() allocate the new
+		   addresses. */
+		for_each_pci_dev(dev) {
 			r = &dev->resource[PCI_ROM_RESOURCE];
-			r->end -= r->start;
-			r->start = 0;
-			if (r->end)
-				pci_assign_resource(dev, PCI_ROM_RESOURCE);
+			if (!r->flags || !r->start)
+				continue;
+			pr = pci_find_parent_resource(dev, r);
+			if (!pr || request_resource(pr, r) < 0) {
+				r->end -= r->start;
+				r->start = 0;
+			}
 		}
 	}
+
+	pci_assign_unassigned_resources();
+
 	return 0;
 }
 

+ 6 - 9
arch/ia64/kernel/signal.c

@@ -467,15 +467,12 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse
 		if (!setup_frame(sig, ka, info, oldset, scr))
 			return 0;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		{
-			sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
-			sigaddset(&current->blocked, sig);
-			recalc_sigpending();
-		}
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
+		sigaddset(&current->blocked, sig);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 	return 1;
 }
 

+ 1 - 0
arch/ia64/pci/pci.c

@@ -380,6 +380,7 @@ void pcibios_bus_to_resource(struct pci_dev *dev,
 	res->start = region->start + offset;
 	res->end = region->end + offset;
 }
+EXPORT_SYMBOL(pcibios_bus_to_resource);
 
 static int __devinit is_valid_resource(struct pci_dev *dev, int idx)
 {

+ 5 - 6
arch/m32r/kernel/signal.c

@@ -341,13 +341,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 	/* Set up the stack frame */
 	setup_rt_frame(sig, ka, info, oldset, regs);
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 /*

+ 5 - 6
arch/m68knommu/kernel/signal.c

@@ -732,13 +732,12 @@ handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
 	if (ka->sa.sa_flags & SA_ONESHOT)
 		ka->sa.sa_handler = SIG_DFL;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 /*

+ 5 - 6
arch/mips/kernel/irixsig.c

@@ -155,13 +155,12 @@ static inline void handle_signal(unsigned long sig, siginfo_t *info,
 	else
 		setup_irix_frame(ka, regs, sig, oldset);
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 asmlinkage int do_irix_signal(sigset_t *oldset, struct pt_regs *regs)

+ 5 - 6
arch/mips/kernel/signal.c

@@ -425,13 +425,12 @@ static inline void handle_signal(unsigned long sig, siginfo_t *info,
 		setup_frame(ka, regs, sig, oldset);
 #endif
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 extern int do_signal32(sigset_t *oldset, struct pt_regs *regs);

+ 5 - 6
arch/mips/kernel/signal32.c

@@ -751,13 +751,12 @@ static inline void handle_signal(unsigned long sig, siginfo_t *info,
 	else
 		setup_frame(ka, regs, sig, oldset);
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 int do_signal32(sigset_t *oldset, struct pt_regs *regs)

+ 5 - 6
arch/parisc/kernel/signal.c

@@ -517,13 +517,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	if (!setup_rt_frame(sig, ka, info, oldset, regs, in_syscall))
 		return 0;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 	return 1;
 }
 

+ 10 - 1
arch/ppc/Makefile

@@ -21,11 +21,13 @@ CC		:= $(CC) -m32
 endif
 
 LDFLAGS_vmlinux	:= -Ttext $(KERNELLOAD) -Bstatic
-CPPFLAGS	+= -Iarch/$(ARCH)
+CPPFLAGS	+= -Iarch/$(ARCH) -Iinclude3
 AFLAGS		+= -Iarch/$(ARCH)
 CFLAGS		+= -Iarch/$(ARCH) -msoft-float -pipe \
 		-ffixed-r2 -mmultiple
 CPP		= $(CC) -E $(CFLAGS)
+# Temporary hack until we have migrated to asm-powerpc
+LINUXINCLUDE    += -Iinclude3
 
 CHECKFLAGS	+= -D__powerpc__
 
@@ -101,6 +103,7 @@ endef
 
 archclean:
 	$(Q)$(MAKE) $(clean)=arch/ppc/boot
+	$(Q)rm -rf include3
 
 prepare: include/asm-$(ARCH)/offsets.h checkbin
 
@@ -110,6 +113,12 @@ arch/$(ARCH)/kernel/asm-offsets.s: include/asm include/linux/version.h \
 include/asm-$(ARCH)/offsets.h: arch/$(ARCH)/kernel/asm-offsets.s
 	$(call filechk,gen-asm-offsets)
 
+# Temporary hack until we have migrated to asm-powerpc
+include/asm: include3/asm
+include3/asm:
+	$(Q)if [ ! -d include3 ]; then mkdir -p include3; fi
+	$(Q)ln -fsn $(srctree)/include/asm-powerpc include3/asm
+
 # Use the file '.tmp_gas_check' for binutils tests, as gas won't output
 # to stdout and these checks are run even on install targets.
 TOUT	:= .tmp_gas_check

+ 0 - 203
arch/ppc/boot/utils/addRamDisk.c

@@ -1,203 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <netinet/in.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#define ElfHeaderSize  (64 * 1024)
-#define ElfPages  (ElfHeaderSize / 4096)
-#define KERNELBASE (0xc0000000)
-
-void get4k(FILE *file, char *buf )
-{
-    unsigned j;
-    unsigned num = fread(buf, 1, 4096, file);
-    for (  j=num; j<4096; ++j )
-	buf[j] = 0;
-}
-
-void put4k(FILE *file, char *buf )
-{
-    fwrite(buf, 1, 4096, file);
-}
-
-void death(const char *msg, FILE *fdesc, const char *fname)
-{
-    printf(msg);
-    fclose(fdesc);
-    unlink(fname);
-    exit(1);
-}
-
-int main(int argc, char **argv)
-{
-    char inbuf[4096];
-    FILE *ramDisk = NULL;
-    FILE *inputVmlinux = NULL;
-    FILE *outputVmlinux = NULL;
-    unsigned i = 0;
-    u_int32_t ramFileLen = 0;
-    u_int32_t ramLen = 0;
-    u_int32_t roundR = 0;
-    u_int32_t kernelLen = 0;
-    u_int32_t actualKernelLen = 0;
-    u_int32_t round = 0;
-    u_int32_t roundedKernelLen = 0;
-    u_int32_t ramStartOffs = 0;
-    u_int32_t ramPages = 0;
-    u_int32_t roundedKernelPages = 0;
-    u_int32_t hvReleaseData = 0;
-    u_int32_t eyeCatcher = 0xc8a5d9c4;
-    u_int32_t naca = 0;
-    u_int32_t xRamDisk = 0;
-    u_int32_t xRamDiskSize = 0;
-    if ( argc < 2 ) {
-	printf("Name of RAM disk file missing.\n");
-	exit(1);
-    }
-
-    if ( argc < 3 ) {
-	printf("Name of vmlinux file missing.\n");
-	exit(1);
-    }
-
-    if ( argc < 4 ) {
-	printf("Name of vmlinux output file missing.\n");
-	exit(1);
-    }
-
-    ramDisk = fopen(argv[1], "r");
-    if ( ! ramDisk ) {
-	printf("RAM disk file \"%s\" failed to open.\n", argv[1]);
-	exit(1);
-    }
-    inputVmlinux = fopen(argv[2], "r");
-    if ( ! inputVmlinux ) {
-	printf("vmlinux file \"%s\" failed to open.\n", argv[2]);
-	exit(1);
-    }
-    outputVmlinux = fopen(argv[3], "w+");
-    if ( ! outputVmlinux ) {
-	printf("output vmlinux file \"%s\" failed to open.\n", argv[3]);
-	exit(1);
-    }
-    fseek(ramDisk, 0, SEEK_END);
-    ramFileLen = ftell(ramDisk);
-    fseek(ramDisk, 0, SEEK_SET);
-    printf("%s file size = %d\n", argv[1], ramFileLen);
-
-    ramLen = ramFileLen;
-
-    roundR = 4096 - (ramLen % 4096);
-    if ( roundR ) {
-	printf("Rounding RAM disk file up to a multiple of 4096, adding %d\n", roundR);
-	ramLen += roundR;
-    }
-
-    printf("Rounded RAM disk size is %d\n", ramLen);
-    fseek(inputVmlinux, 0, SEEK_END);
-    kernelLen = ftell(inputVmlinux);
-    fseek(inputVmlinux, 0, SEEK_SET);
-    printf("kernel file size = %d\n", kernelLen);
-    if ( kernelLen == 0 ) {
-	printf("You must have a linux kernel specified as argv[2]\n");
-	exit(1);
-    }
-
-    actualKernelLen = kernelLen - ElfHeaderSize;
-
-    printf("actual kernel length (minus ELF header) = %d\n", actualKernelLen);
-
-    round = actualKernelLen % 4096;
-    roundedKernelLen = actualKernelLen;
-    if ( round )
-	roundedKernelLen += (4096 - round);
-
-    printf("actual kernel length rounded up to a 4k multiple = %d\n", roundedKernelLen);
-
-    ramStartOffs = roundedKernelLen;
-    ramPages = ramLen / 4096;
-
-    printf("RAM disk pages to copy = %d\n", ramPages);
-
-    // Copy 64K ELF header
-      for (i=0; i<(ElfPages); ++i) {
-	  get4k( inputVmlinux, inbuf );
-	  put4k( outputVmlinux, inbuf );
-      }
-
-    roundedKernelPages = roundedKernelLen / 4096;
-
-    fseek(inputVmlinux, ElfHeaderSize, SEEK_SET);
-
-    for ( i=0; i<roundedKernelPages; ++i ) {
-	get4k( inputVmlinux, inbuf );
-	put4k( outputVmlinux, inbuf );
-    }
-
-    for ( i=0; i<ramPages; ++i ) {
-	get4k( ramDisk, inbuf );
-	put4k( outputVmlinux, inbuf );
-    }
-
-    /* Close the input files */
-    fclose(ramDisk);
-    fclose(inputVmlinux);
-    /* And flush the written output file */
-    fflush(outputVmlinux);
-
-    /* fseek to the hvReleaseData pointer */
-    fseek(outputVmlinux, ElfHeaderSize + 0x24, SEEK_SET);
-    if (fread(&hvReleaseData, 4, 1, outputVmlinux) != 1) {
-        death("Could not read hvReleaseData pointer\n", outputVmlinux, argv[3]);
-    }
-    hvReleaseData = ntohl(hvReleaseData); /* Convert to native int */
-    printf("hvReleaseData is at %08x\n", hvReleaseData);
-
-    /* fseek to the hvReleaseData */
-    fseek(outputVmlinux, ElfHeaderSize + hvReleaseData, SEEK_SET);
-    if (fread(inbuf, 0x40, 1, outputVmlinux) != 1) {
-        death("Could not read hvReleaseData\n", outputVmlinux, argv[3]);
-    }
-    /* Check hvReleaseData sanity */
-    if (memcmp(inbuf, &eyeCatcher, 4) != 0) {
-        death("hvReleaseData is invalid\n", outputVmlinux, argv[3]);
-    }
-    /* Get the naca pointer */
-    naca = ntohl(*((u_int32_t *) &inbuf[0x0c])) - KERNELBASE;
-    printf("naca is at %08x\n", naca);
-
-    /* fseek to the naca */
-    fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET);
-    if (fread(inbuf, 0x18, 1, outputVmlinux) != 1) {
-        death("Could not read naca\n", outputVmlinux, argv[3]);
-    }
-    xRamDisk = ntohl(*((u_int32_t *) &inbuf[0x0c]));
-    xRamDiskSize = ntohl(*((u_int32_t *) &inbuf[0x14]));
-    /* Make sure a RAM disk isn't already present */
-    if ((xRamDisk != 0) || (xRamDiskSize != 0)) {
-        death("RAM disk is already attached to this kernel\n", outputVmlinux, argv[3]);
-    }
-    /* Fill in the values */
-    *((u_int32_t *) &inbuf[0x0c]) = htonl(ramStartOffs);
-    *((u_int32_t *) &inbuf[0x14]) = htonl(ramPages);
-
-    /* Write out the new naca */
-    fflush(outputVmlinux);
-    fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET);
-    if (fwrite(inbuf, 0x18, 1, outputVmlinux) != 1) {
-        death("Could not write naca\n", outputVmlinux, argv[3]);
-    }
-    printf("RAM Disk of 0x%x pages size is attached to the kernel at offset 0x%08x\n",
-            ramPages, ramStartOffs);
-
-    /* Done */
-    fclose(outputVmlinux);
-    /* Set permission to executable */
-    chmod(argv[3], S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH);
-
-    return 0;
-}
-

+ 5 - 6
arch/ppc/kernel/signal.c

@@ -759,13 +759,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs)
 	else
 		handle_signal(signr, &ka, &info, oldset, regs, newsp);
 
-	if (!(ka.sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka.sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka.sa.sa_mask);
+	if (!(ka.sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked, signr);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 
 	return 1;
 }

+ 4 - 4
arch/ppc/syslib/m8xx_setup.c

@@ -57,7 +57,7 @@ unsigned char __res[sizeof(bd_t)];
 extern void m8xx_ide_init(void);
 
 extern unsigned long find_available_memory(void);
-extern void m8xx_cpm_reset();
+extern void m8xx_cpm_reset(void);
 extern void m8xx_wdt_handler_install(bd_t *bp);
 extern void rpxfb_alloc_pages(void);
 extern void cpm_interrupt_init(void);
@@ -266,8 +266,8 @@ m8xx_show_percpuinfo(struct seq_file *m, int i)
 
 	bp = (bd_t *)__res;
 
-	seq_printf(m, "clock\t\t: %ldMHz\n"
-		   "bus clock\t: %ldMHz\n",
+	seq_printf(m, "clock\t\t: %uMHz\n"
+		   "bus clock\t: %uMHz\n",
 		   bp->bi_intfreq / 1000000,
 		   bp->bi_busfreq / 1000000);
 
@@ -423,7 +423,7 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
 	ppc_md.find_end_of_memory	= m8xx_find_end_of_memory;
 	ppc_md.setup_io_mappings	= m8xx_map_io;
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+#if defined(CONFIG_BLK_DEV_MPC8xx_IDE)
 	m8xx_ide_init();
 #endif
 }

+ 34 - 40
arch/ppc64/Kconfig

@@ -302,12 +302,6 @@ config GENERIC_HARDIRQS
 	bool
 	default y
 
-config MSCHUNKS
-	bool
-	depends on PPC_ISERIES
-	default y
-
-
 config PPC_RTAS
 	bool
 	depends on PPC_PSERIES || PPC_BPA
@@ -350,13 +344,46 @@ config SECCOMP
 
 	  If unsure, say Y. Only embedded should say N here.
 
+source "fs/Kconfig.binfmt"
+
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs"
+	depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC)
+	select HOTPLUG
+	---help---
+	  Say Y here to be able to turn CPUs off and on.
+
+	  Say N if you are unsure.
+
+config PROC_DEVICETREE
+	bool "Support for Open Firmware device tree in /proc"
+	depends on !PPC_ISERIES
+	help
+	  This option adds a device-tree directory under /proc which contains
+	  an image of the device tree that the kernel copies from Open
+	  Firmware. If unsure, say Y here.
+
+config CMDLINE_BOOL
+	bool "Default bootloader kernel arguments"
+	depends on !PPC_ISERIES
+
+config CMDLINE
+	string "Initial kernel command string"
+	depends on CMDLINE_BOOL
+	default "console=ttyS0,9600 console=tty0 root=/dev/sda2"
+	help
+	  On some platforms, there is currently no way for the boot loader to
+	  pass arguments to the kernel. For these platforms, you can supply
+	  some command-line options at build time by entering them here.  In
+	  most cases you will need to specify the root device here.
+
 endmenu
 
 config ISA_DMA_API
 	bool
 	default y
 
-menu "General setup"
+menu "Bus Options"
 
 config ISA
 	bool
@@ -389,45 +416,12 @@ config PCI_DOMAINS
 	bool
 	default PCI
 
-source "fs/Kconfig.binfmt"
-
 source "drivers/pci/Kconfig"
 
-config HOTPLUG_CPU
-	bool "Support for hot-pluggable CPUs"
-	depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC)
-	select HOTPLUG
-	---help---
-	  Say Y here to be able to turn CPUs off and on.
-
-	  Say N if you are unsure.
-
 source "drivers/pcmcia/Kconfig"
 
 source "drivers/pci/hotplug/Kconfig"
 
-config PROC_DEVICETREE
-	bool "Support for Open Firmware device tree in /proc"
-	depends on !PPC_ISERIES
-	help
-	  This option adds a device-tree directory under /proc which contains
-	  an image of the device tree that the kernel copies from Open
-	  Firmware. If unsure, say Y here.
-
-config CMDLINE_BOOL
-	bool "Default bootloader kernel arguments"
-	depends on !PPC_ISERIES
-
-config CMDLINE
-	string "Initial kernel command string"
-	depends on CMDLINE_BOOL
-	default "console=ttyS0,9600 console=tty0 root=/dev/sda2"
-	help
-	  On some platforms, there is currently no way for the boot loader to
-	  pass arguments to the kernel. For these platforms, you can supply
-	  some command-line options at build time by entering them here.  In
-	  most cases you will need to specify the root device here.
-
 endmenu
 
 source "net/Kconfig"

+ 9 - 0
arch/ppc64/Makefile

@@ -55,6 +55,8 @@ LDFLAGS		:= -m elf64ppc
 LDFLAGS_vmlinux	:= -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD)
 CFLAGS		+= -msoft-float -pipe -mminimal-toc -mtraceback=none \
 		   -mcall-aixdesc
+# Temporary hack until we have migrated to asm-powerpc
+CPPFLAGS	+= -Iinclude3
 
 GCC_VERSION     := $(call cc-version)
 GCC_BROKEN_VEC	:= $(shell if [ $(GCC_VERSION) -lt 0400 ] ; then echo "y"; fi ;)
@@ -112,6 +114,7 @@ all: $(KBUILD_IMAGE)
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
+	$(Q)rm -rf include3
 
 prepare: include/asm-ppc64/offsets.h
 
@@ -121,6 +124,12 @@ arch/ppc64/kernel/asm-offsets.s: include/asm include/linux/version.h \
 include/asm-ppc64/offsets.h: arch/ppc64/kernel/asm-offsets.s
 	$(call filechk,gen-asm-offsets)
 
+# Temporary hack until we have migrated to asm-powerpc
+include/asm: include3/asm
+include3/asm:
+	$(Q)if [ ! -d include3 ]; then mkdir -p include3; fi;
+	$(Q)ln -fsn $(srctree)/include/asm-powerpc include3/asm
+
 define archhelp
   echo  '* zImage       - Compressed kernel image (arch/$(ARCH)/boot/zImage)'
   echo  '  zImage.initrd- Compressed kernel image with initrd attached,'

+ 2 - 2
arch/ppc64/boot/Makefile

@@ -22,8 +22,8 @@
 
 
 HOSTCC		:= gcc
-BOOTCFLAGS	:= $(HOSTCFLAGS) $(LINUXINCLUDE) -fno-builtin 
-BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional
+BOOTCFLAGS	:= $(HOSTCFLAGS) -fno-builtin -nostdinc -isystem $(shell $(CROSS32CC) -print-file-name=include)
+BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
 BOOTLFLAGS	:= -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds
 OBJCOPYFLAGS    := contents,alloc,load,readonly,data
 

+ 2 - 2
arch/ppc64/boot/addnote.c

@@ -157,7 +157,7 @@ main(int ac, char **av)
 	PUT_32BE(ns, strlen(arch) + 1);
 	PUT_32BE(ns + 4, N_DESCR * 4);
 	PUT_32BE(ns + 8, 0x1275);
-	strcpy(&buf[ns + 12], arch);
+	strcpy((char *) &buf[ns + 12], arch);
 	ns += 12 + strlen(arch) + 1;
 	for (i = 0; i < N_DESCR; ++i, ns += 4)
 		PUT_32BE(ns, descr[i]);
@@ -172,7 +172,7 @@ main(int ac, char **av)
 	PUT_32BE(ns, strlen(rpaname) + 1);
 	PUT_32BE(ns + 4, sizeof(rpanote));
 	PUT_32BE(ns + 8, 0x12759999);
-	strcpy(&buf[ns + 12], rpaname);
+	strcpy((char *) &buf[ns + 12], rpaname);
 	ns += 12 + ROUNDUP(strlen(rpaname) + 1);
 	for (i = 0; i < N_RPA_DESCR; ++i, ns += 4)
 		PUT_32BE(ns, rpanote[i]);

+ 1 - 1
arch/ppc64/boot/crt0.S

@@ -9,7 +9,7 @@
  * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
  */
 
-#include <asm/ppc_asm.h>
+#include "ppc_asm.h"
 
 	.text
 	.globl	_start

+ 1 - 1
arch/ppc64/boot/div64.S

@@ -13,7 +13,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
-#include <asm/ppc_asm.h>
+#include "ppc_asm.h"
 
 	.globl __div64_32
 __div64_32:

+ 149 - 0
arch/ppc64/boot/elf.h

@@ -0,0 +1,149 @@
+#ifndef _PPC_BOOT_ELF_H_
+#define _PPC_BOOT_ELF_H_
+
+/* 32-bit ELF base types. */
+typedef unsigned int Elf32_Addr;
+typedef unsigned short Elf32_Half;
+typedef unsigned int Elf32_Off;
+typedef signed int Elf32_Sword;
+typedef unsigned int Elf32_Word;
+
+/* 64-bit ELF base types. */
+typedef unsigned long long Elf64_Addr;
+typedef unsigned short Elf64_Half;
+typedef signed short Elf64_SHalf;
+typedef unsigned long long Elf64_Off;
+typedef signed int Elf64_Sword;
+typedef unsigned int Elf64_Word;
+typedef unsigned long long Elf64_Xword;
+typedef signed long long Elf64_Sxword;
+
+/* These constants are for the segment types stored in the image headers */
+#define PT_NULL    0
+#define PT_LOAD    1
+#define PT_DYNAMIC 2
+#define PT_INTERP  3
+#define PT_NOTE    4
+#define PT_SHLIB   5
+#define PT_PHDR    6
+#define PT_TLS     7		/* Thread local storage segment */
+#define PT_LOOS    0x60000000	/* OS-specific */
+#define PT_HIOS    0x6fffffff	/* OS-specific */
+#define PT_LOPROC  0x70000000
+#define PT_HIPROC  0x7fffffff
+#define PT_GNU_EH_FRAME		0x6474e550
+
+#define PT_GNU_STACK	(PT_LOOS + 0x474e551)
+
+/* These constants define the different elf file types */
+#define ET_NONE   0
+#define ET_REL    1
+#define ET_EXEC   2
+#define ET_DYN    3
+#define ET_CORE   4
+#define ET_LOPROC 0xff00
+#define ET_HIPROC 0xffff
+
+/* These constants define the various ELF target machines */
+#define EM_NONE  0
+#define EM_PPC	       20	/* PowerPC */
+#define EM_PPC64       21	/* PowerPC64 */
+
+#define EI_NIDENT	16
+
+typedef struct elf32_hdr {
+	unsigned char e_ident[EI_NIDENT];
+	Elf32_Half e_type;
+	Elf32_Half e_machine;
+	Elf32_Word e_version;
+	Elf32_Addr e_entry;	/* Entry point */
+	Elf32_Off e_phoff;
+	Elf32_Off e_shoff;
+	Elf32_Word e_flags;
+	Elf32_Half e_ehsize;
+	Elf32_Half e_phentsize;
+	Elf32_Half e_phnum;
+	Elf32_Half e_shentsize;
+	Elf32_Half e_shnum;
+	Elf32_Half e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct elf64_hdr {
+	unsigned char e_ident[16];	/* ELF "magic number" */
+	Elf64_Half e_type;
+	Elf64_Half e_machine;
+	Elf64_Word e_version;
+	Elf64_Addr e_entry;	/* Entry point virtual address */
+	Elf64_Off e_phoff;	/* Program header table file offset */
+	Elf64_Off e_shoff;	/* Section header table file offset */
+	Elf64_Word e_flags;
+	Elf64_Half e_ehsize;
+	Elf64_Half e_phentsize;
+	Elf64_Half e_phnum;
+	Elf64_Half e_shentsize;
+	Elf64_Half e_shnum;
+	Elf64_Half e_shstrndx;
+} Elf64_Ehdr;
+
+/* These constants define the permissions on sections in the program
+   header, p_flags. */
+#define PF_R		0x4
+#define PF_W		0x2
+#define PF_X		0x1
+
+typedef struct elf32_phdr {
+	Elf32_Word p_type;
+	Elf32_Off p_offset;
+	Elf32_Addr p_vaddr;
+	Elf32_Addr p_paddr;
+	Elf32_Word p_filesz;
+	Elf32_Word p_memsz;
+	Elf32_Word p_flags;
+	Elf32_Word p_align;
+} Elf32_Phdr;
+
+typedef struct elf64_phdr {
+	Elf64_Word p_type;
+	Elf64_Word p_flags;
+	Elf64_Off p_offset;	/* Segment file offset */
+	Elf64_Addr p_vaddr;	/* Segment virtual address */
+	Elf64_Addr p_paddr;	/* Segment physical address */
+	Elf64_Xword p_filesz;	/* Segment size in file */
+	Elf64_Xword p_memsz;	/* Segment size in memory */
+	Elf64_Xword p_align;	/* Segment alignment, file & memory */
+} Elf64_Phdr;
+
+#define	EI_MAG0		0	/* e_ident[] indexes */
+#define	EI_MAG1		1
+#define	EI_MAG2		2
+#define	EI_MAG3		3
+#define	EI_CLASS	4
+#define	EI_DATA		5
+#define	EI_VERSION	6
+#define	EI_OSABI	7
+#define	EI_PAD		8
+
+#define	ELFMAG0		0x7f	/* EI_MAG */
+#define	ELFMAG1		'E'
+#define	ELFMAG2		'L'
+#define	ELFMAG3		'F'
+#define	ELFMAG		"\177ELF"
+#define	SELFMAG		4
+
+#define	ELFCLASSNONE	0	/* EI_CLASS */
+#define	ELFCLASS32	1
+#define	ELFCLASS64	2
+#define	ELFCLASSNUM	3
+
+#define ELFDATANONE	0	/* e_ident[EI_DATA] */
+#define ELFDATA2LSB	1
+#define ELFDATA2MSB	2
+
+#define EV_NONE		0	/* e_version, EI_VERSION */
+#define EV_CURRENT	1
+#define EV_NUM		2
+
+#define ELFOSABI_NONE	0
+#define ELFOSABI_LINUX	3
+
+#endif				/* _PPC_BOOT_ELF_H_ */

+ 20 - 35
arch/ppc64/boot/main.c

@@ -8,36 +8,28 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
-#include "ppc32-types.h"
+#include <stdarg.h>
+#include <stddef.h>
+#include "elf.h"
+#include "page.h"
+#include "string.h"
+#include "stdio.h"
+#include "prom.h"
 #include "zlib.h"
-#include <linux/elf.h>
-#include <linux/string.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-
-extern void *finddevice(const char *);
-extern int getprop(void *, const char *, void *, int);
-extern void printf(const char *fmt, ...);
-extern int sprintf(char *buf, const char *fmt, ...);
-void gunzip(void *, int, unsigned char *, int *);
-void *claim(unsigned int, unsigned int, unsigned int);
-void flush_cache(void *, unsigned long);
-void pause(void);
-extern void exit(void);
-
-unsigned long strlen(const char *s);
-void *memmove(void *dest, const void *src, unsigned long n);
-void *memcpy(void *dest, const void *src, unsigned long n);
+
+static void gunzip(void *, int, unsigned char *, int *);
+extern void flush_cache(void *, unsigned long);
+
 
 /* Value picked to match that used by yaboot */
 #define PROG_START	0x01400000
 #define RAM_END		(256<<20) // Fixme: use OF */
 
-char *avail_ram;
-char *begin_avail, *end_avail;
-char *avail_high;
-unsigned int heap_use;
-unsigned int heap_max;
+static char *avail_ram;
+static char *begin_avail, *end_avail;
+static char *avail_high;
+static unsigned int heap_use;
+static unsigned int heap_max;
 
 extern char _start[];
 extern char _vmlinux_start[];
@@ -52,9 +44,9 @@ struct addr_range {
 	unsigned long size;
 	unsigned long memsize;
 };
-struct addr_range vmlinux = {0, 0, 0};
-struct addr_range vmlinuz = {0, 0, 0};
-struct addr_range initrd  = {0, 0, 0};
+static struct addr_range vmlinux = {0, 0, 0};
+static struct addr_range vmlinuz = {0, 0, 0};
+static struct addr_range initrd  = {0, 0, 0};
 
 static char scratch[128<<10];	/* 128kB of scratch space for gunzip */
 
@@ -64,13 +56,6 @@ typedef void (*kernel_entry_t)( unsigned long,
 				void *);
 
 
-int (*prom)(void *);
-
-void *chosen_handle;
-void *stdin;
-void *stdout;
-void *stderr;
-
 #undef DEBUG
 
 static unsigned long claim_base = PROG_START;
@@ -277,7 +262,7 @@ void zfree(void *x, void *addr, unsigned nb)
 
 #define DEFLATED	8
 
-void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp)
+static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp)
 {
 	z_stream s;
 	int r, i, flags;

+ 34 - 0
arch/ppc64/boot/page.h

@@ -0,0 +1,34 @@
+#ifndef _PPC_BOOT_PAGE_H
+#define _PPC_BOOT_PAGE_H
+/*
+ * Copyright (C) 2001 PPC64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifdef __ASSEMBLY__
+#define ASM_CONST(x) x
+#else
+#define __ASM_CONST(x) x##UL
+#define ASM_CONST(x) __ASM_CONST(x)
+#endif
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT	12
+#define PAGE_SIZE	(ASM_CONST(1) << PAGE_SHIFT)
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+/* align addr on a size boundary - adjust address up/down if needed */
+#define _ALIGN_UP(addr,size)	(((addr)+((size)-1))&(~((size)-1)))
+#define _ALIGN_DOWN(addr,size)	((addr)&(~((size)-1)))
+
+/* align addr on a size boundary - adjust address up if needed */
+#define _ALIGN(addr,size)     _ALIGN_UP(addr,size)
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)	_ALIGN(addr, PAGE_SIZE)
+
+#endif				/* _PPC_BOOT_PAGE_H */

+ 0 - 36
arch/ppc64/boot/ppc32-types.h

@@ -1,36 +0,0 @@
-#ifndef _PPC64_TYPES_H
-#define _PPC64_TYPES_H
-
-typedef __signed__ char __s8;
-typedef unsigned char __u8;
-
-typedef __signed__ short __s16;
-typedef unsigned short __u16;
-
-typedef __signed__ int __s32;
-typedef unsigned int __u32;
-
-typedef __signed__ long long __s64;
-typedef unsigned long long __u64;
-
-typedef signed char s8;
-typedef unsigned char u8;
-
-typedef signed short s16;
-typedef unsigned short u16;
-
-typedef signed int s32;
-typedef unsigned int u32;
-
-typedef signed long long s64;
-typedef unsigned long long u64;
-
-typedef struct {
-	__u32 u[4];
-} __attribute((aligned(16))) __vector128;
-
-#define BITS_PER_LONG 32
-
-typedef __vector128 vector128;
-
-#endif /* _PPC64_TYPES_H */

+ 62 - 0
arch/ppc64/boot/ppc_asm.h

@@ -0,0 +1,62 @@
+#ifndef _PPC64_PPC_ASM_H
+#define _PPC64_PPC_ASM_H
+/*
+ *
+ * Definitions used by various bits of low-level assembly code on PowerPC.
+ *
+ * Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+/* Condition Register Bit Fields */
+
+#define	cr0	0
+#define	cr1	1
+#define	cr2	2
+#define	cr3	3
+#define	cr4	4
+#define	cr5	5
+#define	cr6	6
+#define	cr7	7
+
+
+/* General Purpose Registers (GPRs) */
+
+#define	r0	0
+#define	r1	1
+#define	r2	2
+#define	r3	3
+#define	r4	4
+#define	r5	5
+#define	r6	6
+#define	r7	7
+#define	r8	8
+#define	r9	9
+#define	r10	10
+#define	r11	11
+#define	r12	12
+#define	r13	13
+#define	r14	14
+#define	r15	15
+#define	r16	16
+#define	r17	17
+#define	r18	18
+#define	r19	19
+#define	r20	20
+#define	r21	21
+#define	r22	22
+#define	r23	23
+#define	r24	24
+#define	r25	25
+#define	r26	26
+#define	r27	27
+#define	r28	28
+#define	r29	29
+#define	r30	30
+#define	r31	31
+
+#endif /* _PPC64_PPC_ASM_H */

+ 27 - 169
arch/ppc64/boot/prom.c

@@ -7,43 +7,19 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <stdarg.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-
-extern __u32 __div64_32(unsigned long long *dividend, __u32 divisor);
-
-/* The unnecessary pointer compare is there
- * to check for type safety (n must be 64bit)
- */
-# define do_div(n,base) ({				\
-	__u32 __base = (base);			\
-	__u32 __rem;					\
-	(void)(((typeof((n)) *)0) == ((unsigned long long *)0));	\
-	if (((n) >> 32) == 0) {			\
-		__rem = (__u32)(n) % __base;		\
-		(n) = (__u32)(n) / __base;		\
-	} else 						\
-		__rem = __div64_32(&(n), __base);	\
-	__rem;						\
- })
+#include <stddef.h>
+#include "string.h"
+#include "stdio.h"
+#include "prom.h"
 
 int (*prom)(void *);
 
 void *chosen_handle;
+
 void *stdin;
 void *stdout;
 void *stderr;
 
-void exit(void);
-void *finddevice(const char *name);
-int getprop(void *phandle, const char *name, void *buf, int buflen);
-void chrpboot(int a1, int a2, void *prom);	/* in main.c */
-
-int printf(char *fmt, ...);
-
-/* there is no convenient header to get this from...  -- paulus */
-extern unsigned long strlen(const char *);
 
 int
 write(void *handle, void *ptr, int nb)
@@ -210,107 +186,6 @@ fputs(char *str, void *f)
 	return write(f, str, n) == n? 0: -1;
 }
 
-int
-readchar(void)
-{
-	char ch;
-
-	for (;;) {
-		switch (read(stdin, &ch, 1)) {
-		case 1:
-			return ch;
-		case -1:
-			printf("read(stdin) returned -1\r\n");
-			return -1;
-		}
-	}
-}
-
-static char line[256];
-static char *lineptr;
-static int lineleft;
-
-int
-getchar(void)
-{
-	int c;
-
-	if (lineleft == 0) {
-		lineptr = line;
-		for (;;) {
-			c = readchar();
-			if (c == -1 || c == 4)
-				break;
-			if (c == '\r' || c == '\n') {
-				*lineptr++ = '\n';
-				putchar('\n');
-				break;
-			}
-			switch (c) {
-			case 0177:
-			case '\b':
-				if (lineptr > line) {
-					putchar('\b');
-					putchar(' ');
-					putchar('\b');
-					--lineptr;
-				}
-				break;
-			case 'U' & 0x1F:
-				while (lineptr > line) {
-					putchar('\b');
-					putchar(' ');
-					putchar('\b');
-					--lineptr;
-				}
-				break;
-			default:
-				if (lineptr >= &line[sizeof(line) - 1])
-					putchar('\a');
-				else {
-					putchar(c);
-					*lineptr++ = c;
-				}
-			}
-		}
-		lineleft = lineptr - line;
-		lineptr = line;
-	}
-	if (lineleft == 0)
-		return -1;
-	--lineleft;
-	return *lineptr++;
-}
-
-
-
-/* String functions lifted from lib/vsprintf.c and lib/ctype.c */
-unsigned char _ctype[] = {
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 0-7 */
-_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,		/* 8-15 */
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 16-23 */
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 24-31 */
-_S|_SP,_P,_P,_P,_P,_P,_P,_P,			/* 32-39 */
-_P,_P,_P,_P,_P,_P,_P,_P,			/* 40-47 */
-_D,_D,_D,_D,_D,_D,_D,_D,			/* 48-55 */
-_D,_D,_P,_P,_P,_P,_P,_P,			/* 56-63 */
-_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,	/* 64-71 */
-_U,_U,_U,_U,_U,_U,_U,_U,			/* 72-79 */
-_U,_U,_U,_U,_U,_U,_U,_U,			/* 80-87 */
-_U,_U,_U,_P,_P,_P,_P,_P,			/* 88-95 */
-_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,	/* 96-103 */
-_L,_L,_L,_L,_L,_L,_L,_L,			/* 104-111 */
-_L,_L,_L,_L,_L,_L,_L,_L,			/* 112-119 */
-_L,_L,_L,_P,_P,_P,_P,_C,			/* 120-127 */
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,		/* 128-143 */
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,		/* 144-159 */
-_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,   /* 160-175 */
-_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,       /* 176-191 */
-_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,       /* 192-207 */
-_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,       /* 208-223 */
-_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,       /* 224-239 */
-_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};      /* 240-255 */
-
 size_t strnlen(const char * s, size_t count)
 {
 	const char *sc;
@@ -320,44 +195,30 @@ size_t strnlen(const char * s, size_t count)
 	return sc - s;
 }
 
-unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
-{
-	unsigned long result = 0,value;
+extern unsigned int __div64_32(unsigned long long *dividend,
+			       unsigned int divisor);
 
-	if (!base) {
-		base = 10;
-		if (*cp == '0') {
-			base = 8;
-			cp++;
-			if ((*cp == 'x') && isxdigit(cp[1])) {
-				cp++;
-				base = 16;
-			}
-		}
-	}
-	while (isxdigit(*cp) &&
-	       (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
-		result = result*base + value;
-		cp++;
-	}
-	if (endp)
-		*endp = (char *)cp;
-	return result;
-}
-
-long simple_strtol(const char *cp,char **endp,unsigned int base)
-{
-	if(*cp=='-')
-		return -simple_strtoul(cp+1,endp,base);
-	return simple_strtoul(cp,endp,base);
-}
+/* The unnecessary pointer compare is there
+ * to check for type safety (n must be 64bit)
+ */
+# define do_div(n,base) ({						\
+	unsigned int __base = (base);					\
+	unsigned int __rem;						\
+	(void)(((typeof((n)) *)0) == ((unsigned long long *)0));	\
+	if (((n) >> 32) == 0) {						\
+		__rem = (unsigned int)(n) % __base;			\
+		(n) = (unsigned int)(n) / __base;			\
+	} else								\
+		__rem = __div64_32(&(n), __base);			\
+	__rem;								\
+ })
 
 static int skip_atoi(const char **s)
 {
-	int i=0;
+	int i, c;
 
-	while (isdigit(**s))
-		i = i*10 + *((*s)++) - '0';
+	for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s)
+		i = i*10 + c - '0';
 	return i;
 }
 
@@ -436,9 +297,6 @@ static char * number(char * str, unsigned long long num, int base, int size, int
 	return str;
 }
 
-/* Forward decl. needed for IP address printing stuff... */
-int sprintf(char * buf, const char *fmt, ...);
-
 int vsprintf(char *buf, const char *fmt, va_list args)
 {
 	int len;
@@ -477,7 +335,7 @@ int vsprintf(char *buf, const char *fmt, va_list args)
 		
 		/* get field width */
 		field_width = -1;
-		if (isdigit(*fmt))
+		if ('0' <= *fmt && *fmt <= '9')
 			field_width = skip_atoi(&fmt);
 		else if (*fmt == '*') {
 			++fmt;
@@ -493,7 +351,7 @@ int vsprintf(char *buf, const char *fmt, va_list args)
 		precision = -1;
 		if (*fmt == '.') {
 			++fmt;	
-			if (isdigit(*fmt))
+			if ('0' <= *fmt && *fmt <= '9')
 				precision = skip_atoi(&fmt);
 			else if (*fmt == '*') {
 				++fmt;
@@ -628,7 +486,7 @@ int sprintf(char * buf, const char *fmt, ...)
 static char sprint_buf[1024];
 
 int
-printf(char *fmt, ...)
+printf(const char *fmt, ...)
 {
 	va_list args;
 	int n;

+ 18 - 0
arch/ppc64/boot/prom.h

@@ -0,0 +1,18 @@
+#ifndef _PPC_BOOT_PROM_H_
+#define _PPC_BOOT_PROM_H_
+
+extern int (*prom) (void *);
+extern void *chosen_handle;
+
+extern void *stdin;
+extern void *stdout;
+extern void *stderr;
+
+extern int write(void *handle, void *ptr, int nb);
+extern int read(void *handle, void *ptr, int nb);
+extern void exit(void);
+extern void pause(void);
+extern void *finddevice(const char *);
+extern void *claim(unsigned long virt, unsigned long size, unsigned long align);
+extern int getprop(void *phandle, const char *name, void *buf, int buflen);
+#endif				/* _PPC_BOOT_PROM_H_ */

+ 16 - 0
arch/ppc64/boot/stdio.h

@@ -0,0 +1,16 @@
+#ifndef _PPC_BOOT_STDIO_H_
+#define _PPC_BOOT_STDIO_H_
+
+extern int printf(const char *fmt, ...);
+
+extern int sprintf(char *buf, const char *fmt, ...);
+
+extern int vsprintf(char *buf, const char *fmt, va_list args);
+
+extern int putc(int c, void *f);
+extern int putchar(int c);
+extern int getchar(void);
+
+extern int fputs(char *str, void *f);
+
+#endif				/* _PPC_BOOT_STDIO_H_ */

+ 1 - 1
arch/ppc64/boot/string.S

@@ -9,7 +9,7 @@
  * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
  */
 
-#include <asm/ppc_asm.h>
+#include "ppc_asm.h"
 
 	.text
 	.globl	strcpy

+ 16 - 0
arch/ppc64/boot/string.h

@@ -0,0 +1,16 @@
+#ifndef _PPC_BOOT_STRING_H_
+#define _PPC_BOOT_STRING_H_
+
+extern char *strcpy(char *dest, const char *src);
+extern char *strncpy(char *dest, const char *src, size_t n);
+extern char *strcat(char *dest, const char *src);
+extern int strcmp(const char *s1, const char *s2);
+extern size_t strlen(const char *s);
+extern size_t strnlen(const char *s, size_t count);
+
+extern void *memset(void *s, int c, size_t n);
+extern void *memmove(void *dest, const void *src, unsigned long n);
+extern void *memcpy(void *dest, const void *src, unsigned long n);
+extern int memcmp(const void *s1, const void *s2, size_t n);
+
+#endif	/* _PPC_BOOT_STRING_H_ */

+ 1 - 1
arch/ppc64/boot/zlib.c

@@ -107,7 +107,7 @@ extern void *memcpy(void *, const void *, unsigned long);
 
 /* Diagnostic functions */
 #ifdef DEBUG_ZLIB
-#  include <stdio.h>
+#  include "stdio.h"
 #  ifndef verbose
 #    define verbose 0
 #  endif

+ 3 - 3
arch/ppc64/configs/g5_defconfig

@@ -103,10 +103,10 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_SECCOMP=y
 CONFIG_ISA_DMA_API=y

+ 3 - 4
arch/ppc64/configs/iSeries_defconfig

@@ -94,12 +94,11 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
 CONFIG_GENERIC_HARDIRQS=y
-CONFIG_MSCHUNKS=y
 CONFIG_LPARCFG=y
 CONFIG_SECCOMP=y
 CONFIG_ISA_DMA_API=y

+ 3 - 3
arch/ppc64/configs/maple_defconfig

@@ -103,10 +103,10 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_SECCOMP=y
 CONFIG_ISA_DMA_API=y

+ 3 - 3
arch/ppc64/configs/pSeries_defconfig

@@ -112,10 +112,10 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
 CONFIG_EEH=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_PPC_RTAS=y

+ 3 - 3
arch/ppc64/defconfig

@@ -114,10 +114,10 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
 CONFIG_EEH=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_PPC_RTAS=y

+ 11 - 26
arch/ppc64/kernel/LparData.c

@@ -51,6 +51,17 @@ struct HvReleaseData hvReleaseData = {
 		0xf4, 0x4b, 0xf6, 0xf4 },
 };
 
+/*
+ * The NACA.  The first dword of the naca is required by the iSeries
+ * hypervisor to point to itVpdAreas.  The hypervisor finds the NACA
+ * through the pointer in hvReleaseData.
+ */
+struct naca_struct naca = {
+	.xItVpdAreas = &itVpdAreas,
+	.xRamDisk = 0,
+	.xRamDiskSize = 0,
+};
+
 extern void system_reset_iSeries(void);
 extern void machine_check_iSeries(void);
 extern void data_access_iSeries(void);
@@ -214,29 +225,3 @@ struct ItVpdAreas itVpdAreas = {
 		0,0
 	}
 };
-
-struct msChunks msChunks;
-EXPORT_SYMBOL(msChunks);
-
-/* Depending on whether this is called from iSeries or pSeries setup
- * code, the location of the msChunks struct may or may not have
- * to be reloc'd, so we force the caller to do that for us by passing
- * in a pointer to the structure.
- */
-unsigned long
-msChunks_alloc(unsigned long mem, unsigned long num_chunks, unsigned long chunk_size)
-{
-	unsigned long offset = reloc_offset();
-	struct msChunks *_msChunks = PTRRELOC(&msChunks);
-
-	_msChunks->num_chunks  = num_chunks;
-	_msChunks->chunk_size  = chunk_size;
-	_msChunks->chunk_shift = __ilog2(chunk_size);
-	_msChunks->chunk_mask  = (1UL<<_msChunks->chunk_shift)-1;
-
-	mem = _ALIGN(mem, sizeof(msChunks_entry));
-	_msChunks->abs = (msChunks_entry *)(mem + offset);
-	mem += num_chunks * sizeof(msChunks_entry);
-
-	return mem;
-}

+ 5 - 2
arch/ppc64/kernel/Makefile

@@ -11,7 +11,7 @@ obj-y               :=	setup.o entry.o traps.o irq.o idle.o dma.o \
 			udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \
 			ptrace32.o signal32.o rtc.o init_task.o \
 			lmb.o cputable.o cpu_setup_power4.o idle_power4.o \
-			iommu.o sysfs.o vdso.o pmc.o
+			iommu.o sysfs.o vdso.o pmc.o firmware.o
 obj-y += vdso32/ vdso64/
 
 obj-$(CONFIG_PPC_OF) +=	of_device.o
@@ -50,7 +50,10 @@ obj-$(CONFIG_LPARCFG)		+= lparcfg.o
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
-obj-$(CONFIG_IBMVIO)		+= vio.o
+
+vio-obj-$(CONFIG_PPC_PSERIES)	+= pSeries_vio.o
+vio-obj-$(CONFIG_PPC_ISERIES)	+= iSeries_vio.o
+obj-$(CONFIG_IBMVIO)		+= vio.o $(vio-obj-y)
 obj-$(CONFIG_XICS)		+= xics.o
 obj-$(CONFIG_MPIC)		+= mpic.o
 

+ 2 - 1
arch/ppc64/kernel/asm-offsets.c

@@ -94,7 +94,8 @@ int main(void)
 	DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
 #ifdef CONFIG_HUGETLB_PAGE
-	DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs));
+	DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
+	DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
 #endif /* CONFIG_HUGETLB_PAGE */
 	DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr));
         DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));

+ 1 - 39
arch/ppc64/kernel/cputable.c

@@ -5,7 +5,7 @@
  *
  *  Modifications for ppc64:
  *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
- * 
+ *
  *  This program is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU General Public License
  *  as published by the Free Software Foundation; either version
@@ -60,7 +60,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power3,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Power3+ */
 		.pvr_mask		= 0xffff0000,
@@ -73,7 +72,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power3,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Northstar */
 		.pvr_mask		= 0xffff0000,
@@ -86,7 +84,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power3,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Pulsar */
 		.pvr_mask		= 0xffff0000,
@@ -99,7 +96,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power3,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* I-star */
 		.pvr_mask		= 0xffff0000,
@@ -112,7 +108,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power3,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* S-star */
 		.pvr_mask		= 0xffff0000,
@@ -125,7 +120,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power3,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Power4 */
 		.pvr_mask		= 0xffff0000,
@@ -138,7 +132,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power4,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Power4+ */
 		.pvr_mask		= 0xffff0000,
@@ -151,7 +144,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power4,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* PPC970 */
 		.pvr_mask		= 0xffff0000,
@@ -166,7 +158,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_ppc970,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* PPC970FX */
 		.pvr_mask		= 0xffff0000,
@@ -181,7 +172,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_ppc970,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* PPC970MP */
 		.pvr_mask		= 0xffff0000,
@@ -196,7 +186,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_ppc970,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Power5 */
 		.pvr_mask		= 0xffff0000,
@@ -211,7 +200,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power4,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Power5 */
 		.pvr_mask		= 0xffff0000,
@@ -226,7 +214,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power4,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* BE DD1.x */
 		.pvr_mask		= 0xffff0000,
@@ -241,7 +228,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_be,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* default match */
 		.pvr_mask		= 0x00000000,
@@ -254,29 +240,5 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power4,
-		.firmware_features	= COMMON_PPC64_FW,
 	}
 };
-
-firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
-	{FW_FEATURE_PFT,		"hcall-pft"},
-	{FW_FEATURE_TCE,		"hcall-tce"},
-	{FW_FEATURE_SPRG0,		"hcall-sprg0"},
-	{FW_FEATURE_DABR,		"hcall-dabr"},
-	{FW_FEATURE_COPY,		"hcall-copy"},
-	{FW_FEATURE_ASR,		"hcall-asr"},
-	{FW_FEATURE_DEBUG,		"hcall-debug"},
-	{FW_FEATURE_PERF,		"hcall-perf"},
-	{FW_FEATURE_DUMP,		"hcall-dump"},
-	{FW_FEATURE_INTERRUPT,		"hcall-interrupt"},
-	{FW_FEATURE_MIGRATE,		"hcall-migrate"},
-	{FW_FEATURE_PERFMON,		"hcall-perfmon"},
-	{FW_FEATURE_CRQ,		"hcall-crq"},
-	{FW_FEATURE_VIO,		"hcall-vio"},
-	{FW_FEATURE_RDMA,		"hcall-rdma"},
-	{FW_FEATURE_LLAN,		"hcall-lLAN"},
-	{FW_FEATURE_BULK,		"hcall-bulk"},
-	{FW_FEATURE_XDABR,		"hcall-xdabr"},
-	{FW_FEATURE_MULTITCE,		"hcall-multi-tce"},
-	{FW_FEATURE_SPLPAR,		"hcall-splpar"},
-};

+ 47 - 0
arch/ppc64/kernel/firmware.c

@@ -0,0 +1,47 @@
+/*
+ *  arch/ppc64/kernel/firmware.c
+ *
+ *  Extracted from cputable.c
+ *
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  Modifications for ppc64:
+ *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *  Copyright (C) 2005 Stephen Rothwell, IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+
+#include <asm/firmware.h>
+
+unsigned long ppc64_firmware_features;
+
+#ifdef CONFIG_PPC_PSERIES
+firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
+	{FW_FEATURE_PFT,		"hcall-pft"},
+	{FW_FEATURE_TCE,		"hcall-tce"},
+	{FW_FEATURE_SPRG0,		"hcall-sprg0"},
+	{FW_FEATURE_DABR,		"hcall-dabr"},
+	{FW_FEATURE_COPY,		"hcall-copy"},
+	{FW_FEATURE_ASR,		"hcall-asr"},
+	{FW_FEATURE_DEBUG,		"hcall-debug"},
+	{FW_FEATURE_PERF,		"hcall-perf"},
+	{FW_FEATURE_DUMP,		"hcall-dump"},
+	{FW_FEATURE_INTERRUPT,		"hcall-interrupt"},
+	{FW_FEATURE_MIGRATE,		"hcall-migrate"},
+	{FW_FEATURE_PERFMON,		"hcall-perfmon"},
+	{FW_FEATURE_CRQ,		"hcall-crq"},
+	{FW_FEATURE_VIO,		"hcall-vio"},
+	{FW_FEATURE_RDMA,		"hcall-rdma"},
+	{FW_FEATURE_LLAN,		"hcall-lLAN"},
+	{FW_FEATURE_BULK,		"hcall-bulk"},
+	{FW_FEATURE_XDABR,		"hcall-xdabr"},
+	{FW_FEATURE_MULTITCE,		"hcall-multi-tce"},
+	{FW_FEATURE_SPLPAR,		"hcall-splpar"},
+};
+#endif

+ 206 - 317
arch/ppc64/kernel/head.S

@@ -23,14 +23,11 @@
  *  2 of the License, or (at your option) any later version.
  */
 
-#define SECONDARY_PROCESSORS
-
 #include <linux/config.h>
 #include <linux/threads.h>
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
-#include <asm/naca.h>
 #include <asm/systemcfg.h>
 #include <asm/ppc_asm.h>
 #include <asm/offsets.h>
@@ -44,19 +41,14 @@
 #define DO_SOFT_DISABLE
 #endif
 
-/*
- * hcall interface to pSeries LPAR
- */
-#define H_SET_ASR	0x30
-
 /*
  * We layout physical memory as follows:
  * 0x0000 - 0x00ff : Secondary processor spin code
  * 0x0100 - 0x2fff : pSeries Interrupt prologs
- * 0x3000 - 0x3fff : Interrupt support
- * 0x4000 - 0x4fff : NACA
- * 0x6000	   : iSeries and common interrupt prologs
- * 0x9000 - 0x9fff : Initial segment table
+ * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs
+ * 0x6000 - 0x6fff : Initial (CPU0) segment table
+ * 0x7000 - 0x7fff : FWNMI data area
+ * 0x8000 -        : Early init and support code
  */
 
 /*
@@ -94,6 +86,7 @@ END_FTR_SECTION(0, 1)
 
 	/* Catch branch to 0 in real mode */
 	trap
+
 #ifdef CONFIG_PPC_ISERIES
 	/*
 	 * At offset 0x20, there is a pointer to iSeries LPAR data.
@@ -103,12 +96,12 @@ END_FTR_SECTION(0, 1)
 	.llong hvReleaseData-KERNELBASE
 
 	/*
-	 * At offset 0x28 and 0x30 are offsets to the msChunks
+	 * At offset 0x28 and 0x30 are offsets to the mschunks_map
 	 * array (used by the iSeries LPAR debugger to do translation
 	 * between physical addresses and absolute addresses) and
 	 * to the pidhash table (also used by the debugger)
 	 */
-	.llong msChunks-KERNELBASE
+	.llong mschunks_map-KERNELBASE
 	.llong 0	/* pidhash-KERNELBASE SFRXXX */
 
 	/* Offset 0x38 - Pointer to start of embedded System.map */
@@ -120,7 +113,7 @@ embedded_sysmap_start:
 embedded_sysmap_end:
 	.llong	0
 
-#else /* CONFIG_PPC_ISERIES */
+#endif /* CONFIG_PPC_ISERIES */
 
 	/* Secondary processors spin on this value until it goes to 1. */
 	.globl  __secondary_hold_spinloop
@@ -155,7 +148,7 @@ _GLOBAL(__secondary_hold)
 	std	r24,__secondary_hold_acknowledge@l(0)
 	sync
 
-	/* All secondary cpu's wait here until told to start. */
+	/* All secondary cpus wait here until told to start. */
 100:	ld	r4,__secondary_hold_spinloop@l(0)
 	cmpdi	0,r4,1
 	bne	100b
@@ -170,7 +163,6 @@ _GLOBAL(__secondary_hold)
 	BUG_OPCODE
 #endif
 #endif
-#endif
 
 /* This value is used to mark exception frames on the stack. */
 	.section ".toc","aw"
@@ -502,33 +494,37 @@ system_call_pSeries:
 	STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
 	STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
 
+	. = 0x3000
+
+/*** pSeries interrupt support ***/
+
 	/* moved from 0xf00 */
-	STD_EXCEPTION_PSERIES(0x3000, performance_monitor)
+	STD_EXCEPTION_PSERIES(., performance_monitor)
 
-	. = 0x3100
+	.align	7
 _GLOBAL(do_stab_bolted_pSeries)
 	mtcrf	0x80,r12
 	mfspr	r12,SPRG2
 	EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
 
-	
-	/* Space for the naca.  Architected to be located at real address
-	 * NACA_PHYS_ADDR.  Various tools rely on this location being fixed.
-	 * The first dword of the naca is required by iSeries LPAR to
-	 * point to itVpdAreas.  On pSeries native, this value is not used.
-	 */
-	. = NACA_PHYS_ADDR
-	.globl __end_interrupts
-__end_interrupts:
-#ifdef CONFIG_PPC_ISERIES
-	.globl naca
-naca:
-	.llong	itVpdAreas
-	.llong	0		/* xRamDisk */
-	.llong	0		/* xRamDiskSize */
+/*
+ * Vectors for the FWNMI option.  Share common code.
+ */
+      .globl system_reset_fwnmi
+system_reset_fwnmi:
+      HMT_MEDIUM
+      mtspr   SPRG1,r13               /* save r13 */
+      RUNLATCH_ON(r13)
+      EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
 
-	. = 0x6100
+      .globl machine_check_fwnmi
+machine_check_fwnmi:
+      HMT_MEDIUM
+      mtspr   SPRG1,r13               /* save r13 */
+      RUNLATCH_ON(r13)
+      EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
 
+#ifdef CONFIG_PPC_ISERIES
 /***  ISeries-LPAR interrupt handlers ***/
 
 	STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC)
@@ -626,9 +622,7 @@ system_reset_iSeries:
 
 	cmpwi	0,r23,0
 	beq	iSeries_secondary_smp_loop	/* Loop until told to go */
-#ifdef SECONDARY_PROCESSORS
 	bne	.__secondary_start		/* Loop until told to go */
-#endif
 iSeries_secondary_smp_loop:
 	/* Let the Hypervisor know we are alive */
 	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
@@ -671,51 +665,8 @@ hardware_interrupt_iSeries_masked:
 	ld	r13,PACA_EXGEN+EX_R13(r13)
 	rfid
 	b	.	/* prevent speculative execution */
-#endif
-
-/*
- * Data area reserved for FWNMI option.
- */
-	.= 0x7000
-	.globl fwnmi_data_area
-fwnmi_data_area:
-
-#ifdef CONFIG_PPC_ISERIES
-	. = LPARMAP_PHYS
-#include "lparmap.s"
 #endif /* CONFIG_PPC_ISERIES */
 
-/*
- * Vectors for the FWNMI option.  Share common code.
- */
-	. = 0x8000
-	.globl system_reset_fwnmi
-system_reset_fwnmi:
-	HMT_MEDIUM
-	mtspr	SPRG1,r13		/* save r13 */
-	RUNLATCH_ON(r13)
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
-	.globl machine_check_fwnmi
-machine_check_fwnmi:
-	HMT_MEDIUM
-	mtspr	SPRG1,r13		/* save r13 */
-	RUNLATCH_ON(r13)
-	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
-
-	/*
-	 * Space for the initial segment table
-	 * For LPAR, the hypervisor must fill in at least one entry
-	 * before we get control (with relocate on)
-	 */
-	. = STAB0_PHYS_ADDR
-	.globl __start_stab
-__start_stab:
-
-	. = (STAB0_PHYS_ADDR + PAGE_SIZE)
-	.globl __end_stab
-__end_stab:
-
-
 /*** Common interrupt handlers ***/
 
 	STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
@@ -752,8 +703,8 @@ machine_check_common:
  * R9 contains the saved CR, r13 points to the paca,
  * r10 contains the (bad) kernel stack pointer,
  * r11 and r12 contain the saved SRR0 and SRR1.
- * We switch to using the paca guard page as an emergency stack,
- * save the registers there, and call kernel_bad_stack(), which panics.
+ * We switch to using an emergency stack, save the registers there,
+ * and call kernel_bad_stack(), which panics.
  */
 bad_stack:
 	ld	r1,PACAEMERGSP(r13)
@@ -906,6 +857,62 @@ fp_unavailable_common:
 	bl	.kernel_fp_unavailable_exception
 	BUG_OPCODE
 
+/*
+ * load_up_fpu(unused, unused, tsk)
+ * Disable FP for the task which had the FPU previously,
+ * and save its floating-point registers in its thread_struct.
+ * Enables the FPU for use in the kernel on return.
+ * On SMP we know the fpu is free, since we give it up every
+ * switch (ie, no lazy save of the FP registers).
+ * On entry: r13 == 'current' && last_task_used_math != 'current'
+ */
+_STATIC(load_up_fpu)
+	mfmsr	r5			/* grab the current MSR */
+	ori	r5,r5,MSR_FP
+	mtmsrd	r5			/* enable use of fpu now */
+	isync
+/*
+ * For SMP, we don't do lazy FPU switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another.  Instead we call giveup_fpu in switch_to.
+ *
+ */
+#ifndef CONFIG_SMP
+	ld	r3,last_task_used_math@got(r2)
+	ld	r4,0(r3)
+	cmpdi	0,r4,0
+	beq	1f
+	/* Save FP state to last_task_used_math's THREAD struct */
+	addi	r4,r4,THREAD
+	SAVE_32FPRS(0, r4)
+	mffs	fr0
+	stfd	fr0,THREAD_FPSCR(r4)
+	/* Disable FP for last_task_used_math */
+	ld	r5,PT_REGS(r4)
+	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	li	r6,MSR_FP|MSR_FE0|MSR_FE1
+	andc	r4,r4,r6
+	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+	/* enable use of FP after return */
+	ld	r4,PACACURRENT(r13)
+	addi	r5,r4,THREAD		/* Get THREAD */
+	ld	r4,THREAD_FPEXC_MODE(r5)
+	ori	r12,r12,MSR_FP
+	or	r12,r12,r4
+	std	r12,_MSR(r1)
+	lfd	fr0,THREAD_FPSCR(r5)
+	mtfsf	0xff,fr0
+	REST_32FPRS(0, r5)
+#ifndef CONFIG_SMP
+	/* Update last_task_used_math to 'current' */
+	subi	r4,r5,THREAD		/* Back to 'current' */
+	std	r4,0(r3)
+#endif /* CONFIG_SMP */
+	/* restore registers and return */
+	b	fast_exception_return
+
 	.align	7
 	.globl altivec_unavailable_common
 altivec_unavailable_common:
@@ -921,6 +928,80 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	bl	.altivec_unavailable_exception
 	b	.ret_from_except
 
+#ifdef CONFIG_ALTIVEC
+/*
+ * load_up_altivec(unused, unused, tsk)
+ * Disable VMX for the task which had it previously,
+ * and save its vector registers in its thread_struct.
+ * Enables the VMX for use in the kernel on return.
+ * On SMP we know the VMX is free, since we give it up every
+ * switch (ie, no lazy save of the vector registers).
+ * On entry: r13 == 'current' && last_task_used_altivec != 'current'
+ */
+_STATIC(load_up_altivec)
+	mfmsr	r5			/* grab the current MSR */
+	oris	r5,r5,MSR_VEC@h
+	mtmsrd	r5			/* enable use of VMX now */
+	isync
+
+/*
+ * For SMP, we don't do lazy VMX switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another.  Instead we call giveup_altvec in switch_to.
+ * VRSAVE isn't dealt with here, that is done in the normal context
+ * switch code. Note that we could rely on vrsave value to eventually
+ * avoid saving all of the VREGs here...
+ */
+#ifndef CONFIG_SMP
+	ld	r3,last_task_used_altivec@got(r2)
+	ld	r4,0(r3)
+	cmpdi	0,r4,0
+	beq	1f
+	/* Save VMX state to last_task_used_altivec's THREAD struct */
+	addi	r4,r4,THREAD
+	SAVE_32VRS(0,r5,r4)
+	mfvscr	vr0
+	li	r10,THREAD_VSCR
+	stvx	vr0,r10,r4
+	/* Disable VMX for last_task_used_altivec */
+	ld	r5,PT_REGS(r4)
+	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lis	r6,MSR_VEC@h
+	andc	r4,r4,r6
+	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+	/* Hack: if we get an altivec unavailable trap with VRSAVE
+	 * set to all zeros, we assume this is a broken application
+	 * that fails to set it properly, and thus we switch it to
+	 * all 1's
+	 */
+	mfspr	r4,SPRN_VRSAVE
+	cmpdi	0,r4,0
+	bne+	1f
+	li	r4,-1
+	mtspr	SPRN_VRSAVE,r4
+1:
+	/* enable use of VMX after return */
+	ld	r4,PACACURRENT(r13)
+	addi	r5,r4,THREAD		/* Get THREAD */
+	oris	r12,r12,MSR_VEC@h
+	std	r12,_MSR(r1)
+	li	r4,1
+	li	r10,THREAD_VSCR
+	stw	r4,THREAD_USED_VR(r5)
+	lvx	vr0,r10,r5
+	mtvscr	vr0
+	REST_32VRS(0,r4,r5)
+#ifndef CONFIG_SMP
+	/* Update last_task_used_math to 'current' */
+	subi	r4,r5,THREAD		/* Back to 'current' */
+	std	r4,0(r3)
+#endif /* CONFIG_SMP */
+	/* restore registers and return */
+	b	fast_exception_return
+#endif /* CONFIG_ALTIVEC */
+
 /*
  * Hash table stuff
  */
@@ -1167,6 +1248,42 @@ unrecov_slb:
 	bl	.unrecoverable_exception
 	b	1b
 
+/*
+ * Space for CPU0's segment table.
+ *
+ * On iSeries, the hypervisor must fill in at least one entry before
+ * we get control (with relocate on).  The address is give to the hv
+ * as a page number (see xLparMap in LparData.c), so this must be at a
+ * fixed address (the linker can't compute (u64)&initial_stab >>
+ * PAGE_SHIFT).
+ */
+	. = STAB0_PHYS_ADDR	/* 0x6000 */
+	.globl initial_stab
+initial_stab:
+	.space	4096
+
+/*
+ * Data area reserved for FWNMI option.
+ * This address (0x7000) is fixed by the RPA.
+ */
+	.= 0x7000
+	.globl fwnmi_data_area
+fwnmi_data_area:
+
+	/* iSeries does not use the FWNMI stuff, so it is safe to put
+	 * this here, even if we later allow kernels that will boot on
+	 * both pSeries and iSeries */
+#ifdef CONFIG_PPC_ISERIES
+        . = LPARMAP_PHYS
+#include "lparmap.s"
+/*
+ * This ".text" is here for old compilers that generate a trailing
+ * .note section when compiling .c files to .s
+ */
+	.text
+#endif /* CONFIG_PPC_ISERIES */
+
+        . = 0x8000
 
 /*
  * On pSeries, secondary processors spin in the following code.
@@ -1200,7 +1317,7 @@ _GLOBAL(pSeries_secondary_smp_init)
 	b	.kexec_wait		/* next kernel might do better	 */
 
 2:	mtspr	SPRG3,r13		/* Save vaddr of paca in SPRG3	 */
-	/* From now on, r24 is expected to be logica cpuid */
+	/* From now on, r24 is expected to be logical cpuid */
 	mr	r24,r5
 3:	HMT_LOW
 	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
@@ -1213,9 +1330,7 @@ _GLOBAL(pSeries_secondary_smp_init)
 
 	cmpwi	0,r23,0
 #ifdef CONFIG_SMP
-#ifdef SECONDARY_PROCESSORS
 	bne	.__secondary_start
-#endif
 #endif
 	b 	3b			/* Loop until told to go	 */
 
@@ -1430,228 +1545,6 @@ _GLOBAL(copy_and_flush)
 .align 8
 copy_to_here:
 
-/*
- * load_up_fpu(unused, unused, tsk)
- * Disable FP for the task which had the FPU previously,
- * and save its floating-point registers in its thread_struct.
- * Enables the FPU for use in the kernel on return.
- * On SMP we know the fpu is free, since we give it up every
- * switch (ie, no lazy save of the FP registers).
- * On entry: r13 == 'current' && last_task_used_math != 'current'
- */
-_STATIC(load_up_fpu)
-	mfmsr	r5			/* grab the current MSR */
-	ori	r5,r5,MSR_FP
-	mtmsrd	r5			/* enable use of fpu now */
-	isync
-/*
- * For SMP, we don't do lazy FPU switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another.  Instead we call giveup_fpu in switch_to.
- *
- */
-#ifndef CONFIG_SMP
-	ld	r3,last_task_used_math@got(r2)
-	ld	r4,0(r3)
-	cmpdi	0,r4,0
-	beq	1f
-	/* Save FP state to last_task_used_math's THREAD struct */
-	addi	r4,r4,THREAD
-	SAVE_32FPRS(0, r4)
-	mffs	fr0
-	stfd	fr0,THREAD_FPSCR(r4)
-	/* Disable FP for last_task_used_math */
-	ld	r5,PT_REGS(r4)
-	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	li	r6,MSR_FP|MSR_FE0|MSR_FE1
-	andc	r4,r4,r6
-	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
-	/* enable use of FP after return */
-	ld	r4,PACACURRENT(r13)
-	addi	r5,r4,THREAD		/* Get THREAD */
-	ld	r4,THREAD_FPEXC_MODE(r5)
-	ori	r12,r12,MSR_FP
-	or	r12,r12,r4
-	std	r12,_MSR(r1)
-	lfd	fr0,THREAD_FPSCR(r5)
-	mtfsf	0xff,fr0
-	REST_32FPRS(0, r5)
-#ifndef CONFIG_SMP
-	/* Update last_task_used_math to 'current' */
-	subi	r4,r5,THREAD		/* Back to 'current' */
-	std	r4,0(r3)
-#endif /* CONFIG_SMP */
-	/* restore registers and return */
-	b	fast_exception_return
-
-/*
- * disable_kernel_fp()
- * Disable the FPU.
- */
-_GLOBAL(disable_kernel_fp)
-	mfmsr	r3
-	rldicl	r0,r3,(63-MSR_FP_LG),1
-	rldicl	r3,r0,(MSR_FP_LG+1),0
-	mtmsrd	r3			/* disable use of fpu now */
-	isync
-	blr
-
-/*
- * giveup_fpu(tsk)
- * Disable FP for the task given as the argument,
- * and save the floating-point registers in its thread_struct.
- * Enables the FPU for use in the kernel on return.
- */
-_GLOBAL(giveup_fpu)
-	mfmsr	r5
-	ori	r5,r5,MSR_FP
-	mtmsrd	r5			/* enable use of fpu now */
-	isync
-	cmpdi	0,r3,0
-	beqlr-				/* if no previous owner, done */
-	addi	r3,r3,THREAD		/* want THREAD of task */
-	ld	r5,PT_REGS(r3)
-	cmpdi	0,r5,0
-	SAVE_32FPRS(0, r3)
-	mffs	fr0
-	stfd	fr0,THREAD_FPSCR(r3)
-	beq	1f
-	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	li	r3,MSR_FP|MSR_FE0|MSR_FE1
-	andc	r4,r4,r3		/* disable FP for previous task */
-	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#ifndef CONFIG_SMP
-	li	r5,0
-	ld	r4,last_task_used_math@got(r2)
-	std	r5,0(r4)
-#endif /* CONFIG_SMP */
-	blr
-
-
-#ifdef CONFIG_ALTIVEC
-		
-/*
- * load_up_altivec(unused, unused, tsk)
- * Disable VMX for the task which had it previously,
- * and save its vector registers in its thread_struct.
- * Enables the VMX for use in the kernel on return.
- * On SMP we know the VMX is free, since we give it up every
- * switch (ie, no lazy save of the vector registers).
- * On entry: r13 == 'current' && last_task_used_altivec != 'current'
- */
-_STATIC(load_up_altivec)
-	mfmsr	r5			/* grab the current MSR */
-	oris	r5,r5,MSR_VEC@h
-	mtmsrd	r5			/* enable use of VMX now */
-	isync
-	
-/*
- * For SMP, we don't do lazy VMX switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another.  Instead we call giveup_altvec in switch_to.
- * VRSAVE isn't dealt with here, that is done in the normal context
- * switch code. Note that we could rely on vrsave value to eventually
- * avoid saving all of the VREGs here...
- */
-#ifndef CONFIG_SMP
-	ld	r3,last_task_used_altivec@got(r2)
-	ld	r4,0(r3)
-	cmpdi	0,r4,0
-	beq	1f
-	/* Save VMX state to last_task_used_altivec's THREAD struct */
-	addi	r4,r4,THREAD
-	SAVE_32VRS(0,r5,r4)
-	mfvscr	vr0
-	li	r10,THREAD_VSCR
-	stvx	vr0,r10,r4
-	/* Disable VMX for last_task_used_altivec */
-	ld	r5,PT_REGS(r4)
-	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	lis	r6,MSR_VEC@h
-	andc	r4,r4,r6
-	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
-	/* Hack: if we get an altivec unavailable trap with VRSAVE
-	 * set to all zeros, we assume this is a broken application
-	 * that fails to set it properly, and thus we switch it to
-	 * all 1's
-	 */
-	mfspr	r4,SPRN_VRSAVE
-	cmpdi	0,r4,0
-	bne+	1f
-	li	r4,-1
-	mtspr	SPRN_VRSAVE,r4
-1:
-	/* enable use of VMX after return */
-	ld	r4,PACACURRENT(r13)
-	addi	r5,r4,THREAD		/* Get THREAD */
-	oris	r12,r12,MSR_VEC@h
-	std	r12,_MSR(r1)
-	li	r4,1
-	li	r10,THREAD_VSCR
-	stw	r4,THREAD_USED_VR(r5)
-	lvx	vr0,r10,r5
-	mtvscr	vr0
-	REST_32VRS(0,r4,r5)
-#ifndef CONFIG_SMP
-	/* Update last_task_used_math to 'current' */
-	subi	r4,r5,THREAD		/* Back to 'current' */
-	std	r4,0(r3)
-#endif /* CONFIG_SMP */
-	/* restore registers and return */
-	b	fast_exception_return
-
-/*
- * disable_kernel_altivec()
- * Disable the VMX.
- */
-_GLOBAL(disable_kernel_altivec)
-	mfmsr	r3
-	rldicl	r0,r3,(63-MSR_VEC_LG),1
-	rldicl	r3,r0,(MSR_VEC_LG+1),0
-	mtmsrd	r3			/* disable use of VMX now */
-	isync
-	blr
-
-/*
- * giveup_altivec(tsk)
- * Disable VMX for the task given as the argument,
- * and save the vector registers in its thread_struct.
- * Enables the VMX for use in the kernel on return.
- */
-_GLOBAL(giveup_altivec)
-	mfmsr	r5
-	oris	r5,r5,MSR_VEC@h
-	mtmsrd	r5			/* enable use of VMX now */
-	isync
-	cmpdi	0,r3,0
-	beqlr-				/* if no previous owner, done */
-	addi	r3,r3,THREAD		/* want THREAD of task */
-	ld	r5,PT_REGS(r3)
-	cmpdi	0,r5,0
-	SAVE_32VRS(0,r4,r3)
-	mfvscr	vr0
-	li	r4,THREAD_VSCR
-	stvx	vr0,r4,r3
-	beq	1f
-	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	lis	r3,MSR_VEC@h
-	andc	r4,r4,r3		/* disable FP for previous task */
-	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#ifndef CONFIG_SMP
-	li	r5,0
-	ld	r4,last_task_used_altivec@got(r2)
-	std	r5,0(r4)
-#endif /* CONFIG_SMP */
-	blr
-
-#endif /* CONFIG_ALTIVEC */
-
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PPC_PMAC
 /*
@@ -2002,9 +1895,6 @@ _STATIC(start_here_common)
 
 	bl .start_kernel
 
-_GLOBAL(__setup_cpu_power3)
-	blr
-
 _GLOBAL(hmt_init)
 #ifdef CONFIG_HMT
 	LOADADDR(r5, hmt_thread_data)
@@ -2095,20 +1985,19 @@ _GLOBAL(smp_release_cpus)
 
 /*
  * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
+ * This stuff goes at the beginning of the bss, which is page-aligned.
  */
-	.data
+	.section ".bss"
+
 	.align	12
-	.globl	sdata
-sdata:
+
 	.globl	empty_zero_page
 empty_zero_page:
-	.space	4096
+	.space	PAGE_SIZE
 
 	.globl	swapper_pg_dir
 swapper_pg_dir:
-	.space	4096
+	.space	PAGE_SIZE
 
 /*
  * This space gets a copy of optional info passed to us by the bootstrap

+ 4 - 1
arch/ppc64/kernel/iSeries_htab.c

@@ -41,6 +41,7 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
 				unsigned long prpn, unsigned long vflags,
 				unsigned long rflags)
 {
+	unsigned long arpn;
 	long slot;
 	hpte_t lhpte;
 	int secondary = 0;
@@ -70,8 +71,10 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
 		slot &= 0x7fffffffffffffff;
 	}
 
+	arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT;
+
 	lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
-	lhpte.r = (physRpn_to_absRpn(prpn) << HPTE_R_RPN_SHIFT) | rflags;
+	lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
 
 	/* Now fill in the actual HPTE */
 	HvCallHpt_addValidate(slot, secondary, &lhpte);

+ 26 - 4
arch/ppc64/kernel/iSeries_setup.c

@@ -39,6 +39,7 @@
 #include <asm/cputable.h>
 #include <asm/sections.h>
 #include <asm/iommu.h>
+#include <asm/firmware.h>
 
 #include <asm/time.h>
 #include "iSeries_setup.h"
@@ -314,6 +315,8 @@ static void __init iSeries_init_early(void)
 
 	DBG(" -> iSeries_init_early()\n");
 
+	ppc64_firmware_features = FW_FEATURE_ISERIES;
+
 	ppcdbg_initialize();
 
 #if defined(CONFIG_BLK_DEV_INITRD)
@@ -412,6 +415,22 @@ static void __init iSeries_init_early(void)
 	DBG(" <- iSeries_init_early()\n");
 }
 
+struct mschunks_map mschunks_map = {
+	/* XXX We don't use these, but Piranha might need them. */
+	.chunk_size  = MSCHUNKS_CHUNK_SIZE,
+	.chunk_shift = MSCHUNKS_CHUNK_SHIFT,
+	.chunk_mask  = MSCHUNKS_OFFSET_MASK,
+};
+EXPORT_SYMBOL(mschunks_map);
+
+void mschunks_alloc(unsigned long num_chunks)
+{
+	klimit = _ALIGN(klimit, sizeof(u32));
+	mschunks_map.mapping = (u32 *)klimit;
+	klimit += num_chunks * sizeof(u32);
+	mschunks_map.num_chunks = num_chunks;
+}
+
 /*
  * The iSeries may have very large memories ( > 128 GB ) and a partition
  * may get memory in "chunks" that may be anywhere in the 2**52 real
@@ -449,7 +468,7 @@ static void __init build_iSeries_Memory_Map(void)
 
 	/* Chunk size on iSeries is 256K bytes */
 	totalChunks = (u32)HvLpConfig_getMsChunks();
-	klimit = msChunks_alloc(klimit, totalChunks, 1UL << 18);
+	mschunks_alloc(totalChunks);
 
 	/*
 	 * Get absolute address of our load area
@@ -486,7 +505,7 @@ static void __init build_iSeries_Memory_Map(void)
 	printk("Load area size %dK\n", loadAreaSize * 256);
 
 	for (nextPhysChunk = 0; nextPhysChunk < loadAreaSize; ++nextPhysChunk)
-		msChunks.abs[nextPhysChunk] =
+		mschunks_map.mapping[nextPhysChunk] =
 			loadAreaFirstChunk + nextPhysChunk;
 
 	/*
@@ -495,7 +514,7 @@ static void __init build_iSeries_Memory_Map(void)
 	 */
 	hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress());
 	hptSizePages = (u32)HvCallHpt_getHptPages();
-	hptSizeChunks = hptSizePages >> (msChunks.chunk_shift - PAGE_SHIFT);
+	hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT);
 	hptLastChunk = hptFirstChunk + hptSizeChunks - 1;
 
 	printk("HPT absolute addr = %016lx, size = %dK\n",
@@ -552,7 +571,8 @@ static void __init build_iSeries_Memory_Map(void)
 				     (absChunk > hptLastChunk)) &&
 				    ((absChunk < loadAreaFirstChunk) ||
 				     (absChunk > loadAreaLastChunk))) {
-					msChunks.abs[nextPhysChunk] = absChunk;
+					mschunks_map.mapping[nextPhysChunk] =
+						absChunk;
 					++nextPhysChunk;
 				}
 			}
@@ -944,6 +964,8 @@ void __init iSeries_early_setup(void)
 	ppc_md.calibrate_decr = iSeries_calibrate_decr;
 	ppc_md.progress = iSeries_progress;
 
+	/* XXX Implement enable_pmcs for iSeries */
+
 	if (get_paca()->lppaca.shared_proc) {
 		ppc_md.idle_loop = iseries_shared_idle;
 		printk(KERN_INFO "Using shared processor idle loop\n");

+ 155 - 0
arch/ppc64/kernel/iSeries_vio.c

@@ -0,0 +1,155 @@
+/*
+ * IBM PowerPC iSeries Virtual I/O Infrastructure Support.
+ *
+ *    Copyright (c) 2005 Stephen Rothwell, IBM Corp.
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/init.h>
+
+#include <asm/vio.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/page.h>
+#include <asm/iSeries/vio.h>
+#include <asm/iSeries/HvTypes.h>
+#include <asm/iSeries/HvLpConfig.h>
+#include <asm/iSeries/HvCallXm.h>
+
+struct device *iSeries_vio_dev = &vio_bus_device.dev;
+EXPORT_SYMBOL(iSeries_vio_dev);
+
+static struct iommu_table veth_iommu_table;
+static struct iommu_table vio_iommu_table;
+
+static void __init iommu_vio_init(void)
+{
+	struct iommu_table *t;
+	struct iommu_table_cb cb;
+	unsigned long cbp;
+	unsigned long itc_entries;
+
+	cb.itc_busno = 255;    /* Bus 255 is the virtual bus */
+	cb.itc_virtbus = 0xff; /* Ask for virtual bus */
+
+	cbp = virt_to_abs(&cb);
+	HvCallXm_getTceTableParms(cbp);
+
+	itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
+	veth_iommu_table.it_size        = itc_entries / 2;
+	veth_iommu_table.it_busno       = cb.itc_busno;
+	veth_iommu_table.it_offset      = cb.itc_offset;
+	veth_iommu_table.it_index       = cb.itc_index;
+	veth_iommu_table.it_type        = TCE_VB;
+	veth_iommu_table.it_blocksize	= 1;
+
+	t = iommu_init_table(&veth_iommu_table);
+
+	if (!t)
+		printk("Virtual Bus VETH TCE table failed.\n");
+
+	vio_iommu_table.it_size         = itc_entries - veth_iommu_table.it_size;
+	vio_iommu_table.it_busno        = cb.itc_busno;
+	vio_iommu_table.it_offset       = cb.itc_offset +
+					  veth_iommu_table.it_size;
+	vio_iommu_table.it_index        = cb.itc_index;
+	vio_iommu_table.it_type         = TCE_VB;
+	vio_iommu_table.it_blocksize	= 1;
+
+	t = iommu_init_table(&vio_iommu_table);
+
+	if (!t)
+		printk("Virtual Bus VIO TCE table failed.\n");
+}
+
+/**
+ * vio_register_device_iseries: - Register a new iSeries vio device.
+ * @voidev:	The device to register.
+ */
+static struct vio_dev *__init vio_register_device_iseries(char *type,
+		uint32_t unit_num)
+{
+	struct vio_dev *viodev;
+
+	/* allocate a vio_dev for this device */
+	viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
+	if (!viodev)
+		return NULL;
+	memset(viodev, 0, sizeof(struct vio_dev));
+
+	snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num);
+
+	viodev->name = viodev->dev.bus_id;
+	viodev->type = type;
+	viodev->unit_address = unit_num;
+	viodev->iommu_table = &vio_iommu_table;
+	if (vio_register_device(viodev) == NULL) {
+		kfree(viodev);
+		return NULL;
+	}
+	return viodev;
+}
+
+void __init probe_bus_iseries(void)
+{
+	HvLpIndexMap vlan_map;
+	struct vio_dev *viodev;
+	int i;
+
+	/* there is only one of each of these */
+	vio_register_device_iseries("viocons", 0);
+	vio_register_device_iseries("vscsi", 0);
+
+	vlan_map = HvLpConfig_getVirtualLanIndexMap();
+	for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
+		if ((vlan_map & (0x8000 >> i)) == 0)
+			continue;
+		viodev = vio_register_device_iseries("vlan", i);
+		/* veth is special and has it own iommu_table */
+		viodev->iommu_table = &veth_iommu_table;
+	}
+	for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++)
+		vio_register_device_iseries("viodasd", i);
+	for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++)
+		vio_register_device_iseries("viocd", i);
+	for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++)
+		vio_register_device_iseries("viotape", i);
+}
+
+/**
+ * vio_match_device_iseries: - Tell if a iSeries VIO device matches a
+ *	vio_device_id
+ */
+static int vio_match_device_iseries(const struct vio_device_id *id,
+		const struct vio_dev *dev)
+{
+	return strncmp(dev->type, id->type, strlen(id->type)) == 0;
+}
+
+static struct vio_bus_ops vio_bus_ops_iseries = {
+	.match = vio_match_device_iseries,
+};
+
+/**
+ * vio_bus_init_iseries: - Initialize the iSeries virtual IO bus
+ */
+static int __init vio_bus_init_iseries(void)
+{
+	int err;
+
+	err = vio_bus_init(&vio_bus_ops_iseries);
+	if (err == 0) {
+		iommu_vio_init();
+		vio_bus_device.iommu_table = &vio_iommu_table;
+		iSeries_vio_dev = &vio_bus_device.dev;
+		probe_bus_iseries();
+	}
+	return err;
+}
+
+__initcall(vio_bus_init_iseries);

+ 39 - 112
arch/ppc64/kernel/lmb.c

@@ -28,33 +28,28 @@ void lmb_dump_all(void)
 {
 #ifdef DEBUG
 	unsigned long i;
-	struct lmb *_lmb  = &lmb;
 
 	udbg_printf("lmb_dump_all:\n");
 	udbg_printf("    memory.cnt		  = 0x%lx\n",
-		    _lmb->memory.cnt);
+		    lmb.memory.cnt);
 	udbg_printf("    memory.size		  = 0x%lx\n",
-		    _lmb->memory.size);
-	for (i=0; i < _lmb->memory.cnt ;i++) {
+		    lmb.memory.size);
+	for (i=0; i < lmb.memory.cnt ;i++) {
 		udbg_printf("    memory.region[0x%x].base       = 0x%lx\n",
-			    i, _lmb->memory.region[i].base);
-		udbg_printf("		      .physbase = 0x%lx\n",
-			    _lmb->memory.region[i].physbase);
+			    i, lmb.memory.region[i].base);
 		udbg_printf("		      .size     = 0x%lx\n",
-			    _lmb->memory.region[i].size);
+			    lmb.memory.region[i].size);
 	}
 
 	udbg_printf("\n    reserved.cnt	  = 0x%lx\n",
-		    _lmb->reserved.cnt);
+		    lmb.reserved.cnt);
 	udbg_printf("    reserved.size	  = 0x%lx\n",
-		    _lmb->reserved.size);
-	for (i=0; i < _lmb->reserved.cnt ;i++) {
+		    lmb.reserved.size);
+	for (i=0; i < lmb.reserved.cnt ;i++) {
 		udbg_printf("    reserved.region[0x%x].base       = 0x%lx\n",
-			    i, _lmb->reserved.region[i].base);
-		udbg_printf("		      .physbase = 0x%lx\n",
-			    _lmb->reserved.region[i].physbase);
+			    i, lmb.reserved.region[i].base);
 		udbg_printf("		      .size     = 0x%lx\n",
-			    _lmb->reserved.region[i].size);
+			    lmb.reserved.region[i].size);
 	}
 #endif /* DEBUG */
 }
@@ -98,7 +93,6 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2)
 	rgn->region[r1].size += rgn->region[r2].size;
 	for (i=r2; i < rgn->cnt-1; i++) {
 		rgn->region[i].base = rgn->region[i+1].base;
-		rgn->region[i].physbase = rgn->region[i+1].physbase;
 		rgn->region[i].size = rgn->region[i+1].size;
 	}
 	rgn->cnt--;
@@ -108,49 +102,29 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2)
 void __init
 lmb_init(void)
 {
-	struct lmb *_lmb = &lmb;
-
 	/* Create a dummy zero size LMB which will get coalesced away later.
 	 * This simplifies the lmb_add() code below...
 	 */
-	_lmb->memory.region[0].base = 0;
-	_lmb->memory.region[0].size = 0;
-	_lmb->memory.cnt = 1;
+	lmb.memory.region[0].base = 0;
+	lmb.memory.region[0].size = 0;
+	lmb.memory.cnt = 1;
 
 	/* Ditto. */
-	_lmb->reserved.region[0].base = 0;
-	_lmb->reserved.region[0].size = 0;
-	_lmb->reserved.cnt = 1;
+	lmb.reserved.region[0].base = 0;
+	lmb.reserved.region[0].size = 0;
+	lmb.reserved.cnt = 1;
 }
 
 /* This routine called with relocation disabled. */
 void __init
 lmb_analyze(void)
 {
-	unsigned long i;
-	unsigned long mem_size = 0;
-	unsigned long size_mask = 0;
-	struct lmb *_lmb = &lmb;
-#ifdef CONFIG_MSCHUNKS
-	unsigned long physbase = 0;
-#endif
-
-	for (i=0; i < _lmb->memory.cnt; i++) {
-		unsigned long lmb_size;
-
-		lmb_size = _lmb->memory.region[i].size;
-
-#ifdef CONFIG_MSCHUNKS
-		_lmb->memory.region[i].physbase = physbase;
-		physbase += lmb_size;
-#else
-		_lmb->memory.region[i].physbase = _lmb->memory.region[i].base;
-#endif
-		mem_size += lmb_size;
-		size_mask |= lmb_size;
-	}
+	int i;
+
+	lmb.memory.size = 0;
 
-	_lmb->memory.size = mem_size;
+	for (i = 0; i < lmb.memory.cnt; i++)
+		lmb.memory.size += lmb.memory.region[i].size;
 }
 
 /* This routine called with relocation disabled. */
@@ -168,7 +142,6 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size)
 		adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize);
 		if ( adjacent > 0 ) {
 			rgn->region[i].base -= size;
-			rgn->region[i].physbase -= size;
 			rgn->region[i].size += size;
 			coalesced++;
 			break;
@@ -195,11 +168,9 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size)
 	for (i=rgn->cnt-1; i >= 0; i--) {
 		if (base < rgn->region[i].base) {
 			rgn->region[i+1].base = rgn->region[i].base;
-			rgn->region[i+1].physbase = rgn->region[i].physbase;
 			rgn->region[i+1].size = rgn->region[i].size;
 		}  else {
 			rgn->region[i+1].base = base;
-			rgn->region[i+1].physbase = lmb_abs_to_phys(base);
 			rgn->region[i+1].size = size;
 			break;
 		}
@@ -213,12 +184,11 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size)
 long __init
 lmb_add(unsigned long base, unsigned long size)
 {
-	struct lmb *_lmb = &lmb;
-	struct lmb_region *_rgn = &(_lmb->memory);
+	struct lmb_region *_rgn = &(lmb.memory);
 
 	/* On pSeries LPAR systems, the first LMB is our RMO region. */
 	if ( base == 0 )
-		_lmb->rmo_size = size;
+		lmb.rmo_size = size;
 
 	return lmb_add_region(_rgn, base, size);
 
@@ -227,8 +197,7 @@ lmb_add(unsigned long base, unsigned long size)
 long __init
 lmb_reserve(unsigned long base, unsigned long size)
 {
-	struct lmb *_lmb = &lmb;
-	struct lmb_region *_rgn = &(_lmb->reserved);
+	struct lmb_region *_rgn = &(lmb.reserved);
 
 	return lmb_add_region(_rgn, base, size);
 }
@@ -260,13 +229,10 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr)
 {
 	long i, j;
 	unsigned long base = 0;
-	struct lmb *_lmb = &lmb;
-	struct lmb_region *_mem = &(_lmb->memory);
-	struct lmb_region *_rsv = &(_lmb->reserved);
 
-	for (i=_mem->cnt-1; i >= 0; i--) {
-		unsigned long lmbbase = _mem->region[i].base;
-		unsigned long lmbsize = _mem->region[i].size;
+	for (i=lmb.memory.cnt-1; i >= 0; i--) {
+		unsigned long lmbbase = lmb.memory.region[i].base;
+		unsigned long lmbsize = lmb.memory.region[i].size;
 
 		if ( max_addr == LMB_ALLOC_ANYWHERE )
 			base = _ALIGN_DOWN(lmbbase+lmbsize-size, align);
@@ -276,8 +242,8 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr)
 			continue;
 
 		while ( (lmbbase <= base) &&
-			((j = lmb_overlaps_region(_rsv,base,size)) >= 0) ) {
-			base = _ALIGN_DOWN(_rsv->region[j].base-size, align);
+			((j = lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) {
+			base = _ALIGN_DOWN(lmb.reserved.region[j].base-size, align);
 		}
 
 		if ( (base != 0) && (lmbbase <= base) )
@@ -287,62 +253,24 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr)
 	if ( i < 0 )
 		return 0;
 
-	lmb_add_region(_rsv, base, size);
+	lmb_add_region(&lmb.reserved, base, size);
 
 	return base;
 }
 
+/* You must call lmb_analyze() before this. */
 unsigned long __init
 lmb_phys_mem_size(void)
 {
-	struct lmb *_lmb = &lmb;
-#ifdef CONFIG_MSCHUNKS
-	return _lmb->memory.size;
-#else
-	struct lmb_region *_mem = &(_lmb->memory);
-	unsigned long total = 0;
-	int i;
-
-	/* add all physical memory to the bootmem map */
-	for (i=0; i < _mem->cnt; i++)
-		total += _mem->region[i].size;
-	return total;
-#endif /* CONFIG_MSCHUNKS */
+	return lmb.memory.size;
 }
 
 unsigned long __init
 lmb_end_of_DRAM(void)
 {
-	struct lmb *_lmb = &lmb;
-	struct lmb_region *_mem = &(_lmb->memory);
-	int idx = _mem->cnt - 1;
-
-#ifdef CONFIG_MSCHUNKS
-	return (_mem->region[idx].physbase + _mem->region[idx].size);
-#else
-	return (_mem->region[idx].base + _mem->region[idx].size);
-#endif /* CONFIG_MSCHUNKS */
-
-	return 0;
-}
-
-unsigned long __init
-lmb_abs_to_phys(unsigned long aa)
-{
-	unsigned long i, pa = aa;
-	struct lmb *_lmb = &lmb;
-	struct lmb_region *_mem = &(_lmb->memory);
-
-	for (i=0; i < _mem->cnt; i++) {
-		unsigned long lmbbase = _mem->region[i].base;
-		unsigned long lmbsize = _mem->region[i].size;
-		if ( lmb_addrs_overlap(aa,1,lmbbase,lmbsize) ) {
-			pa = _mem->region[i].physbase + (aa - lmbbase);
-			break;
-		}
-	}
+	int idx = lmb.memory.cnt - 1;
 
-	return pa;
+	return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
 }
 
 /*
@@ -353,20 +281,19 @@ void __init lmb_enforce_memory_limit(void)
 {
 	extern unsigned long memory_limit;
 	unsigned long i, limit;
-	struct lmb_region *mem = &(lmb.memory);
 
 	if (! memory_limit)
 		return;
 
 	limit = memory_limit;
-	for (i = 0; i < mem->cnt; i++) {
-		if (limit > mem->region[i].size) {
-			limit -= mem->region[i].size;
+	for (i = 0; i < lmb.memory.cnt; i++) {
+		if (limit > lmb.memory.region[i].size) {
+			limit -= lmb.memory.region[i].size;
 			continue;
 		}
 
-		mem->region[i].size = limit;
-		mem->cnt = i + 1;
+		lmb.memory.region[i].size = limit;
+		lmb.memory.cnt = i + 1;
 		break;
 	}
 }

+ 4 - 3
arch/ppc64/kernel/lparcfg.c

@@ -29,7 +29,7 @@
 #include <asm/iSeries/HvLpConfig.h>
 #include <asm/lppaca.h>
 #include <asm/hvcall.h>
-#include <asm/cputable.h>
+#include <asm/firmware.h>
 #include <asm/rtas.h>
 #include <asm/system.h>
 #include <asm/time.h>
@@ -273,6 +273,7 @@ static void parse_system_parameter_string(struct seq_file *m)
 		if (!workbuffer) {
 			printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
 			       __FILE__, __FUNCTION__, __LINE__);
+			kfree(local_buffer);			
 			return;
 		}
 #ifdef LPARCFG_DEBUG
@@ -377,7 +378,7 @@ static int lparcfg_data(struct seq_file *m, void *v)
 
 	partition_active_processors = lparcfg_count_active_processors();
 
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 		unsigned long h_entitled, h_unallocated;
 		unsigned long h_aggregation, h_resource;
 		unsigned long pool_idle_time, pool_procs;
@@ -571,7 +572,7 @@ int __init lparcfg_init(void)
 	mode_t mode = S_IRUSR;
 
 	/* Allow writing if we have FW_FEATURE_SPLPAR */
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 		lparcfg_fops.write = lparcfg_write;
 		mode |= S_IWUSR;
 	}

+ 98 - 0
arch/ppc64/kernel/misc.S

@@ -680,6 +680,104 @@ _GLOBAL(kernel_thread)
 	ld	r30,-16(r1)
 	blr
 
+/*
+ * disable_kernel_fp()
+ * Disable the FPU.
+ */
+_GLOBAL(disable_kernel_fp)
+	mfmsr	r3
+	rldicl	r0,r3,(63-MSR_FP_LG),1
+	rldicl	r3,r0,(MSR_FP_LG+1),0
+	mtmsrd	r3			/* disable use of fpu now */
+	isync
+	blr
+
+/*
+ * giveup_fpu(tsk)
+ * Disable FP for the task given as the argument,
+ * and save the floating-point registers in its thread_struct.
+ * Enables the FPU for use in the kernel on return.
+ */
+_GLOBAL(giveup_fpu)
+	mfmsr	r5
+	ori	r5,r5,MSR_FP
+	mtmsrd	r5			/* enable use of fpu now */
+	isync
+	cmpdi	0,r3,0
+	beqlr-				/* if no previous owner, done */
+	addi	r3,r3,THREAD		/* want THREAD of task */
+	ld	r5,PT_REGS(r3)
+	cmpdi	0,r5,0
+	SAVE_32FPRS(0, r3)
+	mffs	fr0
+	stfd	fr0,THREAD_FPSCR(r3)
+	beq	1f
+	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	li	r3,MSR_FP|MSR_FE0|MSR_FE1
+	andc	r4,r4,r3		/* disable FP for previous task */
+	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+	li	r5,0
+	ld	r4,last_task_used_math@got(r2)
+	std	r5,0(r4)
+#endif /* CONFIG_SMP */
+	blr
+
+#ifdef CONFIG_ALTIVEC
+
+#if 0 /* this has no callers for now */
+/*
+ * disable_kernel_altivec()
+ * Disable the VMX.
+ */
+_GLOBAL(disable_kernel_altivec)
+	mfmsr	r3
+	rldicl	r0,r3,(63-MSR_VEC_LG),1
+	rldicl	r3,r0,(MSR_VEC_LG+1),0
+	mtmsrd	r3			/* disable use of VMX now */
+	isync
+	blr
+#endif /* 0 */
+
+/*
+ * giveup_altivec(tsk)
+ * Disable VMX for the task given as the argument,
+ * and save the vector registers in its thread_struct.
+ * Enables the VMX for use in the kernel on return.
+ */
+_GLOBAL(giveup_altivec)
+	mfmsr	r5
+	oris	r5,r5,MSR_VEC@h
+	mtmsrd	r5			/* enable use of VMX now */
+	isync
+	cmpdi	0,r3,0
+	beqlr-				/* if no previous owner, done */
+	addi	r3,r3,THREAD		/* want THREAD of task */
+	ld	r5,PT_REGS(r3)
+	cmpdi	0,r5,0
+	SAVE_32VRS(0,r4,r3)
+	mfvscr	vr0
+	li	r4,THREAD_VSCR
+	stvx	vr0,r4,r3
+	beq	1f
+	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lis	r3,MSR_VEC@h
+	andc	r4,r4,r3		/* disable FP for previous task */
+	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+	li	r5,0
+	ld	r4,last_task_used_altivec@got(r2)
+	std	r5,0(r4)
+#endif /* CONFIG_SMP */
+	blr
+
+#endif /* CONFIG_ALTIVEC */
+
+_GLOBAL(__setup_cpu_power3)
+	blr
+
 /* kexec_wait(phys_cpu)
  *
  * wait for the flag to change, indicating this kernel is going away but

+ 0 - 2
arch/ppc64/kernel/of_device.c

@@ -236,7 +236,6 @@ void of_device_unregister(struct of_device *ofdev)
 struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id)
 {
 	struct of_device *dev;
-	u32 *reg;
 
 	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev)
@@ -250,7 +249,6 @@ struct of_device* of_platform_device_create(struct device_node *np, const char *
 	dev->dev.bus = &of_platform_bus_type;
 	dev->dev.release = of_release_dev;
 
-	reg = (u32 *)get_property(np, "reg", NULL);
 	strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE);
 
 	if (of_device_register(dev) != 0) {

+ 2 - 1
arch/ppc64/kernel/pSeries_iommu.c

@@ -45,6 +45,7 @@
 #include <asm/plpar_wrappers.h>
 #include <asm/pSeries_reconfig.h>
 #include <asm/systemcfg.h>
+#include <asm/firmware.h>
 #include "pci.h"
 
 #define DBG(fmt...)
@@ -546,7 +547,7 @@ void iommu_init_early_pSeries(void)
 	}
 
 	if (systemcfg->platform & PLATFORM_LPAR) {
-		if (cur_cpu_spec->firmware_features & FW_FEATURE_MULTITCE) {
+		if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
 			ppc_md.tce_build = tce_buildmulti_pSeriesLP;
 			ppc_md.tce_free	 = tce_freemulti_pSeriesLP;
 		} else {

+ 1 - 3
arch/ppc64/kernel/pSeries_lpar.c

@@ -52,7 +52,6 @@ EXPORT_SYMBOL(plpar_hcall_4out);
 EXPORT_SYMBOL(plpar_hcall_norets);
 EXPORT_SYMBOL(plpar_hcall_8arg_2ret);
 
-extern void fw_feature_init(void);
 extern void pSeries_find_serial_port(void);
 
 
@@ -279,7 +278,6 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 			      unsigned long va, unsigned long prpn,
 			      unsigned long vflags, unsigned long rflags)
 {
-	unsigned long arpn = physRpn_to_absRpn(prpn);
 	unsigned long lpar_rc;
 	unsigned long flags;
 	unsigned long slot;
@@ -290,7 +288,7 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	if (vflags & HPTE_V_LARGE)
 		hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT);
 
-	hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
+	hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
 
 	/* Now fill in the actual HPTE */
 	/* Set CEC cookie to 0         */

+ 29 - 10
arch/ppc64/kernel/pSeries_setup.c

@@ -60,7 +60,8 @@
 #include <asm/nvram.h>
 #include <asm/plpar_wrappers.h>
 #include <asm/xics.h>
-#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/pmc.h>
 
 #include "i8259.h"
 #include "mpic.h"
@@ -187,6 +188,21 @@ static void __init pSeries_setup_mpic(void)
 				  " MPIC     ");
 }
 
+static void pseries_lpar_enable_pmcs(void)
+{
+	unsigned long set, reset;
+
+	power4_enable_pmcs();
+
+	set = 1UL << 63;
+	reset = 0;
+	plpar_hcall_norets(H_PERFMON, set, reset);
+
+	/* instruct hypervisor to maintain PMCs */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR))
+		get_paca()->lppaca.pmcregs_in_use = 1;
+}
+
 static void __init pSeries_setup_arch(void)
 {
 	/* Fixup ppc_md depending on the type of interrupt controller */
@@ -231,11 +247,9 @@ static void __init pSeries_setup_arch(void)
 
 	pSeries_nvram_init();
 
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
-		vpa_init(boot_cpuid);
-
 	/* Choose an idle loop */
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		vpa_init(boot_cpuid);
 		if (get_paca()->lppaca.shared_proc) {
 			printk(KERN_INFO "Using shared processor idle loop\n");
 			ppc_md.idle_loop = pseries_shared_idle;
@@ -247,6 +261,11 @@ static void __init pSeries_setup_arch(void)
 		printk(KERN_INFO "Using default idle loop\n");
 		ppc_md.idle_loop = default_idle;
 	}
+
+	if (systemcfg->platform & PLATFORM_LPAR)
+		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
+	else
+		ppc_md.enable_pmcs = power4_enable_pmcs;
 }
 
 static int __init pSeries_init_panel(void)
@@ -260,11 +279,11 @@ static int __init pSeries_init_panel(void)
 arch_initcall(pSeries_init_panel);
 
 
-/* Build up the firmware_features bitmask field
+/* Build up the ppc64_firmware_features bitmask field
  * using contents of device-tree/ibm,hypertas-functions.
  * Ultimately this functionality may be moved into prom.c prom_init().
  */
-void __init fw_feature_init(void)
+static void __init fw_feature_init(void)
 {
 	struct device_node * dn;
 	char * hypertas;
@@ -272,7 +291,7 @@ void __init fw_feature_init(void)
 
 	DBG(" -> fw_feature_init()\n");
 
-	cur_cpu_spec->firmware_features = 0;
+	ppc64_firmware_features = 0;
 	dn = of_find_node_by_path("/rtas");
 	if (dn == NULL) {
 		printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n");
@@ -288,7 +307,7 @@ void __init fw_feature_init(void)
 				if ((firmware_features_table[i].name) &&
 				    (strcmp(firmware_features_table[i].name,hypertas))==0) {
 					/* we have a match */
-					cur_cpu_spec->firmware_features |= 
+					ppc64_firmware_features |= 
 						(firmware_features_table[i].val);
 					break;
 				} 
@@ -302,7 +321,7 @@ void __init fw_feature_init(void)
 	of_node_put(dn);
  no_rtas:
 	printk(KERN_INFO "firmware_features = 0x%lx\n", 
-	       cur_cpu_spec->firmware_features);
+	       ppc64_firmware_features);
 
 	DBG(" <- fw_feature_init()\n");
 }

+ 2 - 1
arch/ppc64/kernel/pSeries_smp.c

@@ -41,6 +41,7 @@
 #include <asm/machdep.h>
 #include <asm/xics.h>
 #include <asm/cputable.h>
+#include <asm/firmware.h>
 #include <asm/system.h>
 #include <asm/rtas.h>
 #include <asm/plpar_wrappers.h>
@@ -326,7 +327,7 @@ static void __devinit smp_xics_setup_cpu(int cpu)
 	if (cpu != boot_cpuid)
 		xics_setup_cpu();
 
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
+	if (firmware_has_feature(FW_FEATURE_SPLPAR))
 		vpa_init(cpu);
 
 	cpu_clear(cpu, of_spin_map);

+ 273 - 0
arch/ppc64/kernel/pSeries_vio.c

@@ -0,0 +1,273 @@
+/*
+ * IBM PowerPC pSeries Virtual I/O Infrastructure Support.
+ *
+ *    Copyright (c) 2003-2005 IBM Corp.
+ *     Dave Engebretsen engebret@us.ibm.com
+ *     Santiago Leon santil@us.ibm.com
+ *     Hollis Blanchard <hollisb@us.ibm.com>
+ *     Stephen Rothwell
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/kobject.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/prom.h>
+#include <asm/vio.h>
+#include <asm/hvcall.h>
+
+extern struct subsystem devices_subsys; /* needed for vio_find_name() */
+
+static void probe_bus_pseries(void)
+{
+	struct device_node *node_vroot, *of_node;
+
+	node_vroot = find_devices("vdevice");
+	if ((node_vroot == NULL) || (node_vroot->child == NULL))
+		/* this machine doesn't do virtual IO, and that's ok */
+		return;
+
+	/*
+	 * Create struct vio_devices for each virtual device in the device tree.
+	 * Drivers will associate with them later.
+	 */
+	for (of_node = node_vroot->child; of_node != NULL;
+			of_node = of_node->sibling) {
+		printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node);
+		vio_register_device_node(of_node);
+	}
+}
+
+/**
+ * vio_match_device_pseries: - Tell if a pSeries VIO device matches a
+ *	vio_device_id
+ */
+static int vio_match_device_pseries(const struct vio_device_id *id,
+		const struct vio_dev *dev)
+{
+	return (strncmp(dev->type, id->type, strlen(id->type)) == 0) &&
+			device_is_compatible(dev->dev.platform_data, id->compat);
+}
+
+static void vio_release_device_pseries(struct device *dev)
+{
+	/* XXX free TCE table */
+	of_node_put(dev->platform_data);
+}
+
+static ssize_t viodev_show_devspec(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct device_node *of_node = dev->platform_data;
+
+	return sprintf(buf, "%s\n", of_node->full_name);
+}
+DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL);
+
+static void vio_unregister_device_pseries(struct vio_dev *viodev)
+{
+	device_remove_file(&viodev->dev, &dev_attr_devspec);
+}
+
+static struct vio_bus_ops vio_bus_ops_pseries = {
+	.match = vio_match_device_pseries,
+	.unregister_device = vio_unregister_device_pseries,
+	.release_device = vio_release_device_pseries,
+};
+
+/**
+ * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus
+ */
+static int __init vio_bus_init_pseries(void)
+{
+	int err;
+
+	err = vio_bus_init(&vio_bus_ops_pseries);
+	if (err == 0)
+		probe_bus_pseries();
+	return err;
+}
+
+__initcall(vio_bus_init_pseries);
+
+/**
+ * vio_build_iommu_table: - gets the dma information from OF and
+ *	builds the TCE tree.
+ * @dev: the virtual device.
+ *
+ * Returns a pointer to the built tce tree, or NULL if it can't
+ * find property.
+*/
+static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
+{
+	unsigned int *dma_window;
+	struct iommu_table *newTceTable;
+	unsigned long offset;
+	int dma_window_property_size;
+
+	dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size);
+	if(!dma_window) {
+		return NULL;
+	}
+
+	newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
+
+	/*  There should be some code to extract the phys-encoded offset
+		using prom_n_addr_cells(). However, according to a comment
+		on earlier versions, it's always zero, so we don't bother */
+	offset = dma_window[1] >>  PAGE_SHIFT;
+
+	/* TCE table size - measured in tce entries */
+	newTceTable->it_size		= dma_window[4] >> PAGE_SHIFT;
+	/* offset for VIO should always be 0 */
+	newTceTable->it_offset		= offset;
+	newTceTable->it_busno		= 0;
+	newTceTable->it_index		= (unsigned long)dma_window[0];
+	newTceTable->it_type		= TCE_VB;
+
+	return iommu_init_table(newTceTable);
+}
+
+/**
+ * vio_register_device_node: - Register a new vio device.
+ * @of_node:	The OF node for this device.
+ *
+ * Creates and initializes a vio_dev structure from the data in
+ * of_node (dev.platform_data) and adds it to the list of virtual devices.
+ * Returns a pointer to the created vio_dev or NULL if node has
+ * NULL device_type or compatible fields.
+ */
+struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
+{
+	struct vio_dev *viodev;
+	unsigned int *unit_address;
+	unsigned int *irq_p;
+
+	/* we need the 'device_type' property, in order to match with drivers */
+	if ((NULL == of_node->type)) {
+		printk(KERN_WARNING
+			"%s: node %s missing 'device_type'\n", __FUNCTION__,
+			of_node->name ? of_node->name : "<unknown>");
+		return NULL;
+	}
+
+	unit_address = (unsigned int *)get_property(of_node, "reg", NULL);
+	if (!unit_address) {
+		printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__,
+			of_node->name ? of_node->name : "<unknown>");
+		return NULL;
+	}
+
+	/* allocate a vio_dev for this node */
+	viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
+	if (!viodev) {
+		return NULL;
+	}
+	memset(viodev, 0, sizeof(struct vio_dev));
+
+	viodev->dev.platform_data = of_node_get(of_node);
+
+	viodev->irq = NO_IRQ;
+	irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL);
+	if (irq_p) {
+		int virq = virt_irq_create_mapping(*irq_p);
+		if (virq == NO_IRQ) {
+			printk(KERN_ERR "Unable to allocate interrupt "
+			       "number for %s\n", of_node->full_name);
+		} else
+			viodev->irq = irq_offset_up(virq);
+	}
+
+	snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address);
+	viodev->name = of_node->name;
+	viodev->type = of_node->type;
+	viodev->unit_address = *unit_address;
+	viodev->iommu_table = vio_build_iommu_table(viodev);
+
+	/* register with generic device framework */
+	if (vio_register_device(viodev) == NULL) {
+		/* XXX free TCE table */
+		kfree(viodev);
+		return NULL;
+	}
+	device_create_file(&viodev->dev, &dev_attr_devspec);
+
+	return viodev;
+}
+EXPORT_SYMBOL(vio_register_device_node);
+
+/**
+ * vio_get_attribute: - get attribute for virtual device
+ * @vdev:	The vio device to get property.
+ * @which:	The property/attribute to be extracted.
+ * @length:	Pointer to length of returned data size (unused if NULL).
+ *
+ * Calls prom.c's get_property() to return the value of the
+ * attribute specified by the preprocessor constant @which
+*/
+const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length)
+{
+	return get_property(vdev->dev.platform_data, (char*)which, length);
+}
+EXPORT_SYMBOL(vio_get_attribute);
+
+/* vio_find_name() - internal because only vio.c knows how we formatted the
+ * kobject name
+ * XXX once vio_bus_type.devices is actually used as a kset in
+ * drivers/base/bus.c, this function should be removed in favor of
+ * "device_find(kobj_name, &vio_bus_type)"
+ */
+static struct vio_dev *vio_find_name(const char *kobj_name)
+{
+	struct kobject *found;
+
+	found = kset_find_obj(&devices_subsys.kset, kobj_name);
+	if (!found)
+		return NULL;
+
+	return to_vio_dev(container_of(found, struct device, kobj));
+}
+
+/**
+ * vio_find_node - find an already-registered vio_dev
+ * @vnode: device_node of the virtual device we're looking for
+ */
+struct vio_dev *vio_find_node(struct device_node *vnode)
+{
+	uint32_t *unit_address;
+	char kobj_name[BUS_ID_SIZE];
+
+	/* construct the kobject name from the device node */
+	unit_address = (uint32_t *)get_property(vnode, "reg", NULL);
+	if (!unit_address)
+		return NULL;
+	snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address);
+
+	return vio_find_name(kobj_name);
+}
+EXPORT_SYMBOL(vio_find_node);
+
+int vio_enable_interrupts(struct vio_dev *dev)
+{
+	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
+	if (rc != H_Success)
+		printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
+	return rc;
+}
+EXPORT_SYMBOL(vio_enable_interrupts);
+
+int vio_disable_interrupts(struct vio_dev *dev)
+{
+	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
+	if (rc != H_Success)
+		printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
+	return rc;
+}
+EXPORT_SYMBOL(vio_disable_interrupts);

+ 2 - 2
arch/ppc64/kernel/pacaData.c

@@ -78,7 +78,7 @@ extern unsigned long __toc_start;
 
 #define BOOTCPU_PACA_INIT(number)					    \
 {									    \
-	PACA_INIT_COMMON(number, 1, 0, STAB0_VIRT_ADDR)			    \
+	PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab)		    \
 	PACA_INIT_ISERIES(number)					    \
 }
 
@@ -90,7 +90,7 @@ extern unsigned long __toc_start;
 
 #define BOOTCPU_PACA_INIT(number)					    \
 {									    \
-	PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR)	    \
+	PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab)    \
 }
 #endif
 

+ 2 - 0
arch/ppc64/kernel/pmac_setup.c

@@ -71,6 +71,7 @@
 #include <asm/of_device.h>
 #include <asm/lmb.h>
 #include <asm/smu.h>
+#include <asm/pmc.h>
 
 #include "pmac.h"
 #include "mpic.h"
@@ -511,4 +512,5 @@ struct machdep_calls __initdata pmac_md = {
 	.progress		= pmac_progress,
 	.check_legacy_ioport	= pmac_check_legacy_ioport,
 	.idle_loop		= native_idle,
+	.enable_pmcs		= power4_enable_pmcs,
 };

+ 21 - 0
arch/ppc64/kernel/pmc.c

@@ -65,3 +65,24 @@ void release_pmc_hardware(void)
 	spin_unlock(&pmc_owner_lock);
 }
 EXPORT_SYMBOL_GPL(release_pmc_hardware);
+
+void power4_enable_pmcs(void)
+{
+	unsigned long hid0;
+
+	hid0 = mfspr(HID0);
+	hid0 |= 1UL << (63 - 20);
+
+	/* POWER4 requires the following sequence */
+	asm volatile(
+		"sync\n"
+		"mtspr     %1, %0\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0):
+		"memory");
+}

+ 5 - 7
arch/ppc64/kernel/process.c

@@ -50,6 +50,7 @@
 #include <asm/machdep.h>
 #include <asm/iSeries/HvCallHpt.h>
 #include <asm/cputable.h>
+#include <asm/firmware.h>
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
 #include <asm/time.h>
@@ -202,11 +203,10 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	new_thread = &new->thread;
 	old_thread = &current->thread;
 
-/* Collect purr utilization data per process and per processor wise */
-/* purr is nothing but processor time base                          */
-
-#if defined(CONFIG_PPC_PSERIES)
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+	/* Collect purr utilization data per process and per processor
+	 * wise purr is nothing but processor time base
+	 */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 		struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
 		long unsigned start_tb, current_tb;
 		start_tb = old_thread->start_tb;
@@ -214,8 +214,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
 		old_thread->accum_tb += (current_tb - start_tb);
 		new_thread->start_tb = current_tb;
 	}
-#endif
-
 
 	local_irq_save(flags);
 	last = _switch(old_thread, new_thread);

+ 152 - 32
arch/ppc64/kernel/prom.c

@@ -625,8 +625,8 @@ void __init finish_device_tree(void)
 
 static inline char *find_flat_dt_string(u32 offset)
 {
-	return ((char *)initial_boot_params) + initial_boot_params->off_dt_strings
-		+ offset;
+	return ((char *)initial_boot_params) +
+		initial_boot_params->off_dt_strings + offset;
 }
 
 /**
@@ -635,26 +635,33 @@ static inline char *find_flat_dt_string(u32 offset)
  * unflatten the tree
  */
 static int __init scan_flat_dt(int (*it)(unsigned long node,
-					 const char *full_path, void *data),
+					 const char *uname, int depth,
+					 void *data),
 			       void *data)
 {
 	unsigned long p = ((unsigned long)initial_boot_params) +
 		initial_boot_params->off_dt_struct;
 	int rc = 0;
+	int depth = -1;
 
 	do {
 		u32 tag = *((u32 *)p);
 		char *pathp;
 		
 		p += 4;
-		if (tag == OF_DT_END_NODE)
+		if (tag == OF_DT_END_NODE) {
+			depth --;
+			continue;
+		}
+		if (tag == OF_DT_NOP)
 			continue;
 		if (tag == OF_DT_END)
 			break;
 		if (tag == OF_DT_PROP) {
 			u32 sz = *((u32 *)p);
 			p += 8;
-			p = _ALIGN(p, sz >= 8 ? 8 : 4);
+			if (initial_boot_params->version < 0x10)
+				p = _ALIGN(p, sz >= 8 ? 8 : 4);
 			p += sz;
 			p = _ALIGN(p, 4);
 			continue;
@@ -664,9 +671,18 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
 			       " device tree !\n", tag);
 			return -EINVAL;
 		}
+		depth++;
 		pathp = (char *)p;
 		p = _ALIGN(p + strlen(pathp) + 1, 4);
-		rc = it(p, pathp, data);
+		if ((*pathp) == '/') {
+			char *lp, *np;
+			for (lp = NULL, np = pathp; *np; np++)
+				if ((*np) == '/')
+					lp = np+1;
+			if (lp != NULL)
+				pathp = lp;
+		}
+		rc = it(p, pathp, depth, data);
 		if (rc != 0)
 			break;		
 	} while(1);
@@ -689,17 +705,21 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name,
 		const char *nstr;
 
 		p += 4;
+		if (tag == OF_DT_NOP)
+			continue;
 		if (tag != OF_DT_PROP)
 			return NULL;
 
 		sz = *((u32 *)p);
 		noff = *((u32 *)(p + 4));
 		p += 8;
-		p = _ALIGN(p, sz >= 8 ? 8 : 4);
+		if (initial_boot_params->version < 0x10)
+			p = _ALIGN(p, sz >= 8 ? 8 : 4);
 
 		nstr = find_flat_dt_string(noff);
 		if (nstr == NULL) {
-			printk(KERN_WARNING "Can't find property index name !\n");
+			printk(KERN_WARNING "Can't find property index"
+			       " name !\n");
 			return NULL;
 		}
 		if (strcmp(name, nstr) == 0) {
@@ -713,7 +733,7 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name,
 }
 
 static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
-					       unsigned long align)
+				       unsigned long align)
 {
 	void *res;
 
@@ -727,13 +747,16 @@ static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
 static unsigned long __init unflatten_dt_node(unsigned long mem,
 					      unsigned long *p,
 					      struct device_node *dad,
-					      struct device_node ***allnextpp)
+					      struct device_node ***allnextpp,
+					      unsigned long fpsize)
 {
 	struct device_node *np;
 	struct property *pp, **prev_pp = NULL;
 	char *pathp;
 	u32 tag;
-	unsigned int l;
+	unsigned int l, allocl;
+	int has_name = 0;
+	int new_format = 0;
 
 	tag = *((u32 *)(*p));
 	if (tag != OF_DT_BEGIN_NODE) {
@@ -742,21 +765,62 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
 	}
 	*p += 4;
 	pathp = (char *)*p;
-	l = strlen(pathp) + 1;
+	l = allocl = strlen(pathp) + 1;
 	*p = _ALIGN(*p + l, 4);
 
-	np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + l,
+	/* version 0x10 has a more compact unit name here instead of the full
+	 * path. we accumulate the full path size using "fpsize", we'll rebuild
+	 * it later. We detect this because the first character of the name is
+	 * not '/'.
+	 */
+	if ((*pathp) != '/') {
+		new_format = 1;
+		if (fpsize == 0) {
+			/* root node: special case. fpsize accounts for path
+			 * plus terminating zero. root node only has '/', so
+			 * fpsize should be 2, but we want to avoid the first
+			 * level nodes to have two '/' so we use fpsize 1 here
+			 */
+			fpsize = 1;
+			allocl = 2;
+		} else {
+			/* account for '/' and path size minus terminal 0
+			 * already in 'l'
+			 */
+			fpsize += l;
+			allocl = fpsize;
+		}
+	}
+
+
+	np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl,
 				__alignof__(struct device_node));
 	if (allnextpp) {
 		memset(np, 0, sizeof(*np));
 		np->full_name = ((char*)np) + sizeof(struct device_node);
-		memcpy(np->full_name, pathp, l);
+		if (new_format) {
+			char *p = np->full_name;
+			/* rebuild full path for new format */
+			if (dad && dad->parent) {
+				strcpy(p, dad->full_name);
+#ifdef DEBUG
+				if ((strlen(p) + l + 1) != allocl) {
+					DBG("%s: p: %d, l: %d, a: %d\n",
+					    pathp, strlen(p), l, allocl);
+				}
+#endif
+				p += strlen(p);
+			}
+			*(p++) = '/';
+			memcpy(p, pathp, l);
+		} else
+			memcpy(np->full_name, pathp, l);
 		prev_pp = &np->properties;
 		**allnextpp = np;
 		*allnextpp = &np->allnext;
 		if (dad != NULL) {
 			np->parent = dad;
-			/* we temporarily use the `next' field as `last_child'. */
+			/* we temporarily use the next field as `last_child'*/
 			if (dad->next == 0)
 				dad->child = np;
 			else
@@ -770,18 +834,26 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
 		char *pname;
 
 		tag = *((u32 *)(*p));
+		if (tag == OF_DT_NOP) {
+			*p += 4;
+			continue;
+		}
 		if (tag != OF_DT_PROP)
 			break;
 		*p += 4;
 		sz = *((u32 *)(*p));
 		noff = *((u32 *)((*p) + 4));
-		*p = _ALIGN((*p) + 8, sz >= 8 ? 8 : 4);
+		*p += 8;
+		if (initial_boot_params->version < 0x10)
+			*p = _ALIGN(*p, sz >= 8 ? 8 : 4);
 
 		pname = find_flat_dt_string(noff);
 		if (pname == NULL) {
 			printk("Can't find property name in list !\n");
 			break;
 		}
+		if (strcmp(pname, "name") == 0)
+			has_name = 1;
 		l = strlen(pname) + 1;
 		pp = unflatten_dt_alloc(&mem, sizeof(struct property),
 					__alignof__(struct property));
@@ -801,6 +873,36 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
 		}
 		*p = _ALIGN((*p) + sz, 4);
 	}
+	/* with version 0x10 we may not have the name property, recreate
+	 * it here from the unit name if absent
+	 */
+	if (!has_name) {
+		char *p = pathp, *ps = pathp, *pa = NULL;
+		int sz;
+
+		while (*p) {
+			if ((*p) == '@')
+				pa = p;
+			if ((*p) == '/')
+				ps = p + 1;
+			p++;
+		}
+		if (pa < ps)
+			pa = p;
+		sz = (pa - ps) + 1;
+		pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz,
+					__alignof__(struct property));
+		if (allnextpp) {
+			pp->name = "name";
+			pp->length = sz;
+			pp->value = (unsigned char *)(pp + 1);
+			*prev_pp = pp;
+			prev_pp = &pp->next;
+			memcpy(pp->value, ps, sz - 1);
+			((char *)pp->value)[sz - 1] = 0;
+			DBG("fixed up name for %s -> %s\n", pathp, pp->value);
+		}
+	}
 	if (allnextpp) {
 		*prev_pp = NULL;
 		np->name = get_property(np, "name", NULL);
@@ -812,11 +914,11 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
 			np->type = "<NULL>";
 	}
 	while (tag == OF_DT_BEGIN_NODE) {
-		mem = unflatten_dt_node(mem, p, np, allnextpp);
+		mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize);
 		tag = *((u32 *)(*p));
 	}
 	if (tag != OF_DT_END_NODE) {
-		printk("Weird tag at start of node: %x\n", tag);
+		printk("Weird tag at end of node: %x\n", tag);
 		return mem;
 	}
 	*p += 4;
@@ -842,21 +944,32 @@ void __init unflatten_device_tree(void)
 	/* First pass, scan for size */
 	start = ((unsigned long)initial_boot_params) +
 		initial_boot_params->off_dt_struct;
-	size = unflatten_dt_node(0, &start, NULL, NULL);
+	size = unflatten_dt_node(0, &start, NULL, NULL, 0);
+	size = (size | 3) + 1;
 
 	DBG("  size is %lx, allocating...\n", size);
 
 	/* Allocate memory for the expanded device tree */
-	mem = (unsigned long)abs_to_virt(lmb_alloc(size,
-						   __alignof__(struct device_node)));
+	mem = lmb_alloc(size + 4, __alignof__(struct device_node));
+	if (!mem) {
+		DBG("Couldn't allocate memory with lmb_alloc()!\n");
+		panic("Couldn't allocate memory with lmb_alloc()!\n");
+	}
+	mem = (unsigned long)abs_to_virt(mem);
+
+	((u32 *)mem)[size / 4] = 0xdeadbeef;
+
 	DBG("  unflattening...\n", mem);
 
 	/* Second pass, do actual unflattening */
 	start = ((unsigned long)initial_boot_params) +
 		initial_boot_params->off_dt_struct;
-	unflatten_dt_node(mem, &start, NULL, &allnextp);
+	unflatten_dt_node(mem, &start, NULL, &allnextp, 0);
 	if (*((u32 *)start) != OF_DT_END)
-		printk(KERN_WARNING "Weird tag at end of tree: %x\n", *((u32 *)start));
+		printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start));
+	if (((u32 *)mem)[size / 4] != 0xdeadbeef)
+		printk(KERN_WARNING "End of tree marker overwritten: %08x\n",
+		       ((u32 *)mem)[size / 4] );
 	*allnextp = NULL;
 
 	/* Get pointer to OF "/chosen" node for use everywhere */
@@ -880,7 +993,7 @@ void __init unflatten_device_tree(void)
 
 
 static int __init early_init_dt_scan_cpus(unsigned long node,
-					  const char *full_path, void *data)
+					  const char *uname, int depth, void *data)
 {
 	char *type = get_flat_dt_prop(node, "device_type", NULL);
 	u32 *prop;
@@ -947,13 +1060,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 }
 
 static int __init early_init_dt_scan_chosen(unsigned long node,
-					    const char *full_path, void *data)
+					    const char *uname, int depth, void *data)
 {
 	u32 *prop;
 	u64 *prop64;
 	extern unsigned long memory_limit, tce_alloc_start, tce_alloc_end;
 
-	if (strcmp(full_path, "/chosen") != 0)
+	DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
+
+	if (depth != 1 || strcmp(uname, "chosen") != 0)
 		return 0;
 
 	/* get platform type */
@@ -1003,18 +1118,20 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 }
 
 static int __init early_init_dt_scan_root(unsigned long node,
-					  const char *full_path, void *data)
+					  const char *uname, int depth, void *data)
 {
 	u32 *prop;
 
-	if (strcmp(full_path, "/") != 0)
+	if (depth != 0)
 		return 0;
 
 	prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL);
 	dt_root_size_cells = (prop == NULL) ? 1 : *prop;
-		
+	DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
+
 	prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL);
 	dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
+	DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
 	
 	/* break now */
 	return 1;
@@ -1042,7 +1159,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
 
 
 static int __init early_init_dt_scan_memory(unsigned long node,
-					    const char *full_path, void *data)
+					    const char *uname, int depth, void *data)
 {
 	char *type = get_flat_dt_prop(node, "device_type", NULL);
 	cell_t *reg, *endp;
@@ -1058,7 +1175,9 @@ static int __init early_init_dt_scan_memory(unsigned long node,
 
 	endp = reg + (l / sizeof(cell_t));
 
-	DBG("memory scan node %s ...\n", full_path);
+	DBG("memory scan node %s ..., reg size %ld, data: %x %x %x %x, ...\n",
+	    uname, l, reg[0], reg[1], reg[2], reg[3]);
+
 	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
 		unsigned long base, size;
 
@@ -1469,10 +1588,11 @@ struct device_node *of_find_node_by_path(const char *path)
 	struct device_node *np = allnodes;
 
 	read_lock(&devtree_lock);
-	for (; np != 0; np = np->allnext)
+	for (; np != 0; np = np->allnext) {
 		if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0
 		    && of_node_get(np))
 			break;
+	}
 	read_unlock(&devtree_lock);
 	return np;
 }

+ 59 - 34
arch/ppc64/kernel/prom_init.c

@@ -892,7 +892,10 @@ static void __init prom_init_mem(void)
 	if ( RELOC(of_platform) == PLATFORM_PSERIES_LPAR )
 		RELOC(alloc_top) = RELOC(rmo_top);
 	else
-		RELOC(alloc_top) = RELOC(rmo_top) = min(0x40000000ul, RELOC(ram_top));
+		/* Some RS64 machines have buggy firmware where claims up at 1GB
+		 * fails. Cap at 768MB as a workaround. Still plenty of room.
+		 */
+		RELOC(alloc_top) = RELOC(rmo_top) = min(0x30000000ul, RELOC(ram_top));
 
 	prom_printf("memory layout at init:\n");
 	prom_printf("  memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit));
@@ -1534,7 +1537,8 @@ static unsigned long __init dt_find_string(char *str)
  */
 #define MAX_PROPERTY_NAME 64
 
-static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start,
+static void __init scan_dt_build_strings(phandle node,
+					 unsigned long *mem_start,
 					 unsigned long *mem_end)
 {
 	unsigned long offset = reloc_offset();
@@ -1547,16 +1551,21 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start,
 	/* get and store all property names */
 	prev_name = RELOC("");
 	for (;;) {
-		int rc;
-
 		/* 64 is max len of name including nul. */
 		namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1);
-		rc = call_prom("nextprop", 3, 1, node, prev_name, namep);
-		if (rc != 1) {
+		if (call_prom("nextprop", 3, 1, node, prev_name, namep) != 1) {
 			/* No more nodes: unwind alloc */
 			*mem_start = (unsigned long)namep;
 			break;
 		}
+
+ 		/* skip "name" */
+ 		if (strcmp(namep, RELOC("name")) == 0) {
+ 			*mem_start = (unsigned long)namep;
+ 			prev_name = RELOC("name");
+ 			continue;
+ 		}
+		/* get/create string entry */
 		soff = dt_find_string(namep);
 		if (soff != 0) {
 			*mem_start = (unsigned long)namep;
@@ -1571,7 +1580,7 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start,
 
 	/* do all our children */
 	child = call_prom("child", 1, 1, node);
-	while (child != (phandle)0) {
+	while (child != 0) {
 		scan_dt_build_strings(child, mem_start, mem_end);
 		child = call_prom("peer", 1, 1, child);
 	}
@@ -1580,16 +1589,13 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start,
 static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
 					unsigned long *mem_end)
 {
-	int l, align;
 	phandle child;
-	char *namep, *prev_name, *sstart, *p, *ep;
+	char *namep, *prev_name, *sstart, *p, *ep, *lp, *path;
 	unsigned long soff;
 	unsigned char *valp;
 	unsigned long offset = reloc_offset();
-	char pname[MAX_PROPERTY_NAME];
-	char *path;
-
-	path = RELOC(prom_scratch);
+	static char pname[MAX_PROPERTY_NAME];
+	int l;
 
 	dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end);
 
@@ -1599,23 +1605,33 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
 		      namep, *mem_end - *mem_start);
 	if (l >= 0) {
 		/* Didn't fit?  Get more room. */
-		if (l+1 > *mem_end - *mem_start) {
+		if ((l+1) > (*mem_end - *mem_start)) {
 			namep = make_room(mem_start, mem_end, l+1, 1);
 			call_prom("package-to-path", 3, 1, node, namep, l);
 		}
 		namep[l] = '\0';
+
 		/* Fixup an Apple bug where they have bogus \0 chars in the
 		 * middle of the path in some properties
 		 */
 		for (p = namep, ep = namep + l; p < ep; p++)
 			if (*p == '\0') {
 				memmove(p, p+1, ep - p);
-				ep--; l--;
+				ep--; l--; p--;
 			}
-		*mem_start = _ALIGN(((unsigned long) namep) + strlen(namep) + 1, 4);
+
+		/* now try to extract the unit name in that mess */
+		for (p = namep, lp = NULL; *p; p++)
+			if (*p == '/')
+				lp = p + 1;
+		if (lp != NULL)
+			memmove(namep, lp, strlen(lp) + 1);
+		*mem_start = _ALIGN(((unsigned long) namep) +
+				    strlen(namep) + 1, 4);
 	}
 
 	/* get it again for debugging */
+	path = RELOC(prom_scratch);
 	memset(path, 0, PROM_SCRATCH_SIZE);
 	call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
 
@@ -1623,23 +1639,27 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
 	prev_name = RELOC("");
 	sstart = (char *)RELOC(dt_string_start);
 	for (;;) {
-		int rc;
-
-		rc = call_prom("nextprop", 3, 1, node, prev_name, pname);
-		if (rc != 1)
+		if (call_prom("nextprop", 3, 1, node, prev_name,
+			      RELOC(pname)) != 1)
 			break;
 
+ 		/* skip "name" */
+ 		if (strcmp(RELOC(pname), RELOC("name")) == 0) {
+ 			prev_name = RELOC("name");
+ 			continue;
+ 		}
+
 		/* find string offset */
-		soff = dt_find_string(pname);
+		soff = dt_find_string(RELOC(pname));
 		if (soff == 0) {
-			prom_printf("WARNING: Can't find string index for <%s>, node %s\n",
-				    pname, path);
+			prom_printf("WARNING: Can't find string index for"
+				    " <%s>, node %s\n", RELOC(pname), path);
 			break;
 		}
 		prev_name = sstart + soff;
 
 		/* get length */
-		l = call_prom("getproplen", 2, 1, node, pname);
+		l = call_prom("getproplen", 2, 1, node, RELOC(pname));
 
 		/* sanity checks */
 		if (l == PROM_ERROR)
@@ -1648,7 +1668,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
 			prom_printf("WARNING: ignoring large property ");
 			/* It seems OF doesn't null-terminate the path :-( */
 			prom_printf("[%s] ", path);
-			prom_printf("%s length 0x%x\n", pname, l);
+			prom_printf("%s length 0x%x\n", RELOC(pname), l);
 			continue;
 		}
 
@@ -1658,17 +1678,16 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
 		dt_push_token(soff, mem_start, mem_end);
 
 		/* push property content */
-		align = (l >= 8) ? 8 : 4;
-		valp = make_room(mem_start, mem_end, l, align);
-		call_prom("getprop", 4, 1, node, pname, valp, l);
+		valp = make_room(mem_start, mem_end, l, 4);
+		call_prom("getprop", 4, 1, node, RELOC(pname), valp, l);
 		*mem_start = _ALIGN(*mem_start, 4);
 	}
 
 	/* Add a "linux,phandle" property. */
 	soff = dt_find_string(RELOC("linux,phandle"));
 	if (soff == 0)
-		prom_printf("WARNING: Can't find string index for <linux-phandle>"
-			    " node %s\n", path);
+		prom_printf("WARNING: Can't find string index for"
+			    " <linux-phandle> node %s\n", path);
 	else {
 		dt_push_token(OF_DT_PROP, mem_start, mem_end);
 		dt_push_token(4, mem_start, mem_end);
@@ -1679,7 +1698,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
 
 	/* do all our children */
 	child = call_prom("child", 1, 1, node);
-	while (child != (phandle)0) {
+	while (child != 0) {
 		scan_dt_build_struct(child, mem_start, mem_end);
 		child = call_prom("peer", 1, 1, child);
 	}
@@ -1718,7 +1737,8 @@ static void __init flatten_device_tree(void)
 
 	/* Build header and make room for mem rsv map */ 
 	mem_start = _ALIGN(mem_start, 4);
-	hdr = make_room(&mem_start, &mem_end, sizeof(struct boot_param_header), 4);
+	hdr = make_room(&mem_start, &mem_end,
+			sizeof(struct boot_param_header), 4);
 	RELOC(dt_header_start) = (unsigned long)hdr;
 	rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8);
 
@@ -1731,11 +1751,11 @@ static void __init flatten_device_tree(void)
 	namep = make_room(&mem_start, &mem_end, 16, 1);
 	strcpy(namep, RELOC("linux,phandle"));
 	mem_start = (unsigned long)namep + strlen(namep) + 1;
-	RELOC(dt_string_end) = mem_start;
 
 	/* Build string array */
 	prom_printf("Building dt strings...\n"); 
 	scan_dt_build_strings(root, &mem_start, &mem_end);
+	RELOC(dt_string_end) = mem_start;
 
 	/* Build structure */
 	mem_start = PAGE_ALIGN(mem_start);
@@ -1750,9 +1770,11 @@ static void __init flatten_device_tree(void)
 	hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start);
 	hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start);
 	hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start);
+	hdr->dt_strings_size = RELOC(dt_string_end) - RELOC(dt_string_start);
 	hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start);
 	hdr->version = OF_DT_VERSION;
-	hdr->last_comp_version = 1;
+	/* Version 16 is not backward compatible */
+	hdr->last_comp_version = 0x10;
 
 	/* Reserve the whole thing and copy the reserve map in, we
 	 * also bump mem_reserve_cnt to cause further reservations to
@@ -1808,6 +1830,9 @@ static void __init fixup_device_tree(void)
 	/* does it need fixup ? */
 	if (prom_getproplen(i2c, "interrupts") > 0)
 		return;
+
+	prom_printf("fixing up bogus interrupts for u3 i2c...\n");
+
 	/* interrupt on this revision of u3 is number 0 and level */
 	interrupts[0] = 0;
 	interrupts[1] = 1;

+ 17 - 2
arch/ppc64/kernel/rtas_pci.c

@@ -58,6 +58,21 @@ static int config_access_valid(struct device_node *dn, int where)
 	return 0;
 }
 
+static int of_device_available(struct device_node * dn)
+{
+        char * status;
+
+        status = get_property(dn, "status", NULL);
+
+        if (!status)
+                return 1;
+
+        if (!strcmp(status, "okay"))
+                return 1;
+
+        return 0;
+}
+
 static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val)
 {
 	int returnval = -1;
@@ -103,7 +118,7 @@ static int rtas_pci_read_config(struct pci_bus *bus,
 
 	/* Search only direct children of the bus */
 	for (dn = busdn->child; dn; dn = dn->sibling)
-		if (dn->devfn == devfn)
+		if (dn->devfn == devfn && of_device_available(dn))
 			return rtas_read_config(dn, where, size, val);
 	return PCIBIOS_DEVICE_NOT_FOUND;
 }
@@ -146,7 +161,7 @@ static int rtas_pci_write_config(struct pci_bus *bus,
 
 	/* Search only direct children of the bus */
 	for (dn = busdn->child; dn; dn = dn->sibling)
-		if (dn->devfn == devfn)
+		if (dn->devfn == devfn && of_device_available(dn))
 			return rtas_write_config(dn, where, size, val);
 	return PCIBIOS_DEVICE_NOT_FOUND;
 }

+ 19 - 11
arch/ppc64/kernel/setup.c

@@ -536,15 +536,19 @@ static void __init check_for_initrd(void)
 
 	DBG(" -> check_for_initrd()\n");
 
-	prop = (u64 *)get_property(of_chosen, "linux,initrd-start", NULL);
-	if (prop != NULL) {
-		initrd_start = (unsigned long)__va(*prop);
-		prop = (u64 *)get_property(of_chosen, "linux,initrd-end", NULL);
+	if (of_chosen) {
+		prop = (u64 *)get_property(of_chosen,
+				"linux,initrd-start", NULL);
 		if (prop != NULL) {
-			initrd_end = (unsigned long)__va(*prop);
-			initrd_below_start_ok = 1;
-		} else
-			initrd_start = 0;
+			initrd_start = (unsigned long)__va(*prop);
+			prop = (u64 *)get_property(of_chosen,
+					"linux,initrd-end", NULL);
+			if (prop != NULL) {
+				initrd_end = (unsigned long)__va(*prop);
+				initrd_below_start_ok = 1;
+			} else
+				initrd_start = 0;
+		}
 	}
 
 	/* If we were passed an initrd, set the ROOT_DEV properly if the values
@@ -627,7 +631,7 @@ void __init setup_system(void)
 	 * Initialize xmon
 	 */
 #ifdef CONFIG_XMON_DEFAULT
-	xmon_init();
+	xmon_init(1);
 #endif
 	/*
 	 * Register early console
@@ -706,6 +710,8 @@ void machine_power_off(void)
 	local_irq_disable();
 	while (1) ;
 }
+/* Used by the G5 thermal driver */
+EXPORT_SYMBOL_GPL(machine_power_off);
 
 void machine_halt(void)
 {
@@ -1341,11 +1347,13 @@ static int __init early_xmon(char *p)
 	/* ensure xmon is enabled */
 	if (p) {
 		if (strncmp(p, "on", 2) == 0)
-			xmon_init();
+			xmon_init(1);
+		if (strncmp(p, "off", 3) == 0)
+			xmon_init(0);
 		if (strncmp(p, "early", 5) != 0)
 			return 0;
 	}
-	xmon_init();
+	xmon_init(1);
 	debugger(NULL);
 
 	return 0;

+ 3 - 2
arch/ppc64/kernel/signal.c

@@ -481,10 +481,11 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
 	/* Set up Signal Frame */
 	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
-	if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
+	if (ret) {
 		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
-		sigaddset(&current->blocked,sig);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
+			sigaddset(&current->blocked,sig);
 		recalc_sigpending();
 		spin_unlock_irq(&current->sighand->siglock);
 	}

+ 3 - 2
arch/ppc64/kernel/signal32.c

@@ -976,11 +976,12 @@ int do_signal32(sigset_t *oldset, struct pt_regs *regs)
 	else
 		ret = handle_signal32(signr, &ka, &info, oldset, regs, newsp);
 
-	if (ret && !(ka.sa.sa_flags & SA_NODEFER)) {
+	if (ret) {
 		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked, &current->blocked,
 			  &ka.sa.sa_mask);
-		sigaddset(&current->blocked, signr);
+		if (!(ka.sa.sa_flags & SA_NODEFER))
+			sigaddset(&current->blocked, signr);
 		recalc_sigpending();
 		spin_unlock_irq(&current->sighand->siglock);
 	}

+ 5 - 52
arch/ppc64/kernel/sysfs.c

@@ -13,6 +13,7 @@
 #include <asm/current.h>
 #include <asm/processor.h>
 #include <asm/cputable.h>
+#include <asm/firmware.h>
 #include <asm/hvcall.h>
 #include <asm/prom.h>
 #include <asm/systemcfg.h>
@@ -100,6 +101,8 @@ static int __init setup_smt_snooze_delay(char *str)
 }
 __setup("smt-snooze-delay=", setup_smt_snooze_delay);
 
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
 /*
  * Enabling PMCs will slow partition context switch times so we only do
  * it the first time we write to the PMCs.
@@ -109,65 +112,15 @@ static DEFINE_PER_CPU(char, pmcs_enabled);
 
 void ppc64_enable_pmcs(void)
 {
-	unsigned long hid0;
-#ifdef CONFIG_PPC_PSERIES
-	unsigned long set, reset;
-#endif /* CONFIG_PPC_PSERIES */
-
 	/* Only need to enable them once */
 	if (__get_cpu_var(pmcs_enabled))
 		return;
 
 	__get_cpu_var(pmcs_enabled) = 1;
 
-	switch (systemcfg->platform) {
-	case PLATFORM_PSERIES:
-	case PLATFORM_POWERMAC:
-		hid0 = mfspr(HID0);
-		hid0 |= 1UL << (63 - 20);
-
-		/* POWER4 requires the following sequence */
-		asm volatile(
-			     "sync\n"
-			     "mtspr	%1, %0\n"
-			     "mfspr	%0, %1\n"
-			     "mfspr	%0, %1\n"
-			     "mfspr	%0, %1\n"
-			     "mfspr	%0, %1\n"
-			     "mfspr	%0, %1\n"
-			     "mfspr	%0, %1\n"
-			     "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0):
-			     "memory");
-		break;
-
-#ifdef CONFIG_PPC_PSERIES
-	case PLATFORM_PSERIES_LPAR:
-		set = 1UL << 63;
-		reset = 0;
-		plpar_hcall_norets(H_PERFMON, set, reset);
-		break;
-#endif /* CONFIG_PPC_PSERIES */
-
-	default:
-		break;
-	}
-
-#ifdef CONFIG_PPC_PSERIES
-	/* instruct hypervisor to maintain PMCs */
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
-		get_paca()->lppaca.pmcregs_in_use = 1;
-#endif /* CONFIG_PPC_PSERIES */
+	if (ppc_md.enable_pmcs)
+		ppc_md.enable_pmcs();
 }
-
-#else
-
-/* PMC stuff */
-void ppc64_enable_pmcs(void)
-{
-	/* XXX Implement for iseries */
-}
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-
 EXPORT_SYMBOL(ppc64_enable_pmcs);
 
 /* XXX convert to rusty's on_one_cpu */

+ 3 - 4
arch/ppc64/kernel/time.c

@@ -67,6 +67,7 @@
 #include <asm/prom.h>
 #include <asm/sections.h>
 #include <asm/systemcfg.h>
+#include <asm/firmware.h>
 
 u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
 
@@ -370,13 +371,11 @@ int timer_interrupt(struct pt_regs * regs)
 		process_hvlpevents(regs);
 #endif
 
-/* collect purr register values often, for accurate calculations */
-#if defined(CONFIG_PPC_PSERIES)
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+	/* collect purr register values often, for accurate calculations */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 		struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
 		cu->current_tb = mfspr(SPRN_PURR);
 	}
-#endif
 
 	irq_exit();
 

+ 35 - 409
arch/ppc64/kernel/vio.c

@@ -1,10 +1,11 @@
 /*
  * IBM PowerPC Virtual I/O Infrastructure Support.
  *
- *    Copyright (c) 2003 IBM Corp.
+ *    Copyright (c) 2003-2005 IBM Corp.
  *     Dave Engebretsen engebret@us.ibm.com
  *     Santiago Leon santil@us.ibm.com
  *     Hollis Blanchard <hollisb@us.ibm.com>
+ *     Stephen Rothwell
  *
  *      This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -14,61 +15,30 @@
 
 #include <linux/init.h>
 #include <linux/console.h>
-#include <linux/version.h>
 #include <linux/module.h>
-#include <linux/kobject.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
-#include <asm/rtas.h>
 #include <asm/iommu.h>
 #include <asm/dma.h>
-#include <asm/ppcdebug.h>
 #include <asm/vio.h>
-#include <asm/hvcall.h>
-#include <asm/iSeries/vio.h>
-#include <asm/iSeries/HvTypes.h>
-#include <asm/iSeries/HvCallXm.h>
-#include <asm/iSeries/HvLpConfig.h>
-
-#define DBGENTER() pr_debug("%s entered\n", __FUNCTION__)
-
-extern struct subsystem devices_subsys; /* needed for vio_find_name() */
 
 static const struct vio_device_id *vio_match_device(
 		const struct vio_device_id *, const struct vio_dev *);
 
-#ifdef CONFIG_PPC_PSERIES
-static struct iommu_table *vio_build_iommu_table(struct vio_dev *);
-static int vio_num_address_cells;
-#endif
-#ifdef CONFIG_PPC_ISERIES
-static struct iommu_table veth_iommu_table;
-static struct iommu_table vio_iommu_table;
-#endif
-static struct vio_dev vio_bus_device  = { /* fake "parent" device */
+struct vio_dev vio_bus_device  = { /* fake "parent" device */
 	.name = vio_bus_device.dev.bus_id,
 	.type = "",
-#ifdef CONFIG_PPC_ISERIES
-	.iommu_table = &vio_iommu_table,
-#endif
 	.dev.bus_id = "vio",
 	.dev.bus = &vio_bus_type,
 };
 
-#ifdef CONFIG_PPC_ISERIES
-static struct vio_dev *__init vio_register_device_iseries(char *type,
-		uint32_t unit_num);
-
-struct device *iSeries_vio_dev = &vio_bus_device.dev;
-EXPORT_SYMBOL(iSeries_vio_dev);
-
-#define device_is_compatible(a, b)	1
+static struct vio_bus_ops vio_bus_ops;
 
-#endif
-
-/* convert from struct device to struct vio_dev and pass to driver.
+/*
+ * Convert from struct device to struct vio_dev and pass to driver.
  * dev->driver has already been set by generic code because vio_bus_match
- * succeeded. */
+ * succeeded.
+ */
 static int vio_bus_probe(struct device *dev)
 {
 	struct vio_dev *viodev = to_vio_dev(dev);
@@ -76,15 +46,12 @@ static int vio_bus_probe(struct device *dev)
 	const struct vio_device_id *id;
 	int error = -ENODEV;
 
-	DBGENTER();
-
 	if (!viodrv->probe)
 		return error;
 
 	id = vio_match_device(viodrv->id_table, viodev);
-	if (id) {
+	if (id)
 		error = viodrv->probe(viodev, id);
-	}
 
 	return error;
 }
@@ -95,11 +62,8 @@ static int vio_bus_remove(struct device *dev)
 	struct vio_dev *viodev = to_vio_dev(dev);
 	struct vio_driver *viodrv = to_vio_driver(dev->driver);
 
-	DBGENTER();
-
-	if (viodrv->remove) {
+	if (viodrv->remove)
 		return viodrv->remove(viodev);
-	}
 
 	/* driver can't remove */
 	return 1;
@@ -135,193 +99,72 @@ void vio_unregister_driver(struct vio_driver *viodrv)
 EXPORT_SYMBOL(vio_unregister_driver);
 
 /**
- * vio_match_device: - Tell if a VIO device has a matching VIO device id structure.
- * @ids: 	array of VIO device id structures to search in
- * @dev: 	the VIO device structure to match against
+ * vio_match_device: - Tell if a VIO device has a matching
+ *			VIO device id structure.
+ * @ids:	array of VIO device id structures to search in
+ * @dev:	the VIO device structure to match against
  *
  * Used by a driver to check whether a VIO device present in the
  * system is in its list of supported devices. Returns the matching
  * vio_device_id structure or NULL if there is no match.
  */
-static const struct vio_device_id * vio_match_device(const struct vio_device_id *ids,
-	const struct vio_dev *dev)
+static const struct vio_device_id *vio_match_device(
+		const struct vio_device_id *ids, const struct vio_dev *dev)
 {
-	DBGENTER();
-
-	while (ids->type) {
-		if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) &&
-			device_is_compatible(dev->dev.platform_data, ids->compat))
+	while (ids->type[0] != '\0') {
+		if (vio_bus_ops.match(ids, dev))
 			return ids;
 		ids++;
 	}
 	return NULL;
 }
 
-#ifdef CONFIG_PPC_ISERIES
-void __init iommu_vio_init(void)
-{
-	struct iommu_table *t;
-	struct iommu_table_cb cb;
-	unsigned long cbp;
-	unsigned long itc_entries;
-
-	cb.itc_busno = 255;    /* Bus 255 is the virtual bus */
-	cb.itc_virtbus = 0xff; /* Ask for virtual bus */
-
-	cbp = virt_to_abs(&cb);
-	HvCallXm_getTceTableParms(cbp);
-
-	itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
-	veth_iommu_table.it_size        = itc_entries / 2;
-	veth_iommu_table.it_busno       = cb.itc_busno;
-	veth_iommu_table.it_offset      = cb.itc_offset;
-	veth_iommu_table.it_index       = cb.itc_index;
-	veth_iommu_table.it_type        = TCE_VB;
-	veth_iommu_table.it_blocksize	= 1;
-
-	t = iommu_init_table(&veth_iommu_table);
-
-	if (!t)
-		printk("Virtual Bus VETH TCE table failed.\n");
-
-	vio_iommu_table.it_size         = itc_entries - veth_iommu_table.it_size;
-	vio_iommu_table.it_busno        = cb.itc_busno;
-	vio_iommu_table.it_offset       = cb.itc_offset +
-					  veth_iommu_table.it_size;
-	vio_iommu_table.it_index        = cb.itc_index;
-	vio_iommu_table.it_type         = TCE_VB;
-	vio_iommu_table.it_blocksize	= 1;
-
-	t = iommu_init_table(&vio_iommu_table);
-
-	if (!t)
-		printk("Virtual Bus VIO TCE table failed.\n");
-}
-#endif
-
-#ifdef CONFIG_PPC_PSERIES
-static void probe_bus_pseries(void)
-{
-	struct device_node *node_vroot, *of_node;
-
-	node_vroot = find_devices("vdevice");
-	if ((node_vroot == NULL) || (node_vroot->child == NULL))
-		/* this machine doesn't do virtual IO, and that's ok */
-		return;
-
-	vio_num_address_cells = prom_n_addr_cells(node_vroot->child);
-
-	/*
-	 * Create struct vio_devices for each virtual device in the device tree.
-	 * Drivers will associate with them later.
-	 */
-	for (of_node = node_vroot->child; of_node != NULL;
-			of_node = of_node->sibling) {
-		printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node);
-		vio_register_device_node(of_node);
-	}
-}
-#endif
-
-#ifdef CONFIG_PPC_ISERIES
-static void probe_bus_iseries(void)
-{
-	HvLpIndexMap vlan_map = HvLpConfig_getVirtualLanIndexMap();
-	struct vio_dev *viodev;
-	int i;
-
-	/* there is only one of each of these */
-	vio_register_device_iseries("viocons", 0);
-	vio_register_device_iseries("vscsi", 0);
-
-	vlan_map = HvLpConfig_getVirtualLanIndexMap();
-	for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
-		if ((vlan_map & (0x8000 >> i)) == 0)
-			continue;
-		viodev = vio_register_device_iseries("vlan", i);
-		/* veth is special and has it own iommu_table */
-		viodev->iommu_table = &veth_iommu_table;
-	}
-	for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++)
-		vio_register_device_iseries("viodasd", i);
-	for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++)
-		vio_register_device_iseries("viocd", i);
-	for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++)
-		vio_register_device_iseries("viotape", i);
-}
-#endif
-
 /**
  * vio_bus_init: - Initialize the virtual IO bus
  */
-static int __init vio_bus_init(void)
+int __init vio_bus_init(struct vio_bus_ops *ops)
 {
 	int err;
 
+	vio_bus_ops = *ops;
+
 	err = bus_register(&vio_bus_type);
 	if (err) {
 		printk(KERN_ERR "failed to register VIO bus\n");
 		return err;
 	}
 
-	/* the fake parent of all vio devices, just to give us a nice directory */
+	/*
+	 * The fake parent of all vio devices, just to give us
+	 * a nice directory
+	 */
 	err = device_register(&vio_bus_device.dev);
 	if (err) {
-		printk(KERN_WARNING "%s: device_register returned %i\n", __FUNCTION__,
-			err);
+		printk(KERN_WARNING "%s: device_register returned %i\n",
+				__FUNCTION__, err);
 		return err;
 	}
 
-#ifdef CONFIG_PPC_PSERIES
-	probe_bus_pseries();
-#endif
-#ifdef CONFIG_PPC_ISERIES
-	probe_bus_iseries();
-#endif
-
 	return 0;
 }
 
-__initcall(vio_bus_init);
-
 /* vio_dev refcount hit 0 */
 static void __devinit vio_dev_release(struct device *dev)
 {
-	DBGENTER();
-
-#ifdef CONFIG_PPC_PSERIES
-	/* XXX free TCE table */
-	of_node_put(dev->platform_data);
-#endif
+	if (vio_bus_ops.release_device)
+		vio_bus_ops.release_device(dev);
 	kfree(to_vio_dev(dev));
 }
 
-#ifdef CONFIG_PPC_PSERIES
-static ssize_t viodev_show_devspec(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct device_node *of_node = dev->platform_data;
-
-	return sprintf(buf, "%s\n", of_node->full_name);
-}
-DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL);
-#endif
-
-static ssize_t viodev_show_name(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t viodev_show_name(struct device *dev,
+		struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
 }
 DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL);
 
-static struct vio_dev * __devinit vio_register_device_common(
-		struct vio_dev *viodev, char *name, char *type,
-		uint32_t unit_address, struct iommu_table *iommu_table)
+struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev)
 {
-	DBGENTER();
-
-	viodev->name = name;
-	viodev->type = type;
-	viodev->unit_address = unit_address;
-	viodev->iommu_table = iommu_table;
 	/* init generic 'struct device' fields: */
 	viodev->dev.parent = &vio_bus_device.dev;
 	viodev->dev.bus = &vio_bus_type;
@@ -338,222 +181,15 @@ static struct vio_dev * __devinit vio_register_device_common(
 	return viodev;
 }
 
-#ifdef CONFIG_PPC_PSERIES
-/**
- * vio_register_device_node: - Register a new vio device.
- * @of_node:	The OF node for this device.
- *
- * Creates and initializes a vio_dev structure from the data in
- * of_node (dev.platform_data) and adds it to the list of virtual devices.
- * Returns a pointer to the created vio_dev or NULL if node has
- * NULL device_type or compatible fields.
- */
-struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
-{
-	struct vio_dev *viodev;
-	unsigned int *unit_address;
-	unsigned int *irq_p;
-
-	DBGENTER();
-
-	/* we need the 'device_type' property, in order to match with drivers */
-	if ((NULL == of_node->type)) {
-		printk(KERN_WARNING
-			"%s: node %s missing 'device_type'\n", __FUNCTION__,
-			of_node->name ? of_node->name : "<unknown>");
-		return NULL;
-	}
-
-	unit_address = (unsigned int *)get_property(of_node, "reg", NULL);
-	if (!unit_address) {
-		printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__,
-			of_node->name ? of_node->name : "<unknown>");
-		return NULL;
-	}
-
-	/* allocate a vio_dev for this node */
-	viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
-	if (!viodev) {
-		return NULL;
-	}
-	memset(viodev, 0, sizeof(struct vio_dev));
-
-	viodev->dev.platform_data = of_node_get(of_node);
-
-	viodev->irq = NO_IRQ;
-	irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL);
-	if (irq_p) {
-		int virq = virt_irq_create_mapping(*irq_p);
-		if (virq == NO_IRQ) {
-			printk(KERN_ERR "Unable to allocate interrupt "
-			       "number for %s\n", of_node->full_name);
-		} else
-			viodev->irq = irq_offset_up(virq);
-	}
-
-	snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address);
-
-	/* register with generic device framework */
-	if (vio_register_device_common(viodev, of_node->name, of_node->type,
-				*unit_address, vio_build_iommu_table(viodev))
-			== NULL) {
-		/* XXX free TCE table */
-		kfree(viodev);
-		return NULL;
-	}
-	device_create_file(&viodev->dev, &dev_attr_devspec);
-
-	return viodev;
-}
-EXPORT_SYMBOL(vio_register_device_node);
-#endif
-
-#ifdef CONFIG_PPC_ISERIES
-/**
- * vio_register_device: - Register a new vio device.
- * @voidev:	The device to register.
- */
-static struct vio_dev *__init vio_register_device_iseries(char *type,
-		uint32_t unit_num)
-{
-	struct vio_dev *viodev;
-
-	DBGENTER();
-
-	/* allocate a vio_dev for this node */
-	viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
-	if (!viodev)
-		return NULL;
-	memset(viodev, 0, sizeof(struct vio_dev));
-
-	snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num);
-
-	return vio_register_device_common(viodev, viodev->dev.bus_id, type,
-			unit_num, &vio_iommu_table);
-}
-#endif
-
 void __devinit vio_unregister_device(struct vio_dev *viodev)
 {
-	DBGENTER();
-#ifdef CONFIG_PPC_PSERIES
-	device_remove_file(&viodev->dev, &dev_attr_devspec);
-#endif
+	if (vio_bus_ops.unregister_device)
+		vio_bus_ops.unregister_device(viodev);
 	device_remove_file(&viodev->dev, &dev_attr_name);
 	device_unregister(&viodev->dev);
 }
 EXPORT_SYMBOL(vio_unregister_device);
 
-#ifdef CONFIG_PPC_PSERIES
-/**
- * vio_get_attribute: - get attribute for virtual device
- * @vdev:	The vio device to get property.
- * @which:	The property/attribute to be extracted.
- * @length:	Pointer to length of returned data size (unused if NULL).
- *
- * Calls prom.c's get_property() to return the value of the
- * attribute specified by the preprocessor constant @which
-*/
-const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length)
-{
-	return get_property(vdev->dev.platform_data, (char*)which, length);
-}
-EXPORT_SYMBOL(vio_get_attribute);
-
-/* vio_find_name() - internal because only vio.c knows how we formatted the
- * kobject name
- * XXX once vio_bus_type.devices is actually used as a kset in
- * drivers/base/bus.c, this function should be removed in favor of
- * "device_find(kobj_name, &vio_bus_type)"
- */
-static struct vio_dev *vio_find_name(const char *kobj_name)
-{
-	struct kobject *found;
-
-	found = kset_find_obj(&devices_subsys.kset, kobj_name);
-	if (!found)
-		return NULL;
-
-	return to_vio_dev(container_of(found, struct device, kobj));
-}
-
-/**
- * vio_find_node - find an already-registered vio_dev
- * @vnode: device_node of the virtual device we're looking for
- */
-struct vio_dev *vio_find_node(struct device_node *vnode)
-{
-	uint32_t *unit_address;
-	char kobj_name[BUS_ID_SIZE];
-
-	/* construct the kobject name from the device node */
-	unit_address = (uint32_t *)get_property(vnode, "reg", NULL);
-	if (!unit_address)
-		return NULL;
-	snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address);
-
-	return vio_find_name(kobj_name);
-}
-EXPORT_SYMBOL(vio_find_node);
-
-/**
- * vio_build_iommu_table: - gets the dma information from OF and builds the TCE tree.
- * @dev: the virtual device.
- *
- * Returns a pointer to the built tce tree, or NULL if it can't
- * find property.
-*/
-static struct iommu_table * vio_build_iommu_table(struct vio_dev *dev)
-{
-	unsigned int *dma_window;
-	struct iommu_table *newTceTable;
-	unsigned long offset;
-	int dma_window_property_size;
-
-	dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size);
-	if(!dma_window) {
-		return NULL;
-	}
-
-	newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
-
-	/*  There should be some code to extract the phys-encoded offset
-		using prom_n_addr_cells(). However, according to a comment
-		on earlier versions, it's always zero, so we don't bother */
-	offset = dma_window[1] >>  PAGE_SHIFT;
-
-	/* TCE table size - measured in tce entries */
-	newTceTable->it_size		= dma_window[4] >> PAGE_SHIFT;
-	/* offset for VIO should always be 0 */
-	newTceTable->it_offset		= offset;
-	newTceTable->it_busno		= 0;
-	newTceTable->it_index		= (unsigned long)dma_window[0];
-	newTceTable->it_type		= TCE_VB;
-
-	return iommu_init_table(newTceTable);
-}
-
-int vio_enable_interrupts(struct vio_dev *dev)
-{
-	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
-	if (rc != H_Success) {
-		printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
-	}
-	return rc;
-}
-EXPORT_SYMBOL(vio_enable_interrupts);
-
-int vio_disable_interrupts(struct vio_dev *dev)
-{
-	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
-	if (rc != H_Success) {
-		printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
-	}
-	return rc;
-}
-EXPORT_SYMBOL(vio_disable_interrupts);
-#endif
-
 static dma_addr_t vio_map_single(struct device *dev, void *vaddr,
 			  size_t size, enum dma_data_direction direction)
 {
@@ -615,18 +251,8 @@ static int vio_bus_match(struct device *dev, struct device_driver *drv)
 	const struct vio_dev *vio_dev = to_vio_dev(dev);
 	struct vio_driver *vio_drv = to_vio_driver(drv);
 	const struct vio_device_id *ids = vio_drv->id_table;
-	const struct vio_device_id *found_id;
-
-	DBGENTER();
 
-	if (!ids)
-		return 0;
-
-	found_id = vio_match_device(ids, vio_dev);
-	if (found_id)
-		return 1;
-
-	return 0;
+	return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
 }
 
 struct bus_type vio_bus_type = {

+ 1 - 3
arch/ppc64/mm/hash_low.S

@@ -128,13 +128,11 @@ _GLOBAL(__hash_page)
 	/* We eventually do the icache sync here (maybe inline that
 	 * code rather than call a C function...) 
 	 */
-BEGIN_FTR_SECTION
 BEGIN_FTR_SECTION
 	mr	r4,r30
 	mr	r5,r7
 	bl	.hash_page_do_lazy_icache
-END_FTR_SECTION_IFSET(CPU_FTR_NOEXECUTE)
-END_FTR_SECTION_IFCLR(CPU_FTR_COHERENT_ICACHE)
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
 
 	/* At this point, r3 contains new PP bits, save them in
 	 * place of "access" in the param area (sic)

+ 1 - 2
arch/ppc64/mm/hash_native.c

@@ -51,7 +51,6 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
 			unsigned long prpn, unsigned long vflags,
 			unsigned long rflags)
 {
-	unsigned long arpn = physRpn_to_absRpn(prpn);
 	hpte_t *hptep = htab_address + hpte_group;
 	unsigned long hpte_v, hpte_r;
 	int i;
@@ -74,7 +73,7 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
 	hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
 	if (vflags & HPTE_V_LARGE)
 		va &= ~(1UL << HPTE_V_AVPN_SHIFT);
-	hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
+	hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
 
 	hptep->r = hpte_r;
 	/* Guarantee the second dword is visible before the valid bit */

+ 2 - 2
arch/ppc64/mm/hash_utils.c

@@ -210,7 +210,7 @@ void __init htab_initialize(void)
 
 	/* create bolted the linear mapping in the hash table */
 	for (i=0; i < lmb.memory.cnt; i++) {
-		base = lmb.memory.region[i].physbase + KERNELBASE;
+		base = lmb.memory.region[i].base + KERNELBASE;
 		size = lmb.memory.region[i].size;
 
 		DBG("creating mapping for region: %lx : %lx\n", base, size);
@@ -302,7 +302,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 	int local = 0;
 	cpumask_t tmp;
 
-	if ((ea & ~REGION_MASK) > EADDR_MASK)
+	if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
 		return 1;
 
  	switch (REGION_ID(ea)) {

+ 212 - 176
arch/ppc64/mm/hugetlbpage.c

@@ -27,124 +27,94 @@
 
 #include <linux/sysctl.h>
 
-#define	HUGEPGDIR_SHIFT		(HPAGE_SHIFT + PAGE_SHIFT - 3)
-#define HUGEPGDIR_SIZE		(1UL << HUGEPGDIR_SHIFT)
-#define HUGEPGDIR_MASK		(~(HUGEPGDIR_SIZE-1))
+#define NUM_LOW_AREAS	(0x100000000UL >> SID_SHIFT)
+#define NUM_HIGH_AREAS	(PGTABLE_RANGE >> HTLB_AREA_SHIFT)
 
-#define HUGEPTE_INDEX_SIZE	9
-#define HUGEPGD_INDEX_SIZE	10
-
-#define PTRS_PER_HUGEPTE	(1 << HUGEPTE_INDEX_SIZE)
-#define PTRS_PER_HUGEPGD	(1 << HUGEPGD_INDEX_SIZE)
-
-static inline int hugepgd_index(unsigned long addr)
-{
-	return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT;
-}
-
-static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr)
+/* Modelled after find_linux_pte() */
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
-	int index;
+	pgd_t *pg;
+	pud_t *pu;
+	pmd_t *pm;
+	pte_t *pt;
 
-	if (! mm->context.huge_pgdir)
-		return NULL;
+	BUG_ON(! in_hugepage_area(mm->context, addr));
 
+	addr &= HPAGE_MASK;
+
+	pg = pgd_offset(mm, addr);
+	if (!pgd_none(*pg)) {
+		pu = pud_offset(pg, addr);
+		if (!pud_none(*pu)) {
+			pm = pmd_offset(pu, addr);
+			pt = (pte_t *)pm;
+			BUG_ON(!pmd_none(*pm)
+			       && !(pte_present(*pt) && pte_huge(*pt)));
+			return pt;
+		}
+	}
 
-	index = hugepgd_index(addr);
-	BUG_ON(index >= PTRS_PER_HUGEPGD);
-	return (pud_t *)(mm->context.huge_pgdir + index);
+	return NULL;
 }
 
-static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 {
-	int index;
-
-	if (pud_none(*dir))
-		return NULL;
+	pgd_t *pg;
+	pud_t *pu;
+	pmd_t *pm;
+	pte_t *pt;
 
-	index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE;
-	return (pte_t *)pud_page(*dir) + index;
-}
-
-static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr)
-{
 	BUG_ON(! in_hugepage_area(mm->context, addr));
 
-	if (! mm->context.huge_pgdir) {
-		pgd_t *new;
-		spin_unlock(&mm->page_table_lock);
-		/* Don't use pgd_alloc(), because we want __GFP_REPEAT */
-		new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
-		BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
-		spin_lock(&mm->page_table_lock);
+	addr &= HPAGE_MASK;
 
-		/*
-		 * Because we dropped the lock, we should re-check the
-		 * entry, as somebody else could have populated it..
-		 */
-		if (mm->context.huge_pgdir)
-			pgd_free(new);
-		else
-			mm->context.huge_pgdir = new;
-	}
-	return hugepgd_offset(mm, addr);
-}
+	pg = pgd_offset(mm, addr);
+	pu = pud_alloc(mm, pg, addr);
 
-static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr)
-{
-	if (! pud_present(*dir)) {
-		pte_t *new;
-
-		spin_unlock(&mm->page_table_lock);
-		new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
-		BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
-		spin_lock(&mm->page_table_lock);
-		/*
-		 * Because we dropped the lock, we should re-check the
-		 * entry, as somebody else could have populated it..
-		 */
-		if (pud_present(*dir)) {
-			if (new)
-				kmem_cache_free(zero_cache, new);
-		} else {
-			struct page *ptepage;
-
-			if (! new)
-				return NULL;
-			ptepage = virt_to_page(new);
-			ptepage->mapping = (void *) mm;
-			ptepage->index = addr & HUGEPGDIR_MASK;
-			pud_populate(mm, dir, new);
+	if (pu) {
+		pm = pmd_alloc(mm, pu, addr);
+		if (pm) {
+			pt = (pte_t *)pm;
+			BUG_ON(!pmd_none(*pm)
+			       && !(pte_present(*pt) && pte_huge(*pt)));
+			return pt;
 		}
 	}
 
-	return hugepte_offset(dir, addr);
+	return NULL;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
-{
-	pud_t *pud;
+#define HUGEPTE_BATCH_SIZE	(HPAGE_SIZE / PMD_SIZE)
 
-	BUG_ON(! in_hugepage_area(mm->context, addr));
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte)
+{
+	int i;
 
-	pud = hugepgd_offset(mm, addr);
-	if (! pud)
-		return NULL;
+	if (pte_present(*ptep)) {
+		pte_clear(mm, addr, ptep);
+		flush_tlb_pending();
+	}
 
-	return hugepte_offset(pud, addr);
+	for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
+		*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
+		ptep++;
+	}
 }
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep)
 {
-	pud_t *pud;
+	unsigned long old = pte_update(ptep, ~0UL);
+	int i;
 
-	BUG_ON(! in_hugepage_area(mm->context, addr));
+	if (old & _PAGE_HASHPTE)
+		hpte_update(mm, addr, old, 0);
 
-	pud = hugepgd_alloc(mm, addr);
-	if (! pud)
-		return NULL;
+	for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
+		ptep[i] = __pte(0);
 
-	return hugepte_alloc(mm, pud, addr);
+	return __pte(old);
 }
 
 /*
@@ -162,15 +132,17 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
 	return 0;
 }
 
-static void flush_segments(void *parm)
+static void flush_low_segments(void *parm)
 {
-	u16 segs = (unsigned long) parm;
+	u16 areas = (unsigned long) parm;
 	unsigned long i;
 
 	asm volatile("isync" : : : "memory");
 
-	for (i = 0; i < 16; i++) {
-		if (! (segs & (1U << i)))
+	BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS);
+
+	for (i = 0; i < NUM_LOW_AREAS; i++) {
+		if (! (areas & (1U << i)))
 			continue;
 		asm volatile("slbie %0" : : "r" (i << SID_SHIFT));
 	}
@@ -178,13 +150,33 @@ static void flush_segments(void *parm)
 	asm volatile("isync" : : : "memory");
 }
 
-static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg)
+static void flush_high_segments(void *parm)
 {
-	unsigned long start = seg << SID_SHIFT;
-	unsigned long end = (seg+1) << SID_SHIFT;
+	u16 areas = (unsigned long) parm;
+	unsigned long i, j;
+
+	asm volatile("isync" : : : "memory");
+
+	BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS);
+
+	for (i = 0; i < NUM_HIGH_AREAS; i++) {
+		if (! (areas & (1U << i)))
+			continue;
+		for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
+			asm volatile("slbie %0"
+				     :: "r" ((i << HTLB_AREA_SHIFT) + (j << SID_SHIFT)));
+	}
+
+	asm volatile("isync" : : : "memory");
+}
+
+static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area)
+{
+	unsigned long start = area << SID_SHIFT;
+	unsigned long end = (area+1) << SID_SHIFT;
 	struct vm_area_struct *vma;
 
-	BUG_ON(seg >= 16);
+	BUG_ON(area >= NUM_LOW_AREAS);
 
 	/* Check no VMAs are in the region */
 	vma = find_vma(mm, start);
@@ -194,20 +186,39 @@ static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg)
 	return 0;
 }
 
-static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs)
+static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
+{
+	unsigned long start = area << HTLB_AREA_SHIFT;
+	unsigned long end = (area+1) << HTLB_AREA_SHIFT;
+	struct vm_area_struct *vma;
+
+	BUG_ON(area >= NUM_HIGH_AREAS);
+
+	/* Check no VMAs are in the region */
+	vma = find_vma(mm, start);
+	if (vma && (vma->vm_start < end))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
 {
 	unsigned long i;
 
-	newsegs &= ~(mm->context.htlb_segs);
-	if (! newsegs)
+	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
+	BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
+
+	newareas &= ~(mm->context.low_htlb_areas);
+	if (! newareas)
 		return 0; /* The segments we want are already open */
 
-	for (i = 0; i < 16; i++)
-		if ((1 << i) & newsegs)
-			if (prepare_low_seg_for_htlb(mm, i) != 0)
+	for (i = 0; i < NUM_LOW_AREAS; i++)
+		if ((1 << i) & newareas)
+			if (prepare_low_area_for_htlb(mm, i) != 0)
 				return -EBUSY;
 
-	mm->context.htlb_segs |= newsegs;
+	mm->context.low_htlb_areas |= newareas;
 
 	/* update the paca copy of the context struct */
 	get_paca()->context = mm->context;
@@ -215,29 +226,63 @@ static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs)
 	/* the context change must make it to memory before the flush,
 	 * so that further SLB misses do the right thing. */
 	mb();
-	on_each_cpu(flush_segments, (void *)(unsigned long)newsegs, 0, 1);
+	on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1);
+
+	return 0;
+}
+
+static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
+{
+	unsigned long i;
+
+	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
+	BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)
+		     != NUM_HIGH_AREAS);
+
+	newareas &= ~(mm->context.high_htlb_areas);
+	if (! newareas)
+		return 0; /* The areas we want are already open */
+
+	for (i = 0; i < NUM_HIGH_AREAS; i++)
+		if ((1 << i) & newareas)
+			if (prepare_high_area_for_htlb(mm, i) != 0)
+				return -EBUSY;
+
+	mm->context.high_htlb_areas |= newareas;
+
+	/* update the paca copy of the context struct */
+	get_paca()->context = mm->context;
+
+	/* the context change must make it to memory before the flush,
+	 * so that further SLB misses do the right thing. */
+	mb();
+	on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1);
 
 	return 0;
 }
 
 int prepare_hugepage_range(unsigned long addr, unsigned long len)
 {
-	if (within_hugepage_high_range(addr, len))
-		return 0;
-	else if ((addr < 0x100000000UL) && ((addr+len) < 0x100000000UL)) {
-		int err;
-		/* Yes, we need both tests, in case addr+len overflows
-		 * 64-bit arithmetic */
-		err = open_low_hpage_segs(current->mm,
+	int err;
+
+	if ( (addr+len) < addr )
+		return -EINVAL;
+
+	if ((addr + len) < 0x100000000UL)
+		err = open_low_hpage_areas(current->mm,
 					  LOW_ESID_MASK(addr, len));
-		if (err)
-			printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
-			       " failed (segs: 0x%04hx)\n", addr, len,
-			       LOW_ESID_MASK(addr, len));
+	else
+		err = open_high_hpage_areas(current->mm,
+					    HTLB_AREA_MASK(addr, len));
+	if (err) {
+		printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
+		       " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
+		       addr, len,
+		       LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));
 		return err;
 	}
 
-	return -EINVAL;
+	return 0;
 }
 
 struct page *
@@ -309,8 +354,8 @@ full_search:
 			vma = find_vma(mm, addr);
 			continue;
 		}
-		if (touches_hugepage_high_range(addr, len)) {
-			addr = TASK_HPAGE_END;
+		if (touches_hugepage_high_range(mm, addr, len)) {
+			addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
 			vma = find_vma(mm, addr);
 			continue;
 		}
@@ -389,8 +434,9 @@ hugepage_recheck:
 		if (touches_hugepage_low_range(mm, addr, len)) {
 			addr = (addr & ((~0) << SID_SHIFT)) - len;
 			goto hugepage_recheck;
-		} else if (touches_hugepage_high_range(addr, len)) {
-			addr = TASK_HPAGE_BASE - len;
+		} else if (touches_hugepage_high_range(mm, addr, len)) {
+			addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len;
+			goto hugepage_recheck;
 		}
 
 		/*
@@ -481,23 +527,28 @@ static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
 	return -ENOMEM;
 }
 
-static unsigned long htlb_get_high_area(unsigned long len)
+static unsigned long htlb_get_high_area(unsigned long len, u16 areamask)
 {
-	unsigned long addr = TASK_HPAGE_BASE;
+	unsigned long addr = 0x100000000UL;
 	struct vm_area_struct *vma;
 
 	vma = find_vma(current->mm, addr);
-	for (vma = find_vma(current->mm, addr);
-	     addr + len <= TASK_HPAGE_END;
-	     vma = vma->vm_next) {
+	while (addr + len <= TASK_SIZE_USER64) {
 		BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
-		BUG_ON(! within_hugepage_high_range(addr, len));
+
+		if (! __within_hugepage_high_range(addr, len, areamask)) {
+			addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
+			vma = find_vma(current->mm, addr);
+			continue;
+		}
 
 		if (!vma || (addr + len) <= vma->vm_start)
 			return addr;
 		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
-		/* Because we're in a hugepage region, this alignment
-		 * should not skip us over any VMAs */
+		/* Depending on segmask this might not be a confirmed
+		 * hugepage region, so the ALIGN could have skipped
+		 * some VMAs */
+		vma = find_vma(current->mm, addr);
 	}
 
 	return -ENOMEM;
@@ -507,6 +558,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 					unsigned long len, unsigned long pgoff,
 					unsigned long flags)
 {
+	int lastshift;
+	u16 areamask, curareas;
+
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
 
@@ -514,67 +568,49 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		return -EINVAL;
 
 	if (test_thread_flag(TIF_32BIT)) {
-		int lastshift = 0;
-		u16 segmask, cursegs = current->mm->context.htlb_segs;
+		curareas = current->mm->context.low_htlb_areas;
 
 		/* First see if we can do the mapping in the existing
-		 * low hpage segments */
-		addr = htlb_get_low_area(len, cursegs);
+		 * low areas */
+		addr = htlb_get_low_area(len, curareas);
 		if (addr != -ENOMEM)
 			return addr;
 
-		for (segmask = LOW_ESID_MASK(0x100000000UL-len, len);
-		     ! lastshift; segmask >>=1) {
-			if (segmask & 1)
+		lastshift = 0;
+		for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
+		     ! lastshift; areamask >>=1) {
+			if (areamask & 1)
 				lastshift = 1;
 
-			addr = htlb_get_low_area(len, cursegs | segmask);
+			addr = htlb_get_low_area(len, curareas | areamask);
 			if ((addr != -ENOMEM)
-			    && open_low_hpage_segs(current->mm, segmask) == 0)
+			    && open_low_hpage_areas(current->mm, areamask) == 0)
 				return addr;
 		}
-		printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
-		       " enough segments\n");
-		return -ENOMEM;
 	} else {
-		return htlb_get_high_area(len);
-	}
-}
-
-void hugetlb_mm_free_pgd(struct mm_struct *mm)
-{
-	int i;
-	pgd_t *pgdir;
-
-	spin_lock(&mm->page_table_lock);
-
-	pgdir = mm->context.huge_pgdir;
-	if (! pgdir)
-		goto out;
-
-	mm->context.huge_pgdir = NULL;
+		curareas = current->mm->context.high_htlb_areas;
 
-	/* cleanup any hugepte pages leftover */
-	for (i = 0; i < PTRS_PER_HUGEPGD; i++) {
-		pud_t *pud = (pud_t *)(pgdir + i);
-
-		if (! pud_none(*pud)) {
-			pte_t *pte = (pte_t *)pud_page(*pud);
-			struct page *ptepage = virt_to_page(pte);
+		/* First see if we can do the mapping in the existing
+		 * high areas */
+		addr = htlb_get_high_area(len, curareas);
+		if (addr != -ENOMEM)
+			return addr;
 
-			ptepage->mapping = NULL;
+		lastshift = 0;
+		for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
+		     ! lastshift; areamask >>=1) {
+			if (areamask & 1)
+				lastshift = 1;
 
-			BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE));
-			kmem_cache_free(zero_cache, pte);
+			addr = htlb_get_high_area(len, curareas | areamask);
+			if ((addr != -ENOMEM)
+			    && open_high_hpage_areas(current->mm, areamask) == 0)
+				return addr;
 		}
-		pud_clear(pud);
 	}
-
-	BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE));
-	kmem_cache_free(zero_cache, pgdir);
-
- out:
-	spin_unlock(&mm->page_table_lock);
+	printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
+	       " enough areas\n");
+	return -ENOMEM;
 }
 
 int hash_huge_page(struct mm_struct *mm, unsigned long access,

Some files were not shown because too many files changed in this diff