Эх сурвалжийг харах

Auto-update from upstream

Tony Luck 20 жил өмнө
parent
commit
288ceb8f14
100 өөрчлөгдсөн 2622 нэмэгдсэн , 2024 устгасан
  1. 12 0
      Documentation/feature-removal-schedule.txt
  2. 288 0
      Documentation/networking/phy.txt
  3. 1 1
      Makefile
  4. 5 6
      arch/alpha/kernel/signal.c
  5. 2 37
      arch/arm/Kconfig
  6. 3 0
      arch/arm/common/Kconfig
  7. 1 0
      arch/arm/common/Makefile
  8. 166 0
      arch/arm/common/gic.c
  9. 3 2
      arch/arm/kernel/signal.c
  10. 7 0
      arch/arm/mach-sa1100/assabet.c
  11. 7 0
      arch/arm/mach-sa1100/cerf.c
  12. 5 0
      arch/arm/mach-sa1100/generic.c
  13. 3 0
      arch/arm/mach-sa1100/generic.h
  14. 12 0
      arch/arm/mach-sa1100/lart.c
  15. 7 0
      arch/arm/mach-sa1100/shannon.c
  16. 7 0
      arch/arm/mach-sa1100/simpad.c
  17. 6 7
      arch/arm26/kernel/signal.c
  18. 5 6
      arch/cris/arch-v10/kernel/signal.c
  19. 5 6
      arch/cris/arch-v32/kernel/signal.c
  20. 5 6
      arch/frv/kernel/signal.c
  21. 5 6
      arch/h8300/kernel/signal.c
  22. 3 2
      arch/i386/kernel/signal.c
  23. 6 9
      arch/ia64/kernel/signal.c
  24. 5 6
      arch/m32r/kernel/signal.c
  25. 5 6
      arch/m68knommu/kernel/signal.c
  26. 5 6
      arch/mips/kernel/irixsig.c
  27. 5 6
      arch/mips/kernel/signal.c
  28. 5 6
      arch/mips/kernel/signal32.c
  29. 5 6
      arch/parisc/kernel/signal.c
  30. 10 1
      arch/ppc/Makefile
  31. 0 203
      arch/ppc/boot/utils/addRamDisk.c
  32. 5 6
      arch/ppc/kernel/signal.c
  33. 34 40
      arch/ppc64/Kconfig
  34. 9 0
      arch/ppc64/Makefile
  35. 2 2
      arch/ppc64/boot/Makefile
  36. 2 2
      arch/ppc64/boot/addnote.c
  37. 1 1
      arch/ppc64/boot/crt0.S
  38. 1 1
      arch/ppc64/boot/div64.S
  39. 149 0
      arch/ppc64/boot/elf.h
  40. 20 35
      arch/ppc64/boot/main.c
  41. 34 0
      arch/ppc64/boot/page.h
  42. 0 36
      arch/ppc64/boot/ppc32-types.h
  43. 62 0
      arch/ppc64/boot/ppc_asm.h
  44. 27 169
      arch/ppc64/boot/prom.c
  45. 18 0
      arch/ppc64/boot/prom.h
  46. 16 0
      arch/ppc64/boot/stdio.h
  47. 1 1
      arch/ppc64/boot/string.S
  48. 16 0
      arch/ppc64/boot/string.h
  49. 1 1
      arch/ppc64/boot/zlib.c
  50. 3 3
      arch/ppc64/configs/g5_defconfig
  51. 3 4
      arch/ppc64/configs/iSeries_defconfig
  52. 3 3
      arch/ppc64/configs/maple_defconfig
  53. 3 3
      arch/ppc64/configs/pSeries_defconfig
  54. 3 3
      arch/ppc64/defconfig
  55. 11 26
      arch/ppc64/kernel/LparData.c
  56. 5 2
      arch/ppc64/kernel/Makefile
  57. 2 1
      arch/ppc64/kernel/asm-offsets.c
  58. 1 39
      arch/ppc64/kernel/cputable.c
  59. 47 0
      arch/ppc64/kernel/firmware.c
  60. 206 317
      arch/ppc64/kernel/head.S
  61. 4 1
      arch/ppc64/kernel/iSeries_htab.c
  62. 26 4
      arch/ppc64/kernel/iSeries_setup.c
  63. 155 0
      arch/ppc64/kernel/iSeries_vio.c
  64. 39 112
      arch/ppc64/kernel/lmb.c
  65. 4 3
      arch/ppc64/kernel/lparcfg.c
  66. 98 0
      arch/ppc64/kernel/misc.S
  67. 0 2
      arch/ppc64/kernel/of_device.c
  68. 2 1
      arch/ppc64/kernel/pSeries_iommu.c
  69. 1 3
      arch/ppc64/kernel/pSeries_lpar.c
  70. 29 10
      arch/ppc64/kernel/pSeries_setup.c
  71. 2 1
      arch/ppc64/kernel/pSeries_smp.c
  72. 273 0
      arch/ppc64/kernel/pSeries_vio.c
  73. 2 2
      arch/ppc64/kernel/pacaData.c
  74. 2 0
      arch/ppc64/kernel/pmac_setup.c
  75. 21 0
      arch/ppc64/kernel/pmc.c
  76. 5 7
      arch/ppc64/kernel/process.c
  77. 152 32
      arch/ppc64/kernel/prom.c
  78. 59 34
      arch/ppc64/kernel/prom_init.c
  79. 17 2
      arch/ppc64/kernel/rtas_pci.c
  80. 17 11
      arch/ppc64/kernel/setup.c
  81. 3 2
      arch/ppc64/kernel/signal.c
  82. 3 2
      arch/ppc64/kernel/signal32.c
  83. 5 52
      arch/ppc64/kernel/sysfs.c
  84. 3 4
      arch/ppc64/kernel/time.c
  85. 35 409
      arch/ppc64/kernel/vio.c
  86. 1 3
      arch/ppc64/mm/hash_low.S
  87. 1 2
      arch/ppc64/mm/hash_native.c
  88. 2 2
      arch/ppc64/mm/hash_utils.c
  89. 212 176
      arch/ppc64/mm/hugetlbpage.c
  90. 1 1
      arch/ppc64/mm/imalloc.c
  91. 54 42
      arch/ppc64/mm/init.c
  92. 1 1
      arch/ppc64/mm/numa.c
  93. 13 12
      arch/ppc64/mm/slb_low.S
  94. 55 40
      arch/ppc64/mm/tlb.c
  95. 1 0
      arch/ppc64/oprofile/common.c
  96. 1 1
      arch/ppc64/xmon/start.c
  97. 19 9
      arch/ppc64/xmon/xmon.c
  98. 5 6
      arch/s390/kernel/compat_signal.c
  99. 5 6
      arch/s390/kernel/signal.c
  100. 5 6
      arch/sh/kernel/signal.c

+ 12 - 0
Documentation/feature-removal-schedule.txt

@@ -135,3 +135,15 @@ Why:	With the 16-bit PCMCIA subsystem now behaving (almost) like a
 	pcmciautils package available at
 	http://kernel.org/pub/linux/utils/kernel/pcmcia/
 Who:	Dominik Brodowski <linux@brodo.de>
+
+---------------------------
+
+What:	ip_queue and ip6_queue (old ipv4-only and ipv6-only netfilter queue)
+When:	December 2005
+Why:	This interface has been obsoleted by the new layer3-independent
+	"nfnetlink_queue".  The Kernel interface is compatible, so the old
+	ip[6]tables "QUEUE" targets still work and will transparently handle
+	all packets into nfnetlink queue number 0.  Userspace users will have
+	to link against API-compatible library on top of libnfnetlink_queue 
+	instead of the current 'libipq'.
+Who:	Harald Welte <laforge@netfilter.org>

+ 288 - 0
Documentation/networking/phy.txt

@@ -0,0 +1,288 @@
+
+-------
+PHY Abstraction Layer
+(Updated 2005-07-21)
+
+Purpose
+
+ Most network devices consist of set of registers which provide an interface
+ to a MAC layer, which communicates with the physical connection through a
+ PHY.  The PHY concerns itself with negotiating link parameters with the link
+ partner on the other side of the network connection (typically, an ethernet
+ cable), and provides a register interface to allow drivers to determine what
+ settings were chosen, and to configure what settings are allowed.
+
+ While these devices are distinct from the network devices, and conform to a
+ standard layout for the registers, it has been common practice to integrate
+ the PHY management code with the network driver.  This has resulted in large
+ amounts of redundant code.  Also, on embedded systems with multiple (and
+ sometimes quite different) ethernet controllers connected to the same 
+ management bus, it is difficult to ensure safe use of the bus.
+
+ Since the PHYs are devices, and the management busses through which they are
+ accessed are, in fact, busses, the PHY Abstraction Layer treats them as such.
+ In doing so, it has these goals:
+
+   1) Increase code-reuse
+   2) Increase overall code-maintainability
+   3) Speed development time for new network drivers, and for new systems
+ 
+ Basically, this layer is meant to provide an interface to PHY devices which
+ allows network driver writers to write as little code as possible, while
+ still providing a full feature set.
+
+The MDIO bus
+
+ Most network devices are connected to a PHY by means of a management bus.
+ Different devices use different busses (though some share common interfaces).
+ In order to take advantage of the PAL, each bus interface needs to be
+ registered as a distinct device.
+
+ 1) read and write functions must be implemented.  Their prototypes are:
+
+     int write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
+     int read(struct mii_bus *bus, int mii_id, int regnum);
+
+   mii_id is the address on the bus for the PHY, and regnum is the register
+   number.  These functions are guaranteed not to be called from interrupt
+   time, so it is safe for them to block, waiting for an interrupt to signal
+   the operation is complete
+ 
+ 2) A reset function is necessary.  This is used to return the bus to an
+   initialized state.
+
+ 3) A probe function is needed.  This function should set up anything the bus
+   driver needs, setup the mii_bus structure, and register with the PAL using
+   mdiobus_register.  Similarly, there's a remove function to undo all of
+   that (use mdiobus_unregister).
+ 
+ 4) Like any driver, the device_driver structure must be configured, and init
+   exit functions are used to register the driver.
+
+ 5) The bus must also be declared somewhere as a device, and registered.
+
+ As an example for how one driver implemented an mdio bus driver, see
+ drivers/net/gianfar_mii.c and arch/ppc/syslib/mpc85xx_devices.c
+
+Connecting to a PHY
+
+ Sometime during startup, the network driver needs to establish a connection
+ between the PHY device, and the network device.  At this time, the PHY's bus
+ and drivers need to all have been loaded, so it is ready for the connection.
+ At this point, there are several ways to connect to the PHY:
+
+ 1) The PAL handles everything, and only calls the network driver when
+   the link state changes, so it can react.
+
+ 2) The PAL handles everything except interrupts (usually because the
+   controller has the interrupt registers).
+
+ 3) The PAL handles everything, but checks in with the driver every second,
+   allowing the network driver to react first to any changes before the PAL
+   does.
+ 
+ 4) The PAL serves only as a library of functions, with the network device
+   manually calling functions to update status, and configure the PHY
+
+
+Letting the PHY Abstraction Layer do Everything
+
+ If you choose option 1 (The hope is that every driver can, but to still be
+ useful to drivers that can't), connecting to the PHY is simple:
+
+ First, you need a function to react to changes in the link state.  This
+ function follows this protocol:
+
+   static void adjust_link(struct net_device *dev);
+ 
+ Next, you need to know the device name of the PHY connected to this device. 
+ The name will look something like, "phy0:0", where the first number is the
+ bus id, and the second is the PHY's address on that bus.
+ 
+ Now, to connect, just call this function:
+ 
+   phydev = phy_connect(dev, phy_name, &adjust_link, flags);
+
+ phydev is a pointer to the phy_device structure which represents the PHY.  If
+ phy_connect is successful, it will return the pointer.  dev, here, is the
+ pointer to your net_device.  Once done, this function will have started the
+ PHY's software state machine, and registered for the PHY's interrupt, if it
+ has one.  The phydev structure will be populated with information about the
+ current state, though the PHY will not yet be truly operational at this
+ point.
+
+ flags is a u32 which can optionally contain phy-specific flags.
+ This is useful if the system has put hardware restrictions on
+ the PHY/controller, of which the PHY needs to be aware.
+
+ Now just make sure that phydev->supported and phydev->advertising have any
+ values pruned from them which don't make sense for your controller (a 10/100
+ controller may be connected to a gigabit capable PHY, so you would need to
+ mask off SUPPORTED_1000baseT*).  See include/linux/ethtool.h for definitions
+ for these bitfields. Note that you should not SET any bits, or the PHY may
+ get put into an unsupported state.
+
+ Lastly, once the controller is ready to handle network traffic, you call
+ phy_start(phydev).  This tells the PAL that you are ready, and configures the
+ PHY to connect to the network.  If you want to handle your own interrupts,
+ just set phydev->irq to PHY_IGNORE_INTERRUPT before you call phy_start.
+ Similarly, if you don't want to use interrupts, set phydev->irq to PHY_POLL.
+
+ When you want to disconnect from the network (even if just briefly), you call
+ phy_stop(phydev).
+
+Keeping Close Tabs on the PAL
+
+ It is possible that the PAL's built-in state machine needs a little help to
+ keep your network device and the PHY properly in sync.  If so, you can
+ register a helper function when connecting to the PHY, which will be called
+ every second before the state machine reacts to any changes.  To do this, you
+ need to manually call phy_attach() and phy_prepare_link(), and then call
+ phy_start_machine() with the second argument set to point to your special
+ handler.
+
+ Currently there are no examples of how to use this functionality, and testing
+ on it has been limited because the author does not have any drivers which use
+ it (they all use option 1).  So Caveat Emptor.
+
+Doing it all yourself
+
+ There's a remote chance that the PAL's built-in state machine cannot track
+ the complex interactions between the PHY and your network device.  If this is
+ so, you can simply call phy_attach(), and not call phy_start_machine or
+ phy_prepare_link().  This will mean that phydev->state is entirely yours to
+ handle (phy_start and phy_stop toggle between some of the states, so you
+ might need to avoid them).
+
+ An effort has been made to make sure that useful functionality can be
+ accessed without the state-machine running, and most of these functions are
+ descended from functions which did not interact with a complex state-machine.
+ However, again, no effort has been made so far to test running without the
+ state machine, so tryer beware.
+
+ Here is a brief rundown of the functions:
+
+ int phy_read(struct phy_device *phydev, u16 regnum);
+ int phy_write(struct phy_device *phydev, u16 regnum, u16 val);
+
+   Simple read/write primitives.  They invoke the bus's read/write function
+   pointers.
+
+ void phy_print_status(struct phy_device *phydev);
+ 
+   A convenience function to print out the PHY status neatly.
+
+ int phy_clear_interrupt(struct phy_device *phydev);
+ int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
+   
+   Clear the PHY's interrupt, and configure which ones are allowed,
+   respectively.  Currently only supports all on, or all off.
+ 
+ int phy_enable_interrupts(struct phy_device *phydev);
+ int phy_disable_interrupts(struct phy_device *phydev);
+
+   Functions which enable/disable PHY interrupts, clearing them
+   before and after, respectively.
+
+ int phy_start_interrupts(struct phy_device *phydev);
+ int phy_stop_interrupts(struct phy_device *phydev);
+
+   Requests the IRQ for the PHY interrupts, then enables them for
+   start, or disables then frees them for stop.
+
+ struct phy_device * phy_attach(struct net_device *dev, const char *phy_id,
+		 u32 flags);
+
+   Attaches a network device to a particular PHY, binding the PHY to a generic
+   driver if none was found during bus initialization.  Passes in
+   any phy-specific flags as needed.
+
+ int phy_start_aneg(struct phy_device *phydev);
+   
+   Using variables inside the phydev structure, either configures advertising
+   and resets autonegotiation, or disables autonegotiation, and configures
+   forced settings.
+
+ static inline int phy_read_status(struct phy_device *phydev);
+
+   Fills the phydev structure with up-to-date information about the current
+   settings in the PHY.
+
+ void phy_sanitize_settings(struct phy_device *phydev)
+   
+   Resolves differences between currently desired settings, and
+   supported settings for the given PHY device.  Does not make
+   the changes in the hardware, though.
+
+ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+
+   Ethtool convenience functions.
+
+ int phy_mii_ioctl(struct phy_device *phydev,
+                 struct mii_ioctl_data *mii_data, int cmd);
+
+   The MII ioctl.  Note that this function will completely screw up the state
+   machine if you write registers like BMCR, BMSR, ADVERTISE, etc.  Best to
+   use this only to write registers which are not standard, and don't set off
+   a renegotiation.
+
+
+PHY Device Drivers
+
+ With the PHY Abstraction Layer, adding support for new PHYs is
+ quite easy.  In some cases, no work is required at all!  However,
+ many PHYs require a little hand-holding to get up-and-running.
+
+Generic PHY driver
+
+ If the desired PHY doesn't have any errata, quirks, or special
+ features you want to support, then it may be best to not add
+ support, and let the PHY Abstraction Layer's Generic PHY Driver
+ do all of the work.  
+
+Writing a PHY driver
+
+ If you do need to write a PHY driver, the first thing to do is
+ make sure it can be matched with an appropriate PHY device.
+ This is done during bus initialization by reading the device's
+ UID (stored in registers 2 and 3), then comparing it to each
+ driver's phy_id field by ANDing it with each driver's
+ phy_id_mask field.  Also, it needs a name.  Here's an example:
+
+   static struct phy_driver dm9161_driver = {
+         .phy_id         = 0x0181b880,
+	 .name           = "Davicom DM9161E",
+	 .phy_id_mask    = 0x0ffffff0,
+	 ...
+   }
+
+ Next, you need to specify what features (speed, duplex, autoneg,
+ etc) your PHY device and driver support.  Most PHYs support
+ PHY_BASIC_FEATURES, but you can look in include/mii.h for other
+ features.
+
+ Each driver consists of a number of function pointers:
+
+   config_init: configures PHY into a sane state after a reset.
+     For instance, a Davicom PHY requires descrambling disabled.
+   probe: Does any setup needed by the driver
+   suspend/resume: power management
+   config_aneg: Changes the speed/duplex/negotiation settings
+   read_status: Reads the current speed/duplex/negotiation settings
+   ack_interrupt: Clear a pending interrupt
+   config_intr: Enable or disable interrupts
+   remove: Does any driver take-down
+
+ Of these, only config_aneg and read_status are required to be
+ assigned by the driver code.  The rest are optional.  Also, it is
+ preferred to use the generic phy driver's versions of these two
+ functions if at all possible: genphy_read_status and
+ genphy_config_aneg.  If this is not possible, it is likely that
+ you only need to perform some actions before and after invoking
+ these functions, and so your functions will wrap the generic
+ ones.
+
+ Feel free to look at the Marvell, Cicada, and Davicom drivers in
+ drivers/net/phy/ for examples (the lxt and qsemi drivers have
+ not been tested as of this writing)

+ 1 - 1
Makefile

@@ -2,7 +2,7 @@ VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 13
 EXTRAVERSION =
-NAME=Woozy Numbat
+NAME=Affluent Albatross
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"

+ 5 - 6
arch/alpha/kernel/signal.c

@@ -566,13 +566,12 @@ handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
 	if (ka->sa.sa_flags & SA_RESETHAND)
 		ka->sa.sa_handler = SIG_DFL;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER)) 
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 static inline void

+ 2 - 37
arch/arm/Kconfig

@@ -635,10 +635,6 @@ config PM
 	  and the Battery Powered Linux mini-HOWTO, available from
 	  <http://www.tldp.org/docs.html#howto>.
 
-	  Note that, even if you say N here, Linux on the x86 architecture
-	  will issue the hlt instruction if nothing is to be done, thereby
-	  sending the processor to sleep and saving power.
-
 config APM
 	tristate "Advanced Power Management Emulation"
 	depends on PM
@@ -650,12 +646,6 @@ config APM
 	  battery status information, and user-space programs will receive
 	  notification of APM "events" (e.g. battery status change).
 
-	  If you select "Y" here, you can disable actual use of the APM
-	  BIOS by passing the "apm=off" option to the kernel at boot time.
-
-	  Note that the APM support is almost completely disabled for
-	  machines with more than one CPU.
-
 	  In order to use APM, you will need supporting software. For location
 	  and more information, read <file:Documentation/pm.txt> and the
 	  Battery Powered Linux mini-HOWTO, available from
@@ -665,39 +655,12 @@ config APM
 	  manpage ("man 8 hdparm") for that), and it doesn't turn off
 	  VESA-compliant "green" monitors.
 
-	  This driver does not support the TI 4000M TravelMate and the ACER
-	  486/DX4/75 because they don't have compliant BIOSes. Many "green"
-	  desktop machines also don't have compliant BIOSes, and this driver
-	  may cause those machines to panic during the boot phase.
-
 	  Generally, if you don't have a battery in your machine, there isn't
 	  much point in using this driver and you should say N. If you get
 	  random kernel OOPSes or reboots that don't seem to be related to
 	  anything, try disabling/enabling this option (or disabling/enabling
 	  APM in your BIOS).
 
-	  Some other things you should try when experiencing seemingly random,
-	  "weird" problems:
-
-	  1) make sure that you have enough swap space and that it is
-	  enabled.
-	  2) pass the "no-hlt" option to the kernel
-	  3) switch on floating point emulation in the kernel and pass
-	  the "no387" option to the kernel
-	  4) pass the "floppy=nodma" option to the kernel
-	  5) pass the "mem=4M" option to the kernel (thereby disabling
-	  all but the first 4 MB of RAM)
-	  6) make sure that the CPU is not over clocked.
-	  7) read the sig11 FAQ at <http://www.bitwizard.nl/sig11/>
-	  8) disable the cache from your BIOS settings
-	  9) install a fan for the video card or exchange video RAM
-	  10) install a better fan for the CPU
-	  11) exchange RAM chips
-	  12) exchange the motherboard.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called apm.
-
 endmenu
 
 source "net/Kconfig"
@@ -752,6 +715,8 @@ source "drivers/hwmon/Kconfig"
 
 source "drivers/misc/Kconfig"
 
+source "drivers/mfd/Kconfig"
+
 source "drivers/media/Kconfig"
 
 source "drivers/video/Kconfig"

+ 3 - 0
arch/arm/common/Kconfig

@@ -1,6 +1,9 @@
 config ICST525
 	bool
 
+config ARM_GIC
+	bool
+
 config ICST307
 	bool
 

+ 1 - 0
arch/arm/common/Makefile

@@ -4,6 +4,7 @@
 
 obj-y				+= rtctime.o
 obj-$(CONFIG_ARM_AMBA)		+= amba.o
+obj-$(CONFIG_ARM_GIC)		+= gic.o
 obj-$(CONFIG_ICST525)		+= icst525.o
 obj-$(CONFIG_ICST307)		+= icst307.o
 obj-$(CONFIG_SA1111)		+= sa1111.o

+ 166 - 0
arch/arm/common/gic.c

@@ -0,0 +1,166 @@
+/*
+ *  linux/arch/arm/common/gic.c
+ *
+ *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Interrupt architecture for the GIC:
+ *
+ * o There is one Interrupt Distributor, which receives interrupts
+ *   from system devices and sends them to the Interrupt Controllers.
+ *
+ * o There is one CPU Interface per CPU, which sends interrupts sent
+ *   by the Distributor, and interrupts generated locally, to the
+ *   associated CPU.
+ *
+ * Note that IRQs 0-31 are special - they are local to each CPU.
+ * As such, the enable set/clear, pending set/clear and active bit
+ * registers are banked per-cpu for these sources.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/mach/irq.h>
+#include <asm/hardware/gic.h>
+
+static void __iomem *gic_dist_base;
+static void __iomem *gic_cpu_base;
+
+/*
+ * Routines to acknowledge, disable and enable interrupts
+ *
+ * Linux assumes that when we're done with an interrupt we need to
+ * unmask it, in the same way we need to unmask an interrupt when
+ * we first enable it.
+ *
+ * The GIC has a seperate notion of "end of interrupt" to re-enable
+ * an interrupt after handling, in order to support hardware
+ * prioritisation.
+ *
+ * We can make the GIC behave in the way that Linux expects by making
+ * our "acknowledge" routine disable the interrupt, then mark it as
+ * complete.
+ */
+static void gic_ack_irq(unsigned int irq)
+{
+	u32 mask = 1 << (irq % 32);
+	writel(mask, gic_dist_base + GIC_DIST_ENABLE_CLEAR + (irq / 32) * 4);
+	writel(irq, gic_cpu_base + GIC_CPU_EOI);
+}
+
+static void gic_mask_irq(unsigned int irq)
+{
+	u32 mask = 1 << (irq % 32);
+	writel(mask, gic_dist_base + GIC_DIST_ENABLE_CLEAR + (irq / 32) * 4);
+}
+
+static void gic_unmask_irq(unsigned int irq)
+{
+	u32 mask = 1 << (irq % 32);
+	writel(mask, gic_dist_base + GIC_DIST_ENABLE_SET + (irq / 32) * 4);
+}
+
+static void gic_set_cpu(struct irqdesc *desc, unsigned int irq, unsigned int cpu)
+{
+	void __iomem *reg = gic_dist_base + GIC_DIST_TARGET + (irq & ~3);
+	unsigned int shift = (irq % 4) * 8;
+	u32 val;
+
+	val = readl(reg) & ~(0xff << shift);
+	val |= 1 << (cpu + shift);
+	writel(val, reg);
+}
+
+static struct irqchip gic_chip = {
+	.ack		= gic_ack_irq,
+	.mask		= gic_mask_irq,
+	.unmask		= gic_unmask_irq,
+#ifdef CONFIG_SMP
+	.set_cpu	= gic_set_cpu,
+#endif
+};
+
+void __init gic_dist_init(void __iomem *base)
+{
+	unsigned int max_irq, i;
+	u32 cpumask = 1 << smp_processor_id();
+
+	cpumask |= cpumask << 8;
+	cpumask |= cpumask << 16;
+
+	gic_dist_base = base;
+
+	writel(0, base + GIC_DIST_CTRL);
+
+	/*
+	 * Find out how many interrupts are supported.
+	 */
+	max_irq = readl(base + GIC_DIST_CTR) & 0x1f;
+	max_irq = (max_irq + 1) * 32;
+
+	/*
+	 * The GIC only supports up to 1020 interrupt sources.
+	 * Limit this to either the architected maximum, or the
+	 * platform maximum.
+	 */
+	if (max_irq > max(1020, NR_IRQS))
+		max_irq = max(1020, NR_IRQS);
+
+	/*
+	 * Set all global interrupts to be level triggered, active low.
+	 */
+	for (i = 32; i < max_irq; i += 16)
+		writel(0, base + GIC_DIST_CONFIG + i * 4 / 16);
+
+	/*
+	 * Set all global interrupts to this CPU only.
+	 */
+	for (i = 32; i < max_irq; i += 4)
+		writel(cpumask, base + GIC_DIST_TARGET + i * 4 / 4);
+
+	/*
+	 * Set priority on all interrupts.
+	 */
+	for (i = 0; i < max_irq; i += 4)
+		writel(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4);
+
+	/*
+	 * Disable all interrupts.
+	 */
+	for (i = 0; i < max_irq; i += 32)
+		writel(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i * 4 / 32);
+
+	/*
+	 * Setup the Linux IRQ subsystem.
+	 */
+	for (i = 29; i < max_irq; i++) {
+		set_irq_chip(i, &gic_chip);
+		set_irq_handler(i, do_level_IRQ);
+		set_irq_flags(i, IRQF_VALID | IRQF_PROBE);
+	}
+
+	writel(1, base + GIC_DIST_CTRL);
+}
+
+void __cpuinit gic_cpu_init(void __iomem *base)
+{
+	gic_cpu_base = base;
+	writel(0xf0, base + GIC_CPU_PRIMASK);
+	writel(1, base + GIC_CPU_CTRL);
+}
+
+#ifdef CONFIG_SMP
+void gic_raise_softirq(cpumask_t cpumask, unsigned int irq)
+{
+	unsigned long map = *cpus_addr(cpumask);
+
+	writel(map << 16 | irq, gic_dist_base + GIC_DIST_SOFTINT);
+}
+#endif

+ 3 - 2
arch/arm/kernel/signal.c

@@ -658,11 +658,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
 	/*
 	 * Block the signal if we were unsuccessful.
 	 */
-	if (ret != 0 || !(ka->sa.sa_flags & SA_NODEFER)) {
+	if (ret != 0) {
 		spin_lock_irq(&tsk->sighand->siglock);
 		sigorsets(&tsk->blocked, &tsk->blocked,
 			  &ka->sa.sa_mask);
-		sigaddset(&tsk->blocked, sig);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
+			sigaddset(&tsk->blocked, sig);
 		recalc_sigpending();
 		spin_unlock_irq(&tsk->sighand->siglock);
 	}

+ 7 - 0
arch/arm/mach-sa1100/assabet.c

@@ -35,6 +35,7 @@
 #include <asm/mach/map.h>
 #include <asm/mach/serial_sa1100.h>
 #include <asm/arch/assabet.h>
+#include <asm/arch/mcp.h>
 
 #include "generic.h"
 
@@ -198,6 +199,11 @@ static struct irda_platform_data assabet_irda_data = {
 	.set_speed	= assabet_irda_set_speed,
 };
 
+static struct mcp_plat_data assabet_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
 static void __init assabet_init(void)
 {
 	/*
@@ -246,6 +252,7 @@ static void __init assabet_init(void)
 	sa11x0_set_flash_data(&assabet_flash_data, assabet_flash_resources,
 			      ARRAY_SIZE(assabet_flash_resources));
 	sa11x0_set_irda_data(&assabet_irda_data);
+	sa11x0_set_mcp_data(&assabet_mcp_data);
 }
 
 /*

+ 7 - 0
arch/arm/mach-sa1100/cerf.c

@@ -29,6 +29,7 @@
 #include <asm/mach/serial_sa1100.h>
 
 #include <asm/arch/cerf.h>
+#include <asm/arch/mcp.h>
 #include "generic.h"
 
 static struct resource cerfuart2_resources[] = {
@@ -116,10 +117,16 @@ static void __init cerf_map_io(void)
 	GPDR |= CERF_GPIO_CF_RESET;
 }
 
+static struct mcp_plat_data cerf_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
 static void __init cerf_init(void)
 {
 	platform_add_devices(cerf_devices, ARRAY_SIZE(cerf_devices));
 	sa11x0_set_flash_data(&cerf_flash_data, &cerf_flash_resource, 1);
+	sa11x0_set_mcp_data(&cerf_mcp_data);
 }
 
 MACHINE_START(CERF, "Intrinsyc CerfBoard/CerfCube")

+ 5 - 0
arch/arm/mach-sa1100/generic.c

@@ -221,6 +221,11 @@ static struct platform_device sa11x0mcp_device = {
 	.resource	= sa11x0mcp_resources,
 };
 
+void sa11x0_set_mcp_data(struct mcp_plat_data *data)
+{
+	sa11x0mcp_device.dev.platform_data = data;
+}
+
 static struct resource sa11x0ssp_resources[] = {
 	[0] = {
 		.start	= 0x80070000,

+ 3 - 0
arch/arm/mach-sa1100/generic.h

@@ -34,5 +34,8 @@ struct resource;
 extern void sa11x0_set_flash_data(struct flash_platform_data *flash,
 				  struct resource *res, int nr);
 
+struct sa11x0_ssp_plat_ops;
+extern void sa11x0_set_ssp_data(struct sa11x0_ssp_plat_ops *ops);
+
 struct irda_platform_data;
 void sa11x0_set_irda_data(struct irda_platform_data *irda);

+ 12 - 0
arch/arm/mach-sa1100/lart.c

@@ -13,12 +13,23 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/mach/serial_sa1100.h>
+#include <asm/arch/mcp.h>
 
 #include "generic.h"
 
 
 #warning "include/asm/arch-sa1100/ide.h needs fixing for lart"
 
+static struct mcp_plat_data lart_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
+static void __init lart_init(void)
+{
+	sa11x0_set_mcp_data(&lart_mcp_data);
+}
+
 static struct map_desc lart_io_desc[] __initdata = {
  /* virtual     physical    length      type */
   { 0xe8000000, 0x00000000, 0x00400000, MT_DEVICE }, /* main flash memory */
@@ -47,5 +58,6 @@ MACHINE_START(LART, "LART")
 	.boot_params	= 0xc0000100,
 	.map_io		= lart_map_io,
 	.init_irq	= sa1100_init_irq,
+	.init_machine	= lart_init,
 	.timer		= &sa1100_timer,
 MACHINE_END

+ 7 - 0
arch/arm/mach-sa1100/shannon.c

@@ -18,6 +18,7 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/mach/serial_sa1100.h>
+#include <asm/arch/mcp.h>
 #include <asm/arch/shannon.h>
 
 #include "generic.h"
@@ -52,9 +53,15 @@ static struct resource shannon_flash_resource = {
 	.flags		= IORESOURCE_MEM,
 };
 
+static struct mcp_plat_data shannon_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
 static void __init shannon_init(void)
 {
 	sa11x0_set_flash_data(&shannon_flash_data, &shannon_flash_resource, 1);
+	sa11x0_set_mcp_data(&shannon_mcp_data);
 }
 
 static void __init shannon_map_io(void)

+ 7 - 0
arch/arm/mach-sa1100/simpad.c

@@ -23,6 +23,7 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/mach/serial_sa1100.h>
+#include <asm/arch/mcp.h>
 #include <asm/arch/simpad.h>
 
 #include <linux/serial_core.h>
@@ -123,6 +124,11 @@ static struct resource simpad_flash_resources [] = {
 	}
 };
 
+static struct mcp_plat_data simpad_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
 
 
 static void __init simpad_map_io(void)
@@ -157,6 +163,7 @@ static void __init simpad_map_io(void)
 
 	sa11x0_set_flash_data(&simpad_flash_data, simpad_flash_resources,
 			      ARRAY_SIZE(simpad_flash_resources));
+	sa11x0_set_mcp_data(&simpad_mcp_data);
 }
 
 static void simpad_power_off(void)

+ 6 - 7
arch/arm26/kernel/signal.c

@@ -454,14 +454,13 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
 		if (ka->sa.sa_flags & SA_ONESHOT)
 			ka->sa.sa_handler = SIG_DFL;
 
-		if (!(ka->sa.sa_flags & SA_NODEFER)) {
-			spin_lock_irq(&tsk->sighand->siglock);
-			sigorsets(&tsk->blocked, &tsk->blocked,
-				  &ka->sa.sa_mask);
+		spin_lock_irq(&tsk->sighand->siglock);
+		sigorsets(&tsk->blocked, &tsk->blocked,
+			  &ka->sa.sa_mask);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
 			sigaddset(&tsk->blocked, sig);
-			recalc_sigpending();
-			spin_unlock_irq(&tsk->sighand->siglock);
-		}
+		recalc_sigpending();
+		spin_unlock_irq(&tsk->sighand->siglock);
 		return;
 	}
 

+ 5 - 6
arch/cris/arch-v10/kernel/signal.c

@@ -517,13 +517,12 @@ handle_signal(int canrestart, unsigned long sig,
 	if (ka->sa.sa_flags & SA_ONESHOT)
 		ka->sa.sa_handler = SIG_DFL;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 /*

+ 5 - 6
arch/cris/arch-v32/kernel/signal.c

@@ -568,13 +568,12 @@ handle_signal(int canrestart, unsigned long sig,
 	if (ka->sa.sa_flags & SA_ONESHOT)
 		ka->sa.sa_handler = SIG_DFL;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 /*

+ 5 - 6
arch/frv/kernel/signal.c

@@ -506,13 +506,12 @@ static void handle_signal(unsigned long sig, siginfo_t *info,
 	else
 		setup_frame(sig, ka, oldset, regs);
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked, sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 } /* end handle_signal() */
 
 /*****************************************************************************/

+ 5 - 6
arch/h8300/kernel/signal.c

@@ -488,13 +488,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	else
 		setup_frame(sig, ka, oldset, regs);
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 /*

+ 3 - 2
arch/i386/kernel/signal.c

@@ -577,10 +577,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	else
 		ret = setup_frame(sig, ka, oldset, regs);
 
-	if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
+	if (ret) {
 		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
-		sigaddset(&current->blocked,sig);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
+			sigaddset(&current->blocked,sig);
 		recalc_sigpending();
 		spin_unlock_irq(&current->sighand->siglock);
 	}

+ 6 - 9
arch/ia64/kernel/signal.c

@@ -467,15 +467,12 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse
 		if (!setup_frame(sig, ka, info, oldset, scr))
 			return 0;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		{
-			sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
-			sigaddset(&current->blocked, sig);
-			recalc_sigpending();
-		}
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
+		sigaddset(&current->blocked, sig);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 	return 1;
 }
 

+ 5 - 6
arch/m32r/kernel/signal.c

@@ -341,13 +341,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 	/* Set up the stack frame */
 	setup_rt_frame(sig, ka, info, oldset, regs);
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 /*

+ 5 - 6
arch/m68knommu/kernel/signal.c

@@ -732,13 +732,12 @@ handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
 	if (ka->sa.sa_flags & SA_ONESHOT)
 		ka->sa.sa_handler = SIG_DFL;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 /*

+ 5 - 6
arch/mips/kernel/irixsig.c

@@ -155,13 +155,12 @@ static inline void handle_signal(unsigned long sig, siginfo_t *info,
 	else
 		setup_irix_frame(ka, regs, sig, oldset);
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 asmlinkage int do_irix_signal(sigset_t *oldset, struct pt_regs *regs)

+ 5 - 6
arch/mips/kernel/signal.c

@@ -425,13 +425,12 @@ static inline void handle_signal(unsigned long sig, siginfo_t *info,
 		setup_frame(ka, regs, sig, oldset);
 #endif
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 extern int do_signal32(sigset_t *oldset, struct pt_regs *regs);

+ 5 - 6
arch/mips/kernel/signal32.c

@@ -751,13 +751,12 @@ static inline void handle_signal(unsigned long sig, siginfo_t *info,
 	else
 		setup_frame(ka, regs, sig, oldset);
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 int do_signal32(sigset_t *oldset, struct pt_regs *regs)

+ 5 - 6
arch/parisc/kernel/signal.c

@@ -517,13 +517,12 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	if (!setup_rt_frame(sig, ka, info, oldset, regs, in_syscall))
 		return 0;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 	return 1;
 }
 

+ 10 - 1
arch/ppc/Makefile

@@ -21,11 +21,13 @@ CC		:= $(CC) -m32
 endif
 
 LDFLAGS_vmlinux	:= -Ttext $(KERNELLOAD) -Bstatic
-CPPFLAGS	+= -Iarch/$(ARCH)
+CPPFLAGS	+= -Iarch/$(ARCH) -Iinclude3
 AFLAGS		+= -Iarch/$(ARCH)
 CFLAGS		+= -Iarch/$(ARCH) -msoft-float -pipe \
 		-ffixed-r2 -mmultiple
 CPP		= $(CC) -E $(CFLAGS)
+# Temporary hack until we have migrated to asm-powerpc
+LINUXINCLUDE    += -Iinclude3
 
 CHECKFLAGS	+= -D__powerpc__
 
@@ -101,6 +103,7 @@ endef
 
 archclean:
 	$(Q)$(MAKE) $(clean)=arch/ppc/boot
+	$(Q)rm -rf include3
 
 prepare: include/asm-$(ARCH)/offsets.h checkbin
 
@@ -110,6 +113,12 @@ arch/$(ARCH)/kernel/asm-offsets.s: include/asm include/linux/version.h \
 include/asm-$(ARCH)/offsets.h: arch/$(ARCH)/kernel/asm-offsets.s
 	$(call filechk,gen-asm-offsets)
 
+# Temporary hack until we have migrated to asm-powerpc
+include/asm: include3/asm
+include3/asm:
+	$(Q)if [ ! -d include3 ]; then mkdir -p include3; fi
+	$(Q)ln -fsn $(srctree)/include/asm-powerpc include3/asm
+
 # Use the file '.tmp_gas_check' for binutils tests, as gas won't output
 # to stdout and these checks are run even on install targets.
 TOUT	:= .tmp_gas_check

+ 0 - 203
arch/ppc/boot/utils/addRamDisk.c

@@ -1,203 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <netinet/in.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#define ElfHeaderSize  (64 * 1024)
-#define ElfPages  (ElfHeaderSize / 4096)
-#define KERNELBASE (0xc0000000)
-
-void get4k(FILE *file, char *buf )
-{
-    unsigned j;
-    unsigned num = fread(buf, 1, 4096, file);
-    for (  j=num; j<4096; ++j )
-	buf[j] = 0;
-}
-
-void put4k(FILE *file, char *buf )
-{
-    fwrite(buf, 1, 4096, file);
-}
-
-void death(const char *msg, FILE *fdesc, const char *fname)
-{
-    printf(msg);
-    fclose(fdesc);
-    unlink(fname);
-    exit(1);
-}
-
-int main(int argc, char **argv)
-{
-    char inbuf[4096];
-    FILE *ramDisk = NULL;
-    FILE *inputVmlinux = NULL;
-    FILE *outputVmlinux = NULL;
-    unsigned i = 0;
-    u_int32_t ramFileLen = 0;
-    u_int32_t ramLen = 0;
-    u_int32_t roundR = 0;
-    u_int32_t kernelLen = 0;
-    u_int32_t actualKernelLen = 0;
-    u_int32_t round = 0;
-    u_int32_t roundedKernelLen = 0;
-    u_int32_t ramStartOffs = 0;
-    u_int32_t ramPages = 0;
-    u_int32_t roundedKernelPages = 0;
-    u_int32_t hvReleaseData = 0;
-    u_int32_t eyeCatcher = 0xc8a5d9c4;
-    u_int32_t naca = 0;
-    u_int32_t xRamDisk = 0;
-    u_int32_t xRamDiskSize = 0;
-    if ( argc < 2 ) {
-	printf("Name of RAM disk file missing.\n");
-	exit(1);
-    }
-
-    if ( argc < 3 ) {
-	printf("Name of vmlinux file missing.\n");
-	exit(1);
-    }
-
-    if ( argc < 4 ) {
-	printf("Name of vmlinux output file missing.\n");
-	exit(1);
-    }
-
-    ramDisk = fopen(argv[1], "r");
-    if ( ! ramDisk ) {
-	printf("RAM disk file \"%s\" failed to open.\n", argv[1]);
-	exit(1);
-    }
-    inputVmlinux = fopen(argv[2], "r");
-    if ( ! inputVmlinux ) {
-	printf("vmlinux file \"%s\" failed to open.\n", argv[2]);
-	exit(1);
-    }
-    outputVmlinux = fopen(argv[3], "w+");
-    if ( ! outputVmlinux ) {
-	printf("output vmlinux file \"%s\" failed to open.\n", argv[3]);
-	exit(1);
-    }
-    fseek(ramDisk, 0, SEEK_END);
-    ramFileLen = ftell(ramDisk);
-    fseek(ramDisk, 0, SEEK_SET);
-    printf("%s file size = %d\n", argv[1], ramFileLen);
-
-    ramLen = ramFileLen;
-
-    roundR = 4096 - (ramLen % 4096);
-    if ( roundR ) {
-	printf("Rounding RAM disk file up to a multiple of 4096, adding %d\n", roundR);
-	ramLen += roundR;
-    }
-
-    printf("Rounded RAM disk size is %d\n", ramLen);
-    fseek(inputVmlinux, 0, SEEK_END);
-    kernelLen = ftell(inputVmlinux);
-    fseek(inputVmlinux, 0, SEEK_SET);
-    printf("kernel file size = %d\n", kernelLen);
-    if ( kernelLen == 0 ) {
-	printf("You must have a linux kernel specified as argv[2]\n");
-	exit(1);
-    }
-
-    actualKernelLen = kernelLen - ElfHeaderSize;
-
-    printf("actual kernel length (minus ELF header) = %d\n", actualKernelLen);
-
-    round = actualKernelLen % 4096;
-    roundedKernelLen = actualKernelLen;
-    if ( round )
-	roundedKernelLen += (4096 - round);
-
-    printf("actual kernel length rounded up to a 4k multiple = %d\n", roundedKernelLen);
-
-    ramStartOffs = roundedKernelLen;
-    ramPages = ramLen / 4096;
-
-    printf("RAM disk pages to copy = %d\n", ramPages);
-
-    // Copy 64K ELF header
-      for (i=0; i<(ElfPages); ++i) {
-	  get4k( inputVmlinux, inbuf );
-	  put4k( outputVmlinux, inbuf );
-      }
-
-    roundedKernelPages = roundedKernelLen / 4096;
-
-    fseek(inputVmlinux, ElfHeaderSize, SEEK_SET);
-
-    for ( i=0; i<roundedKernelPages; ++i ) {
-	get4k( inputVmlinux, inbuf );
-	put4k( outputVmlinux, inbuf );
-    }
-
-    for ( i=0; i<ramPages; ++i ) {
-	get4k( ramDisk, inbuf );
-	put4k( outputVmlinux, inbuf );
-    }
-
-    /* Close the input files */
-    fclose(ramDisk);
-    fclose(inputVmlinux);
-    /* And flush the written output file */
-    fflush(outputVmlinux);
-
-    /* fseek to the hvReleaseData pointer */
-    fseek(outputVmlinux, ElfHeaderSize + 0x24, SEEK_SET);
-    if (fread(&hvReleaseData, 4, 1, outputVmlinux) != 1) {
-        death("Could not read hvReleaseData pointer\n", outputVmlinux, argv[3]);
-    }
-    hvReleaseData = ntohl(hvReleaseData); /* Convert to native int */
-    printf("hvReleaseData is at %08x\n", hvReleaseData);
-
-    /* fseek to the hvReleaseData */
-    fseek(outputVmlinux, ElfHeaderSize + hvReleaseData, SEEK_SET);
-    if (fread(inbuf, 0x40, 1, outputVmlinux) != 1) {
-        death("Could not read hvReleaseData\n", outputVmlinux, argv[3]);
-    }
-    /* Check hvReleaseData sanity */
-    if (memcmp(inbuf, &eyeCatcher, 4) != 0) {
-        death("hvReleaseData is invalid\n", outputVmlinux, argv[3]);
-    }
-    /* Get the naca pointer */
-    naca = ntohl(*((u_int32_t *) &inbuf[0x0c])) - KERNELBASE;
-    printf("naca is at %08x\n", naca);
-
-    /* fseek to the naca */
-    fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET);
-    if (fread(inbuf, 0x18, 1, outputVmlinux) != 1) {
-        death("Could not read naca\n", outputVmlinux, argv[3]);
-    }
-    xRamDisk = ntohl(*((u_int32_t *) &inbuf[0x0c]));
-    xRamDiskSize = ntohl(*((u_int32_t *) &inbuf[0x14]));
-    /* Make sure a RAM disk isn't already present */
-    if ((xRamDisk != 0) || (xRamDiskSize != 0)) {
-        death("RAM disk is already attached to this kernel\n", outputVmlinux, argv[3]);
-    }
-    /* Fill in the values */
-    *((u_int32_t *) &inbuf[0x0c]) = htonl(ramStartOffs);
-    *((u_int32_t *) &inbuf[0x14]) = htonl(ramPages);
-
-    /* Write out the new naca */
-    fflush(outputVmlinux);
-    fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET);
-    if (fwrite(inbuf, 0x18, 1, outputVmlinux) != 1) {
-        death("Could not write naca\n", outputVmlinux, argv[3]);
-    }
-    printf("RAM Disk of 0x%x pages size is attached to the kernel at offset 0x%08x\n",
-            ramPages, ramStartOffs);
-
-    /* Done */
-    fclose(outputVmlinux);
-    /* Set permission to executable */
-    chmod(argv[3], S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH);
-
-    return 0;
-}
-

+ 5 - 6
arch/ppc/kernel/signal.c

@@ -759,13 +759,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs)
 	else
 		handle_signal(signr, &ka, &info, oldset, regs, newsp);
 
-	if (!(ka.sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka.sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka.sa.sa_mask);
+	if (!(ka.sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked, signr);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 
 	return 1;
 }

+ 34 - 40
arch/ppc64/Kconfig

@@ -302,12 +302,6 @@ config GENERIC_HARDIRQS
 	bool
 	default y
 
-config MSCHUNKS
-	bool
-	depends on PPC_ISERIES
-	default y
-
-
 config PPC_RTAS
 	bool
 	depends on PPC_PSERIES || PPC_BPA
@@ -350,13 +344,46 @@ config SECCOMP
 
 	  If unsure, say Y. Only embedded should say N here.
 
+source "fs/Kconfig.binfmt"
+
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs"
+	depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC)
+	select HOTPLUG
+	---help---
+	  Say Y here to be able to turn CPUs off and on.
+
+	  Say N if you are unsure.
+
+config PROC_DEVICETREE
+	bool "Support for Open Firmware device tree in /proc"
+	depends on !PPC_ISERIES
+	help
+	  This option adds a device-tree directory under /proc which contains
+	  an image of the device tree that the kernel copies from Open
+	  Firmware. If unsure, say Y here.
+
+config CMDLINE_BOOL
+	bool "Default bootloader kernel arguments"
+	depends on !PPC_ISERIES
+
+config CMDLINE
+	string "Initial kernel command string"
+	depends on CMDLINE_BOOL
+	default "console=ttyS0,9600 console=tty0 root=/dev/sda2"
+	help
+	  On some platforms, there is currently no way for the boot loader to
+	  pass arguments to the kernel. For these platforms, you can supply
+	  some command-line options at build time by entering them here.  In
+	  most cases you will need to specify the root device here.
+
 endmenu
 
 config ISA_DMA_API
 	bool
 	default y
 
-menu "General setup"
+menu "Bus Options"
 
 config ISA
 	bool
@@ -389,45 +416,12 @@ config PCI_DOMAINS
 	bool
 	default PCI
 
-source "fs/Kconfig.binfmt"
-
 source "drivers/pci/Kconfig"
 
-config HOTPLUG_CPU
-	bool "Support for hot-pluggable CPUs"
-	depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC)
-	select HOTPLUG
-	---help---
-	  Say Y here to be able to turn CPUs off and on.
-
-	  Say N if you are unsure.
-
 source "drivers/pcmcia/Kconfig"
 
 source "drivers/pci/hotplug/Kconfig"
 
-config PROC_DEVICETREE
-	bool "Support for Open Firmware device tree in /proc"
-	depends on !PPC_ISERIES
-	help
-	  This option adds a device-tree directory under /proc which contains
-	  an image of the device tree that the kernel copies from Open
-	  Firmware. If unsure, say Y here.
-
-config CMDLINE_BOOL
-	bool "Default bootloader kernel arguments"
-	depends on !PPC_ISERIES
-
-config CMDLINE
-	string "Initial kernel command string"
-	depends on CMDLINE_BOOL
-	default "console=ttyS0,9600 console=tty0 root=/dev/sda2"
-	help
-	  On some platforms, there is currently no way for the boot loader to
-	  pass arguments to the kernel. For these platforms, you can supply
-	  some command-line options at build time by entering them here.  In
-	  most cases you will need to specify the root device here.
-
 endmenu
 
 source "net/Kconfig"

+ 9 - 0
arch/ppc64/Makefile

@@ -55,6 +55,8 @@ LDFLAGS		:= -m elf64ppc
 LDFLAGS_vmlinux	:= -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD)
 CFLAGS		+= -msoft-float -pipe -mminimal-toc -mtraceback=none \
 		   -mcall-aixdesc
+# Temporary hack until we have migrated to asm-powerpc
+CPPFLAGS	+= -Iinclude3
 
 GCC_VERSION     := $(call cc-version)
 GCC_BROKEN_VEC	:= $(shell if [ $(GCC_VERSION) -lt 0400 ] ; then echo "y"; fi ;)
@@ -112,6 +114,7 @@ all: $(KBUILD_IMAGE)
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
+	$(Q)rm -rf include3
 
 prepare: include/asm-ppc64/offsets.h
 
@@ -121,6 +124,12 @@ arch/ppc64/kernel/asm-offsets.s: include/asm include/linux/version.h \
 include/asm-ppc64/offsets.h: arch/ppc64/kernel/asm-offsets.s
 	$(call filechk,gen-asm-offsets)
 
+# Temporary hack until we have migrated to asm-powerpc
+include/asm: include3/asm
+include3/asm:
+	$(Q)if [ ! -d include3 ]; then mkdir -p include3; fi;
+	$(Q)ln -fsn $(srctree)/include/asm-powerpc include3/asm
+
 define archhelp
   echo  '* zImage       - Compressed kernel image (arch/$(ARCH)/boot/zImage)'
   echo  '  zImage.initrd- Compressed kernel image with initrd attached,'

+ 2 - 2
arch/ppc64/boot/Makefile

@@ -22,8 +22,8 @@
 
 
 HOSTCC		:= gcc
-BOOTCFLAGS	:= $(HOSTCFLAGS) $(LINUXINCLUDE) -fno-builtin 
-BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional
+BOOTCFLAGS	:= $(HOSTCFLAGS) -fno-builtin -nostdinc -isystem $(shell $(CROSS32CC) -print-file-name=include)
+BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
 BOOTLFLAGS	:= -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds
 OBJCOPYFLAGS    := contents,alloc,load,readonly,data
 

+ 2 - 2
arch/ppc64/boot/addnote.c

@@ -157,7 +157,7 @@ main(int ac, char **av)
 	PUT_32BE(ns, strlen(arch) + 1);
 	PUT_32BE(ns + 4, N_DESCR * 4);
 	PUT_32BE(ns + 8, 0x1275);
-	strcpy(&buf[ns + 12], arch);
+	strcpy((char *) &buf[ns + 12], arch);
 	ns += 12 + strlen(arch) + 1;
 	for (i = 0; i < N_DESCR; ++i, ns += 4)
 		PUT_32BE(ns, descr[i]);
@@ -172,7 +172,7 @@ main(int ac, char **av)
 	PUT_32BE(ns, strlen(rpaname) + 1);
 	PUT_32BE(ns + 4, sizeof(rpanote));
 	PUT_32BE(ns + 8, 0x12759999);
-	strcpy(&buf[ns + 12], rpaname);
+	strcpy((char *) &buf[ns + 12], rpaname);
 	ns += 12 + ROUNDUP(strlen(rpaname) + 1);
 	for (i = 0; i < N_RPA_DESCR; ++i, ns += 4)
 		PUT_32BE(ns, rpanote[i]);

+ 1 - 1
arch/ppc64/boot/crt0.S

@@ -9,7 +9,7 @@
  * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
  */
 
-#include <asm/ppc_asm.h>
+#include "ppc_asm.h"
 
 	.text
 	.globl	_start

+ 1 - 1
arch/ppc64/boot/div64.S

@@ -13,7 +13,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
-#include <asm/ppc_asm.h>
+#include "ppc_asm.h"
 
 	.globl __div64_32
 __div64_32:

+ 149 - 0
arch/ppc64/boot/elf.h

@@ -0,0 +1,149 @@
+#ifndef _PPC_BOOT_ELF_H_
+#define _PPC_BOOT_ELF_H_
+
+/* 32-bit ELF base types. */
+typedef unsigned int Elf32_Addr;
+typedef unsigned short Elf32_Half;
+typedef unsigned int Elf32_Off;
+typedef signed int Elf32_Sword;
+typedef unsigned int Elf32_Word;
+
+/* 64-bit ELF base types. */
+typedef unsigned long long Elf64_Addr;
+typedef unsigned short Elf64_Half;
+typedef signed short Elf64_SHalf;
+typedef unsigned long long Elf64_Off;
+typedef signed int Elf64_Sword;
+typedef unsigned int Elf64_Word;
+typedef unsigned long long Elf64_Xword;
+typedef signed long long Elf64_Sxword;
+
+/* These constants are for the segment types stored in the image headers */
+#define PT_NULL    0
+#define PT_LOAD    1
+#define PT_DYNAMIC 2
+#define PT_INTERP  3
+#define PT_NOTE    4
+#define PT_SHLIB   5
+#define PT_PHDR    6
+#define PT_TLS     7		/* Thread local storage segment */
+#define PT_LOOS    0x60000000	/* OS-specific */
+#define PT_HIOS    0x6fffffff	/* OS-specific */
+#define PT_LOPROC  0x70000000
+#define PT_HIPROC  0x7fffffff
+#define PT_GNU_EH_FRAME		0x6474e550
+
+#define PT_GNU_STACK	(PT_LOOS + 0x474e551)
+
+/* These constants define the different elf file types */
+#define ET_NONE   0
+#define ET_REL    1
+#define ET_EXEC   2
+#define ET_DYN    3
+#define ET_CORE   4
+#define ET_LOPROC 0xff00
+#define ET_HIPROC 0xffff
+
+/* These constants define the various ELF target machines */
+#define EM_NONE  0
+#define EM_PPC	       20	/* PowerPC */
+#define EM_PPC64       21	/* PowerPC64 */
+
+#define EI_NIDENT	16
+
+typedef struct elf32_hdr {
+	unsigned char e_ident[EI_NIDENT];
+	Elf32_Half e_type;
+	Elf32_Half e_machine;
+	Elf32_Word e_version;
+	Elf32_Addr e_entry;	/* Entry point */
+	Elf32_Off e_phoff;
+	Elf32_Off e_shoff;
+	Elf32_Word e_flags;
+	Elf32_Half e_ehsize;
+	Elf32_Half e_phentsize;
+	Elf32_Half e_phnum;
+	Elf32_Half e_shentsize;
+	Elf32_Half e_shnum;
+	Elf32_Half e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct elf64_hdr {
+	unsigned char e_ident[16];	/* ELF "magic number" */
+	Elf64_Half e_type;
+	Elf64_Half e_machine;
+	Elf64_Word e_version;
+	Elf64_Addr e_entry;	/* Entry point virtual address */
+	Elf64_Off e_phoff;	/* Program header table file offset */
+	Elf64_Off e_shoff;	/* Section header table file offset */
+	Elf64_Word e_flags;
+	Elf64_Half e_ehsize;
+	Elf64_Half e_phentsize;
+	Elf64_Half e_phnum;
+	Elf64_Half e_shentsize;
+	Elf64_Half e_shnum;
+	Elf64_Half e_shstrndx;
+} Elf64_Ehdr;
+
+/* These constants define the permissions on sections in the program
+   header, p_flags. */
+#define PF_R		0x4
+#define PF_W		0x2
+#define PF_X		0x1
+
+typedef struct elf32_phdr {
+	Elf32_Word p_type;
+	Elf32_Off p_offset;
+	Elf32_Addr p_vaddr;
+	Elf32_Addr p_paddr;
+	Elf32_Word p_filesz;
+	Elf32_Word p_memsz;
+	Elf32_Word p_flags;
+	Elf32_Word p_align;
+} Elf32_Phdr;
+
+typedef struct elf64_phdr {
+	Elf64_Word p_type;
+	Elf64_Word p_flags;
+	Elf64_Off p_offset;	/* Segment file offset */
+	Elf64_Addr p_vaddr;	/* Segment virtual address */
+	Elf64_Addr p_paddr;	/* Segment physical address */
+	Elf64_Xword p_filesz;	/* Segment size in file */
+	Elf64_Xword p_memsz;	/* Segment size in memory */
+	Elf64_Xword p_align;	/* Segment alignment, file & memory */
+} Elf64_Phdr;
+
+#define	EI_MAG0		0	/* e_ident[] indexes */
+#define	EI_MAG1		1
+#define	EI_MAG2		2
+#define	EI_MAG3		3
+#define	EI_CLASS	4
+#define	EI_DATA		5
+#define	EI_VERSION	6
+#define	EI_OSABI	7
+#define	EI_PAD		8
+
+#define	ELFMAG0		0x7f	/* EI_MAG */
+#define	ELFMAG1		'E'
+#define	ELFMAG2		'L'
+#define	ELFMAG3		'F'
+#define	ELFMAG		"\177ELF"
+#define	SELFMAG		4
+
+#define	ELFCLASSNONE	0	/* EI_CLASS */
+#define	ELFCLASS32	1
+#define	ELFCLASS64	2
+#define	ELFCLASSNUM	3
+
+#define ELFDATANONE	0	/* e_ident[EI_DATA] */
+#define ELFDATA2LSB	1
+#define ELFDATA2MSB	2
+
+#define EV_NONE		0	/* e_version, EI_VERSION */
+#define EV_CURRENT	1
+#define EV_NUM		2
+
+#define ELFOSABI_NONE	0
+#define ELFOSABI_LINUX	3
+
+#endif				/* _PPC_BOOT_ELF_H_ */

+ 20 - 35
arch/ppc64/boot/main.c

@@ -8,36 +8,28 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
-#include "ppc32-types.h"
+#include <stdarg.h>
+#include <stddef.h>
+#include "elf.h"
+#include "page.h"
+#include "string.h"
+#include "stdio.h"
+#include "prom.h"
 #include "zlib.h"
-#include <linux/elf.h>
-#include <linux/string.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-
-extern void *finddevice(const char *);
-extern int getprop(void *, const char *, void *, int);
-extern void printf(const char *fmt, ...);
-extern int sprintf(char *buf, const char *fmt, ...);
-void gunzip(void *, int, unsigned char *, int *);
-void *claim(unsigned int, unsigned int, unsigned int);
-void flush_cache(void *, unsigned long);
-void pause(void);
-extern void exit(void);
-
-unsigned long strlen(const char *s);
-void *memmove(void *dest, const void *src, unsigned long n);
-void *memcpy(void *dest, const void *src, unsigned long n);
+
+static void gunzip(void *, int, unsigned char *, int *);
+extern void flush_cache(void *, unsigned long);
+
 
 /* Value picked to match that used by yaboot */
 #define PROG_START	0x01400000
 #define RAM_END		(256<<20) // Fixme: use OF */
 
-char *avail_ram;
-char *begin_avail, *end_avail;
-char *avail_high;
-unsigned int heap_use;
-unsigned int heap_max;
+static char *avail_ram;
+static char *begin_avail, *end_avail;
+static char *avail_high;
+static unsigned int heap_use;
+static unsigned int heap_max;
 
 extern char _start[];
 extern char _vmlinux_start[];
@@ -52,9 +44,9 @@ struct addr_range {
 	unsigned long size;
 	unsigned long memsize;
 };
-struct addr_range vmlinux = {0, 0, 0};
-struct addr_range vmlinuz = {0, 0, 0};
-struct addr_range initrd  = {0, 0, 0};
+static struct addr_range vmlinux = {0, 0, 0};
+static struct addr_range vmlinuz = {0, 0, 0};
+static struct addr_range initrd  = {0, 0, 0};
 
 static char scratch[128<<10];	/* 128kB of scratch space for gunzip */
 
@@ -64,13 +56,6 @@ typedef void (*kernel_entry_t)( unsigned long,
 				void *);
 
 
-int (*prom)(void *);
-
-void *chosen_handle;
-void *stdin;
-void *stdout;
-void *stderr;
-
 #undef DEBUG
 
 static unsigned long claim_base = PROG_START;
@@ -277,7 +262,7 @@ void zfree(void *x, void *addr, unsigned nb)
 
 #define DEFLATED	8
 
-void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp)
+static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp)
 {
 	z_stream s;
 	int r, i, flags;

+ 34 - 0
arch/ppc64/boot/page.h

@@ -0,0 +1,34 @@
+#ifndef _PPC_BOOT_PAGE_H
+#define _PPC_BOOT_PAGE_H
+/*
+ * Copyright (C) 2001 PPC64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifdef __ASSEMBLY__
+#define ASM_CONST(x) x
+#else
+#define __ASM_CONST(x) x##UL
+#define ASM_CONST(x) __ASM_CONST(x)
+#endif
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT	12
+#define PAGE_SIZE	(ASM_CONST(1) << PAGE_SHIFT)
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+/* align addr on a size boundary - adjust address up/down if needed */
+#define _ALIGN_UP(addr,size)	(((addr)+((size)-1))&(~((size)-1)))
+#define _ALIGN_DOWN(addr,size)	((addr)&(~((size)-1)))
+
+/* align addr on a size boundary - adjust address up if needed */
+#define _ALIGN(addr,size)     _ALIGN_UP(addr,size)
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)	_ALIGN(addr, PAGE_SIZE)
+
+#endif				/* _PPC_BOOT_PAGE_H */

+ 0 - 36
arch/ppc64/boot/ppc32-types.h

@@ -1,36 +0,0 @@
-#ifndef _PPC64_TYPES_H
-#define _PPC64_TYPES_H
-
-typedef __signed__ char __s8;
-typedef unsigned char __u8;
-
-typedef __signed__ short __s16;
-typedef unsigned short __u16;
-
-typedef __signed__ int __s32;
-typedef unsigned int __u32;
-
-typedef __signed__ long long __s64;
-typedef unsigned long long __u64;
-
-typedef signed char s8;
-typedef unsigned char u8;
-
-typedef signed short s16;
-typedef unsigned short u16;
-
-typedef signed int s32;
-typedef unsigned int u32;
-
-typedef signed long long s64;
-typedef unsigned long long u64;
-
-typedef struct {
-	__u32 u[4];
-} __attribute((aligned(16))) __vector128;
-
-#define BITS_PER_LONG 32
-
-typedef __vector128 vector128;
-
-#endif /* _PPC64_TYPES_H */

+ 62 - 0
arch/ppc64/boot/ppc_asm.h

@@ -0,0 +1,62 @@
+#ifndef _PPC64_PPC_ASM_H
+#define _PPC64_PPC_ASM_H
+/*
+ *
+ * Definitions used by various bits of low-level assembly code on PowerPC.
+ *
+ * Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+/* Condition Register Bit Fields */
+
+#define	cr0	0
+#define	cr1	1
+#define	cr2	2
+#define	cr3	3
+#define	cr4	4
+#define	cr5	5
+#define	cr6	6
+#define	cr7	7
+
+
+/* General Purpose Registers (GPRs) */
+
+#define	r0	0
+#define	r1	1
+#define	r2	2
+#define	r3	3
+#define	r4	4
+#define	r5	5
+#define	r6	6
+#define	r7	7
+#define	r8	8
+#define	r9	9
+#define	r10	10
+#define	r11	11
+#define	r12	12
+#define	r13	13
+#define	r14	14
+#define	r15	15
+#define	r16	16
+#define	r17	17
+#define	r18	18
+#define	r19	19
+#define	r20	20
+#define	r21	21
+#define	r22	22
+#define	r23	23
+#define	r24	24
+#define	r25	25
+#define	r26	26
+#define	r27	27
+#define	r28	28
+#define	r29	29
+#define	r30	30
+#define	r31	31
+
+#endif /* _PPC64_PPC_ASM_H */

+ 27 - 169
arch/ppc64/boot/prom.c

@@ -7,43 +7,19 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <stdarg.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-
-extern __u32 __div64_32(unsigned long long *dividend, __u32 divisor);
-
-/* The unnecessary pointer compare is there
- * to check for type safety (n must be 64bit)
- */
-# define do_div(n,base) ({				\
-	__u32 __base = (base);			\
-	__u32 __rem;					\
-	(void)(((typeof((n)) *)0) == ((unsigned long long *)0));	\
-	if (((n) >> 32) == 0) {			\
-		__rem = (__u32)(n) % __base;		\
-		(n) = (__u32)(n) / __base;		\
-	} else 						\
-		__rem = __div64_32(&(n), __base);	\
-	__rem;						\
- })
+#include <stddef.h>
+#include "string.h"
+#include "stdio.h"
+#include "prom.h"
 
 int (*prom)(void *);
 
 void *chosen_handle;
+
 void *stdin;
 void *stdout;
 void *stderr;
 
-void exit(void);
-void *finddevice(const char *name);
-int getprop(void *phandle, const char *name, void *buf, int buflen);
-void chrpboot(int a1, int a2, void *prom);	/* in main.c */
-
-int printf(char *fmt, ...);
-
-/* there is no convenient header to get this from...  -- paulus */
-extern unsigned long strlen(const char *);
 
 int
 write(void *handle, void *ptr, int nb)
@@ -210,107 +186,6 @@ fputs(char *str, void *f)
 	return write(f, str, n) == n? 0: -1;
 }
 
-int
-readchar(void)
-{
-	char ch;
-
-	for (;;) {
-		switch (read(stdin, &ch, 1)) {
-		case 1:
-			return ch;
-		case -1:
-			printf("read(stdin) returned -1\r\n");
-			return -1;
-		}
-	}
-}
-
-static char line[256];
-static char *lineptr;
-static int lineleft;
-
-int
-getchar(void)
-{
-	int c;
-
-	if (lineleft == 0) {
-		lineptr = line;
-		for (;;) {
-			c = readchar();
-			if (c == -1 || c == 4)
-				break;
-			if (c == '\r' || c == '\n') {
-				*lineptr++ = '\n';
-				putchar('\n');
-				break;
-			}
-			switch (c) {
-			case 0177:
-			case '\b':
-				if (lineptr > line) {
-					putchar('\b');
-					putchar(' ');
-					putchar('\b');
-					--lineptr;
-				}
-				break;
-			case 'U' & 0x1F:
-				while (lineptr > line) {
-					putchar('\b');
-					putchar(' ');
-					putchar('\b');
-					--lineptr;
-				}
-				break;
-			default:
-				if (lineptr >= &line[sizeof(line) - 1])
-					putchar('\a');
-				else {
-					putchar(c);
-					*lineptr++ = c;
-				}
-			}
-		}
-		lineleft = lineptr - line;
-		lineptr = line;
-	}
-	if (lineleft == 0)
-		return -1;
-	--lineleft;
-	return *lineptr++;
-}
-
-
-
-/* String functions lifted from lib/vsprintf.c and lib/ctype.c */
-unsigned char _ctype[] = {
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 0-7 */
-_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,		/* 8-15 */
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 16-23 */
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 24-31 */
-_S|_SP,_P,_P,_P,_P,_P,_P,_P,			/* 32-39 */
-_P,_P,_P,_P,_P,_P,_P,_P,			/* 40-47 */
-_D,_D,_D,_D,_D,_D,_D,_D,			/* 48-55 */
-_D,_D,_P,_P,_P,_P,_P,_P,			/* 56-63 */
-_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,	/* 64-71 */
-_U,_U,_U,_U,_U,_U,_U,_U,			/* 72-79 */
-_U,_U,_U,_U,_U,_U,_U,_U,			/* 80-87 */
-_U,_U,_U,_P,_P,_P,_P,_P,			/* 88-95 */
-_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,	/* 96-103 */
-_L,_L,_L,_L,_L,_L,_L,_L,			/* 104-111 */
-_L,_L,_L,_L,_L,_L,_L,_L,			/* 112-119 */
-_L,_L,_L,_P,_P,_P,_P,_C,			/* 120-127 */
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,		/* 128-143 */
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,		/* 144-159 */
-_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,   /* 160-175 */
-_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,       /* 176-191 */
-_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,       /* 192-207 */
-_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,       /* 208-223 */
-_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,       /* 224-239 */
-_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};      /* 240-255 */
-
 size_t strnlen(const char * s, size_t count)
 {
 	const char *sc;
@@ -320,44 +195,30 @@ size_t strnlen(const char * s, size_t count)
 	return sc - s;
 }
 
-unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
-{
-	unsigned long result = 0,value;
+extern unsigned int __div64_32(unsigned long long *dividend,
+			       unsigned int divisor);
 
-	if (!base) {
-		base = 10;
-		if (*cp == '0') {
-			base = 8;
-			cp++;
-			if ((*cp == 'x') && isxdigit(cp[1])) {
-				cp++;
-				base = 16;
-			}
-		}
-	}
-	while (isxdigit(*cp) &&
-	       (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
-		result = result*base + value;
-		cp++;
-	}
-	if (endp)
-		*endp = (char *)cp;
-	return result;
-}
-
-long simple_strtol(const char *cp,char **endp,unsigned int base)
-{
-	if(*cp=='-')
-		return -simple_strtoul(cp+1,endp,base);
-	return simple_strtoul(cp,endp,base);
-}
+/* The unnecessary pointer compare is there
+ * to check for type safety (n must be 64bit)
+ */
+# define do_div(n,base) ({						\
+	unsigned int __base = (base);					\
+	unsigned int __rem;						\
+	(void)(((typeof((n)) *)0) == ((unsigned long long *)0));	\
+	if (((n) >> 32) == 0) {						\
+		__rem = (unsigned int)(n) % __base;			\
+		(n) = (unsigned int)(n) / __base;			\
+	} else								\
+		__rem = __div64_32(&(n), __base);			\
+	__rem;								\
+ })
 
 static int skip_atoi(const char **s)
 {
-	int i=0;
+	int i, c;
 
-	while (isdigit(**s))
-		i = i*10 + *((*s)++) - '0';
+	for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s)
+		i = i*10 + c - '0';
 	return i;
 }
 
@@ -436,9 +297,6 @@ static char * number(char * str, unsigned long long num, int base, int size, int
 	return str;
 }
 
-/* Forward decl. needed for IP address printing stuff... */
-int sprintf(char * buf, const char *fmt, ...);
-
 int vsprintf(char *buf, const char *fmt, va_list args)
 {
 	int len;
@@ -477,7 +335,7 @@ int vsprintf(char *buf, const char *fmt, va_list args)
 		
 		/* get field width */
 		field_width = -1;
-		if (isdigit(*fmt))
+		if ('0' <= *fmt && *fmt <= '9')
 			field_width = skip_atoi(&fmt);
 		else if (*fmt == '*') {
 			++fmt;
@@ -493,7 +351,7 @@ int vsprintf(char *buf, const char *fmt, va_list args)
 		precision = -1;
 		if (*fmt == '.') {
 			++fmt;	
-			if (isdigit(*fmt))
+			if ('0' <= *fmt && *fmt <= '9')
 				precision = skip_atoi(&fmt);
 			else if (*fmt == '*') {
 				++fmt;
@@ -628,7 +486,7 @@ int sprintf(char * buf, const char *fmt, ...)
 static char sprint_buf[1024];
 
 int
-printf(char *fmt, ...)
+printf(const char *fmt, ...)
 {
 	va_list args;
 	int n;

+ 18 - 0
arch/ppc64/boot/prom.h

@@ -0,0 +1,18 @@
+#ifndef _PPC_BOOT_PROM_H_
+#define _PPC_BOOT_PROM_H_
+
+extern int (*prom) (void *);
+extern void *chosen_handle;
+
+extern void *stdin;
+extern void *stdout;
+extern void *stderr;
+
+extern int write(void *handle, void *ptr, int nb);
+extern int read(void *handle, void *ptr, int nb);
+extern void exit(void);
+extern void pause(void);
+extern void *finddevice(const char *);
+extern void *claim(unsigned long virt, unsigned long size, unsigned long align);
+extern int getprop(void *phandle, const char *name, void *buf, int buflen);
+#endif				/* _PPC_BOOT_PROM_H_ */

+ 16 - 0
arch/ppc64/boot/stdio.h

@@ -0,0 +1,16 @@
+#ifndef _PPC_BOOT_STDIO_H_
+#define _PPC_BOOT_STDIO_H_
+
+extern int printf(const char *fmt, ...);
+
+extern int sprintf(char *buf, const char *fmt, ...);
+
+extern int vsprintf(char *buf, const char *fmt, va_list args);
+
+extern int putc(int c, void *f);
+extern int putchar(int c);
+extern int getchar(void);
+
+extern int fputs(char *str, void *f);
+
+#endif				/* _PPC_BOOT_STDIO_H_ */

+ 1 - 1
arch/ppc64/boot/string.S

@@ -9,7 +9,7 @@
  * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
  */
 
-#include <asm/ppc_asm.h>
+#include "ppc_asm.h"
 
 	.text
 	.globl	strcpy

+ 16 - 0
arch/ppc64/boot/string.h

@@ -0,0 +1,16 @@
+#ifndef _PPC_BOOT_STRING_H_
+#define _PPC_BOOT_STRING_H_
+
+extern char *strcpy(char *dest, const char *src);
+extern char *strncpy(char *dest, const char *src, size_t n);
+extern char *strcat(char *dest, const char *src);
+extern int strcmp(const char *s1, const char *s2);
+extern size_t strlen(const char *s);
+extern size_t strnlen(const char *s, size_t count);
+
+extern void *memset(void *s, int c, size_t n);
+extern void *memmove(void *dest, const void *src, unsigned long n);
+extern void *memcpy(void *dest, const void *src, unsigned long n);
+extern int memcmp(const void *s1, const void *s2, size_t n);
+
+#endif	/* _PPC_BOOT_STRING_H_ */

+ 1 - 1
arch/ppc64/boot/zlib.c

@@ -107,7 +107,7 @@ extern void *memcpy(void *, const void *, unsigned long);
 
 /* Diagnostic functions */
 #ifdef DEBUG_ZLIB
-#  include <stdio.h>
+#  include "stdio.h"
 #  ifndef verbose
 #    define verbose 0
 #  endif

+ 3 - 3
arch/ppc64/configs/g5_defconfig

@@ -103,10 +103,10 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_SECCOMP=y
 CONFIG_ISA_DMA_API=y

+ 3 - 4
arch/ppc64/configs/iSeries_defconfig

@@ -94,12 +94,11 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
 CONFIG_GENERIC_HARDIRQS=y
-CONFIG_MSCHUNKS=y
 CONFIG_LPARCFG=y
 CONFIG_SECCOMP=y
 CONFIG_ISA_DMA_API=y

+ 3 - 3
arch/ppc64/configs/maple_defconfig

@@ -103,10 +103,10 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_SECCOMP=y
 CONFIG_ISA_DMA_API=y

+ 3 - 3
arch/ppc64/configs/pSeries_defconfig

@@ -112,10 +112,10 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
 CONFIG_EEH=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_PPC_RTAS=y

+ 3 - 3
arch/ppc64/defconfig

@@ -114,10 +114,10 @@ CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT_BKL is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
 CONFIG_EEH=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_PPC_RTAS=y

+ 11 - 26
arch/ppc64/kernel/LparData.c

@@ -51,6 +51,17 @@ struct HvReleaseData hvReleaseData = {
 		0xf4, 0x4b, 0xf6, 0xf4 },
 };
 
+/*
+ * The NACA.  The first dword of the naca is required by the iSeries
+ * hypervisor to point to itVpdAreas.  The hypervisor finds the NACA
+ * through the pointer in hvReleaseData.
+ */
+struct naca_struct naca = {
+	.xItVpdAreas = &itVpdAreas,
+	.xRamDisk = 0,
+	.xRamDiskSize = 0,
+};
+
 extern void system_reset_iSeries(void);
 extern void machine_check_iSeries(void);
 extern void data_access_iSeries(void);
@@ -214,29 +225,3 @@ struct ItVpdAreas itVpdAreas = {
 		0,0
 	}
 };
-
-struct msChunks msChunks;
-EXPORT_SYMBOL(msChunks);
-
-/* Depending on whether this is called from iSeries or pSeries setup
- * code, the location of the msChunks struct may or may not have
- * to be reloc'd, so we force the caller to do that for us by passing
- * in a pointer to the structure.
- */
-unsigned long
-msChunks_alloc(unsigned long mem, unsigned long num_chunks, unsigned long chunk_size)
-{
-	unsigned long offset = reloc_offset();
-	struct msChunks *_msChunks = PTRRELOC(&msChunks);
-
-	_msChunks->num_chunks  = num_chunks;
-	_msChunks->chunk_size  = chunk_size;
-	_msChunks->chunk_shift = __ilog2(chunk_size);
-	_msChunks->chunk_mask  = (1UL<<_msChunks->chunk_shift)-1;
-
-	mem = _ALIGN(mem, sizeof(msChunks_entry));
-	_msChunks->abs = (msChunks_entry *)(mem + offset);
-	mem += num_chunks * sizeof(msChunks_entry);
-
-	return mem;
-}

+ 5 - 2
arch/ppc64/kernel/Makefile

@@ -11,7 +11,7 @@ obj-y               :=	setup.o entry.o traps.o irq.o idle.o dma.o \
 			udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \
 			ptrace32.o signal32.o rtc.o init_task.o \
 			lmb.o cputable.o cpu_setup_power4.o idle_power4.o \
-			iommu.o sysfs.o vdso.o pmc.o
+			iommu.o sysfs.o vdso.o pmc.o firmware.o
 obj-y += vdso32/ vdso64/
 
 obj-$(CONFIG_PPC_OF) +=	of_device.o
@@ -50,7 +50,10 @@ obj-$(CONFIG_LPARCFG)		+= lparcfg.o
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
-obj-$(CONFIG_IBMVIO)		+= vio.o
+
+vio-obj-$(CONFIG_PPC_PSERIES)	+= pSeries_vio.o
+vio-obj-$(CONFIG_PPC_ISERIES)	+= iSeries_vio.o
+obj-$(CONFIG_IBMVIO)		+= vio.o $(vio-obj-y)
 obj-$(CONFIG_XICS)		+= xics.o
 obj-$(CONFIG_MPIC)		+= mpic.o
 

+ 2 - 1
arch/ppc64/kernel/asm-offsets.c

@@ -94,7 +94,8 @@ int main(void)
 	DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
 #ifdef CONFIG_HUGETLB_PAGE
-	DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs));
+	DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
+	DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
 #endif /* CONFIG_HUGETLB_PAGE */
 	DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr));
         DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));

+ 1 - 39
arch/ppc64/kernel/cputable.c

@@ -5,7 +5,7 @@
  *
  *  Modifications for ppc64:
  *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
- * 
+ *
  *  This program is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU General Public License
  *  as published by the Free Software Foundation; either version
@@ -60,7 +60,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power3,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Power3+ */
 		.pvr_mask		= 0xffff0000,
@@ -73,7 +72,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power3,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Northstar */
 		.pvr_mask		= 0xffff0000,
@@ -86,7 +84,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power3,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Pulsar */
 		.pvr_mask		= 0xffff0000,
@@ -99,7 +96,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power3,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* I-star */
 		.pvr_mask		= 0xffff0000,
@@ -112,7 +108,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power3,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* S-star */
 		.pvr_mask		= 0xffff0000,
@@ -125,7 +120,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power3,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Power4 */
 		.pvr_mask		= 0xffff0000,
@@ -138,7 +132,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power4,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Power4+ */
 		.pvr_mask		= 0xffff0000,
@@ -151,7 +144,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power4,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* PPC970 */
 		.pvr_mask		= 0xffff0000,
@@ -166,7 +158,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_ppc970,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* PPC970FX */
 		.pvr_mask		= 0xffff0000,
@@ -181,7 +172,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_ppc970,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* PPC970MP */
 		.pvr_mask		= 0xffff0000,
@@ -196,7 +186,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_ppc970,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Power5 */
 		.pvr_mask		= 0xffff0000,
@@ -211,7 +200,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power4,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* Power5 */
 		.pvr_mask		= 0xffff0000,
@@ -226,7 +214,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power4,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* BE DD1.x */
 		.pvr_mask		= 0xffff0000,
@@ -241,7 +228,6 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_be,
-		.firmware_features	= COMMON_PPC64_FW,
 	},
 	{	/* default match */
 		.pvr_mask		= 0x00000000,
@@ -254,29 +240,5 @@ struct cpu_spec	cpu_specs[] = {
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.cpu_setup		= __setup_cpu_power4,
-		.firmware_features	= COMMON_PPC64_FW,
 	}
 };
-
-firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
-	{FW_FEATURE_PFT,		"hcall-pft"},
-	{FW_FEATURE_TCE,		"hcall-tce"},
-	{FW_FEATURE_SPRG0,		"hcall-sprg0"},
-	{FW_FEATURE_DABR,		"hcall-dabr"},
-	{FW_FEATURE_COPY,		"hcall-copy"},
-	{FW_FEATURE_ASR,		"hcall-asr"},
-	{FW_FEATURE_DEBUG,		"hcall-debug"},
-	{FW_FEATURE_PERF,		"hcall-perf"},
-	{FW_FEATURE_DUMP,		"hcall-dump"},
-	{FW_FEATURE_INTERRUPT,		"hcall-interrupt"},
-	{FW_FEATURE_MIGRATE,		"hcall-migrate"},
-	{FW_FEATURE_PERFMON,		"hcall-perfmon"},
-	{FW_FEATURE_CRQ,		"hcall-crq"},
-	{FW_FEATURE_VIO,		"hcall-vio"},
-	{FW_FEATURE_RDMA,		"hcall-rdma"},
-	{FW_FEATURE_LLAN,		"hcall-lLAN"},
-	{FW_FEATURE_BULK,		"hcall-bulk"},
-	{FW_FEATURE_XDABR,		"hcall-xdabr"},
-	{FW_FEATURE_MULTITCE,		"hcall-multi-tce"},
-	{FW_FEATURE_SPLPAR,		"hcall-splpar"},
-};

+ 47 - 0
arch/ppc64/kernel/firmware.c

@@ -0,0 +1,47 @@
+/*
+ *  arch/ppc64/kernel/firmware.c
+ *
+ *  Extracted from cputable.c
+ *
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  Modifications for ppc64:
+ *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *  Copyright (C) 2005 Stephen Rothwell, IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+
+#include <asm/firmware.h>
+
+unsigned long ppc64_firmware_features;
+
+#ifdef CONFIG_PPC_PSERIES
+firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = {
+	{FW_FEATURE_PFT,		"hcall-pft"},
+	{FW_FEATURE_TCE,		"hcall-tce"},
+	{FW_FEATURE_SPRG0,		"hcall-sprg0"},
+	{FW_FEATURE_DABR,		"hcall-dabr"},
+	{FW_FEATURE_COPY,		"hcall-copy"},
+	{FW_FEATURE_ASR,		"hcall-asr"},
+	{FW_FEATURE_DEBUG,		"hcall-debug"},
+	{FW_FEATURE_PERF,		"hcall-perf"},
+	{FW_FEATURE_DUMP,		"hcall-dump"},
+	{FW_FEATURE_INTERRUPT,		"hcall-interrupt"},
+	{FW_FEATURE_MIGRATE,		"hcall-migrate"},
+	{FW_FEATURE_PERFMON,		"hcall-perfmon"},
+	{FW_FEATURE_CRQ,		"hcall-crq"},
+	{FW_FEATURE_VIO,		"hcall-vio"},
+	{FW_FEATURE_RDMA,		"hcall-rdma"},
+	{FW_FEATURE_LLAN,		"hcall-lLAN"},
+	{FW_FEATURE_BULK,		"hcall-bulk"},
+	{FW_FEATURE_XDABR,		"hcall-xdabr"},
+	{FW_FEATURE_MULTITCE,		"hcall-multi-tce"},
+	{FW_FEATURE_SPLPAR,		"hcall-splpar"},
+};
+#endif

+ 206 - 317
arch/ppc64/kernel/head.S

@@ -23,14 +23,11 @@
  *  2 of the License, or (at your option) any later version.
  */
 
-#define SECONDARY_PROCESSORS
-
 #include <linux/config.h>
 #include <linux/threads.h>
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
-#include <asm/naca.h>
 #include <asm/systemcfg.h>
 #include <asm/ppc_asm.h>
 #include <asm/offsets.h>
@@ -44,19 +41,14 @@
 #define DO_SOFT_DISABLE
 #endif
 
-/*
- * hcall interface to pSeries LPAR
- */
-#define H_SET_ASR	0x30
-
 /*
  * We layout physical memory as follows:
  * 0x0000 - 0x00ff : Secondary processor spin code
  * 0x0100 - 0x2fff : pSeries Interrupt prologs
- * 0x3000 - 0x3fff : Interrupt support
- * 0x4000 - 0x4fff : NACA
- * 0x6000	   : iSeries and common interrupt prologs
- * 0x9000 - 0x9fff : Initial segment table
+ * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs
+ * 0x6000 - 0x6fff : Initial (CPU0) segment table
+ * 0x7000 - 0x7fff : FWNMI data area
+ * 0x8000 -        : Early init and support code
  */
 
 /*
@@ -94,6 +86,7 @@ END_FTR_SECTION(0, 1)
 
 	/* Catch branch to 0 in real mode */
 	trap
+
 #ifdef CONFIG_PPC_ISERIES
 	/*
 	 * At offset 0x20, there is a pointer to iSeries LPAR data.
@@ -103,12 +96,12 @@ END_FTR_SECTION(0, 1)
 	.llong hvReleaseData-KERNELBASE
 
 	/*
-	 * At offset 0x28 and 0x30 are offsets to the msChunks
+	 * At offset 0x28 and 0x30 are offsets to the mschunks_map
 	 * array (used by the iSeries LPAR debugger to do translation
 	 * between physical addresses and absolute addresses) and
 	 * to the pidhash table (also used by the debugger)
 	 */
-	.llong msChunks-KERNELBASE
+	.llong mschunks_map-KERNELBASE
 	.llong 0	/* pidhash-KERNELBASE SFRXXX */
 
 	/* Offset 0x38 - Pointer to start of embedded System.map */
@@ -120,7 +113,7 @@ embedded_sysmap_start:
 embedded_sysmap_end:
 	.llong	0
 
-#else /* CONFIG_PPC_ISERIES */
+#endif /* CONFIG_PPC_ISERIES */
 
 	/* Secondary processors spin on this value until it goes to 1. */
 	.globl  __secondary_hold_spinloop
@@ -155,7 +148,7 @@ _GLOBAL(__secondary_hold)
 	std	r24,__secondary_hold_acknowledge@l(0)
 	sync
 
-	/* All secondary cpu's wait here until told to start. */
+	/* All secondary cpus wait here until told to start. */
 100:	ld	r4,__secondary_hold_spinloop@l(0)
 	cmpdi	0,r4,1
 	bne	100b
@@ -170,7 +163,6 @@ _GLOBAL(__secondary_hold)
 	BUG_OPCODE
 #endif
 #endif
-#endif
 
 /* This value is used to mark exception frames on the stack. */
 	.section ".toc","aw"
@@ -502,33 +494,37 @@ system_call_pSeries:
 	STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
 	STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
 
+	. = 0x3000
+
+/*** pSeries interrupt support ***/
+
 	/* moved from 0xf00 */
-	STD_EXCEPTION_PSERIES(0x3000, performance_monitor)
+	STD_EXCEPTION_PSERIES(., performance_monitor)
 
-	. = 0x3100
+	.align	7
 _GLOBAL(do_stab_bolted_pSeries)
 	mtcrf	0x80,r12
 	mfspr	r12,SPRG2
 	EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
 
-	
-	/* Space for the naca.  Architected to be located at real address
-	 * NACA_PHYS_ADDR.  Various tools rely on this location being fixed.
-	 * The first dword of the naca is required by iSeries LPAR to
-	 * point to itVpdAreas.  On pSeries native, this value is not used.
-	 */
-	. = NACA_PHYS_ADDR
-	.globl __end_interrupts
-__end_interrupts:
-#ifdef CONFIG_PPC_ISERIES
-	.globl naca
-naca:
-	.llong	itVpdAreas
-	.llong	0		/* xRamDisk */
-	.llong	0		/* xRamDiskSize */
+/*
+ * Vectors for the FWNMI option.  Share common code.
+ */
+      .globl system_reset_fwnmi
+system_reset_fwnmi:
+      HMT_MEDIUM
+      mtspr   SPRG1,r13               /* save r13 */
+      RUNLATCH_ON(r13)
+      EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
 
-	. = 0x6100
+      .globl machine_check_fwnmi
+machine_check_fwnmi:
+      HMT_MEDIUM
+      mtspr   SPRG1,r13               /* save r13 */
+      RUNLATCH_ON(r13)
+      EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
 
+#ifdef CONFIG_PPC_ISERIES
 /***  ISeries-LPAR interrupt handlers ***/
 
 	STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC)
@@ -626,9 +622,7 @@ system_reset_iSeries:
 
 	cmpwi	0,r23,0
 	beq	iSeries_secondary_smp_loop	/* Loop until told to go */
-#ifdef SECONDARY_PROCESSORS
 	bne	.__secondary_start		/* Loop until told to go */
-#endif
 iSeries_secondary_smp_loop:
 	/* Let the Hypervisor know we are alive */
 	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
@@ -671,51 +665,8 @@ hardware_interrupt_iSeries_masked:
 	ld	r13,PACA_EXGEN+EX_R13(r13)
 	rfid
 	b	.	/* prevent speculative execution */
-#endif
-
-/*
- * Data area reserved for FWNMI option.
- */
-	.= 0x7000
-	.globl fwnmi_data_area
-fwnmi_data_area:
-
-#ifdef CONFIG_PPC_ISERIES
-	. = LPARMAP_PHYS
-#include "lparmap.s"
 #endif /* CONFIG_PPC_ISERIES */
 
-/*
- * Vectors for the FWNMI option.  Share common code.
- */
-	. = 0x8000
-	.globl system_reset_fwnmi
-system_reset_fwnmi:
-	HMT_MEDIUM
-	mtspr	SPRG1,r13		/* save r13 */
-	RUNLATCH_ON(r13)
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
-	.globl machine_check_fwnmi
-machine_check_fwnmi:
-	HMT_MEDIUM
-	mtspr	SPRG1,r13		/* save r13 */
-	RUNLATCH_ON(r13)
-	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
-
-	/*
-	 * Space for the initial segment table
-	 * For LPAR, the hypervisor must fill in at least one entry
-	 * before we get control (with relocate on)
-	 */
-	. = STAB0_PHYS_ADDR
-	.globl __start_stab
-__start_stab:
-
-	. = (STAB0_PHYS_ADDR + PAGE_SIZE)
-	.globl __end_stab
-__end_stab:
-
-
 /*** Common interrupt handlers ***/
 
 	STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
@@ -752,8 +703,8 @@ machine_check_common:
  * R9 contains the saved CR, r13 points to the paca,
  * r10 contains the (bad) kernel stack pointer,
  * r11 and r12 contain the saved SRR0 and SRR1.
- * We switch to using the paca guard page as an emergency stack,
- * save the registers there, and call kernel_bad_stack(), which panics.
+ * We switch to using an emergency stack, save the registers there,
+ * and call kernel_bad_stack(), which panics.
  */
 bad_stack:
 	ld	r1,PACAEMERGSP(r13)
@@ -906,6 +857,62 @@ fp_unavailable_common:
 	bl	.kernel_fp_unavailable_exception
 	BUG_OPCODE
 
+/*
+ * load_up_fpu(unused, unused, tsk)
+ * Disable FP for the task which had the FPU previously,
+ * and save its floating-point registers in its thread_struct.
+ * Enables the FPU for use in the kernel on return.
+ * On SMP we know the fpu is free, since we give it up every
+ * switch (ie, no lazy save of the FP registers).
+ * On entry: r13 == 'current' && last_task_used_math != 'current'
+ */
+_STATIC(load_up_fpu)
+	mfmsr	r5			/* grab the current MSR */
+	ori	r5,r5,MSR_FP
+	mtmsrd	r5			/* enable use of fpu now */
+	isync
+/*
+ * For SMP, we don't do lazy FPU switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another.  Instead we call giveup_fpu in switch_to.
+ *
+ */
+#ifndef CONFIG_SMP
+	ld	r3,last_task_used_math@got(r2)
+	ld	r4,0(r3)
+	cmpdi	0,r4,0
+	beq	1f
+	/* Save FP state to last_task_used_math's THREAD struct */
+	addi	r4,r4,THREAD
+	SAVE_32FPRS(0, r4)
+	mffs	fr0
+	stfd	fr0,THREAD_FPSCR(r4)
+	/* Disable FP for last_task_used_math */
+	ld	r5,PT_REGS(r4)
+	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	li	r6,MSR_FP|MSR_FE0|MSR_FE1
+	andc	r4,r4,r6
+	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+	/* enable use of FP after return */
+	ld	r4,PACACURRENT(r13)
+	addi	r5,r4,THREAD		/* Get THREAD */
+	ld	r4,THREAD_FPEXC_MODE(r5)
+	ori	r12,r12,MSR_FP
+	or	r12,r12,r4
+	std	r12,_MSR(r1)
+	lfd	fr0,THREAD_FPSCR(r5)
+	mtfsf	0xff,fr0
+	REST_32FPRS(0, r5)
+#ifndef CONFIG_SMP
+	/* Update last_task_used_math to 'current' */
+	subi	r4,r5,THREAD		/* Back to 'current' */
+	std	r4,0(r3)
+#endif /* CONFIG_SMP */
+	/* restore registers and return */
+	b	fast_exception_return
+
 	.align	7
 	.globl altivec_unavailable_common
 altivec_unavailable_common:
@@ -921,6 +928,80 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	bl	.altivec_unavailable_exception
 	b	.ret_from_except
 
+#ifdef CONFIG_ALTIVEC
+/*
+ * load_up_altivec(unused, unused, tsk)
+ * Disable VMX for the task which had it previously,
+ * and save its vector registers in its thread_struct.
+ * Enables the VMX for use in the kernel on return.
+ * On SMP we know the VMX is free, since we give it up every
+ * switch (ie, no lazy save of the vector registers).
+ * On entry: r13 == 'current' && last_task_used_altivec != 'current'
+ */
+_STATIC(load_up_altivec)
+	mfmsr	r5			/* grab the current MSR */
+	oris	r5,r5,MSR_VEC@h
+	mtmsrd	r5			/* enable use of VMX now */
+	isync
+
+/*
+ * For SMP, we don't do lazy VMX switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another.  Instead we call giveup_altvec in switch_to.
+ * VRSAVE isn't dealt with here, that is done in the normal context
+ * switch code. Note that we could rely on vrsave value to eventually
+ * avoid saving all of the VREGs here...
+ */
+#ifndef CONFIG_SMP
+	ld	r3,last_task_used_altivec@got(r2)
+	ld	r4,0(r3)
+	cmpdi	0,r4,0
+	beq	1f
+	/* Save VMX state to last_task_used_altivec's THREAD struct */
+	addi	r4,r4,THREAD
+	SAVE_32VRS(0,r5,r4)
+	mfvscr	vr0
+	li	r10,THREAD_VSCR
+	stvx	vr0,r10,r4
+	/* Disable VMX for last_task_used_altivec */
+	ld	r5,PT_REGS(r4)
+	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lis	r6,MSR_VEC@h
+	andc	r4,r4,r6
+	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+	/* Hack: if we get an altivec unavailable trap with VRSAVE
+	 * set to all zeros, we assume this is a broken application
+	 * that fails to set it properly, and thus we switch it to
+	 * all 1's
+	 */
+	mfspr	r4,SPRN_VRSAVE
+	cmpdi	0,r4,0
+	bne+	1f
+	li	r4,-1
+	mtspr	SPRN_VRSAVE,r4
+1:
+	/* enable use of VMX after return */
+	ld	r4,PACACURRENT(r13)
+	addi	r5,r4,THREAD		/* Get THREAD */
+	oris	r12,r12,MSR_VEC@h
+	std	r12,_MSR(r1)
+	li	r4,1
+	li	r10,THREAD_VSCR
+	stw	r4,THREAD_USED_VR(r5)
+	lvx	vr0,r10,r5
+	mtvscr	vr0
+	REST_32VRS(0,r4,r5)
+#ifndef CONFIG_SMP
+	/* Update last_task_used_math to 'current' */
+	subi	r4,r5,THREAD		/* Back to 'current' */
+	std	r4,0(r3)
+#endif /* CONFIG_SMP */
+	/* restore registers and return */
+	b	fast_exception_return
+#endif /* CONFIG_ALTIVEC */
+
 /*
  * Hash table stuff
  */
@@ -1167,6 +1248,42 @@ unrecov_slb:
 	bl	.unrecoverable_exception
 	b	1b
 
+/*
+ * Space for CPU0's segment table.
+ *
+ * On iSeries, the hypervisor must fill in at least one entry before
+ * we get control (with relocate on).  The address is give to the hv
+ * as a page number (see xLparMap in LparData.c), so this must be at a
+ * fixed address (the linker can't compute (u64)&initial_stab >>
+ * PAGE_SHIFT).
+ */
+	. = STAB0_PHYS_ADDR	/* 0x6000 */
+	.globl initial_stab
+initial_stab:
+	.space	4096
+
+/*
+ * Data area reserved for FWNMI option.
+ * This address (0x7000) is fixed by the RPA.
+ */
+	.= 0x7000
+	.globl fwnmi_data_area
+fwnmi_data_area:
+
+	/* iSeries does not use the FWNMI stuff, so it is safe to put
+	 * this here, even if we later allow kernels that will boot on
+	 * both pSeries and iSeries */
+#ifdef CONFIG_PPC_ISERIES
+        . = LPARMAP_PHYS
+#include "lparmap.s"
+/*
+ * This ".text" is here for old compilers that generate a trailing
+ * .note section when compiling .c files to .s
+ */
+	.text
+#endif /* CONFIG_PPC_ISERIES */
+
+        . = 0x8000
 
 /*
  * On pSeries, secondary processors spin in the following code.
@@ -1200,7 +1317,7 @@ _GLOBAL(pSeries_secondary_smp_init)
 	b	.kexec_wait		/* next kernel might do better	 */
 
 2:	mtspr	SPRG3,r13		/* Save vaddr of paca in SPRG3	 */
-	/* From now on, r24 is expected to be logica cpuid */
+	/* From now on, r24 is expected to be logical cpuid */
 	mr	r24,r5
 3:	HMT_LOW
 	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
@@ -1213,9 +1330,7 @@ _GLOBAL(pSeries_secondary_smp_init)
 
 	cmpwi	0,r23,0
 #ifdef CONFIG_SMP
-#ifdef SECONDARY_PROCESSORS
 	bne	.__secondary_start
-#endif
 #endif
 	b 	3b			/* Loop until told to go	 */
 
@@ -1430,228 +1545,6 @@ _GLOBAL(copy_and_flush)
 .align 8
 copy_to_here:
 
-/*
- * load_up_fpu(unused, unused, tsk)
- * Disable FP for the task which had the FPU previously,
- * and save its floating-point registers in its thread_struct.
- * Enables the FPU for use in the kernel on return.
- * On SMP we know the fpu is free, since we give it up every
- * switch (ie, no lazy save of the FP registers).
- * On entry: r13 == 'current' && last_task_used_math != 'current'
- */
-_STATIC(load_up_fpu)
-	mfmsr	r5			/* grab the current MSR */
-	ori	r5,r5,MSR_FP
-	mtmsrd	r5			/* enable use of fpu now */
-	isync
-/*
- * For SMP, we don't do lazy FPU switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another.  Instead we call giveup_fpu in switch_to.
- *
- */
-#ifndef CONFIG_SMP
-	ld	r3,last_task_used_math@got(r2)
-	ld	r4,0(r3)
-	cmpdi	0,r4,0
-	beq	1f
-	/* Save FP state to last_task_used_math's THREAD struct */
-	addi	r4,r4,THREAD
-	SAVE_32FPRS(0, r4)
-	mffs	fr0
-	stfd	fr0,THREAD_FPSCR(r4)
-	/* Disable FP for last_task_used_math */
-	ld	r5,PT_REGS(r4)
-	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	li	r6,MSR_FP|MSR_FE0|MSR_FE1
-	andc	r4,r4,r6
-	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
-	/* enable use of FP after return */
-	ld	r4,PACACURRENT(r13)
-	addi	r5,r4,THREAD		/* Get THREAD */
-	ld	r4,THREAD_FPEXC_MODE(r5)
-	ori	r12,r12,MSR_FP
-	or	r12,r12,r4
-	std	r12,_MSR(r1)
-	lfd	fr0,THREAD_FPSCR(r5)
-	mtfsf	0xff,fr0
-	REST_32FPRS(0, r5)
-#ifndef CONFIG_SMP
-	/* Update last_task_used_math to 'current' */
-	subi	r4,r5,THREAD		/* Back to 'current' */
-	std	r4,0(r3)
-#endif /* CONFIG_SMP */
-	/* restore registers and return */
-	b	fast_exception_return
-
-/*
- * disable_kernel_fp()
- * Disable the FPU.
- */
-_GLOBAL(disable_kernel_fp)
-	mfmsr	r3
-	rldicl	r0,r3,(63-MSR_FP_LG),1
-	rldicl	r3,r0,(MSR_FP_LG+1),0
-	mtmsrd	r3			/* disable use of fpu now */
-	isync
-	blr
-
-/*
- * giveup_fpu(tsk)
- * Disable FP for the task given as the argument,
- * and save the floating-point registers in its thread_struct.
- * Enables the FPU for use in the kernel on return.
- */
-_GLOBAL(giveup_fpu)
-	mfmsr	r5
-	ori	r5,r5,MSR_FP
-	mtmsrd	r5			/* enable use of fpu now */
-	isync
-	cmpdi	0,r3,0
-	beqlr-				/* if no previous owner, done */
-	addi	r3,r3,THREAD		/* want THREAD of task */
-	ld	r5,PT_REGS(r3)
-	cmpdi	0,r5,0
-	SAVE_32FPRS(0, r3)
-	mffs	fr0
-	stfd	fr0,THREAD_FPSCR(r3)
-	beq	1f
-	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	li	r3,MSR_FP|MSR_FE0|MSR_FE1
-	andc	r4,r4,r3		/* disable FP for previous task */
-	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#ifndef CONFIG_SMP
-	li	r5,0
-	ld	r4,last_task_used_math@got(r2)
-	std	r5,0(r4)
-#endif /* CONFIG_SMP */
-	blr
-
-
-#ifdef CONFIG_ALTIVEC
-		
-/*
- * load_up_altivec(unused, unused, tsk)
- * Disable VMX for the task which had it previously,
- * and save its vector registers in its thread_struct.
- * Enables the VMX for use in the kernel on return.
- * On SMP we know the VMX is free, since we give it up every
- * switch (ie, no lazy save of the vector registers).
- * On entry: r13 == 'current' && last_task_used_altivec != 'current'
- */
-_STATIC(load_up_altivec)
-	mfmsr	r5			/* grab the current MSR */
-	oris	r5,r5,MSR_VEC@h
-	mtmsrd	r5			/* enable use of VMX now */
-	isync
-	
-/*
- * For SMP, we don't do lazy VMX switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another.  Instead we call giveup_altvec in switch_to.
- * VRSAVE isn't dealt with here, that is done in the normal context
- * switch code. Note that we could rely on vrsave value to eventually
- * avoid saving all of the VREGs here...
- */
-#ifndef CONFIG_SMP
-	ld	r3,last_task_used_altivec@got(r2)
-	ld	r4,0(r3)
-	cmpdi	0,r4,0
-	beq	1f
-	/* Save VMX state to last_task_used_altivec's THREAD struct */
-	addi	r4,r4,THREAD
-	SAVE_32VRS(0,r5,r4)
-	mfvscr	vr0
-	li	r10,THREAD_VSCR
-	stvx	vr0,r10,r4
-	/* Disable VMX for last_task_used_altivec */
-	ld	r5,PT_REGS(r4)
-	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	lis	r6,MSR_VEC@h
-	andc	r4,r4,r6
-	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
-	/* Hack: if we get an altivec unavailable trap with VRSAVE
-	 * set to all zeros, we assume this is a broken application
-	 * that fails to set it properly, and thus we switch it to
-	 * all 1's
-	 */
-	mfspr	r4,SPRN_VRSAVE
-	cmpdi	0,r4,0
-	bne+	1f
-	li	r4,-1
-	mtspr	SPRN_VRSAVE,r4
-1:
-	/* enable use of VMX after return */
-	ld	r4,PACACURRENT(r13)
-	addi	r5,r4,THREAD		/* Get THREAD */
-	oris	r12,r12,MSR_VEC@h
-	std	r12,_MSR(r1)
-	li	r4,1
-	li	r10,THREAD_VSCR
-	stw	r4,THREAD_USED_VR(r5)
-	lvx	vr0,r10,r5
-	mtvscr	vr0
-	REST_32VRS(0,r4,r5)
-#ifndef CONFIG_SMP
-	/* Update last_task_used_math to 'current' */
-	subi	r4,r5,THREAD		/* Back to 'current' */
-	std	r4,0(r3)
-#endif /* CONFIG_SMP */
-	/* restore registers and return */
-	b	fast_exception_return
-
-/*
- * disable_kernel_altivec()
- * Disable the VMX.
- */
-_GLOBAL(disable_kernel_altivec)
-	mfmsr	r3
-	rldicl	r0,r3,(63-MSR_VEC_LG),1
-	rldicl	r3,r0,(MSR_VEC_LG+1),0
-	mtmsrd	r3			/* disable use of VMX now */
-	isync
-	blr
-
-/*
- * giveup_altivec(tsk)
- * Disable VMX for the task given as the argument,
- * and save the vector registers in its thread_struct.
- * Enables the VMX for use in the kernel on return.
- */
-_GLOBAL(giveup_altivec)
-	mfmsr	r5
-	oris	r5,r5,MSR_VEC@h
-	mtmsrd	r5			/* enable use of VMX now */
-	isync
-	cmpdi	0,r3,0
-	beqlr-				/* if no previous owner, done */
-	addi	r3,r3,THREAD		/* want THREAD of task */
-	ld	r5,PT_REGS(r3)
-	cmpdi	0,r5,0
-	SAVE_32VRS(0,r4,r3)
-	mfvscr	vr0
-	li	r4,THREAD_VSCR
-	stvx	vr0,r4,r3
-	beq	1f
-	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	lis	r3,MSR_VEC@h
-	andc	r4,r4,r3		/* disable FP for previous task */
-	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#ifndef CONFIG_SMP
-	li	r5,0
-	ld	r4,last_task_used_altivec@got(r2)
-	std	r5,0(r4)
-#endif /* CONFIG_SMP */
-	blr
-
-#endif /* CONFIG_ALTIVEC */
-
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PPC_PMAC
 /*
@@ -2002,9 +1895,6 @@ _STATIC(start_here_common)
 
 	bl .start_kernel
 
-_GLOBAL(__setup_cpu_power3)
-	blr
-
 _GLOBAL(hmt_init)
 #ifdef CONFIG_HMT
 	LOADADDR(r5, hmt_thread_data)
@@ -2095,20 +1985,19 @@ _GLOBAL(smp_release_cpus)
 
 /*
  * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
+ * This stuff goes at the beginning of the bss, which is page-aligned.
  */
-	.data
+	.section ".bss"
+
 	.align	12
-	.globl	sdata
-sdata:
+
 	.globl	empty_zero_page
 empty_zero_page:
-	.space	4096
+	.space	PAGE_SIZE
 
 	.globl	swapper_pg_dir
 swapper_pg_dir:
-	.space	4096
+	.space	PAGE_SIZE
 
 /*
  * This space gets a copy of optional info passed to us by the bootstrap

+ 4 - 1
arch/ppc64/kernel/iSeries_htab.c

@@ -41,6 +41,7 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
 				unsigned long prpn, unsigned long vflags,
 				unsigned long rflags)
 {
+	unsigned long arpn;
 	long slot;
 	hpte_t lhpte;
 	int secondary = 0;
@@ -70,8 +71,10 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
 		slot &= 0x7fffffffffffffff;
 	}
 
+	arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT;
+
 	lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
-	lhpte.r = (physRpn_to_absRpn(prpn) << HPTE_R_RPN_SHIFT) | rflags;
+	lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
 
 	/* Now fill in the actual HPTE */
 	HvCallHpt_addValidate(slot, secondary, &lhpte);

+ 26 - 4
arch/ppc64/kernel/iSeries_setup.c

@@ -39,6 +39,7 @@
 #include <asm/cputable.h>
 #include <asm/sections.h>
 #include <asm/iommu.h>
+#include <asm/firmware.h>
 
 #include <asm/time.h>
 #include "iSeries_setup.h"
@@ -314,6 +315,8 @@ static void __init iSeries_init_early(void)
 
 	DBG(" -> iSeries_init_early()\n");
 
+	ppc64_firmware_features = FW_FEATURE_ISERIES;
+
 	ppcdbg_initialize();
 
 #if defined(CONFIG_BLK_DEV_INITRD)
@@ -412,6 +415,22 @@ static void __init iSeries_init_early(void)
 	DBG(" <- iSeries_init_early()\n");
 }
 
+struct mschunks_map mschunks_map = {
+	/* XXX We don't use these, but Piranha might need them. */
+	.chunk_size  = MSCHUNKS_CHUNK_SIZE,
+	.chunk_shift = MSCHUNKS_CHUNK_SHIFT,
+	.chunk_mask  = MSCHUNKS_OFFSET_MASK,
+};
+EXPORT_SYMBOL(mschunks_map);
+
+void mschunks_alloc(unsigned long num_chunks)
+{
+	klimit = _ALIGN(klimit, sizeof(u32));
+	mschunks_map.mapping = (u32 *)klimit;
+	klimit += num_chunks * sizeof(u32);
+	mschunks_map.num_chunks = num_chunks;
+}
+
 /*
  * The iSeries may have very large memories ( > 128 GB ) and a partition
  * may get memory in "chunks" that may be anywhere in the 2**52 real
@@ -449,7 +468,7 @@ static void __init build_iSeries_Memory_Map(void)
 
 	/* Chunk size on iSeries is 256K bytes */
 	totalChunks = (u32)HvLpConfig_getMsChunks();
-	klimit = msChunks_alloc(klimit, totalChunks, 1UL << 18);
+	mschunks_alloc(totalChunks);
 
 	/*
 	 * Get absolute address of our load area
@@ -486,7 +505,7 @@ static void __init build_iSeries_Memory_Map(void)
 	printk("Load area size %dK\n", loadAreaSize * 256);
 
 	for (nextPhysChunk = 0; nextPhysChunk < loadAreaSize; ++nextPhysChunk)
-		msChunks.abs[nextPhysChunk] =
+		mschunks_map.mapping[nextPhysChunk] =
 			loadAreaFirstChunk + nextPhysChunk;
 
 	/*
@@ -495,7 +514,7 @@ static void __init build_iSeries_Memory_Map(void)
 	 */
 	hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress());
 	hptSizePages = (u32)HvCallHpt_getHptPages();
-	hptSizeChunks = hptSizePages >> (msChunks.chunk_shift - PAGE_SHIFT);
+	hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT);
 	hptLastChunk = hptFirstChunk + hptSizeChunks - 1;
 
 	printk("HPT absolute addr = %016lx, size = %dK\n",
@@ -552,7 +571,8 @@ static void __init build_iSeries_Memory_Map(void)
 				     (absChunk > hptLastChunk)) &&
 				    ((absChunk < loadAreaFirstChunk) ||
 				     (absChunk > loadAreaLastChunk))) {
-					msChunks.abs[nextPhysChunk] = absChunk;
+					mschunks_map.mapping[nextPhysChunk] =
+						absChunk;
 					++nextPhysChunk;
 				}
 			}
@@ -944,6 +964,8 @@ void __init iSeries_early_setup(void)
 	ppc_md.calibrate_decr = iSeries_calibrate_decr;
 	ppc_md.progress = iSeries_progress;
 
+	/* XXX Implement enable_pmcs for iSeries */
+
 	if (get_paca()->lppaca.shared_proc) {
 		ppc_md.idle_loop = iseries_shared_idle;
 		printk(KERN_INFO "Using shared processor idle loop\n");

+ 155 - 0
arch/ppc64/kernel/iSeries_vio.c

@@ -0,0 +1,155 @@
+/*
+ * IBM PowerPC iSeries Virtual I/O Infrastructure Support.
+ *
+ *    Copyright (c) 2005 Stephen Rothwell, IBM Corp.
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/init.h>
+
+#include <asm/vio.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/page.h>
+#include <asm/iSeries/vio.h>
+#include <asm/iSeries/HvTypes.h>
+#include <asm/iSeries/HvLpConfig.h>
+#include <asm/iSeries/HvCallXm.h>
+
+struct device *iSeries_vio_dev = &vio_bus_device.dev;
+EXPORT_SYMBOL(iSeries_vio_dev);
+
+static struct iommu_table veth_iommu_table;
+static struct iommu_table vio_iommu_table;
+
+static void __init iommu_vio_init(void)
+{
+	struct iommu_table *t;
+	struct iommu_table_cb cb;
+	unsigned long cbp;
+	unsigned long itc_entries;
+
+	cb.itc_busno = 255;    /* Bus 255 is the virtual bus */
+	cb.itc_virtbus = 0xff; /* Ask for virtual bus */
+
+	cbp = virt_to_abs(&cb);
+	HvCallXm_getTceTableParms(cbp);
+
+	itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
+	veth_iommu_table.it_size        = itc_entries / 2;
+	veth_iommu_table.it_busno       = cb.itc_busno;
+	veth_iommu_table.it_offset      = cb.itc_offset;
+	veth_iommu_table.it_index       = cb.itc_index;
+	veth_iommu_table.it_type        = TCE_VB;
+	veth_iommu_table.it_blocksize	= 1;
+
+	t = iommu_init_table(&veth_iommu_table);
+
+	if (!t)
+		printk("Virtual Bus VETH TCE table failed.\n");
+
+	vio_iommu_table.it_size         = itc_entries - veth_iommu_table.it_size;
+	vio_iommu_table.it_busno        = cb.itc_busno;
+	vio_iommu_table.it_offset       = cb.itc_offset +
+					  veth_iommu_table.it_size;
+	vio_iommu_table.it_index        = cb.itc_index;
+	vio_iommu_table.it_type         = TCE_VB;
+	vio_iommu_table.it_blocksize	= 1;
+
+	t = iommu_init_table(&vio_iommu_table);
+
+	if (!t)
+		printk("Virtual Bus VIO TCE table failed.\n");
+}
+
+/**
+ * vio_register_device_iseries: - Register a new iSeries vio device.
+ * @voidev:	The device to register.
+ */
+static struct vio_dev *__init vio_register_device_iseries(char *type,
+		uint32_t unit_num)
+{
+	struct vio_dev *viodev;
+
+	/* allocate a vio_dev for this device */
+	viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
+	if (!viodev)
+		return NULL;
+	memset(viodev, 0, sizeof(struct vio_dev));
+
+	snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num);
+
+	viodev->name = viodev->dev.bus_id;
+	viodev->type = type;
+	viodev->unit_address = unit_num;
+	viodev->iommu_table = &vio_iommu_table;
+	if (vio_register_device(viodev) == NULL) {
+		kfree(viodev);
+		return NULL;
+	}
+	return viodev;
+}
+
+void __init probe_bus_iseries(void)
+{
+	HvLpIndexMap vlan_map;
+	struct vio_dev *viodev;
+	int i;
+
+	/* there is only one of each of these */
+	vio_register_device_iseries("viocons", 0);
+	vio_register_device_iseries("vscsi", 0);
+
+	vlan_map = HvLpConfig_getVirtualLanIndexMap();
+	for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
+		if ((vlan_map & (0x8000 >> i)) == 0)
+			continue;
+		viodev = vio_register_device_iseries("vlan", i);
+		/* veth is special and has it own iommu_table */
+		viodev->iommu_table = &veth_iommu_table;
+	}
+	for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++)
+		vio_register_device_iseries("viodasd", i);
+	for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++)
+		vio_register_device_iseries("viocd", i);
+	for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++)
+		vio_register_device_iseries("viotape", i);
+}
+
+/**
+ * vio_match_device_iseries: - Tell if a iSeries VIO device matches a
+ *	vio_device_id
+ */
+static int vio_match_device_iseries(const struct vio_device_id *id,
+		const struct vio_dev *dev)
+{
+	return strncmp(dev->type, id->type, strlen(id->type)) == 0;
+}
+
+static struct vio_bus_ops vio_bus_ops_iseries = {
+	.match = vio_match_device_iseries,
+};
+
+/**
+ * vio_bus_init_iseries: - Initialize the iSeries virtual IO bus
+ */
+static int __init vio_bus_init_iseries(void)
+{
+	int err;
+
+	err = vio_bus_init(&vio_bus_ops_iseries);
+	if (err == 0) {
+		iommu_vio_init();
+		vio_bus_device.iommu_table = &vio_iommu_table;
+		iSeries_vio_dev = &vio_bus_device.dev;
+		probe_bus_iseries();
+	}
+	return err;
+}
+
+__initcall(vio_bus_init_iseries);

+ 39 - 112
arch/ppc64/kernel/lmb.c

@@ -28,33 +28,28 @@ void lmb_dump_all(void)
 {
 #ifdef DEBUG
 	unsigned long i;
-	struct lmb *_lmb  = &lmb;
 
 	udbg_printf("lmb_dump_all:\n");
 	udbg_printf("    memory.cnt		  = 0x%lx\n",
-		    _lmb->memory.cnt);
+		    lmb.memory.cnt);
 	udbg_printf("    memory.size		  = 0x%lx\n",
-		    _lmb->memory.size);
-	for (i=0; i < _lmb->memory.cnt ;i++) {
+		    lmb.memory.size);
+	for (i=0; i < lmb.memory.cnt ;i++) {
 		udbg_printf("    memory.region[0x%x].base       = 0x%lx\n",
-			    i, _lmb->memory.region[i].base);
-		udbg_printf("		      .physbase = 0x%lx\n",
-			    _lmb->memory.region[i].physbase);
+			    i, lmb.memory.region[i].base);
 		udbg_printf("		      .size     = 0x%lx\n",
-			    _lmb->memory.region[i].size);
+			    lmb.memory.region[i].size);
 	}
 
 	udbg_printf("\n    reserved.cnt	  = 0x%lx\n",
-		    _lmb->reserved.cnt);
+		    lmb.reserved.cnt);
 	udbg_printf("    reserved.size	  = 0x%lx\n",
-		    _lmb->reserved.size);
-	for (i=0; i < _lmb->reserved.cnt ;i++) {
+		    lmb.reserved.size);
+	for (i=0; i < lmb.reserved.cnt ;i++) {
 		udbg_printf("    reserved.region[0x%x].base       = 0x%lx\n",
-			    i, _lmb->reserved.region[i].base);
-		udbg_printf("		      .physbase = 0x%lx\n",
-			    _lmb->reserved.region[i].physbase);
+			    i, lmb.reserved.region[i].base);
 		udbg_printf("		      .size     = 0x%lx\n",
-			    _lmb->reserved.region[i].size);
+			    lmb.reserved.region[i].size);
 	}
 #endif /* DEBUG */
 }
@@ -98,7 +93,6 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2)
 	rgn->region[r1].size += rgn->region[r2].size;
 	for (i=r2; i < rgn->cnt-1; i++) {
 		rgn->region[i].base = rgn->region[i+1].base;
-		rgn->region[i].physbase = rgn->region[i+1].physbase;
 		rgn->region[i].size = rgn->region[i+1].size;
 	}
 	rgn->cnt--;
@@ -108,49 +102,29 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2)
 void __init
 lmb_init(void)
 {
-	struct lmb *_lmb = &lmb;
-
 	/* Create a dummy zero size LMB which will get coalesced away later.
 	 * This simplifies the lmb_add() code below...
 	 */
-	_lmb->memory.region[0].base = 0;
-	_lmb->memory.region[0].size = 0;
-	_lmb->memory.cnt = 1;
+	lmb.memory.region[0].base = 0;
+	lmb.memory.region[0].size = 0;
+	lmb.memory.cnt = 1;
 
 	/* Ditto. */
-	_lmb->reserved.region[0].base = 0;
-	_lmb->reserved.region[0].size = 0;
-	_lmb->reserved.cnt = 1;
+	lmb.reserved.region[0].base = 0;
+	lmb.reserved.region[0].size = 0;
+	lmb.reserved.cnt = 1;
 }
 
 /* This routine called with relocation disabled. */
 void __init
 lmb_analyze(void)
 {
-	unsigned long i;
-	unsigned long mem_size = 0;
-	unsigned long size_mask = 0;
-	struct lmb *_lmb = &lmb;
-#ifdef CONFIG_MSCHUNKS
-	unsigned long physbase = 0;
-#endif
-
-	for (i=0; i < _lmb->memory.cnt; i++) {
-		unsigned long lmb_size;
-
-		lmb_size = _lmb->memory.region[i].size;
-
-#ifdef CONFIG_MSCHUNKS
-		_lmb->memory.region[i].physbase = physbase;
-		physbase += lmb_size;
-#else
-		_lmb->memory.region[i].physbase = _lmb->memory.region[i].base;
-#endif
-		mem_size += lmb_size;
-		size_mask |= lmb_size;
-	}
+	int i;
+
+	lmb.memory.size = 0;
 
-	_lmb->memory.size = mem_size;
+	for (i = 0; i < lmb.memory.cnt; i++)
+		lmb.memory.size += lmb.memory.region[i].size;
 }
 
 /* This routine called with relocation disabled. */
@@ -168,7 +142,6 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size)
 		adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize);
 		if ( adjacent > 0 ) {
 			rgn->region[i].base -= size;
-			rgn->region[i].physbase -= size;
 			rgn->region[i].size += size;
 			coalesced++;
 			break;
@@ -195,11 +168,9 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size)
 	for (i=rgn->cnt-1; i >= 0; i--) {
 		if (base < rgn->region[i].base) {
 			rgn->region[i+1].base = rgn->region[i].base;
-			rgn->region[i+1].physbase = rgn->region[i].physbase;
 			rgn->region[i+1].size = rgn->region[i].size;
 		}  else {
 			rgn->region[i+1].base = base;
-			rgn->region[i+1].physbase = lmb_abs_to_phys(base);
 			rgn->region[i+1].size = size;
 			break;
 		}
@@ -213,12 +184,11 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size)
 long __init
 lmb_add(unsigned long base, unsigned long size)
 {
-	struct lmb *_lmb = &lmb;
-	struct lmb_region *_rgn = &(_lmb->memory);
+	struct lmb_region *_rgn = &(lmb.memory);
 
 	/* On pSeries LPAR systems, the first LMB is our RMO region. */
 	if ( base == 0 )
-		_lmb->rmo_size = size;
+		lmb.rmo_size = size;
 
 	return lmb_add_region(_rgn, base, size);
 
@@ -227,8 +197,7 @@ lmb_add(unsigned long base, unsigned long size)
 long __init
 lmb_reserve(unsigned long base, unsigned long size)
 {
-	struct lmb *_lmb = &lmb;
-	struct lmb_region *_rgn = &(_lmb->reserved);
+	struct lmb_region *_rgn = &(lmb.reserved);
 
 	return lmb_add_region(_rgn, base, size);
 }
@@ -260,13 +229,10 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr)
 {
 	long i, j;
 	unsigned long base = 0;
-	struct lmb *_lmb = &lmb;
-	struct lmb_region *_mem = &(_lmb->memory);
-	struct lmb_region *_rsv = &(_lmb->reserved);
 
-	for (i=_mem->cnt-1; i >= 0; i--) {
-		unsigned long lmbbase = _mem->region[i].base;
-		unsigned long lmbsize = _mem->region[i].size;
+	for (i=lmb.memory.cnt-1; i >= 0; i--) {
+		unsigned long lmbbase = lmb.memory.region[i].base;
+		unsigned long lmbsize = lmb.memory.region[i].size;
 
 		if ( max_addr == LMB_ALLOC_ANYWHERE )
 			base = _ALIGN_DOWN(lmbbase+lmbsize-size, align);
@@ -276,8 +242,8 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr)
 			continue;
 
 		while ( (lmbbase <= base) &&
-			((j = lmb_overlaps_region(_rsv,base,size)) >= 0) ) {
-			base = _ALIGN_DOWN(_rsv->region[j].base-size, align);
+			((j = lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) {
+			base = _ALIGN_DOWN(lmb.reserved.region[j].base-size, align);
 		}
 
 		if ( (base != 0) && (lmbbase <= base) )
@@ -287,62 +253,24 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr)
 	if ( i < 0 )
 		return 0;
 
-	lmb_add_region(_rsv, base, size);
+	lmb_add_region(&lmb.reserved, base, size);
 
 	return base;
 }
 
+/* You must call lmb_analyze() before this. */
 unsigned long __init
 lmb_phys_mem_size(void)
 {
-	struct lmb *_lmb = &lmb;
-#ifdef CONFIG_MSCHUNKS
-	return _lmb->memory.size;
-#else
-	struct lmb_region *_mem = &(_lmb->memory);
-	unsigned long total = 0;
-	int i;
-
-	/* add all physical memory to the bootmem map */
-	for (i=0; i < _mem->cnt; i++)
-		total += _mem->region[i].size;
-	return total;
-#endif /* CONFIG_MSCHUNKS */
+	return lmb.memory.size;
 }
 
 unsigned long __init
 lmb_end_of_DRAM(void)
 {
-	struct lmb *_lmb = &lmb;
-	struct lmb_region *_mem = &(_lmb->memory);
-	int idx = _mem->cnt - 1;
-
-#ifdef CONFIG_MSCHUNKS
-	return (_mem->region[idx].physbase + _mem->region[idx].size);
-#else
-	return (_mem->region[idx].base + _mem->region[idx].size);
-#endif /* CONFIG_MSCHUNKS */
-
-	return 0;
-}
-
-unsigned long __init
-lmb_abs_to_phys(unsigned long aa)
-{
-	unsigned long i, pa = aa;
-	struct lmb *_lmb = &lmb;
-	struct lmb_region *_mem = &(_lmb->memory);
-
-	for (i=0; i < _mem->cnt; i++) {
-		unsigned long lmbbase = _mem->region[i].base;
-		unsigned long lmbsize = _mem->region[i].size;
-		if ( lmb_addrs_overlap(aa,1,lmbbase,lmbsize) ) {
-			pa = _mem->region[i].physbase + (aa - lmbbase);
-			break;
-		}
-	}
+	int idx = lmb.memory.cnt - 1;
 
-	return pa;
+	return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
 }
 
 /*
@@ -353,20 +281,19 @@ void __init lmb_enforce_memory_limit(void)
 {
 	extern unsigned long memory_limit;
 	unsigned long i, limit;
-	struct lmb_region *mem = &(lmb.memory);
 
 	if (! memory_limit)
 		return;
 
 	limit = memory_limit;
-	for (i = 0; i < mem->cnt; i++) {
-		if (limit > mem->region[i].size) {
-			limit -= mem->region[i].size;
+	for (i = 0; i < lmb.memory.cnt; i++) {
+		if (limit > lmb.memory.region[i].size) {
+			limit -= lmb.memory.region[i].size;
 			continue;
 		}
 
-		mem->region[i].size = limit;
-		mem->cnt = i + 1;
+		lmb.memory.region[i].size = limit;
+		lmb.memory.cnt = i + 1;
 		break;
 	}
 }

+ 4 - 3
arch/ppc64/kernel/lparcfg.c

@@ -29,7 +29,7 @@
 #include <asm/iSeries/HvLpConfig.h>
 #include <asm/lppaca.h>
 #include <asm/hvcall.h>
-#include <asm/cputable.h>
+#include <asm/firmware.h>
 #include <asm/rtas.h>
 #include <asm/system.h>
 #include <asm/time.h>
@@ -273,6 +273,7 @@ static void parse_system_parameter_string(struct seq_file *m)
 		if (!workbuffer) {
 			printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
 			       __FILE__, __FUNCTION__, __LINE__);
+			kfree(local_buffer);			
 			return;
 		}
 #ifdef LPARCFG_DEBUG
@@ -377,7 +378,7 @@ static int lparcfg_data(struct seq_file *m, void *v)
 
 	partition_active_processors = lparcfg_count_active_processors();
 
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 		unsigned long h_entitled, h_unallocated;
 		unsigned long h_aggregation, h_resource;
 		unsigned long pool_idle_time, pool_procs;
@@ -571,7 +572,7 @@ int __init lparcfg_init(void)
 	mode_t mode = S_IRUSR;
 
 	/* Allow writing if we have FW_FEATURE_SPLPAR */
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 		lparcfg_fops.write = lparcfg_write;
 		mode |= S_IWUSR;
 	}

+ 98 - 0
arch/ppc64/kernel/misc.S

@@ -680,6 +680,104 @@ _GLOBAL(kernel_thread)
 	ld	r30,-16(r1)
 	blr
 
+/*
+ * disable_kernel_fp()
+ * Disable the FPU.
+ */
+_GLOBAL(disable_kernel_fp)
+	mfmsr	r3
+	rldicl	r0,r3,(63-MSR_FP_LG),1
+	rldicl	r3,r0,(MSR_FP_LG+1),0
+	mtmsrd	r3			/* disable use of fpu now */
+	isync
+	blr
+
+/*
+ * giveup_fpu(tsk)
+ * Disable FP for the task given as the argument,
+ * and save the floating-point registers in its thread_struct.
+ * Enables the FPU for use in the kernel on return.
+ */
+_GLOBAL(giveup_fpu)
+	mfmsr	r5
+	ori	r5,r5,MSR_FP
+	mtmsrd	r5			/* enable use of fpu now */
+	isync
+	cmpdi	0,r3,0
+	beqlr-				/* if no previous owner, done */
+	addi	r3,r3,THREAD		/* want THREAD of task */
+	ld	r5,PT_REGS(r3)
+	cmpdi	0,r5,0
+	SAVE_32FPRS(0, r3)
+	mffs	fr0
+	stfd	fr0,THREAD_FPSCR(r3)
+	beq	1f
+	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	li	r3,MSR_FP|MSR_FE0|MSR_FE1
+	andc	r4,r4,r3		/* disable FP for previous task */
+	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+	li	r5,0
+	ld	r4,last_task_used_math@got(r2)
+	std	r5,0(r4)
+#endif /* CONFIG_SMP */
+	blr
+
+#ifdef CONFIG_ALTIVEC
+
+#if 0 /* this has no callers for now */
+/*
+ * disable_kernel_altivec()
+ * Disable the VMX.
+ */
+_GLOBAL(disable_kernel_altivec)
+	mfmsr	r3
+	rldicl	r0,r3,(63-MSR_VEC_LG),1
+	rldicl	r3,r0,(MSR_VEC_LG+1),0
+	mtmsrd	r3			/* disable use of VMX now */
+	isync
+	blr
+#endif /* 0 */
+
+/*
+ * giveup_altivec(tsk)
+ * Disable VMX for the task given as the argument,
+ * and save the vector registers in its thread_struct.
+ * Enables the VMX for use in the kernel on return.
+ */
+_GLOBAL(giveup_altivec)
+	mfmsr	r5
+	oris	r5,r5,MSR_VEC@h
+	mtmsrd	r5			/* enable use of VMX now */
+	isync
+	cmpdi	0,r3,0
+	beqlr-				/* if no previous owner, done */
+	addi	r3,r3,THREAD		/* want THREAD of task */
+	ld	r5,PT_REGS(r3)
+	cmpdi	0,r5,0
+	SAVE_32VRS(0,r4,r3)
+	mfvscr	vr0
+	li	r4,THREAD_VSCR
+	stvx	vr0,r4,r3
+	beq	1f
+	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lis	r3,MSR_VEC@h
+	andc	r4,r4,r3		/* disable FP for previous task */
+	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+	li	r5,0
+	ld	r4,last_task_used_altivec@got(r2)
+	std	r5,0(r4)
+#endif /* CONFIG_SMP */
+	blr
+
+#endif /* CONFIG_ALTIVEC */
+
+_GLOBAL(__setup_cpu_power3)
+	blr
+
 /* kexec_wait(phys_cpu)
  *
  * wait for the flag to change, indicating this kernel is going away but

+ 0 - 2
arch/ppc64/kernel/of_device.c

@@ -236,7 +236,6 @@ void of_device_unregister(struct of_device *ofdev)
 struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id)
 {
 	struct of_device *dev;
-	u32 *reg;
 
 	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev)
@@ -250,7 +249,6 @@ struct of_device* of_platform_device_create(struct device_node *np, const char *
 	dev->dev.bus = &of_platform_bus_type;
 	dev->dev.release = of_release_dev;
 
-	reg = (u32 *)get_property(np, "reg", NULL);
 	strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE);
 
 	if (of_device_register(dev) != 0) {

+ 2 - 1
arch/ppc64/kernel/pSeries_iommu.c

@@ -45,6 +45,7 @@
 #include <asm/plpar_wrappers.h>
 #include <asm/pSeries_reconfig.h>
 #include <asm/systemcfg.h>
+#include <asm/firmware.h>
 #include "pci.h"
 
 #define DBG(fmt...)
@@ -546,7 +547,7 @@ void iommu_init_early_pSeries(void)
 	}
 
 	if (systemcfg->platform & PLATFORM_LPAR) {
-		if (cur_cpu_spec->firmware_features & FW_FEATURE_MULTITCE) {
+		if (firmware_has_feature(FW_FEATURE_MULTITCE)) {
 			ppc_md.tce_build = tce_buildmulti_pSeriesLP;
 			ppc_md.tce_free	 = tce_freemulti_pSeriesLP;
 		} else {

+ 1 - 3
arch/ppc64/kernel/pSeries_lpar.c

@@ -52,7 +52,6 @@ EXPORT_SYMBOL(plpar_hcall_4out);
 EXPORT_SYMBOL(plpar_hcall_norets);
 EXPORT_SYMBOL(plpar_hcall_8arg_2ret);
 
-extern void fw_feature_init(void);
 extern void pSeries_find_serial_port(void);
 
 
@@ -279,7 +278,6 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 			      unsigned long va, unsigned long prpn,
 			      unsigned long vflags, unsigned long rflags)
 {
-	unsigned long arpn = physRpn_to_absRpn(prpn);
 	unsigned long lpar_rc;
 	unsigned long flags;
 	unsigned long slot;
@@ -290,7 +288,7 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	if (vflags & HPTE_V_LARGE)
 		hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT);
 
-	hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
+	hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
 
 	/* Now fill in the actual HPTE */
 	/* Set CEC cookie to 0         */

+ 29 - 10
arch/ppc64/kernel/pSeries_setup.c

@@ -60,7 +60,8 @@
 #include <asm/nvram.h>
 #include <asm/plpar_wrappers.h>
 #include <asm/xics.h>
-#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/pmc.h>
 
 #include "i8259.h"
 #include "mpic.h"
@@ -187,6 +188,21 @@ static void __init pSeries_setup_mpic(void)
 				  " MPIC     ");
 }
 
+static void pseries_lpar_enable_pmcs(void)
+{
+	unsigned long set, reset;
+
+	power4_enable_pmcs();
+
+	set = 1UL << 63;
+	reset = 0;
+	plpar_hcall_norets(H_PERFMON, set, reset);
+
+	/* instruct hypervisor to maintain PMCs */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR))
+		get_paca()->lppaca.pmcregs_in_use = 1;
+}
+
 static void __init pSeries_setup_arch(void)
 {
 	/* Fixup ppc_md depending on the type of interrupt controller */
@@ -231,11 +247,9 @@ static void __init pSeries_setup_arch(void)
 
 	pSeries_nvram_init();
 
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
-		vpa_init(boot_cpuid);
-
 	/* Choose an idle loop */
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		vpa_init(boot_cpuid);
 		if (get_paca()->lppaca.shared_proc) {
 			printk(KERN_INFO "Using shared processor idle loop\n");
 			ppc_md.idle_loop = pseries_shared_idle;
@@ -247,6 +261,11 @@ static void __init pSeries_setup_arch(void)
 		printk(KERN_INFO "Using default idle loop\n");
 		ppc_md.idle_loop = default_idle;
 	}
+
+	if (systemcfg->platform & PLATFORM_LPAR)
+		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
+	else
+		ppc_md.enable_pmcs = power4_enable_pmcs;
 }
 
 static int __init pSeries_init_panel(void)
@@ -260,11 +279,11 @@ static int __init pSeries_init_panel(void)
 arch_initcall(pSeries_init_panel);
 
 
-/* Build up the firmware_features bitmask field
+/* Build up the ppc64_firmware_features bitmask field
  * using contents of device-tree/ibm,hypertas-functions.
  * Ultimately this functionality may be moved into prom.c prom_init().
  */
-void __init fw_feature_init(void)
+static void __init fw_feature_init(void)
 {
 	struct device_node * dn;
 	char * hypertas;
@@ -272,7 +291,7 @@ void __init fw_feature_init(void)
 
 	DBG(" -> fw_feature_init()\n");
 
-	cur_cpu_spec->firmware_features = 0;
+	ppc64_firmware_features = 0;
 	dn = of_find_node_by_path("/rtas");
 	if (dn == NULL) {
 		printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n");
@@ -288,7 +307,7 @@ void __init fw_feature_init(void)
 				if ((firmware_features_table[i].name) &&
 				    (strcmp(firmware_features_table[i].name,hypertas))==0) {
 					/* we have a match */
-					cur_cpu_spec->firmware_features |= 
+					ppc64_firmware_features |= 
 						(firmware_features_table[i].val);
 					break;
 				} 
@@ -302,7 +321,7 @@ void __init fw_feature_init(void)
 	of_node_put(dn);
  no_rtas:
 	printk(KERN_INFO "firmware_features = 0x%lx\n", 
-	       cur_cpu_spec->firmware_features);
+	       ppc64_firmware_features);
 
 	DBG(" <- fw_feature_init()\n");
 }

+ 2 - 1
arch/ppc64/kernel/pSeries_smp.c

@@ -41,6 +41,7 @@
 #include <asm/machdep.h>
 #include <asm/xics.h>
 #include <asm/cputable.h>
+#include <asm/firmware.h>
 #include <asm/system.h>
 #include <asm/rtas.h>
 #include <asm/plpar_wrappers.h>
@@ -326,7 +327,7 @@ static void __devinit smp_xics_setup_cpu(int cpu)
 	if (cpu != boot_cpuid)
 		xics_setup_cpu();
 
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
+	if (firmware_has_feature(FW_FEATURE_SPLPAR))
 		vpa_init(cpu);
 
 	cpu_clear(cpu, of_spin_map);

+ 273 - 0
arch/ppc64/kernel/pSeries_vio.c

@@ -0,0 +1,273 @@
+/*
+ * IBM PowerPC pSeries Virtual I/O Infrastructure Support.
+ *
+ *    Copyright (c) 2003-2005 IBM Corp.
+ *     Dave Engebretsen engebret@us.ibm.com
+ *     Santiago Leon santil@us.ibm.com
+ *     Hollis Blanchard <hollisb@us.ibm.com>
+ *     Stephen Rothwell
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/kobject.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/prom.h>
+#include <asm/vio.h>
+#include <asm/hvcall.h>
+
+extern struct subsystem devices_subsys; /* needed for vio_find_name() */
+
+static void probe_bus_pseries(void)
+{
+	struct device_node *node_vroot, *of_node;
+
+	node_vroot = find_devices("vdevice");
+	if ((node_vroot == NULL) || (node_vroot->child == NULL))
+		/* this machine doesn't do virtual IO, and that's ok */
+		return;
+
+	/*
+	 * Create struct vio_devices for each virtual device in the device tree.
+	 * Drivers will associate with them later.
+	 */
+	for (of_node = node_vroot->child; of_node != NULL;
+			of_node = of_node->sibling) {
+		printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node);
+		vio_register_device_node(of_node);
+	}
+}
+
+/**
+ * vio_match_device_pseries: - Tell if a pSeries VIO device matches a
+ *	vio_device_id
+ */
+static int vio_match_device_pseries(const struct vio_device_id *id,
+		const struct vio_dev *dev)
+{
+	return (strncmp(dev->type, id->type, strlen(id->type)) == 0) &&
+			device_is_compatible(dev->dev.platform_data, id->compat);
+}
+
+static void vio_release_device_pseries(struct device *dev)
+{
+	/* XXX free TCE table */
+	of_node_put(dev->platform_data);
+}
+
+static ssize_t viodev_show_devspec(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct device_node *of_node = dev->platform_data;
+
+	return sprintf(buf, "%s\n", of_node->full_name);
+}
+DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL);
+
+static void vio_unregister_device_pseries(struct vio_dev *viodev)
+{
+	device_remove_file(&viodev->dev, &dev_attr_devspec);
+}
+
+static struct vio_bus_ops vio_bus_ops_pseries = {
+	.match = vio_match_device_pseries,
+	.unregister_device = vio_unregister_device_pseries,
+	.release_device = vio_release_device_pseries,
+};
+
+/**
+ * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus
+ */
+static int __init vio_bus_init_pseries(void)
+{
+	int err;
+
+	err = vio_bus_init(&vio_bus_ops_pseries);
+	if (err == 0)
+		probe_bus_pseries();
+	return err;
+}
+
+__initcall(vio_bus_init_pseries);
+
+/**
+ * vio_build_iommu_table: - gets the dma information from OF and
+ *	builds the TCE tree.
+ * @dev: the virtual device.
+ *
+ * Returns a pointer to the built tce tree, or NULL if it can't
+ * find property.
+*/
+static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
+{
+	unsigned int *dma_window;
+	struct iommu_table *newTceTable;
+	unsigned long offset;
+	int dma_window_property_size;
+
+	dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size);
+	if(!dma_window) {
+		return NULL;
+	}
+
+	newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
+
+	/*  There should be some code to extract the phys-encoded offset
+		using prom_n_addr_cells(). However, according to a comment
+		on earlier versions, it's always zero, so we don't bother */
+	offset = dma_window[1] >>  PAGE_SHIFT;
+
+	/* TCE table size - measured in tce entries */
+	newTceTable->it_size		= dma_window[4] >> PAGE_SHIFT;
+	/* offset for VIO should always be 0 */
+	newTceTable->it_offset		= offset;
+	newTceTable->it_busno		= 0;
+	newTceTable->it_index		= (unsigned long)dma_window[0];
+	newTceTable->it_type		= TCE_VB;
+
+	return iommu_init_table(newTceTable);
+}
+
+/**
+ * vio_register_device_node: - Register a new vio device.
+ * @of_node:	The OF node for this device.
+ *
+ * Creates and initializes a vio_dev structure from the data in
+ * of_node (dev.platform_data) and adds it to the list of virtual devices.
+ * Returns a pointer to the created vio_dev or NULL if node has
+ * NULL device_type or compatible fields.
+ */
+struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
+{
+	struct vio_dev *viodev;
+	unsigned int *unit_address;
+	unsigned int *irq_p;
+
+	/* we need the 'device_type' property, in order to match with drivers */
+	if ((NULL == of_node->type)) {
+		printk(KERN_WARNING
+			"%s: node %s missing 'device_type'\n", __FUNCTION__,
+			of_node->name ? of_node->name : "<unknown>");
+		return NULL;
+	}
+
+	unit_address = (unsigned int *)get_property(of_node, "reg", NULL);
+	if (!unit_address) {
+		printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__,
+			of_node->name ? of_node->name : "<unknown>");
+		return NULL;
+	}
+
+	/* allocate a vio_dev for this node */
+	viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
+	if (!viodev) {
+		return NULL;
+	}
+	memset(viodev, 0, sizeof(struct vio_dev));
+
+	viodev->dev.platform_data = of_node_get(of_node);
+
+	viodev->irq = NO_IRQ;
+	irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL);
+	if (irq_p) {
+		int virq = virt_irq_create_mapping(*irq_p);
+		if (virq == NO_IRQ) {
+			printk(KERN_ERR "Unable to allocate interrupt "
+			       "number for %s\n", of_node->full_name);
+		} else
+			viodev->irq = irq_offset_up(virq);
+	}
+
+	snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address);
+	viodev->name = of_node->name;
+	viodev->type = of_node->type;
+	viodev->unit_address = *unit_address;
+	viodev->iommu_table = vio_build_iommu_table(viodev);
+
+	/* register with generic device framework */
+	if (vio_register_device(viodev) == NULL) {
+		/* XXX free TCE table */
+		kfree(viodev);
+		return NULL;
+	}
+	device_create_file(&viodev->dev, &dev_attr_devspec);
+
+	return viodev;
+}
+EXPORT_SYMBOL(vio_register_device_node);
+
+/**
+ * vio_get_attribute: - get attribute for virtual device
+ * @vdev:	The vio device to get property.
+ * @which:	The property/attribute to be extracted.
+ * @length:	Pointer to length of returned data size (unused if NULL).
+ *
+ * Calls prom.c's get_property() to return the value of the
+ * attribute specified by the preprocessor constant @which
+*/
+const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length)
+{
+	return get_property(vdev->dev.platform_data, (char*)which, length);
+}
+EXPORT_SYMBOL(vio_get_attribute);
+
+/* vio_find_name() - internal because only vio.c knows how we formatted the
+ * kobject name
+ * XXX once vio_bus_type.devices is actually used as a kset in
+ * drivers/base/bus.c, this function should be removed in favor of
+ * "device_find(kobj_name, &vio_bus_type)"
+ */
+static struct vio_dev *vio_find_name(const char *kobj_name)
+{
+	struct kobject *found;
+
+	found = kset_find_obj(&devices_subsys.kset, kobj_name);
+	if (!found)
+		return NULL;
+
+	return to_vio_dev(container_of(found, struct device, kobj));
+}
+
+/**
+ * vio_find_node - find an already-registered vio_dev
+ * @vnode: device_node of the virtual device we're looking for
+ */
+struct vio_dev *vio_find_node(struct device_node *vnode)
+{
+	uint32_t *unit_address;
+	char kobj_name[BUS_ID_SIZE];
+
+	/* construct the kobject name from the device node */
+	unit_address = (uint32_t *)get_property(vnode, "reg", NULL);
+	if (!unit_address)
+		return NULL;
+	snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address);
+
+	return vio_find_name(kobj_name);
+}
+EXPORT_SYMBOL(vio_find_node);
+
+int vio_enable_interrupts(struct vio_dev *dev)
+{
+	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
+	if (rc != H_Success)
+		printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
+	return rc;
+}
+EXPORT_SYMBOL(vio_enable_interrupts);
+
+int vio_disable_interrupts(struct vio_dev *dev)
+{
+	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
+	if (rc != H_Success)
+		printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
+	return rc;
+}
+EXPORT_SYMBOL(vio_disable_interrupts);

+ 2 - 2
arch/ppc64/kernel/pacaData.c

@@ -78,7 +78,7 @@ extern unsigned long __toc_start;
 
 #define BOOTCPU_PACA_INIT(number)					    \
 {									    \
-	PACA_INIT_COMMON(number, 1, 0, STAB0_VIRT_ADDR)			    \
+	PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab)		    \
 	PACA_INIT_ISERIES(number)					    \
 }
 
@@ -90,7 +90,7 @@ extern unsigned long __toc_start;
 
 #define BOOTCPU_PACA_INIT(number)					    \
 {									    \
-	PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR)	    \
+	PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab)    \
 }
 #endif
 

+ 2 - 0
arch/ppc64/kernel/pmac_setup.c

@@ -71,6 +71,7 @@
 #include <asm/of_device.h>
 #include <asm/lmb.h>
 #include <asm/smu.h>
+#include <asm/pmc.h>
 
 #include "pmac.h"
 #include "mpic.h"
@@ -511,4 +512,5 @@ struct machdep_calls __initdata pmac_md = {
 	.progress		= pmac_progress,
 	.check_legacy_ioport	= pmac_check_legacy_ioport,
 	.idle_loop		= native_idle,
+	.enable_pmcs		= power4_enable_pmcs,
 };

+ 21 - 0
arch/ppc64/kernel/pmc.c

@@ -65,3 +65,24 @@ void release_pmc_hardware(void)
 	spin_unlock(&pmc_owner_lock);
 }
 EXPORT_SYMBOL_GPL(release_pmc_hardware);
+
+void power4_enable_pmcs(void)
+{
+	unsigned long hid0;
+
+	hid0 = mfspr(HID0);
+	hid0 |= 1UL << (63 - 20);
+
+	/* POWER4 requires the following sequence */
+	asm volatile(
+		"sync\n"
+		"mtspr     %1, %0\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"mfspr     %0, %1\n"
+		"isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0):
+		"memory");
+}

+ 5 - 7
arch/ppc64/kernel/process.c

@@ -50,6 +50,7 @@
 #include <asm/machdep.h>
 #include <asm/iSeries/HvCallHpt.h>
 #include <asm/cputable.h>
+#include <asm/firmware.h>
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
 #include <asm/time.h>
@@ -202,11 +203,10 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	new_thread = &new->thread;
 	old_thread = &current->thread;
 
-/* Collect purr utilization data per process and per processor wise */
-/* purr is nothing but processor time base                          */
-
-#if defined(CONFIG_PPC_PSERIES)
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+	/* Collect purr utilization data per process and per processor
+	 * wise purr is nothing but processor time base
+	 */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 		struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
 		long unsigned start_tb, current_tb;
 		start_tb = old_thread->start_tb;
@@ -214,8 +214,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
 		old_thread->accum_tb += (current_tb - start_tb);
 		new_thread->start_tb = current_tb;
 	}
-#endif
-
 
 	local_irq_save(flags);
 	last = _switch(old_thread, new_thread);

+ 152 - 32
arch/ppc64/kernel/prom.c

@@ -625,8 +625,8 @@ void __init finish_device_tree(void)
 
 static inline char *find_flat_dt_string(u32 offset)
 {
-	return ((char *)initial_boot_params) + initial_boot_params->off_dt_strings
-		+ offset;
+	return ((char *)initial_boot_params) +
+		initial_boot_params->off_dt_strings + offset;
 }
 
 /**
@@ -635,26 +635,33 @@ static inline char *find_flat_dt_string(u32 offset)
  * unflatten the tree
  */
 static int __init scan_flat_dt(int (*it)(unsigned long node,
-					 const char *full_path, void *data),
+					 const char *uname, int depth,
+					 void *data),
 			       void *data)
 {
 	unsigned long p = ((unsigned long)initial_boot_params) +
 		initial_boot_params->off_dt_struct;
 	int rc = 0;
+	int depth = -1;
 
 	do {
 		u32 tag = *((u32 *)p);
 		char *pathp;
 		
 		p += 4;
-		if (tag == OF_DT_END_NODE)
+		if (tag == OF_DT_END_NODE) {
+			depth --;
+			continue;
+		}
+		if (tag == OF_DT_NOP)
 			continue;
 		if (tag == OF_DT_END)
 			break;
 		if (tag == OF_DT_PROP) {
 			u32 sz = *((u32 *)p);
 			p += 8;
-			p = _ALIGN(p, sz >= 8 ? 8 : 4);
+			if (initial_boot_params->version < 0x10)
+				p = _ALIGN(p, sz >= 8 ? 8 : 4);
 			p += sz;
 			p = _ALIGN(p, 4);
 			continue;
@@ -664,9 +671,18 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
 			       " device tree !\n", tag);
 			return -EINVAL;
 		}
+		depth++;
 		pathp = (char *)p;
 		p = _ALIGN(p + strlen(pathp) + 1, 4);
-		rc = it(p, pathp, data);
+		if ((*pathp) == '/') {
+			char *lp, *np;
+			for (lp = NULL, np = pathp; *np; np++)
+				if ((*np) == '/')
+					lp = np+1;
+			if (lp != NULL)
+				pathp = lp;
+		}
+		rc = it(p, pathp, depth, data);
 		if (rc != 0)
 			break;		
 	} while(1);
@@ -689,17 +705,21 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name,
 		const char *nstr;
 
 		p += 4;
+		if (tag == OF_DT_NOP)
+			continue;
 		if (tag != OF_DT_PROP)
 			return NULL;
 
 		sz = *((u32 *)p);
 		noff = *((u32 *)(p + 4));
 		p += 8;
-		p = _ALIGN(p, sz >= 8 ? 8 : 4);
+		if (initial_boot_params->version < 0x10)
+			p = _ALIGN(p, sz >= 8 ? 8 : 4);
 
 		nstr = find_flat_dt_string(noff);
 		if (nstr == NULL) {
-			printk(KERN_WARNING "Can't find property index name !\n");
+			printk(KERN_WARNING "Can't find property index"
+			       " name !\n");
 			return NULL;
 		}
 		if (strcmp(name, nstr) == 0) {
@@ -713,7 +733,7 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name,
 }
 
 static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
-					       unsigned long align)
+				       unsigned long align)
 {
 	void *res;
 
@@ -727,13 +747,16 @@ static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
 static unsigned long __init unflatten_dt_node(unsigned long mem,
 					      unsigned long *p,
 					      struct device_node *dad,
-					      struct device_node ***allnextpp)
+					      struct device_node ***allnextpp,
+					      unsigned long fpsize)
 {
 	struct device_node *np;
 	struct property *pp, **prev_pp = NULL;
 	char *pathp;
 	u32 tag;
-	unsigned int l;
+	unsigned int l, allocl;
+	int has_name = 0;
+	int new_format = 0;
 
 	tag = *((u32 *)(*p));
 	if (tag != OF_DT_BEGIN_NODE) {
@@ -742,21 +765,62 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
 	}
 	*p += 4;
 	pathp = (char *)*p;
-	l = strlen(pathp) + 1;
+	l = allocl = strlen(pathp) + 1;
 	*p = _ALIGN(*p + l, 4);
 
-	np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + l,
+	/* version 0x10 has a more compact unit name here instead of the full
+	 * path. we accumulate the full path size using "fpsize", we'll rebuild
+	 * it later. We detect this because the first character of the name is
+	 * not '/'.
+	 */
+	if ((*pathp) != '/') {
+		new_format = 1;
+		if (fpsize == 0) {
+			/* root node: special case. fpsize accounts for path
+			 * plus terminating zero. root node only has '/', so
+			 * fpsize should be 2, but we want to avoid the first
+			 * level nodes to have two '/' so we use fpsize 1 here
+			 */
+			fpsize = 1;
+			allocl = 2;
+		} else {
+			/* account for '/' and path size minus terminal 0
+			 * already in 'l'
+			 */
+			fpsize += l;
+			allocl = fpsize;
+		}
+	}
+
+
+	np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl,
 				__alignof__(struct device_node));
 	if (allnextpp) {
 		memset(np, 0, sizeof(*np));
 		np->full_name = ((char*)np) + sizeof(struct device_node);
-		memcpy(np->full_name, pathp, l);
+		if (new_format) {
+			char *p = np->full_name;
+			/* rebuild full path for new format */
+			if (dad && dad->parent) {
+				strcpy(p, dad->full_name);
+#ifdef DEBUG
+				if ((strlen(p) + l + 1) != allocl) {
+					DBG("%s: p: %d, l: %d, a: %d\n",
+					    pathp, strlen(p), l, allocl);
+				}
+#endif
+				p += strlen(p);
+			}
+			*(p++) = '/';
+			memcpy(p, pathp, l);
+		} else
+			memcpy(np->full_name, pathp, l);
 		prev_pp = &np->properties;
 		**allnextpp = np;
 		*allnextpp = &np->allnext;
 		if (dad != NULL) {
 			np->parent = dad;
-			/* we temporarily use the `next' field as `last_child'. */
+			/* we temporarily use the next field as `last_child'*/
 			if (dad->next == 0)
 				dad->child = np;
 			else
@@ -770,18 +834,26 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
 		char *pname;
 
 		tag = *((u32 *)(*p));
+		if (tag == OF_DT_NOP) {
+			*p += 4;
+			continue;
+		}
 		if (tag != OF_DT_PROP)
 			break;
 		*p += 4;
 		sz = *((u32 *)(*p));
 		noff = *((u32 *)((*p) + 4));
-		*p = _ALIGN((*p) + 8, sz >= 8 ? 8 : 4);
+		*p += 8;
+		if (initial_boot_params->version < 0x10)
+			*p = _ALIGN(*p, sz >= 8 ? 8 : 4);
 
 		pname = find_flat_dt_string(noff);
 		if (pname == NULL) {
 			printk("Can't find property name in list !\n");
 			break;
 		}
+		if (strcmp(pname, "name") == 0)
+			has_name = 1;
 		l = strlen(pname) + 1;
 		pp = unflatten_dt_alloc(&mem, sizeof(struct property),
 					__alignof__(struct property));
@@ -801,6 +873,36 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
 		}
 		*p = _ALIGN((*p) + sz, 4);
 	}
+	/* with version 0x10 we may not have the name property, recreate
+	 * it here from the unit name if absent
+	 */
+	if (!has_name) {
+		char *p = pathp, *ps = pathp, *pa = NULL;
+		int sz;
+
+		while (*p) {
+			if ((*p) == '@')
+				pa = p;
+			if ((*p) == '/')
+				ps = p + 1;
+			p++;
+		}
+		if (pa < ps)
+			pa = p;
+		sz = (pa - ps) + 1;
+		pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz,
+					__alignof__(struct property));
+		if (allnextpp) {
+			pp->name = "name";
+			pp->length = sz;
+			pp->value = (unsigned char *)(pp + 1);
+			*prev_pp = pp;
+			prev_pp = &pp->next;
+			memcpy(pp->value, ps, sz - 1);
+			((char *)pp->value)[sz - 1] = 0;
+			DBG("fixed up name for %s -> %s\n", pathp, pp->value);
+		}
+	}
 	if (allnextpp) {
 		*prev_pp = NULL;
 		np->name = get_property(np, "name", NULL);
@@ -812,11 +914,11 @@ static unsigned long __init unflatten_dt_node(unsigned long mem,
 			np->type = "<NULL>";
 	}
 	while (tag == OF_DT_BEGIN_NODE) {
-		mem = unflatten_dt_node(mem, p, np, allnextpp);
+		mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize);
 		tag = *((u32 *)(*p));
 	}
 	if (tag != OF_DT_END_NODE) {
-		printk("Weird tag at start of node: %x\n", tag);
+		printk("Weird tag at end of node: %x\n", tag);
 		return mem;
 	}
 	*p += 4;
@@ -842,21 +944,32 @@ void __init unflatten_device_tree(void)
 	/* First pass, scan for size */
 	start = ((unsigned long)initial_boot_params) +
 		initial_boot_params->off_dt_struct;
-	size = unflatten_dt_node(0, &start, NULL, NULL);
+	size = unflatten_dt_node(0, &start, NULL, NULL, 0);
+	size = (size | 3) + 1;
 
 	DBG("  size is %lx, allocating...\n", size);
 
 	/* Allocate memory for the expanded device tree */
-	mem = (unsigned long)abs_to_virt(lmb_alloc(size,
-						   __alignof__(struct device_node)));
+	mem = lmb_alloc(size + 4, __alignof__(struct device_node));
+	if (!mem) {
+		DBG("Couldn't allocate memory with lmb_alloc()!\n");
+		panic("Couldn't allocate memory with lmb_alloc()!\n");
+	}
+	mem = (unsigned long)abs_to_virt(mem);
+
+	((u32 *)mem)[size / 4] = 0xdeadbeef;
+
 	DBG("  unflattening...\n", mem);
 
 	/* Second pass, do actual unflattening */
 	start = ((unsigned long)initial_boot_params) +
 		initial_boot_params->off_dt_struct;
-	unflatten_dt_node(mem, &start, NULL, &allnextp);
+	unflatten_dt_node(mem, &start, NULL, &allnextp, 0);
 	if (*((u32 *)start) != OF_DT_END)
-		printk(KERN_WARNING "Weird tag at end of tree: %x\n", *((u32 *)start));
+		printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start));
+	if (((u32 *)mem)[size / 4] != 0xdeadbeef)
+		printk(KERN_WARNING "End of tree marker overwritten: %08x\n",
+		       ((u32 *)mem)[size / 4] );
 	*allnextp = NULL;
 
 	/* Get pointer to OF "/chosen" node for use everywhere */
@@ -880,7 +993,7 @@ void __init unflatten_device_tree(void)
 
 
 static int __init early_init_dt_scan_cpus(unsigned long node,
-					  const char *full_path, void *data)
+					  const char *uname, int depth, void *data)
 {
 	char *type = get_flat_dt_prop(node, "device_type", NULL);
 	u32 *prop;
@@ -947,13 +1060,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 }
 
 static int __init early_init_dt_scan_chosen(unsigned long node,
-					    const char *full_path, void *data)
+					    const char *uname, int depth, void *data)
 {
 	u32 *prop;
 	u64 *prop64;
 	extern unsigned long memory_limit, tce_alloc_start, tce_alloc_end;
 
-	if (strcmp(full_path, "/chosen") != 0)
+	DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
+
+	if (depth != 1 || strcmp(uname, "chosen") != 0)
 		return 0;
 
 	/* get platform type */
@@ -1003,18 +1118,20 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 }
 
 static int __init early_init_dt_scan_root(unsigned long node,
-					  const char *full_path, void *data)
+					  const char *uname, int depth, void *data)
 {
 	u32 *prop;
 
-	if (strcmp(full_path, "/") != 0)
+	if (depth != 0)
 		return 0;
 
 	prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL);
 	dt_root_size_cells = (prop == NULL) ? 1 : *prop;
-		
+	DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
+
 	prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL);
 	dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
+	DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
 	
 	/* break now */
 	return 1;
@@ -1042,7 +1159,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
 
 
 static int __init early_init_dt_scan_memory(unsigned long node,
-					    const char *full_path, void *data)
+					    const char *uname, int depth, void *data)
 {
 	char *type = get_flat_dt_prop(node, "device_type", NULL);
 	cell_t *reg, *endp;
@@ -1058,7 +1175,9 @@ static int __init early_init_dt_scan_memory(unsigned long node,
 
 	endp = reg + (l / sizeof(cell_t));
 
-	DBG("memory scan node %s ...\n", full_path);
+	DBG("memory scan node %s ..., reg size %ld, data: %x %x %x %x, ...\n",
+	    uname, l, reg[0], reg[1], reg[2], reg[3]);
+
 	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
 		unsigned long base, size;
 
@@ -1469,10 +1588,11 @@ struct device_node *of_find_node_by_path(const char *path)
 	struct device_node *np = allnodes;
 
 	read_lock(&devtree_lock);
-	for (; np != 0; np = np->allnext)
+	for (; np != 0; np = np->allnext) {
 		if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0
 		    && of_node_get(np))
 			break;
+	}
 	read_unlock(&devtree_lock);
 	return np;
 }

+ 59 - 34
arch/ppc64/kernel/prom_init.c

@@ -892,7 +892,10 @@ static void __init prom_init_mem(void)
 	if ( RELOC(of_platform) == PLATFORM_PSERIES_LPAR )
 		RELOC(alloc_top) = RELOC(rmo_top);
 	else
-		RELOC(alloc_top) = RELOC(rmo_top) = min(0x40000000ul, RELOC(ram_top));
+		/* Some RS64 machines have buggy firmware where claims up at 1GB
+		 * fails. Cap at 768MB as a workaround. Still plenty of room.
+		 */
+		RELOC(alloc_top) = RELOC(rmo_top) = min(0x30000000ul, RELOC(ram_top));
 
 	prom_printf("memory layout at init:\n");
 	prom_printf("  memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit));
@@ -1534,7 +1537,8 @@ static unsigned long __init dt_find_string(char *str)
  */
 #define MAX_PROPERTY_NAME 64
 
-static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start,
+static void __init scan_dt_build_strings(phandle node,
+					 unsigned long *mem_start,
 					 unsigned long *mem_end)
 {
 	unsigned long offset = reloc_offset();
@@ -1547,16 +1551,21 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start,
 	/* get and store all property names */
 	prev_name = RELOC("");
 	for (;;) {
-		int rc;
-
 		/* 64 is max len of name including nul. */
 		namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1);
-		rc = call_prom("nextprop", 3, 1, node, prev_name, namep);
-		if (rc != 1) {
+		if (call_prom("nextprop", 3, 1, node, prev_name, namep) != 1) {
 			/* No more nodes: unwind alloc */
 			*mem_start = (unsigned long)namep;
 			break;
 		}
+
+ 		/* skip "name" */
+ 		if (strcmp(namep, RELOC("name")) == 0) {
+ 			*mem_start = (unsigned long)namep;
+ 			prev_name = RELOC("name");
+ 			continue;
+ 		}
+		/* get/create string entry */
 		soff = dt_find_string(namep);
 		if (soff != 0) {
 			*mem_start = (unsigned long)namep;
@@ -1571,7 +1580,7 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start,
 
 	/* do all our children */
 	child = call_prom("child", 1, 1, node);
-	while (child != (phandle)0) {
+	while (child != 0) {
 		scan_dt_build_strings(child, mem_start, mem_end);
 		child = call_prom("peer", 1, 1, child);
 	}
@@ -1580,16 +1589,13 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start,
 static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
 					unsigned long *mem_end)
 {
-	int l, align;
 	phandle child;
-	char *namep, *prev_name, *sstart, *p, *ep;
+	char *namep, *prev_name, *sstart, *p, *ep, *lp, *path;
 	unsigned long soff;
 	unsigned char *valp;
 	unsigned long offset = reloc_offset();
-	char pname[MAX_PROPERTY_NAME];
-	char *path;
-
-	path = RELOC(prom_scratch);
+	static char pname[MAX_PROPERTY_NAME];
+	int l;
 
 	dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end);
 
@@ -1599,23 +1605,33 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
 		      namep, *mem_end - *mem_start);
 	if (l >= 0) {
 		/* Didn't fit?  Get more room. */
-		if (l+1 > *mem_end - *mem_start) {
+		if ((l+1) > (*mem_end - *mem_start)) {
 			namep = make_room(mem_start, mem_end, l+1, 1);
 			call_prom("package-to-path", 3, 1, node, namep, l);
 		}
 		namep[l] = '\0';
+
 		/* Fixup an Apple bug where they have bogus \0 chars in the
 		 * middle of the path in some properties
 		 */
 		for (p = namep, ep = namep + l; p < ep; p++)
 			if (*p == '\0') {
 				memmove(p, p+1, ep - p);
-				ep--; l--;
+				ep--; l--; p--;
 			}
-		*mem_start = _ALIGN(((unsigned long) namep) + strlen(namep) + 1, 4);
+
+		/* now try to extract the unit name in that mess */
+		for (p = namep, lp = NULL; *p; p++)
+			if (*p == '/')
+				lp = p + 1;
+		if (lp != NULL)
+			memmove(namep, lp, strlen(lp) + 1);
+		*mem_start = _ALIGN(((unsigned long) namep) +
+				    strlen(namep) + 1, 4);
 	}
 
 	/* get it again for debugging */
+	path = RELOC(prom_scratch);
 	memset(path, 0, PROM_SCRATCH_SIZE);
 	call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
 
@@ -1623,23 +1639,27 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
 	prev_name = RELOC("");
 	sstart = (char *)RELOC(dt_string_start);
 	for (;;) {
-		int rc;
-
-		rc = call_prom("nextprop", 3, 1, node, prev_name, pname);
-		if (rc != 1)
+		if (call_prom("nextprop", 3, 1, node, prev_name,
+			      RELOC(pname)) != 1)
 			break;
 
+ 		/* skip "name" */
+ 		if (strcmp(RELOC(pname), RELOC("name")) == 0) {
+ 			prev_name = RELOC("name");
+ 			continue;
+ 		}
+
 		/* find string offset */
-		soff = dt_find_string(pname);
+		soff = dt_find_string(RELOC(pname));
 		if (soff == 0) {
-			prom_printf("WARNING: Can't find string index for <%s>, node %s\n",
-				    pname, path);
+			prom_printf("WARNING: Can't find string index for"
+				    " <%s>, node %s\n", RELOC(pname), path);
 			break;
 		}
 		prev_name = sstart + soff;
 
 		/* get length */
-		l = call_prom("getproplen", 2, 1, node, pname);
+		l = call_prom("getproplen", 2, 1, node, RELOC(pname));
 
 		/* sanity checks */
 		if (l == PROM_ERROR)
@@ -1648,7 +1668,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
 			prom_printf("WARNING: ignoring large property ");
 			/* It seems OF doesn't null-terminate the path :-( */
 			prom_printf("[%s] ", path);
-			prom_printf("%s length 0x%x\n", pname, l);
+			prom_printf("%s length 0x%x\n", RELOC(pname), l);
 			continue;
 		}
 
@@ -1658,17 +1678,16 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
 		dt_push_token(soff, mem_start, mem_end);
 
 		/* push property content */
-		align = (l >= 8) ? 8 : 4;
-		valp = make_room(mem_start, mem_end, l, align);
-		call_prom("getprop", 4, 1, node, pname, valp, l);
+		valp = make_room(mem_start, mem_end, l, 4);
+		call_prom("getprop", 4, 1, node, RELOC(pname), valp, l);
 		*mem_start = _ALIGN(*mem_start, 4);
 	}
 
 	/* Add a "linux,phandle" property. */
 	soff = dt_find_string(RELOC("linux,phandle"));
 	if (soff == 0)
-		prom_printf("WARNING: Can't find string index for <linux-phandle>"
-			    " node %s\n", path);
+		prom_printf("WARNING: Can't find string index for"
+			    " <linux-phandle> node %s\n", path);
 	else {
 		dt_push_token(OF_DT_PROP, mem_start, mem_end);
 		dt_push_token(4, mem_start, mem_end);
@@ -1679,7 +1698,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
 
 	/* do all our children */
 	child = call_prom("child", 1, 1, node);
-	while (child != (phandle)0) {
+	while (child != 0) {
 		scan_dt_build_struct(child, mem_start, mem_end);
 		child = call_prom("peer", 1, 1, child);
 	}
@@ -1718,7 +1737,8 @@ static void __init flatten_device_tree(void)
 
 	/* Build header and make room for mem rsv map */ 
 	mem_start = _ALIGN(mem_start, 4);
-	hdr = make_room(&mem_start, &mem_end, sizeof(struct boot_param_header), 4);
+	hdr = make_room(&mem_start, &mem_end,
+			sizeof(struct boot_param_header), 4);
 	RELOC(dt_header_start) = (unsigned long)hdr;
 	rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8);
 
@@ -1731,11 +1751,11 @@ static void __init flatten_device_tree(void)
 	namep = make_room(&mem_start, &mem_end, 16, 1);
 	strcpy(namep, RELOC("linux,phandle"));
 	mem_start = (unsigned long)namep + strlen(namep) + 1;
-	RELOC(dt_string_end) = mem_start;
 
 	/* Build string array */
 	prom_printf("Building dt strings...\n"); 
 	scan_dt_build_strings(root, &mem_start, &mem_end);
+	RELOC(dt_string_end) = mem_start;
 
 	/* Build structure */
 	mem_start = PAGE_ALIGN(mem_start);
@@ -1750,9 +1770,11 @@ static void __init flatten_device_tree(void)
 	hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start);
 	hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start);
 	hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start);
+	hdr->dt_strings_size = RELOC(dt_string_end) - RELOC(dt_string_start);
 	hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start);
 	hdr->version = OF_DT_VERSION;
-	hdr->last_comp_version = 1;
+	/* Version 16 is not backward compatible */
+	hdr->last_comp_version = 0x10;
 
 	/* Reserve the whole thing and copy the reserve map in, we
 	 * also bump mem_reserve_cnt to cause further reservations to
@@ -1808,6 +1830,9 @@ static void __init fixup_device_tree(void)
 	/* does it need fixup ? */
 	if (prom_getproplen(i2c, "interrupts") > 0)
 		return;
+
+	prom_printf("fixing up bogus interrupts for u3 i2c...\n");
+
 	/* interrupt on this revision of u3 is number 0 and level */
 	interrupts[0] = 0;
 	interrupts[1] = 1;

+ 17 - 2
arch/ppc64/kernel/rtas_pci.c

@@ -58,6 +58,21 @@ static int config_access_valid(struct device_node *dn, int where)
 	return 0;
 }
 
+static int of_device_available(struct device_node * dn)
+{
+        char * status;
+
+        status = get_property(dn, "status", NULL);
+
+        if (!status)
+                return 1;
+
+        if (!strcmp(status, "okay"))
+                return 1;
+
+        return 0;
+}
+
 static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val)
 {
 	int returnval = -1;
@@ -103,7 +118,7 @@ static int rtas_pci_read_config(struct pci_bus *bus,
 
 	/* Search only direct children of the bus */
 	for (dn = busdn->child; dn; dn = dn->sibling)
-		if (dn->devfn == devfn)
+		if (dn->devfn == devfn && of_device_available(dn))
 			return rtas_read_config(dn, where, size, val);
 	return PCIBIOS_DEVICE_NOT_FOUND;
 }
@@ -146,7 +161,7 @@ static int rtas_pci_write_config(struct pci_bus *bus,
 
 	/* Search only direct children of the bus */
 	for (dn = busdn->child; dn; dn = dn->sibling)
-		if (dn->devfn == devfn)
+		if (dn->devfn == devfn && of_device_available(dn))
 			return rtas_write_config(dn, where, size, val);
 	return PCIBIOS_DEVICE_NOT_FOUND;
 }

+ 17 - 11
arch/ppc64/kernel/setup.c

@@ -536,15 +536,19 @@ static void __init check_for_initrd(void)
 
 	DBG(" -> check_for_initrd()\n");
 
-	prop = (u64 *)get_property(of_chosen, "linux,initrd-start", NULL);
-	if (prop != NULL) {
-		initrd_start = (unsigned long)__va(*prop);
-		prop = (u64 *)get_property(of_chosen, "linux,initrd-end", NULL);
+	if (of_chosen) {
+		prop = (u64 *)get_property(of_chosen,
+				"linux,initrd-start", NULL);
 		if (prop != NULL) {
-			initrd_end = (unsigned long)__va(*prop);
-			initrd_below_start_ok = 1;
-		} else
-			initrd_start = 0;
+			initrd_start = (unsigned long)__va(*prop);
+			prop = (u64 *)get_property(of_chosen,
+					"linux,initrd-end", NULL);
+			if (prop != NULL) {
+				initrd_end = (unsigned long)__va(*prop);
+				initrd_below_start_ok = 1;
+			} else
+				initrd_start = 0;
+		}
 	}
 
 	/* If we were passed an initrd, set the ROOT_DEV properly if the values
@@ -627,7 +631,7 @@ void __init setup_system(void)
 	 * Initialize xmon
 	 */
 #ifdef CONFIG_XMON_DEFAULT
-	xmon_init();
+	xmon_init(1);
 #endif
 	/*
 	 * Register early console
@@ -1343,11 +1347,13 @@ static int __init early_xmon(char *p)
 	/* ensure xmon is enabled */
 	if (p) {
 		if (strncmp(p, "on", 2) == 0)
-			xmon_init();
+			xmon_init(1);
+		if (strncmp(p, "off", 3) == 0)
+			xmon_init(0);
 		if (strncmp(p, "early", 5) != 0)
 			return 0;
 	}
-	xmon_init();
+	xmon_init(1);
 	debugger(NULL);
 
 	return 0;

+ 3 - 2
arch/ppc64/kernel/signal.c

@@ -481,10 +481,11 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
 	/* Set up Signal Frame */
 	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
-	if (ret && !(ka->sa.sa_flags & SA_NODEFER)) {
+	if (ret) {
 		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
-		sigaddset(&current->blocked,sig);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
+			sigaddset(&current->blocked,sig);
 		recalc_sigpending();
 		spin_unlock_irq(&current->sighand->siglock);
 	}

+ 3 - 2
arch/ppc64/kernel/signal32.c

@@ -976,11 +976,12 @@ int do_signal32(sigset_t *oldset, struct pt_regs *regs)
 	else
 		ret = handle_signal32(signr, &ka, &info, oldset, regs, newsp);
 
-	if (ret && !(ka.sa.sa_flags & SA_NODEFER)) {
+	if (ret) {
 		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked, &current->blocked,
 			  &ka.sa.sa_mask);
-		sigaddset(&current->blocked, signr);
+		if (!(ka.sa.sa_flags & SA_NODEFER))
+			sigaddset(&current->blocked, signr);
 		recalc_sigpending();
 		spin_unlock_irq(&current->sighand->siglock);
 	}

+ 5 - 52
arch/ppc64/kernel/sysfs.c

@@ -13,6 +13,7 @@
 #include <asm/current.h>
 #include <asm/processor.h>
 #include <asm/cputable.h>
+#include <asm/firmware.h>
 #include <asm/hvcall.h>
 #include <asm/prom.h>
 #include <asm/systemcfg.h>
@@ -100,6 +101,8 @@ static int __init setup_smt_snooze_delay(char *str)
 }
 __setup("smt-snooze-delay=", setup_smt_snooze_delay);
 
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
 /*
  * Enabling PMCs will slow partition context switch times so we only do
  * it the first time we write to the PMCs.
@@ -109,65 +112,15 @@ static DEFINE_PER_CPU(char, pmcs_enabled);
 
 void ppc64_enable_pmcs(void)
 {
-	unsigned long hid0;
-#ifdef CONFIG_PPC_PSERIES
-	unsigned long set, reset;
-#endif /* CONFIG_PPC_PSERIES */
-
 	/* Only need to enable them once */
 	if (__get_cpu_var(pmcs_enabled))
 		return;
 
 	__get_cpu_var(pmcs_enabled) = 1;
 
-	switch (systemcfg->platform) {
-	case PLATFORM_PSERIES:
-	case PLATFORM_POWERMAC:
-		hid0 = mfspr(HID0);
-		hid0 |= 1UL << (63 - 20);
-
-		/* POWER4 requires the following sequence */
-		asm volatile(
-			     "sync\n"
-			     "mtspr	%1, %0\n"
-			     "mfspr	%0, %1\n"
-			     "mfspr	%0, %1\n"
-			     "mfspr	%0, %1\n"
-			     "mfspr	%0, %1\n"
-			     "mfspr	%0, %1\n"
-			     "mfspr	%0, %1\n"
-			     "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0):
-			     "memory");
-		break;
-
-#ifdef CONFIG_PPC_PSERIES
-	case PLATFORM_PSERIES_LPAR:
-		set = 1UL << 63;
-		reset = 0;
-		plpar_hcall_norets(H_PERFMON, set, reset);
-		break;
-#endif /* CONFIG_PPC_PSERIES */
-
-	default:
-		break;
-	}
-
-#ifdef CONFIG_PPC_PSERIES
-	/* instruct hypervisor to maintain PMCs */
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR)
-		get_paca()->lppaca.pmcregs_in_use = 1;
-#endif /* CONFIG_PPC_PSERIES */
+	if (ppc_md.enable_pmcs)
+		ppc_md.enable_pmcs();
 }
-
-#else
-
-/* PMC stuff */
-void ppc64_enable_pmcs(void)
-{
-	/* XXX Implement for iseries */
-}
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-
 EXPORT_SYMBOL(ppc64_enable_pmcs);
 
 /* XXX convert to rusty's on_one_cpu */

+ 3 - 4
arch/ppc64/kernel/time.c

@@ -67,6 +67,7 @@
 #include <asm/prom.h>
 #include <asm/sections.h>
 #include <asm/systemcfg.h>
+#include <asm/firmware.h>
 
 u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
 
@@ -370,13 +371,11 @@ int timer_interrupt(struct pt_regs * regs)
 		process_hvlpevents(regs);
 #endif
 
-/* collect purr register values often, for accurate calculations */
-#if defined(CONFIG_PPC_PSERIES)
-	if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
+	/* collect purr register values often, for accurate calculations */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 		struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
 		cu->current_tb = mfspr(SPRN_PURR);
 	}
-#endif
 
 	irq_exit();
 

+ 35 - 409
arch/ppc64/kernel/vio.c

@@ -1,10 +1,11 @@
 /*
  * IBM PowerPC Virtual I/O Infrastructure Support.
  *
- *    Copyright (c) 2003 IBM Corp.
+ *    Copyright (c) 2003-2005 IBM Corp.
  *     Dave Engebretsen engebret@us.ibm.com
  *     Santiago Leon santil@us.ibm.com
  *     Hollis Blanchard <hollisb@us.ibm.com>
+ *     Stephen Rothwell
  *
  *      This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -14,61 +15,30 @@
 
 #include <linux/init.h>
 #include <linux/console.h>
-#include <linux/version.h>
 #include <linux/module.h>
-#include <linux/kobject.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
-#include <asm/rtas.h>
 #include <asm/iommu.h>
 #include <asm/dma.h>
-#include <asm/ppcdebug.h>
 #include <asm/vio.h>
-#include <asm/hvcall.h>
-#include <asm/iSeries/vio.h>
-#include <asm/iSeries/HvTypes.h>
-#include <asm/iSeries/HvCallXm.h>
-#include <asm/iSeries/HvLpConfig.h>
-
-#define DBGENTER() pr_debug("%s entered\n", __FUNCTION__)
-
-extern struct subsystem devices_subsys; /* needed for vio_find_name() */
 
 static const struct vio_device_id *vio_match_device(
 		const struct vio_device_id *, const struct vio_dev *);
 
-#ifdef CONFIG_PPC_PSERIES
-static struct iommu_table *vio_build_iommu_table(struct vio_dev *);
-static int vio_num_address_cells;
-#endif
-#ifdef CONFIG_PPC_ISERIES
-static struct iommu_table veth_iommu_table;
-static struct iommu_table vio_iommu_table;
-#endif
-static struct vio_dev vio_bus_device  = { /* fake "parent" device */
+struct vio_dev vio_bus_device  = { /* fake "parent" device */
 	.name = vio_bus_device.dev.bus_id,
 	.type = "",
-#ifdef CONFIG_PPC_ISERIES
-	.iommu_table = &vio_iommu_table,
-#endif
 	.dev.bus_id = "vio",
 	.dev.bus = &vio_bus_type,
 };
 
-#ifdef CONFIG_PPC_ISERIES
-static struct vio_dev *__init vio_register_device_iseries(char *type,
-		uint32_t unit_num);
-
-struct device *iSeries_vio_dev = &vio_bus_device.dev;
-EXPORT_SYMBOL(iSeries_vio_dev);
-
-#define device_is_compatible(a, b)	1
+static struct vio_bus_ops vio_bus_ops;
 
-#endif
-
-/* convert from struct device to struct vio_dev and pass to driver.
+/*
+ * Convert from struct device to struct vio_dev and pass to driver.
  * dev->driver has already been set by generic code because vio_bus_match
- * succeeded. */
+ * succeeded.
+ */
 static int vio_bus_probe(struct device *dev)
 {
 	struct vio_dev *viodev = to_vio_dev(dev);
@@ -76,15 +46,12 @@ static int vio_bus_probe(struct device *dev)
 	const struct vio_device_id *id;
 	int error = -ENODEV;
 
-	DBGENTER();
-
 	if (!viodrv->probe)
 		return error;
 
 	id = vio_match_device(viodrv->id_table, viodev);
-	if (id) {
+	if (id)
 		error = viodrv->probe(viodev, id);
-	}
 
 	return error;
 }
@@ -95,11 +62,8 @@ static int vio_bus_remove(struct device *dev)
 	struct vio_dev *viodev = to_vio_dev(dev);
 	struct vio_driver *viodrv = to_vio_driver(dev->driver);
 
-	DBGENTER();
-
-	if (viodrv->remove) {
+	if (viodrv->remove)
 		return viodrv->remove(viodev);
-	}
 
 	/* driver can't remove */
 	return 1;
@@ -135,193 +99,72 @@ void vio_unregister_driver(struct vio_driver *viodrv)
 EXPORT_SYMBOL(vio_unregister_driver);
 
 /**
- * vio_match_device: - Tell if a VIO device has a matching VIO device id structure.
- * @ids: 	array of VIO device id structures to search in
- * @dev: 	the VIO device structure to match against
+ * vio_match_device: - Tell if a VIO device has a matching
+ *			VIO device id structure.
+ * @ids:	array of VIO device id structures to search in
+ * @dev:	the VIO device structure to match against
  *
  * Used by a driver to check whether a VIO device present in the
  * system is in its list of supported devices. Returns the matching
  * vio_device_id structure or NULL if there is no match.
  */
-static const struct vio_device_id * vio_match_device(const struct vio_device_id *ids,
-	const struct vio_dev *dev)
+static const struct vio_device_id *vio_match_device(
+		const struct vio_device_id *ids, const struct vio_dev *dev)
 {
-	DBGENTER();
-
-	while (ids->type) {
-		if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) &&
-			device_is_compatible(dev->dev.platform_data, ids->compat))
+	while (ids->type[0] != '\0') {
+		if (vio_bus_ops.match(ids, dev))
 			return ids;
 		ids++;
 	}
 	return NULL;
 }
 
-#ifdef CONFIG_PPC_ISERIES
-void __init iommu_vio_init(void)
-{
-	struct iommu_table *t;
-	struct iommu_table_cb cb;
-	unsigned long cbp;
-	unsigned long itc_entries;
-
-	cb.itc_busno = 255;    /* Bus 255 is the virtual bus */
-	cb.itc_virtbus = 0xff; /* Ask for virtual bus */
-
-	cbp = virt_to_abs(&cb);
-	HvCallXm_getTceTableParms(cbp);
-
-	itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry);
-	veth_iommu_table.it_size        = itc_entries / 2;
-	veth_iommu_table.it_busno       = cb.itc_busno;
-	veth_iommu_table.it_offset      = cb.itc_offset;
-	veth_iommu_table.it_index       = cb.itc_index;
-	veth_iommu_table.it_type        = TCE_VB;
-	veth_iommu_table.it_blocksize	= 1;
-
-	t = iommu_init_table(&veth_iommu_table);
-
-	if (!t)
-		printk("Virtual Bus VETH TCE table failed.\n");
-
-	vio_iommu_table.it_size         = itc_entries - veth_iommu_table.it_size;
-	vio_iommu_table.it_busno        = cb.itc_busno;
-	vio_iommu_table.it_offset       = cb.itc_offset +
-					  veth_iommu_table.it_size;
-	vio_iommu_table.it_index        = cb.itc_index;
-	vio_iommu_table.it_type         = TCE_VB;
-	vio_iommu_table.it_blocksize	= 1;
-
-	t = iommu_init_table(&vio_iommu_table);
-
-	if (!t)
-		printk("Virtual Bus VIO TCE table failed.\n");
-}
-#endif
-
-#ifdef CONFIG_PPC_PSERIES
-static void probe_bus_pseries(void)
-{
-	struct device_node *node_vroot, *of_node;
-
-	node_vroot = find_devices("vdevice");
-	if ((node_vroot == NULL) || (node_vroot->child == NULL))
-		/* this machine doesn't do virtual IO, and that's ok */
-		return;
-
-	vio_num_address_cells = prom_n_addr_cells(node_vroot->child);
-
-	/*
-	 * Create struct vio_devices for each virtual device in the device tree.
-	 * Drivers will associate with them later.
-	 */
-	for (of_node = node_vroot->child; of_node != NULL;
-			of_node = of_node->sibling) {
-		printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node);
-		vio_register_device_node(of_node);
-	}
-}
-#endif
-
-#ifdef CONFIG_PPC_ISERIES
-static void probe_bus_iseries(void)
-{
-	HvLpIndexMap vlan_map = HvLpConfig_getVirtualLanIndexMap();
-	struct vio_dev *viodev;
-	int i;
-
-	/* there is only one of each of these */
-	vio_register_device_iseries("viocons", 0);
-	vio_register_device_iseries("vscsi", 0);
-
-	vlan_map = HvLpConfig_getVirtualLanIndexMap();
-	for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
-		if ((vlan_map & (0x8000 >> i)) == 0)
-			continue;
-		viodev = vio_register_device_iseries("vlan", i);
-		/* veth is special and has it own iommu_table */
-		viodev->iommu_table = &veth_iommu_table;
-	}
-	for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++)
-		vio_register_device_iseries("viodasd", i);
-	for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++)
-		vio_register_device_iseries("viocd", i);
-	for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++)
-		vio_register_device_iseries("viotape", i);
-}
-#endif
-
 /**
  * vio_bus_init: - Initialize the virtual IO bus
  */
-static int __init vio_bus_init(void)
+int __init vio_bus_init(struct vio_bus_ops *ops)
 {
 	int err;
 
+	vio_bus_ops = *ops;
+
 	err = bus_register(&vio_bus_type);
 	if (err) {
 		printk(KERN_ERR "failed to register VIO bus\n");
 		return err;
 	}
 
-	/* the fake parent of all vio devices, just to give us a nice directory */
+	/*
+	 * The fake parent of all vio devices, just to give us
+	 * a nice directory
+	 */
 	err = device_register(&vio_bus_device.dev);
 	if (err) {
-		printk(KERN_WARNING "%s: device_register returned %i\n", __FUNCTION__,
-			err);
+		printk(KERN_WARNING "%s: device_register returned %i\n",
+				__FUNCTION__, err);
 		return err;
 	}
 
-#ifdef CONFIG_PPC_PSERIES
-	probe_bus_pseries();
-#endif
-#ifdef CONFIG_PPC_ISERIES
-	probe_bus_iseries();
-#endif
-
 	return 0;
 }
 
-__initcall(vio_bus_init);
-
 /* vio_dev refcount hit 0 */
 static void __devinit vio_dev_release(struct device *dev)
 {
-	DBGENTER();
-
-#ifdef CONFIG_PPC_PSERIES
-	/* XXX free TCE table */
-	of_node_put(dev->platform_data);
-#endif
+	if (vio_bus_ops.release_device)
+		vio_bus_ops.release_device(dev);
 	kfree(to_vio_dev(dev));
 }
 
-#ifdef CONFIG_PPC_PSERIES
-static ssize_t viodev_show_devspec(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct device_node *of_node = dev->platform_data;
-
-	return sprintf(buf, "%s\n", of_node->full_name);
-}
-DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL);
-#endif
-
-static ssize_t viodev_show_name(struct device *dev, struct device_attribute *attr, char *buf)
+static ssize_t viodev_show_name(struct device *dev,
+		struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
 }
 DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL);
 
-static struct vio_dev * __devinit vio_register_device_common(
-		struct vio_dev *viodev, char *name, char *type,
-		uint32_t unit_address, struct iommu_table *iommu_table)
+struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev)
 {
-	DBGENTER();
-
-	viodev->name = name;
-	viodev->type = type;
-	viodev->unit_address = unit_address;
-	viodev->iommu_table = iommu_table;
 	/* init generic 'struct device' fields: */
 	viodev->dev.parent = &vio_bus_device.dev;
 	viodev->dev.bus = &vio_bus_type;
@@ -338,222 +181,15 @@ static struct vio_dev * __devinit vio_register_device_common(
 	return viodev;
 }
 
-#ifdef CONFIG_PPC_PSERIES
-/**
- * vio_register_device_node: - Register a new vio device.
- * @of_node:	The OF node for this device.
- *
- * Creates and initializes a vio_dev structure from the data in
- * of_node (dev.platform_data) and adds it to the list of virtual devices.
- * Returns a pointer to the created vio_dev or NULL if node has
- * NULL device_type or compatible fields.
- */
-struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
-{
-	struct vio_dev *viodev;
-	unsigned int *unit_address;
-	unsigned int *irq_p;
-
-	DBGENTER();
-
-	/* we need the 'device_type' property, in order to match with drivers */
-	if ((NULL == of_node->type)) {
-		printk(KERN_WARNING
-			"%s: node %s missing 'device_type'\n", __FUNCTION__,
-			of_node->name ? of_node->name : "<unknown>");
-		return NULL;
-	}
-
-	unit_address = (unsigned int *)get_property(of_node, "reg", NULL);
-	if (!unit_address) {
-		printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__,
-			of_node->name ? of_node->name : "<unknown>");
-		return NULL;
-	}
-
-	/* allocate a vio_dev for this node */
-	viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
-	if (!viodev) {
-		return NULL;
-	}
-	memset(viodev, 0, sizeof(struct vio_dev));
-
-	viodev->dev.platform_data = of_node_get(of_node);
-
-	viodev->irq = NO_IRQ;
-	irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL);
-	if (irq_p) {
-		int virq = virt_irq_create_mapping(*irq_p);
-		if (virq == NO_IRQ) {
-			printk(KERN_ERR "Unable to allocate interrupt "
-			       "number for %s\n", of_node->full_name);
-		} else
-			viodev->irq = irq_offset_up(virq);
-	}
-
-	snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address);
-
-	/* register with generic device framework */
-	if (vio_register_device_common(viodev, of_node->name, of_node->type,
-				*unit_address, vio_build_iommu_table(viodev))
-			== NULL) {
-		/* XXX free TCE table */
-		kfree(viodev);
-		return NULL;
-	}
-	device_create_file(&viodev->dev, &dev_attr_devspec);
-
-	return viodev;
-}
-EXPORT_SYMBOL(vio_register_device_node);
-#endif
-
-#ifdef CONFIG_PPC_ISERIES
-/**
- * vio_register_device: - Register a new vio device.
- * @voidev:	The device to register.
- */
-static struct vio_dev *__init vio_register_device_iseries(char *type,
-		uint32_t unit_num)
-{
-	struct vio_dev *viodev;
-
-	DBGENTER();
-
-	/* allocate a vio_dev for this node */
-	viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL);
-	if (!viodev)
-		return NULL;
-	memset(viodev, 0, sizeof(struct vio_dev));
-
-	snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num);
-
-	return vio_register_device_common(viodev, viodev->dev.bus_id, type,
-			unit_num, &vio_iommu_table);
-}
-#endif
-
 void __devinit vio_unregister_device(struct vio_dev *viodev)
 {
-	DBGENTER();
-#ifdef CONFIG_PPC_PSERIES
-	device_remove_file(&viodev->dev, &dev_attr_devspec);
-#endif
+	if (vio_bus_ops.unregister_device)
+		vio_bus_ops.unregister_device(viodev);
 	device_remove_file(&viodev->dev, &dev_attr_name);
 	device_unregister(&viodev->dev);
 }
 EXPORT_SYMBOL(vio_unregister_device);
 
-#ifdef CONFIG_PPC_PSERIES
-/**
- * vio_get_attribute: - get attribute for virtual device
- * @vdev:	The vio device to get property.
- * @which:	The property/attribute to be extracted.
- * @length:	Pointer to length of returned data size (unused if NULL).
- *
- * Calls prom.c's get_property() to return the value of the
- * attribute specified by the preprocessor constant @which
-*/
-const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length)
-{
-	return get_property(vdev->dev.platform_data, (char*)which, length);
-}
-EXPORT_SYMBOL(vio_get_attribute);
-
-/* vio_find_name() - internal because only vio.c knows how we formatted the
- * kobject name
- * XXX once vio_bus_type.devices is actually used as a kset in
- * drivers/base/bus.c, this function should be removed in favor of
- * "device_find(kobj_name, &vio_bus_type)"
- */
-static struct vio_dev *vio_find_name(const char *kobj_name)
-{
-	struct kobject *found;
-
-	found = kset_find_obj(&devices_subsys.kset, kobj_name);
-	if (!found)
-		return NULL;
-
-	return to_vio_dev(container_of(found, struct device, kobj));
-}
-
-/**
- * vio_find_node - find an already-registered vio_dev
- * @vnode: device_node of the virtual device we're looking for
- */
-struct vio_dev *vio_find_node(struct device_node *vnode)
-{
-	uint32_t *unit_address;
-	char kobj_name[BUS_ID_SIZE];
-
-	/* construct the kobject name from the device node */
-	unit_address = (uint32_t *)get_property(vnode, "reg", NULL);
-	if (!unit_address)
-		return NULL;
-	snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address);
-
-	return vio_find_name(kobj_name);
-}
-EXPORT_SYMBOL(vio_find_node);
-
-/**
- * vio_build_iommu_table: - gets the dma information from OF and builds the TCE tree.
- * @dev: the virtual device.
- *
- * Returns a pointer to the built tce tree, or NULL if it can't
- * find property.
-*/
-static struct iommu_table * vio_build_iommu_table(struct vio_dev *dev)
-{
-	unsigned int *dma_window;
-	struct iommu_table *newTceTable;
-	unsigned long offset;
-	int dma_window_property_size;
-
-	dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size);
-	if(!dma_window) {
-		return NULL;
-	}
-
-	newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
-
-	/*  There should be some code to extract the phys-encoded offset
-		using prom_n_addr_cells(). However, according to a comment
-		on earlier versions, it's always zero, so we don't bother */
-	offset = dma_window[1] >>  PAGE_SHIFT;
-
-	/* TCE table size - measured in tce entries */
-	newTceTable->it_size		= dma_window[4] >> PAGE_SHIFT;
-	/* offset for VIO should always be 0 */
-	newTceTable->it_offset		= offset;
-	newTceTable->it_busno		= 0;
-	newTceTable->it_index		= (unsigned long)dma_window[0];
-	newTceTable->it_type		= TCE_VB;
-
-	return iommu_init_table(newTceTable);
-}
-
-int vio_enable_interrupts(struct vio_dev *dev)
-{
-	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
-	if (rc != H_Success) {
-		printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
-	}
-	return rc;
-}
-EXPORT_SYMBOL(vio_enable_interrupts);
-
-int vio_disable_interrupts(struct vio_dev *dev)
-{
-	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
-	if (rc != H_Success) {
-		printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
-	}
-	return rc;
-}
-EXPORT_SYMBOL(vio_disable_interrupts);
-#endif
-
 static dma_addr_t vio_map_single(struct device *dev, void *vaddr,
 			  size_t size, enum dma_data_direction direction)
 {
@@ -615,18 +251,8 @@ static int vio_bus_match(struct device *dev, struct device_driver *drv)
 	const struct vio_dev *vio_dev = to_vio_dev(dev);
 	struct vio_driver *vio_drv = to_vio_driver(drv);
 	const struct vio_device_id *ids = vio_drv->id_table;
-	const struct vio_device_id *found_id;
-
-	DBGENTER();
 
-	if (!ids)
-		return 0;
-
-	found_id = vio_match_device(ids, vio_dev);
-	if (found_id)
-		return 1;
-
-	return 0;
+	return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
 }
 
 struct bus_type vio_bus_type = {

+ 1 - 3
arch/ppc64/mm/hash_low.S

@@ -128,13 +128,11 @@ _GLOBAL(__hash_page)
 	/* We eventually do the icache sync here (maybe inline that
 	 * code rather than call a C function...) 
 	 */
-BEGIN_FTR_SECTION
 BEGIN_FTR_SECTION
 	mr	r4,r30
 	mr	r5,r7
 	bl	.hash_page_do_lazy_icache
-END_FTR_SECTION_IFSET(CPU_FTR_NOEXECUTE)
-END_FTR_SECTION_IFCLR(CPU_FTR_COHERENT_ICACHE)
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
 
 	/* At this point, r3 contains new PP bits, save them in
 	 * place of "access" in the param area (sic)

+ 1 - 2
arch/ppc64/mm/hash_native.c

@@ -51,7 +51,6 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
 			unsigned long prpn, unsigned long vflags,
 			unsigned long rflags)
 {
-	unsigned long arpn = physRpn_to_absRpn(prpn);
 	hpte_t *hptep = htab_address + hpte_group;
 	unsigned long hpte_v, hpte_r;
 	int i;
@@ -74,7 +73,7 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
 	hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
 	if (vflags & HPTE_V_LARGE)
 		va &= ~(1UL << HPTE_V_AVPN_SHIFT);
-	hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags;
+	hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
 
 	hptep->r = hpte_r;
 	/* Guarantee the second dword is visible before the valid bit */

+ 2 - 2
arch/ppc64/mm/hash_utils.c

@@ -210,7 +210,7 @@ void __init htab_initialize(void)
 
 	/* create bolted the linear mapping in the hash table */
 	for (i=0; i < lmb.memory.cnt; i++) {
-		base = lmb.memory.region[i].physbase + KERNELBASE;
+		base = lmb.memory.region[i].base + KERNELBASE;
 		size = lmb.memory.region[i].size;
 
 		DBG("creating mapping for region: %lx : %lx\n", base, size);
@@ -302,7 +302,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 	int local = 0;
 	cpumask_t tmp;
 
-	if ((ea & ~REGION_MASK) > EADDR_MASK)
+	if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
 		return 1;
 
  	switch (REGION_ID(ea)) {

+ 212 - 176
arch/ppc64/mm/hugetlbpage.c

@@ -27,124 +27,94 @@
 
 #include <linux/sysctl.h>
 
-#define	HUGEPGDIR_SHIFT		(HPAGE_SHIFT + PAGE_SHIFT - 3)
-#define HUGEPGDIR_SIZE		(1UL << HUGEPGDIR_SHIFT)
-#define HUGEPGDIR_MASK		(~(HUGEPGDIR_SIZE-1))
+#define NUM_LOW_AREAS	(0x100000000UL >> SID_SHIFT)
+#define NUM_HIGH_AREAS	(PGTABLE_RANGE >> HTLB_AREA_SHIFT)
 
-#define HUGEPTE_INDEX_SIZE	9
-#define HUGEPGD_INDEX_SIZE	10
-
-#define PTRS_PER_HUGEPTE	(1 << HUGEPTE_INDEX_SIZE)
-#define PTRS_PER_HUGEPGD	(1 << HUGEPGD_INDEX_SIZE)
-
-static inline int hugepgd_index(unsigned long addr)
-{
-	return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT;
-}
-
-static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr)
+/* Modelled after find_linux_pte() */
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
-	int index;
+	pgd_t *pg;
+	pud_t *pu;
+	pmd_t *pm;
+	pte_t *pt;
 
-	if (! mm->context.huge_pgdir)
-		return NULL;
+	BUG_ON(! in_hugepage_area(mm->context, addr));
 
+	addr &= HPAGE_MASK;
+
+	pg = pgd_offset(mm, addr);
+	if (!pgd_none(*pg)) {
+		pu = pud_offset(pg, addr);
+		if (!pud_none(*pu)) {
+			pm = pmd_offset(pu, addr);
+			pt = (pte_t *)pm;
+			BUG_ON(!pmd_none(*pm)
+			       && !(pte_present(*pt) && pte_huge(*pt)));
+			return pt;
+		}
+	}
 
-	index = hugepgd_index(addr);
-	BUG_ON(index >= PTRS_PER_HUGEPGD);
-	return (pud_t *)(mm->context.huge_pgdir + index);
+	return NULL;
 }
 
-static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 {
-	int index;
-
-	if (pud_none(*dir))
-		return NULL;
+	pgd_t *pg;
+	pud_t *pu;
+	pmd_t *pm;
+	pte_t *pt;
 
-	index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE;
-	return (pte_t *)pud_page(*dir) + index;
-}
-
-static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr)
-{
 	BUG_ON(! in_hugepage_area(mm->context, addr));
 
-	if (! mm->context.huge_pgdir) {
-		pgd_t *new;
-		spin_unlock(&mm->page_table_lock);
-		/* Don't use pgd_alloc(), because we want __GFP_REPEAT */
-		new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
-		BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
-		spin_lock(&mm->page_table_lock);
+	addr &= HPAGE_MASK;
 
-		/*
-		 * Because we dropped the lock, we should re-check the
-		 * entry, as somebody else could have populated it..
-		 */
-		if (mm->context.huge_pgdir)
-			pgd_free(new);
-		else
-			mm->context.huge_pgdir = new;
-	}
-	return hugepgd_offset(mm, addr);
-}
+	pg = pgd_offset(mm, addr);
+	pu = pud_alloc(mm, pg, addr);
 
-static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr)
-{
-	if (! pud_present(*dir)) {
-		pte_t *new;
-
-		spin_unlock(&mm->page_table_lock);
-		new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
-		BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
-		spin_lock(&mm->page_table_lock);
-		/*
-		 * Because we dropped the lock, we should re-check the
-		 * entry, as somebody else could have populated it..
-		 */
-		if (pud_present(*dir)) {
-			if (new)
-				kmem_cache_free(zero_cache, new);
-		} else {
-			struct page *ptepage;
-
-			if (! new)
-				return NULL;
-			ptepage = virt_to_page(new);
-			ptepage->mapping = (void *) mm;
-			ptepage->index = addr & HUGEPGDIR_MASK;
-			pud_populate(mm, dir, new);
+	if (pu) {
+		pm = pmd_alloc(mm, pu, addr);
+		if (pm) {
+			pt = (pte_t *)pm;
+			BUG_ON(!pmd_none(*pm)
+			       && !(pte_present(*pt) && pte_huge(*pt)));
+			return pt;
 		}
 	}
 
-	return hugepte_offset(dir, addr);
+	return NULL;
 }
 
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
-{
-	pud_t *pud;
+#define HUGEPTE_BATCH_SIZE	(HPAGE_SIZE / PMD_SIZE)
 
-	BUG_ON(! in_hugepage_area(mm->context, addr));
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte)
+{
+	int i;
 
-	pud = hugepgd_offset(mm, addr);
-	if (! pud)
-		return NULL;
+	if (pte_present(*ptep)) {
+		pte_clear(mm, addr, ptep);
+		flush_tlb_pending();
+	}
 
-	return hugepte_offset(pud, addr);
+	for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
+		*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
+		ptep++;
+	}
 }
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep)
 {
-	pud_t *pud;
+	unsigned long old = pte_update(ptep, ~0UL);
+	int i;
 
-	BUG_ON(! in_hugepage_area(mm->context, addr));
+	if (old & _PAGE_HASHPTE)
+		hpte_update(mm, addr, old, 0);
 
-	pud = hugepgd_alloc(mm, addr);
-	if (! pud)
-		return NULL;
+	for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
+		ptep[i] = __pte(0);
 
-	return hugepte_alloc(mm, pud, addr);
+	return __pte(old);
 }
 
 /*
@@ -162,15 +132,17 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
 	return 0;
 }
 
-static void flush_segments(void *parm)
+static void flush_low_segments(void *parm)
 {
-	u16 segs = (unsigned long) parm;
+	u16 areas = (unsigned long) parm;
 	unsigned long i;
 
 	asm volatile("isync" : : : "memory");
 
-	for (i = 0; i < 16; i++) {
-		if (! (segs & (1U << i)))
+	BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS);
+
+	for (i = 0; i < NUM_LOW_AREAS; i++) {
+		if (! (areas & (1U << i)))
 			continue;
 		asm volatile("slbie %0" : : "r" (i << SID_SHIFT));
 	}
@@ -178,13 +150,33 @@ static void flush_segments(void *parm)
 	asm volatile("isync" : : : "memory");
 }
 
-static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg)
+static void flush_high_segments(void *parm)
 {
-	unsigned long start = seg << SID_SHIFT;
-	unsigned long end = (seg+1) << SID_SHIFT;
+	u16 areas = (unsigned long) parm;
+	unsigned long i, j;
+
+	asm volatile("isync" : : : "memory");
+
+	BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS);
+
+	for (i = 0; i < NUM_HIGH_AREAS; i++) {
+		if (! (areas & (1U << i)))
+			continue;
+		for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
+			asm volatile("slbie %0"
+				     :: "r" ((i << HTLB_AREA_SHIFT) + (j << SID_SHIFT)));
+	}
+
+	asm volatile("isync" : : : "memory");
+}
+
+static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area)
+{
+	unsigned long start = area << SID_SHIFT;
+	unsigned long end = (area+1) << SID_SHIFT;
 	struct vm_area_struct *vma;
 
-	BUG_ON(seg >= 16);
+	BUG_ON(area >= NUM_LOW_AREAS);
 
 	/* Check no VMAs are in the region */
 	vma = find_vma(mm, start);
@@ -194,20 +186,39 @@ static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg)
 	return 0;
 }
 
-static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs)
+static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
+{
+	unsigned long start = area << HTLB_AREA_SHIFT;
+	unsigned long end = (area+1) << HTLB_AREA_SHIFT;
+	struct vm_area_struct *vma;
+
+	BUG_ON(area >= NUM_HIGH_AREAS);
+
+	/* Check no VMAs are in the region */
+	vma = find_vma(mm, start);
+	if (vma && (vma->vm_start < end))
+		return -EBUSY;
+
+	return 0;
+}
+
+static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
 {
 	unsigned long i;
 
-	newsegs &= ~(mm->context.htlb_segs);
-	if (! newsegs)
+	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
+	BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
+
+	newareas &= ~(mm->context.low_htlb_areas);
+	if (! newareas)
 		return 0; /* The segments we want are already open */
 
-	for (i = 0; i < 16; i++)
-		if ((1 << i) & newsegs)
-			if (prepare_low_seg_for_htlb(mm, i) != 0)
+	for (i = 0; i < NUM_LOW_AREAS; i++)
+		if ((1 << i) & newareas)
+			if (prepare_low_area_for_htlb(mm, i) != 0)
 				return -EBUSY;
 
-	mm->context.htlb_segs |= newsegs;
+	mm->context.low_htlb_areas |= newareas;
 
 	/* update the paca copy of the context struct */
 	get_paca()->context = mm->context;
@@ -215,29 +226,63 @@ static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs)
 	/* the context change must make it to memory before the flush,
 	 * so that further SLB misses do the right thing. */
 	mb();
-	on_each_cpu(flush_segments, (void *)(unsigned long)newsegs, 0, 1);
+	on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1);
+
+	return 0;
+}
+
+static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
+{
+	unsigned long i;
+
+	BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
+	BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)
+		     != NUM_HIGH_AREAS);
+
+	newareas &= ~(mm->context.high_htlb_areas);
+	if (! newareas)
+		return 0; /* The areas we want are already open */
+
+	for (i = 0; i < NUM_HIGH_AREAS; i++)
+		if ((1 << i) & newareas)
+			if (prepare_high_area_for_htlb(mm, i) != 0)
+				return -EBUSY;
+
+	mm->context.high_htlb_areas |= newareas;
+
+	/* update the paca copy of the context struct */
+	get_paca()->context = mm->context;
+
+	/* the context change must make it to memory before the flush,
+	 * so that further SLB misses do the right thing. */
+	mb();
+	on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1);
 
 	return 0;
 }
 
 int prepare_hugepage_range(unsigned long addr, unsigned long len)
 {
-	if (within_hugepage_high_range(addr, len))
-		return 0;
-	else if ((addr < 0x100000000UL) && ((addr+len) < 0x100000000UL)) {
-		int err;
-		/* Yes, we need both tests, in case addr+len overflows
-		 * 64-bit arithmetic */
-		err = open_low_hpage_segs(current->mm,
+	int err;
+
+	if ( (addr+len) < addr )
+		return -EINVAL;
+
+	if ((addr + len) < 0x100000000UL)
+		err = open_low_hpage_areas(current->mm,
 					  LOW_ESID_MASK(addr, len));
-		if (err)
-			printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
-			       " failed (segs: 0x%04hx)\n", addr, len,
-			       LOW_ESID_MASK(addr, len));
+	else
+		err = open_high_hpage_areas(current->mm,
+					    HTLB_AREA_MASK(addr, len));
+	if (err) {
+		printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
+		       " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
+		       addr, len,
+		       LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));
 		return err;
 	}
 
-	return -EINVAL;
+	return 0;
 }
 
 struct page *
@@ -309,8 +354,8 @@ full_search:
 			vma = find_vma(mm, addr);
 			continue;
 		}
-		if (touches_hugepage_high_range(addr, len)) {
-			addr = TASK_HPAGE_END;
+		if (touches_hugepage_high_range(mm, addr, len)) {
+			addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
 			vma = find_vma(mm, addr);
 			continue;
 		}
@@ -389,8 +434,9 @@ hugepage_recheck:
 		if (touches_hugepage_low_range(mm, addr, len)) {
 			addr = (addr & ((~0) << SID_SHIFT)) - len;
 			goto hugepage_recheck;
-		} else if (touches_hugepage_high_range(addr, len)) {
-			addr = TASK_HPAGE_BASE - len;
+		} else if (touches_hugepage_high_range(mm, addr, len)) {
+			addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len;
+			goto hugepage_recheck;
 		}
 
 		/*
@@ -481,23 +527,28 @@ static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
 	return -ENOMEM;
 }
 
-static unsigned long htlb_get_high_area(unsigned long len)
+static unsigned long htlb_get_high_area(unsigned long len, u16 areamask)
 {
-	unsigned long addr = TASK_HPAGE_BASE;
+	unsigned long addr = 0x100000000UL;
 	struct vm_area_struct *vma;
 
 	vma = find_vma(current->mm, addr);
-	for (vma = find_vma(current->mm, addr);
-	     addr + len <= TASK_HPAGE_END;
-	     vma = vma->vm_next) {
+	while (addr + len <= TASK_SIZE_USER64) {
 		BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
-		BUG_ON(! within_hugepage_high_range(addr, len));
+
+		if (! __within_hugepage_high_range(addr, len, areamask)) {
+			addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
+			vma = find_vma(current->mm, addr);
+			continue;
+		}
 
 		if (!vma || (addr + len) <= vma->vm_start)
 			return addr;
 		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
-		/* Because we're in a hugepage region, this alignment
-		 * should not skip us over any VMAs */
+		/* Depending on segmask this might not be a confirmed
+		 * hugepage region, so the ALIGN could have skipped
+		 * some VMAs */
+		vma = find_vma(current->mm, addr);
 	}
 
 	return -ENOMEM;
@@ -507,6 +558,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 					unsigned long len, unsigned long pgoff,
 					unsigned long flags)
 {
+	int lastshift;
+	u16 areamask, curareas;
+
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
 
@@ -514,67 +568,49 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 		return -EINVAL;
 
 	if (test_thread_flag(TIF_32BIT)) {
-		int lastshift = 0;
-		u16 segmask, cursegs = current->mm->context.htlb_segs;
+		curareas = current->mm->context.low_htlb_areas;
 
 		/* First see if we can do the mapping in the existing
-		 * low hpage segments */
-		addr = htlb_get_low_area(len, cursegs);
+		 * low areas */
+		addr = htlb_get_low_area(len, curareas);
 		if (addr != -ENOMEM)
 			return addr;
 
-		for (segmask = LOW_ESID_MASK(0x100000000UL-len, len);
-		     ! lastshift; segmask >>=1) {
-			if (segmask & 1)
+		lastshift = 0;
+		for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
+		     ! lastshift; areamask >>=1) {
+			if (areamask & 1)
 				lastshift = 1;
 
-			addr = htlb_get_low_area(len, cursegs | segmask);
+			addr = htlb_get_low_area(len, curareas | areamask);
 			if ((addr != -ENOMEM)
-			    && open_low_hpage_segs(current->mm, segmask) == 0)
+			    && open_low_hpage_areas(current->mm, areamask) == 0)
 				return addr;
 		}
-		printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
-		       " enough segments\n");
-		return -ENOMEM;
 	} else {
-		return htlb_get_high_area(len);
-	}
-}
-
-void hugetlb_mm_free_pgd(struct mm_struct *mm)
-{
-	int i;
-	pgd_t *pgdir;
-
-	spin_lock(&mm->page_table_lock);
-
-	pgdir = mm->context.huge_pgdir;
-	if (! pgdir)
-		goto out;
-
-	mm->context.huge_pgdir = NULL;
+		curareas = current->mm->context.high_htlb_areas;
 
-	/* cleanup any hugepte pages leftover */
-	for (i = 0; i < PTRS_PER_HUGEPGD; i++) {
-		pud_t *pud = (pud_t *)(pgdir + i);
-
-		if (! pud_none(*pud)) {
-			pte_t *pte = (pte_t *)pud_page(*pud);
-			struct page *ptepage = virt_to_page(pte);
+		/* First see if we can do the mapping in the existing
+		 * high areas */
+		addr = htlb_get_high_area(len, curareas);
+		if (addr != -ENOMEM)
+			return addr;
 
-			ptepage->mapping = NULL;
+		lastshift = 0;
+		for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
+		     ! lastshift; areamask >>=1) {
+			if (areamask & 1)
+				lastshift = 1;
 
-			BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE));
-			kmem_cache_free(zero_cache, pte);
+			addr = htlb_get_high_area(len, curareas | areamask);
+			if ((addr != -ENOMEM)
+			    && open_high_hpage_areas(current->mm, areamask) == 0)
+				return addr;
 		}
-		pud_clear(pud);
 	}
-
-	BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE));
-	kmem_cache_free(zero_cache, pgdir);
-
- out:
-	spin_unlock(&mm->page_table_lock);
+	printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
+	       " enough areas\n");
+	return -ENOMEM;
 }
 
 int hash_huge_page(struct mm_struct *mm, unsigned long access,

+ 1 - 1
arch/ppc64/mm/imalloc.c

@@ -31,7 +31,7 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
 			break;
 		if ((unsigned long)tmp->addr >= ioremap_bot)
 			addr = tmp->size + (unsigned long) tmp->addr;
-		if (addr > IMALLOC_END-size) 
+		if (addr >= IMALLOC_END-size)
 			return 1;
 	}
 	*im_addr = addr;

+ 54 - 42
arch/ppc64/mm/init.c

@@ -42,7 +42,6 @@
 
 #include <asm/pgalloc.h>
 #include <asm/page.h>
-#include <asm/abs_addr.h>
 #include <asm/prom.h>
 #include <asm/lmb.h>
 #include <asm/rtas.h>
@@ -66,6 +65,14 @@
 #include <asm/vdso.h>
 #include <asm/imalloc.h>
 
+#if PGTABLE_RANGE > USER_VSID_RANGE
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#warning TASK_SIZE is smaller than it needs to be.
+#endif
+
 int mem_init_done;
 unsigned long ioremap_bot = IMALLOC_BASE;
 static unsigned long phbs_io_bot = PHBS_IO_BASE;
@@ -159,7 +166,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
 		ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
 		if (!ptep)
 			return -ENOMEM;
-		pa = abs_to_phys(pa);
 		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
 							  __pgprot(flags)));
 		spin_unlock(&init_mm.page_table_lock);
@@ -226,7 +232,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size,
 	 * Before that, we map using addresses going
 	 * up from ioremap_bot.  imalloc will use
 	 * the addresses from ioremap_bot through
-	 * IMALLOC_END (0xE000001fffffffff)
+	 * IMALLOC_END
 	 * 
 	 */
 	pa = addr & PAGE_MASK;
@@ -417,12 +423,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	int index;
 	int err;
 
-#ifdef CONFIG_HUGETLB_PAGE
-	/* We leave htlb_segs as it was, but for a fork, we need to
-	 * clear the huge_pgdir. */
-	mm->context.huge_pgdir = NULL;
-#endif
-
 again:
 	if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
 		return -ENOMEM;
@@ -453,8 +453,6 @@ void destroy_context(struct mm_struct *mm)
 	spin_unlock(&mmu_context_lock);
 
 	mm->context.id = NO_CONTEXT;
-
-	hugetlb_mm_free_pgd(mm);
 }
 
 /*
@@ -484,9 +482,9 @@ void __init mm_init_ppc64(void)
 	for (i = 1; i < lmb.memory.cnt; i++) {
 		unsigned long base, prevbase, prevsize;
 
-		prevbase = lmb.memory.region[i-1].physbase;
+		prevbase = lmb.memory.region[i-1].base;
 		prevsize = lmb.memory.region[i-1].size;
-		base = lmb.memory.region[i].physbase;
+		base = lmb.memory.region[i].base;
 		if (base > (prevbase + prevsize)) {
 			io_hole_start = prevbase + prevsize;
 			io_hole_size = base  - (prevbase + prevsize);
@@ -513,11 +511,8 @@ int page_is_ram(unsigned long pfn)
 	for (i=0; i < lmb.memory.cnt; i++) {
 		unsigned long base;
 
-#ifdef CONFIG_MSCHUNKS
-		base = lmb.memory.region[i].physbase;
-#else
 		base = lmb.memory.region[i].base;
-#endif
+
 		if ((paddr >= base) &&
 			(paddr < (base + lmb.memory.region[i].size))) {
 			return 1;
@@ -547,7 +542,7 @@ void __init do_init_bootmem(void)
 	 */
 	bootmap_pages = bootmem_bootmap_pages(total_pages);
 
-	start = abs_to_phys(lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE));
+	start = lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
 	BUG_ON(!start);
 
 	boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
@@ -558,25 +553,25 @@ void __init do_init_bootmem(void)
 	 * present.
 	 */
 	for (i=0; i < lmb.memory.cnt; i++) {
-		unsigned long physbase, size;
+		unsigned long base, size;
 		unsigned long start_pfn, end_pfn;
 
-		physbase = lmb.memory.region[i].physbase;
+		base = lmb.memory.region[i].base;
 		size = lmb.memory.region[i].size;
 
-		start_pfn = physbase >> PAGE_SHIFT;
+		start_pfn = base >> PAGE_SHIFT;
 		end_pfn = start_pfn + (size >> PAGE_SHIFT);
 		memory_present(0, start_pfn, end_pfn);
 
-		free_bootmem(physbase, size);
+		free_bootmem(base, size);
 	}
 
 	/* reserve the sections we're already using */
 	for (i=0; i < lmb.reserved.cnt; i++) {
-		unsigned long physbase = lmb.reserved.region[i].physbase;
+		unsigned long base = lmb.reserved.region[i].base;
 		unsigned long size = lmb.reserved.region[i].size;
 
-		reserve_bootmem(physbase, size);
+		reserve_bootmem(base, size);
 	}
 }
 
@@ -615,10 +610,10 @@ static int __init setup_kcore(void)
 	int i;
 
 	for (i=0; i < lmb.memory.cnt; i++) {
-		unsigned long physbase, size;
+		unsigned long base, size;
 		struct kcore_list *kcore_mem;
 
-		physbase = lmb.memory.region[i].physbase;
+		base = lmb.memory.region[i].base;
 		size = lmb.memory.region[i].size;
 
 		/* GFP_ATOMIC to avoid might_sleep warnings during boot */
@@ -626,7 +621,7 @@ static int __init setup_kcore(void)
 		if (!kcore_mem)
 			panic("mem_init: kmalloc failed\n");
 
-		kclist_add(kcore_mem, __va(physbase), size);
+		kclist_add(kcore_mem, __va(base), size);
 	}
 
 	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
@@ -686,9 +681,6 @@ void __init mem_init(void)
 
 	mem_init_done = 1;
 
-#ifdef CONFIG_PPC_ISERIES
-	iommu_vio_init();
-#endif
 	/* Initialize the vDSO */
 	vdso_init();
 }
@@ -833,23 +825,43 @@ void __iomem * reserve_phb_iospace(unsigned long size)
 	return virt_addr;
 }
 
-kmem_cache_t *zero_cache;
-
-static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags)
+static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
 {
-	memset(pte, 0, PAGE_SIZE);
+	memset(addr, 0, kmem_cache_size(cache));
 }
 
+static const int pgtable_cache_size[2] = {
+	PTE_TABLE_SIZE, PMD_TABLE_SIZE
+};
+static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+	"pgd_pte_cache", "pud_pmd_cache",
+};
+
+kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
+
 void pgtable_cache_init(void)
 {
-	zero_cache = kmem_cache_create("zero",
-				PAGE_SIZE,
-				0,
-				SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN,
-				zero_ctor,
-				NULL);
-	if (!zero_cache)
-		panic("pgtable_cache_init(): could not create zero_cache!\n");
+	int i;
+
+	BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
+	BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
+	BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
+	BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
+
+	for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
+		int size = pgtable_cache_size[i];
+		const char *name = pgtable_cache_name[i];
+
+		pgtable_cache[i] = kmem_cache_create(name,
+						     size, size,
+						     SLAB_HWCACHE_ALIGN
+						     | SLAB_MUST_HWCACHE_ALIGN,
+						     zero_ctor,
+						     NULL);
+		if (! pgtable_cache[i])
+			panic("pgtable_cache_init(): could not create %s!\n",
+			      name);
+	}
 }
 
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,

+ 1 - 1
arch/ppc64/mm/numa.c

@@ -671,7 +671,7 @@ new_range:
 		 * Mark reserved regions on this node
 		 */
 		for (i = 0; i < lmb.reserved.cnt; i++) {
-			unsigned long physbase = lmb.reserved.region[i].physbase;
+			unsigned long physbase = lmb.reserved.region[i].base;
 			unsigned long size = lmb.reserved.region[i].size;
 
 			if (pa_to_nid(physbase) != nid &&

+ 13 - 12
arch/ppc64/mm/slb_low.S

@@ -89,28 +89,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
 	b	9f
 
 0:	/* user address: proto-VSID = context<<15 | ESID */
-	li	r11,SLB_VSID_USER
-
-	srdi.	r9,r3,13
+	srdi.	r9,r3,USER_ESID_BITS
 	bne-	8f			/* invalid ea bits set */
 
 #ifdef CONFIG_HUGETLB_PAGE
 BEGIN_FTR_SECTION
-	/* check against the hugepage ranges */
-	cmpldi	r3,(TASK_HPAGE_END>>SID_SHIFT)
-	bge	6f			/* >= TASK_HPAGE_END */
-	cmpldi	r3,(TASK_HPAGE_BASE>>SID_SHIFT)
-	bge	5f			/* TASK_HPAGE_BASE..TASK_HPAGE_END */
+	lhz	r9,PACAHIGHHTLBAREAS(r13)
+	srdi	r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT)
+	srd	r9,r9,r11
+	andi.	r9,r9,1
+	bne	5f
+
+	li	r11,SLB_VSID_USER
+
 	cmpldi	r3,16
-	bge	6f			/* 4GB..TASK_HPAGE_BASE */
+	bge	6f
 
-	lhz	r9,PACAHTLBSEGS(r13)
+	lhz	r9,PACALOWHTLBAREAS(r13)
 	srd	r9,r9,r3
 	andi.	r9,r9,1
+
 	beq	6f
 
-5:	/* this is a hugepage user address */
-	li	r11,(SLB_VSID_USER|SLB_VSID_L)
+5:	li	r11,SLB_VSID_USER|SLB_VSID_L
 END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
 #endif /* CONFIG_HUGETLB_PAGE */
 

+ 55 - 40
arch/ppc64/mm/tlb.c

@@ -41,7 +41,58 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
 unsigned long pte_freelist_forced_free;
 
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage)
+struct pte_freelist_batch
+{
+	struct rcu_head	rcu;
+	unsigned int	index;
+	pgtable_free_t	tables[0];
+};
+
+DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+unsigned long pte_freelist_forced_free;
+
+#define PTE_FREELIST_SIZE \
+	((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
+	  / sizeof(pgtable_free_t))
+
+#ifdef CONFIG_SMP
+static void pte_free_smp_sync(void *arg)
+{
+	/* Do nothing, just ensure we sync with all CPUs */
+}
+#endif
+
+/* This is only called when we are critically out of memory
+ * (and fail to get a page in pte_free_tlb).
+ */
+static void pgtable_free_now(pgtable_free_t pgf)
+{
+	pte_freelist_forced_free++;
+
+	smp_call_function(pte_free_smp_sync, NULL, 0, 1);
+
+	pgtable_free(pgf);
+}
+
+static void pte_free_rcu_callback(struct rcu_head *head)
+{
+	struct pte_freelist_batch *batch =
+		container_of(head, struct pte_freelist_batch, rcu);
+	unsigned int i;
+
+	for (i = 0; i < batch->index; i++)
+		pgtable_free(batch->tables[i]);
+
+	free_page((unsigned long)batch);
+}
+
+static void pte_free_submit(struct pte_freelist_batch *batch)
+{
+	INIT_RCU_HEAD(&batch->rcu);
+	call_rcu(&batch->rcu, pte_free_rcu_callback);
+}
+
+void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
 {
 	/* This is safe as we are holding page_table_lock */
         cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
@@ -49,19 +100,19 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage)
 
 	if (atomic_read(&tlb->mm->mm_users) < 2 ||
 	    cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
-		pte_free(ptepage);
+		pgtable_free(pgf);
 		return;
 	}
 
 	if (*batchp == NULL) {
 		*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
 		if (*batchp == NULL) {
-			pte_free_now(ptepage);
+			pgtable_free_now(pgf);
 			return;
 		}
 		(*batchp)->index = 0;
 	}
-	(*batchp)->pages[(*batchp)->index++] = ptepage;
+	(*batchp)->tables[(*batchp)->index++] = pgf;
 	if ((*batchp)->index == PTE_FREELIST_SIZE) {
 		pte_free_submit(*batchp);
 		*batchp = NULL;
@@ -132,42 +183,6 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
 	put_cpu();
 }
 
-#ifdef CONFIG_SMP
-static void pte_free_smp_sync(void *arg)
-{
-	/* Do nothing, just ensure we sync with all CPUs */
-}
-#endif
-
-/* This is only called when we are critically out of memory
- * (and fail to get a page in pte_free_tlb).
- */
-void pte_free_now(struct page *ptepage)
-{
-	pte_freelist_forced_free++;
-
-	smp_call_function(pte_free_smp_sync, NULL, 0, 1);
-
-	pte_free(ptepage);
-}
-
-static void pte_free_rcu_callback(struct rcu_head *head)
-{
-	struct pte_freelist_batch *batch =
-		container_of(head, struct pte_freelist_batch, rcu);
-	unsigned int i;
-
-	for (i = 0; i < batch->index; i++)
-		pte_free(batch->pages[i]);
-	free_page((unsigned long)batch);
-}
-
-void pte_free_submit(struct pte_freelist_batch *batch)
-{
-	INIT_RCU_HEAD(&batch->rcu);
-	call_rcu(&batch->rcu, pte_free_rcu_callback);
-}
-
 void pte_free_finish(void)
 {
 	/* This is safe as we are holding page_table_lock */

+ 1 - 0
arch/ppc64/oprofile/common.c

@@ -153,6 +153,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 
 		case PV_970:
 		case PV_970FX:
+		case PV_970MP:
 			model = &op_model_power4;
 			model->num_counters = 8;
 			ops->cpu_type = "ppc64/970";

+ 1 - 1
arch/ppc64/xmon/start.c

@@ -27,7 +27,7 @@ static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs,
 			      struct tty_struct *tty) 
 {
 	/* ensure xmon is enabled */
-	xmon_init();
+	xmon_init(1);
 	debugger(pt_regs);
 }
 

+ 19 - 9
arch/ppc64/xmon/xmon.c

@@ -2496,15 +2496,25 @@ static void dump_stab(void)
 	}
 }
 
-void xmon_init(void)
-{
-	__debugger = xmon;
-	__debugger_ipi = xmon_ipi;
-	__debugger_bpt = xmon_bpt;
-	__debugger_sstep = xmon_sstep;
-	__debugger_iabr_match = xmon_iabr_match;
-	__debugger_dabr_match = xmon_dabr_match;
-	__debugger_fault_handler = xmon_fault_handler;
+void xmon_init(int enable)
+{
+	if (enable) {
+		__debugger = xmon;
+		__debugger_ipi = xmon_ipi;
+		__debugger_bpt = xmon_bpt;
+		__debugger_sstep = xmon_sstep;
+		__debugger_iabr_match = xmon_iabr_match;
+		__debugger_dabr_match = xmon_dabr_match;
+		__debugger_fault_handler = xmon_fault_handler;
+	} else {
+		__debugger = NULL;
+		__debugger_ipi = NULL;
+		__debugger_bpt = NULL;
+		__debugger_sstep = NULL;
+		__debugger_iabr_match = NULL;
+		__debugger_dabr_match = NULL;
+		__debugger_fault_handler = NULL;
+	}
 }
 
 void dump_segments(void)

+ 5 - 6
arch/s390/kernel/compat_signal.c

@@ -637,12 +637,11 @@ handle_signal32(unsigned long sig, struct k_sigaction *ka,
 	else
 		setup_frame32(sig, ka, oldset, regs);
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 

+ 5 - 6
arch/s390/kernel/signal.c

@@ -429,13 +429,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
 	else
 		setup_frame(sig, ka, oldset, regs);
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 /*

+ 5 - 6
arch/sh/kernel/signal.c

@@ -546,13 +546,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 	if (ka->sa.sa_flags & SA_ONESHOT)
 		ka->sa.sa_handler = SIG_DFL;
 
-	if (!(ka->sa.sa_flags & SA_NODEFER)) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
 		sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
 }
 
 /*

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно