Browse Source

Automatic merge of rsync://rsync.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git branch HEAD

20 năm trước cách đây
mục cha
commit
949d33e70f
100 tập tin đã thay đổi với 1944 bổ sung1300 xóa
  1. 2 3
      CREDITS
  2. 1 2
      Documentation/cpusets.txt
  3. 205 0
      Documentation/dvb/README.flexcop
  4. 24 45
      Documentation/dvb/bt8xx.txt
  5. 219 0
      Documentation/dvb/ci.txt
  6. 2 2
      Documentation/dvb/get_dvb_firmware
  7. 20 0
      Documentation/feature-removal-schedule.txt
  8. 2 4
      Documentation/filesystems/sysfs-pci.txt
  9. 0 21
      Documentation/power/devices.txt
  10. 2 2
      Documentation/powerpc/hvcs.txt
  11. 3 0
      Documentation/x86_64/boot-options.txt
  12. 2 2
      Makefile
  13. 2 5
      arch/alpha/kernel/osf_sys.c
  14. 1 1
      arch/arm/mach-s3c2410/clock.c
  15. 4 2
      arch/arm/mach-s3c2410/s3c2440.c
  16. 10 11
      arch/arm/mm/Kconfig
  17. 0 80
      arch/arm/mm/copypage-v4mc.S
  18. 111 0
      arch/arm/mm/copypage-v4mc.c
  19. 6 22
      arch/arm/mm/copypage-v6.c
  20. 35 2
      arch/arm/mm/flush.c
  21. 19 8
      arch/arm/mm/mm-armv.c
  22. 7 5
      arch/i386/kernel/cpu/amd.c
  23. 4 0
      arch/i386/kernel/cpu/common.c
  24. 1 0
      arch/i386/kernel/smpboot.c
  25. 8 9
      arch/i386/mach-voyager/voyager_smp.c
  26. 6 4
      arch/i386/mm/ioremap.c
  27. 1 1
      arch/i386/pci/fixup.c
  28. 0 1
      arch/ia64/ia32/ia32_ioctl.c
  29. 48 7
      arch/mips/vr41xx/common/pmu.c
  30. 9 6
      arch/ppc/kernel/head_44x.S
  31. 3 1
      arch/ppc/kernel/setup.c
  32. 5 2
      arch/ppc/lib/string.S
  33. 1 0
      arch/ppc/mm/init.c
  34. 1 1
      arch/ppc/syslib/ipic.c
  35. 1 0
      arch/ppc/syslib/mpc83xx_devices.c
  36. 1 0
      arch/ppc/syslib/mpc85xx_devices.c
  37. 0 2
      arch/ppc/syslib/open_pic.c
  38. 65 20
      arch/ppc64/kernel/mf.c
  39. 21 7
      arch/ppc64/kernel/pmac_smp.c
  40. 43 1
      arch/ppc64/kernel/prom_init.c
  41. 1 38
      arch/ppc64/kernel/rtc.c
  42. 1 0
      arch/ppc64/kernel/time.c
  43. 67 100
      arch/sparc64/kernel/pci_iommu.c
  44. 23 8
      arch/sparc64/kernel/sbus.c
  45. 11 0
      arch/sparc64/kernel/setup.c
  46. 3 0
      arch/sparc64/kernel/smp.c
  47. 19 0
      arch/sparc64/kernel/traps.c
  48. 4 0
      arch/um/Kconfig_x86_64
  49. 15 7
      arch/um/drivers/chan_kern.c
  50. 1 3
      arch/um/drivers/mcast_kern.c
  51. 15 32
      arch/um/drivers/mcast_user.c
  52. 3 293
      arch/um/drivers/ubd_kern.c
  53. 1 1
      arch/um/include/sysdep-i386/ptrace.h
  54. 10 16
      arch/um/include/sysdep-x86_64/checksum.h
  55. 26 36
      arch/um/include/sysdep-x86_64/ptrace.h
  56. 1 1
      arch/um/kernel/Makefile
  57. 0 0
      arch/um/kernel/checksum.c
  58. 78 0
      arch/um/kernel/initrd.c
  59. 8 2
      arch/um/kernel/irq_user.c
  60. 1 0
      arch/um/kernel/ksyms.c
  61. 31 9
      arch/um/kernel/mem.c
  62. 3 3
      arch/um/kernel/ptrace.c
  63. 4 32
      arch/um/kernel/trap_kern.c
  64. 1 0
      arch/um/kernel/tt/ksyms.c
  65. 2 0
      arch/um/kernel/uml.lds.S
  66. 2 2
      arch/um/sys-i386/Makefile
  67. 12 4
      arch/um/sys-i386/delay.c
  68. 2 4
      arch/um/sys-x86_64/Makefile
  69. 15 18
      arch/um/sys-x86_64/delay.c
  70. 1 2
      arch/um/sys-x86_64/ksyms.c
  71. 5 4
      arch/um/sys-x86_64/ptrace.c
  72. 1 0
      arch/um/sys-x86_64/syscalls.c
  73. 8 0
      arch/um/sys-x86_64/user-offsets.c
  74. 14 0
      arch/x86_64/Kconfig
  75. 30 28
      arch/x86_64/defconfig
  76. 1 0
      arch/x86_64/kernel/Makefile
  77. 3 2
      arch/x86_64/kernel/apic.c
  78. 9 2
      arch/x86_64/kernel/entry.S
  79. 10 71
      arch/x86_64/kernel/io_apic.c
  80. 15 7
      arch/x86_64/kernel/mpparse.c
  81. 175 73
      arch/x86_64/kernel/nmi.c
  82. 101 0
      arch/x86_64/kernel/pmtimer.c
  83. 11 6
      arch/x86_64/kernel/ptrace.c
  84. 18 12
      arch/x86_64/kernel/setup.c
  85. 3 1
      arch/x86_64/kernel/signal.c
  86. 186 77
      arch/x86_64/kernel/smpboot.c
  87. 45 17
      arch/x86_64/kernel/time.c
  88. 2 0
      arch/x86_64/kernel/traps.c
  89. 3 2
      arch/x86_64/kernel/vsyscall.c
  90. 2 1
      arch/x86_64/kernel/x8664_ksyms.c
  91. 9 2
      arch/x86_64/mm/fault.c
  92. 8 21
      arch/x86_64/mm/ioremap.c
  93. 15 13
      crypto/crypto_null.c
  94. 1 1
      crypto/internal.h
  95. 1 1
      drivers/base/Makefile
  96. 0 1
      drivers/base/bus.c
  97. 2 5
      drivers/base/core.c
  98. 0 51
      drivers/base/interface.c
  99. 0 11
      drivers/base/power/power.h
  100. 10 1
      drivers/base/power/resume.c

+ 2 - 3
CREDITS

@@ -882,13 +882,12 @@ S: Blacksburg, Virginia 24061
 S: USA
 S: USA
 
 
 N: Randy Dunlap
 N: Randy Dunlap
-E: rddunlap@osdl.org
+E: rdunlap@xenotime.net
 W: http://www.xenotime.net/linux/linux.html
 W: http://www.xenotime.net/linux/linux.html
 W: http://www.linux-usb.org
 W: http://www.linux-usb.org
 D: Linux-USB subsystem, USB core/UHCI/printer/storage drivers
 D: Linux-USB subsystem, USB core/UHCI/printer/storage drivers
 D: x86 SMP, ACPI, bootflag hacking
 D: x86 SMP, ACPI, bootflag hacking
-S: 12725 SW Millikan Way, Suite 400
-S: Beaverton, Oregon 97005
+S: (ask for current address)
 S: USA
 S: USA
 
 
 N: Bob Dunlop
 N: Bob Dunlop

+ 1 - 2
Documentation/cpusets.txt

@@ -252,8 +252,7 @@ in a tasks processor placement.
 There is an exception to the above.  If hotplug funtionality is used
 There is an exception to the above.  If hotplug funtionality is used
 to remove all the CPUs that are currently assigned to a cpuset,
 to remove all the CPUs that are currently assigned to a cpuset,
 then the kernel will automatically update the cpus_allowed of all
 then the kernel will automatically update the cpus_allowed of all
-tasks attached to CPUs in that cpuset with the online CPUs of the
-nearest parent cpuset that still has some CPUs online.  When memory
+tasks attached to CPUs in that cpuset to allow all CPUs.  When memory
 hotplug functionality for removing Memory Nodes is available, a
 hotplug functionality for removing Memory Nodes is available, a
 similar exception is expected to apply there as well.  In general,
 similar exception is expected to apply there as well.  In general,
 the kernel prefers to violate cpuset placement, over starving a task
 the kernel prefers to violate cpuset placement, over starving a task

+ 205 - 0
Documentation/dvb/README.flexcop

@@ -0,0 +1,205 @@
+This README escorted the skystar2-driver rewriting procedure. It describes the
+state of the new flexcop-driver set and some internals are written down here
+too.
+
+This document hopefully describes things about the flexcop and its
+device-offsprings. Goal was to write an easy-to-write and easy-to-read set of
+drivers based on the skystar2.c and other information.
+
+Remark: flexcop-pci.c was a copy of skystar2.c, but every line has been
+touched and rewritten.
+
+History & News
+==============
+  2005-04-01 - correct USB ISOC transfers (thanks to Vadim Catana)
+
+
+
+
+General coding processing
+=========================
+
+We should proceed as follows (as long as no one complains):
+
+0) Think before start writing code!
+
+1) rewriting the skystar2.c with the help of the flexcop register descriptions
+and splitting up the files to a pci-bus-part and a flexcop-part.
+The new driver will be called b2c2-flexcop-pci.ko/b2c2-flexcop-usb.ko for the
+device-specific part and b2c2-flexcop.ko for the common flexcop-functions.
+
+2) Search for errors in the leftover of flexcop-pci.c (compare with pluto2.c
+and other pci drivers)
+
+3) make some beautification (see 'Improvements when rewriting (refactoring) is
+done')
+
+4) Testing the new driver and maybe substitute the skystar2.c with it, to reach
+a wider tester audience.
+
+5) creating an usb-bus-part using the already written flexcop code for the pci
+card.
+
+Idea: create a kernel-object for the flexcop and export all important
+functions. This option saves kernel-memory, but maybe a lot of functions have
+to be exported to kernel namespace.
+
+
+Current situation
+=================
+
+0) Done :)
+1) Done (some minor issues left)
+2) Done
+3) Not ready yet, more information is necessary
+4) next to be done (see the table below)
+5) USB driver is working (yes, there are some minor issues)
+
+What seems to be ready?
+-----------------------
+
+1) Rewriting
+1a) i2c is cut off from the flexcop-pci.c and seems to work
+1b) moved tuner and demod stuff from flexcop-pci.c to flexcop-tuner-fe.c
+1c) moved lnb and diseqc stuff from flexcop-pci.c to flexcop-tuner-fe.c
+1e) eeprom (reading MAC address)
+1d) sram (no dynamic sll size detection (commented out) (using default as JJ told me))
+1f) misc. register accesses for reading parameters (e.g. resetting, revision)
+1g) pid/mac filter (flexcop-hw-filter.c)
+1i) dvb-stuff initialization in flexcop.c (done)
+1h) dma stuff (now just using the size-irq, instead of all-together, to be done)
+1j) remove flexcop initialization from flexcop-pci.c completely (done)
+1l) use a well working dma IRQ method (done, see 'Known bugs and problems and TODO')
+1k) cleanup flexcop-files (remove unused EXPORT_SYMBOLs, make static from
+non-static where possible, moved code to proper places)
+
+2) Search for errors in the leftover of flexcop-pci.c (partially done)
+5a) add MAC address reading
+5c) feeding of ISOC data to the software demux (format of the isochronous data
+and speed optimization, no real error) (thanks to Vadim Catana)
+
+What to do in the near future?
+--------------------------------------
+(no special order here)
+
+5) USB driver
+5b) optimize isoc-transfer (submitting/killing isoc URBs when transfer is starting)
+
+Testing changes
+---------------
+
+O             = item is working
+P             = item is partially working
+X             = item is not working
+N             = item does not apply here
+<empty field> = item need to be examined
+
+       | PCI                               | USB
+item   | mt352 | nxt2002 | stv0299 | mt312 | mt352 | nxt2002 | stv0299 | mt312
+-------+-------+---------+---------+-------+-------+---------+---------+-------
+1a)    | O     |         |         |       | N     | N       | N       | N
+1b)    | O     |         |         |       |       |         | O       |
+1c)    | N     | N       |         |       | N     | N       | O       |
+1d)    |                 O                 |                 O
+1e)    |                 O                 |                 O
+1f)    |                                   P
+1g)    |                                   O
+1h)    |                 P                 |
+1i)    |                 O                 |                 N
+1j)    |                 O                 |                 N
+1l)    |                 O                 |                 N
+2)     |                 O                 |                 N
+5a)    |                 N                 |                 O
+5b)*   |                 N                 |
+5c)    |                 N                 |                 O
+
+* - not done yet
+
+Known bugs and problems and TODO
+--------------------------------
+
+1g/h/l) when pid filtering is enabled on the pci card
+
+DMA usage currently:
+  The DMA is splitted in 2 equal-sized subbuffers. The Flexcop writes to first
+  address and triggers an IRQ when it's full and starts writing to the second
+  address. When the second address is full, the IRQ is triggered again, and
+  the flexcop writes to first address again, and so on.
+  The buffersize of each address is currently 640*188 bytes.
+
+  Problem is, when using hw-pid-filtering and doing some low-bandwidth
+  operation (like scanning) the buffers won't be filled enough to trigger
+  the IRQ. That's why:
+
+  When PID filtering is activated, the timer IRQ is used. Every 1.97 ms the IRQ
+  is triggered.  Is the current write address of DMA1 different to the one
+  during the last IRQ, then the data is passed to the demuxer.
+
+  There is an additional DMA-IRQ-method: packet count IRQ. This isn't
+  implemented correctly yet.
+
+  The solution is to disable HW PID filtering, but I don't know how the DVB
+  API software demux behaves on slow systems with 45MBit/s TS.
+
+Solved bugs :)
+--------------
+1g) pid-filtering (somehow pid index 4 and 5 (EMM_PID and ECM_PID) aren't
+working)
+SOLUTION: also index 0 was affected, because net_translation is done for
+these indexes by default
+
+5b) isochronous transfer does only work in the first attempt (for the Sky2PC
+USB, Air2PC is working) SOLUTION: the flexcop was going asleep and never really
+woke up again (don't know if this need fixes, see
+flexcop-fe-tuner.c:flexcop_sleep)
+
+NEWS: when the driver is loaded and unloaded and loaded again (w/o doing
+anything in the while the driver is loaded the first time), no transfers take
+place anymore.
+
+Improvements when rewriting (refactoring) is done
+=================================================
+
+- split sleeping of the flexcop (misc_204.ACPI3_sig = 1;) from lnb_control
+  (enable sleeping for other demods than dvb-s)
+- add support for CableStar (stv0297 Microtune 203x/ALPS) (almost done, incompatibilities with the Nexus-CA)
+
+Debugging
+---------
+- add verbose debugging to skystar2.c (dump the reg_dw_data) and compare it
+  with this flexcop, this is important, because i2c is now using the
+  flexcop_ibi_value union from flexcop-reg.h (do you have a better idea for
+  that, please tell us so).
+
+Everything which is identical in the following table, can be put into a common
+flexcop-module.
+
+                  PCI                  USB
+-------------------------------------------------------------------------------
+Different:
+Register access:  accessing IO memory  USB control message
+I2C bus:          I2C bus of the FC    USB control message
+Data transfer:    DMA                  isochronous transfer
+EEPROM transfer:  through i2c bus      not clear yet
+
+Identical:
+Streaming:                 accessing registers
+PID Filtering:             accessing registers
+Sram destinations:         accessing registers
+Tuner/Demod:                     I2C bus
+DVB-stuff:            can be written for common use
+
+Acknowledgements (just for the rewriting part)
+================
+
+Bjarne Steinsbo thought a lot in the first place of the pci part for this code
+sharing idea.
+
+Andreas Oberritter for providing a recent PCI initialization template
+(pluto2.c).
+
+Boleslaw Ciesielski for pointing out a problem with firmware loader.
+
+Vadim Catana for correcting the USB transfer.
+
+comments, critics and ideas to linux-dvb@linuxtv.org.

+ 24 - 45
Documentation/dvb/bt8xx.txt

@@ -17,74 +17,53 @@ Because of this, you need to enable
 "Device drivers" => "Multimedia devices"
 "Device drivers" => "Multimedia devices"
   => "Video For Linux" => "BT848 Video For Linux"
   => "Video For Linux" => "BT848 Video For Linux"
 
 
+Furthermore you need to enable
+"Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices"
+  => "DVB for Linux" "DVB Core Support" "Nebula/Pinnacle PCTV/TwinHan PCI Cards"
+
 2) Loading Modules
 2) Loading Modules
 ==================
 ==================
 
 
 In general you need to load the bttv driver, which will handle the gpio and
 In general you need to load the bttv driver, which will handle the gpio and
-i2c communication for us. Next you need the common dvb-bt8xx device driver
-and one frontend driver.
-
-The bttv driver will HANG YOUR SYSTEM IF YOU DO NOT SPECIFY THE CORRECT 
-CARD ID!
-
-(If you don't get your card running and you suspect that the card id you're
-using is wrong, have a look at "bttv-cards.c" for a list of possible card
-ids.)
-
-Pay attention to failures when you load the frontend drivers
-(e.g. dmesg, /var/log/messages).
+i2c communication for us, plus the common dvb-bt8xx device driver.
+The frontends for Nebula (nxt6000), Pinnacle PCTV (cx24110) and
+TwinHan (dst) are loaded automatically by the dvb-bt8xx device driver.
 
 
 3a) Nebula / Pinnacle PCTV
 3a) Nebula / Pinnacle PCTV
 --------------------------
 --------------------------
 
 
-   $ modprobe bttv i2c_hw=1 card=0x68
-   $ modprobe dvb-bt8xx
-   
-For Nebula cards use the "nxt6000" frontend driver:
-   $ modprobe nxt6000
+   $ modprobe bttv (normally bttv is being loaded automatically by kmod)
+   $ modprobe dvb-bt8xx (or just place dvb-bt8xx in /etc/modules for automatic loading)
 
 
-For Pinnacle PCTV cards use the "cx24110" frontend driver:
-   $ modprobe cx24110
 
 
-3b) TwinHan
------------
+3b) TwinHan and Clones
+--------------------------
 
 
    $ modprobe bttv i2c_hw=1 card=0x71
    $ modprobe bttv i2c_hw=1 card=0x71
    $ modprobe dvb-bt8xx
    $ modprobe dvb-bt8xx
    $ modprobe dst
    $ modprobe dst
 
 
-The value 0x71 will override the PCI type detection for dvb-bt8xx, which 
-is necessary for TwinHan cards.#
+The value 0x71 will override the PCI type detection for dvb-bt8xx,
+which  is necessary for TwinHan cards.
 
 
-If you're having an older card (blue color circuit) and card=0x71 locks your
-machine, try using 0x68, too. If that does not work, ask on the DVB mailing list.
+If you're having an older card (blue color circuit) and card=0x71 locks
+your machine, try using 0x68, too. If that does not work, ask on the
+mailing list.
 
 
-The DST module takes a couple of useful parameters, in case the
-dst drivers fails to detect your type of card correctly.
+The DST module takes a couple of useful parameters.
 
 
-dst_type takes values 0 (satellite), 1 (terrestial TV), 2 (cable).
+verbose takes values 0 to 5. These values control the verbosity level.
 
 
-dst_type_flags takes bit combined values:
-1 = new tuner type packets. You can use this if your card is detected
-    and you have debug and you continually see the tuner packets not
-    working (make sure not a basic problem like dish alignment etc.)
+debug takes values 0 and 1. You can either disable or enable debugging.
 
 
-2 = TS 204. If your card tunes OK, but the picture is terrible, seemingly
-    breaking up in one half continually, and crc fails a lot, then
-    this is worth a try (or trying to turn off)
+dst_addons takes values 0 and 0x20. A value of 0 means it is a FTA card.
+0x20 means it has a Conditional Access slot.
 
 
-4 = has symdiv. Some cards, mostly without new tuner packets, require
-    a symbol division algorithm. Doesn't apply to terrestial TV.
-
-You can also specify a value to have the autodetected values turned off
-(e.g. 0). The autodected values are determined bythe cards 'response
+The autodected values are determined bythe cards 'response
 string' which you can see in your logs e.g.
 string' which you can see in your logs e.g.
 
 
-dst_check_ci: recognize DST-MOT
-
-or 
+dst_get_device_id: Recognise [DSTMCI]
 
 
-dst_check_ci: unable to recognize DSTXCI or STXCI
 
 
 --
 --
-Authors: Richard Walker, Jamie Honan, Michael Hunold
+Authors: Richard Walker, Jamie Honan, Michael Hunold, Manu Abraham

+ 219 - 0
Documentation/dvb/ci.txt

@@ -0,0 +1,219 @@
+* For the user
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+NOTE: This document describes the usage of the high level CI API as
+in accordance to the Linux DVB API. This is a not a documentation for the,
+existing low level CI API.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To utilize the High Level CI capabilities,
+
+(1*) This point is valid only for the Twinhan/clones
+  For the Twinhan/Twinhan clones, the dst_ca module handles the CI
+  hardware handling.This module is loaded automatically if a CI
+  (Common Interface, that holds the CAM (Conditional Access Module)
+  is detected.
+
+(2) one requires a userspace application, ca_zap. This small userland
+  application is in charge of sending the descrambling related information
+  to the CAM.
+
+This application requires the following to function properly as of now.
+
+	(a) Tune to a valid channel, with szap.
+	  eg: $ szap -c channels.conf -r "TMC" -x
+
+	(b) a channels.conf containing a valid PMT PID
+
+	  eg: TMC:11996:h:0:27500:278:512:650:321
+
+	  here 278 is a valid PMT PID. the rest of the values are the
+	  same ones that szap uses.
+
+	(c) after running a szap, you have to run ca_zap, for the
+	  descrambler to function,
+
+	  eg: $ ca_zap patched_channels.conf "TMC"
+
+	  The patched means a patch to apply to scan, such that scan can
+	  generate a channels.conf_with pmt, which has this PMT PID info
+	  (NOTE: szap cannot use this channels.conf with the PMT_PID)
+
+
+	(d) Hopeflly Enjoy your favourite subscribed channel as you do with
+	  a FTA card.
+
+(3) Currently ca_zap, and dst_test, both are meant for demonstration
+  purposes only, they can become full fledged applications if necessary.
+
+
+* Cards that fall in this category
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+At present the cards that fall in this category are the Twinhan and it's
+clones, these cards are available as VVMER, Tomato, Hercules, Orange and
+so on.
+
+* CI modules that are supported
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The CI module support is largely dependant upon the firmware on the cards
+Some cards do support almost all of the available CI modules. There is
+nothing much that can be done in order to make additional CI modules
+working with these cards.
+
+Modules that have been tested by this driver at present are
+
+(1) Irdeto 1 and 2 from SCM
+(2) Viaccess from SCM
+(3) Dragoncam
+
+* The High level CI API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* For the programmer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+With the High Level CI approach any new card with almost any random
+architecture can be implemented with this style, the definitions
+insidethe switch statement can be easily adapted for any card, thereby
+eliminating the need for any additional ioctls.
+
+The disadvantage is that the driver/hardware has to manage the rest. For
+the application programmer it would be as simple as sending/receiving an
+array to/from the CI ioctls as defined in the Linux DVB API. No changes
+have been made in the API to accomodate this feature.
+
+
+* Why the need for another CI interface ?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+This is one of the most commonly asked question. Well a nice question.
+Strictly speaking this is not a new interface.
+
+The CI interface is defined in the DVB API in ca.h as
+
+typedef struct ca_slot_info {
+	int num;               /* slot number */
+
+	int type;              /* CA interface this slot supports */
+#define CA_CI            1     /* CI high level interface */
+#define CA_CI_LINK       2     /* CI link layer level interface */
+#define CA_CI_PHYS       4     /* CI physical layer level interface */
+#define CA_DESCR         8     /* built-in descrambler */
+#define CA_SC          128     /* simple smart card interface */
+
+	unsigned int flags;
+#define CA_CI_MODULE_PRESENT 1 /* module (or card) inserted */
+#define CA_CI_MODULE_READY   2
+} ca_slot_info_t;
+
+
+
+This CI interface follows the CI high level interface, which is not
+implemented by most applications. Hence this area is revisited.
+
+This CI interface is quite different in the case that it tries to
+accomodate all other CI based devices, that fall into the other categories
+
+This means that this CI interface handles the EN50221 style tags in the
+Application layer only and no session management is taken care of by the
+application. The driver/hardware will take care of all that.
+
+This interface is purely an EN50221 interface exchanging APDU's. This
+means that no session management, link layer or a transport layer do
+exist in this case in the application to driver communication. It is
+as simple as that. The driver/hardware has to take care of that.
+
+
+With this High Level CI interface, the interface can be defined with the
+regular ioctls.
+
+All these ioctls are also valid for the High level CI interface
+
+#define CA_RESET          _IO('o', 128)
+#define CA_GET_CAP        _IOR('o', 129, ca_caps_t)
+#define CA_GET_SLOT_INFO  _IOR('o', 130, ca_slot_info_t)
+#define CA_GET_DESCR_INFO _IOR('o', 131, ca_descr_info_t)
+#define CA_GET_MSG        _IOR('o', 132, ca_msg_t)
+#define CA_SEND_MSG       _IOW('o', 133, ca_msg_t)
+#define CA_SET_DESCR      _IOW('o', 134, ca_descr_t)
+#define CA_SET_PID        _IOW('o', 135, ca_pid_t)
+
+
+On querying the device, the device yields information thus
+
+CA_GET_SLOT_INFO
+----------------------------
+Command = [info]
+APP: Number=[1]
+APP: Type=[1]
+APP: flags=[1]
+APP: CI High level interface
+APP: CA/CI Module Present
+
+CA_GET_CAP
+----------------------------
+Command = [caps]
+APP: Slots=[1]
+APP: Type=[1]
+APP: Descrambler keys=[16]
+APP: Type=[1]
+
+CA_SEND_MSG
+----------------------------
+Descriptors(Program Level)=[ 09 06 06 04 05 50 ff f1]
+Found CA descriptor @ program level
+
+(20) ES type=[2] ES pid=[201]  ES length =[0 (0x0)]
+(25) ES type=[4] ES pid=[301]  ES length =[0 (0x0)]
+ca_message length is 25 (0x19) bytes
+EN50221 CA MSG=[ 9f 80 32 19 03 01 2d d1 f0 08 01 09 06 06 04 05 50 ff f1 02 e0 c9 00 00 04 e1 2d 00 00]
+
+
+Not all ioctl's are implemented in the driver from the API, the other
+features of the hardware that cannot be implemented by the API are achieved
+using the CA_GET_MSG and CA_SEND_MSG ioctls. An EN50221 style wrapper is
+used to exchange the data to maintain compatibility with other hardware.
+
+
+/* a message to/from a CI-CAM */
+typedef struct ca_msg {
+	unsigned int index;
+	unsigned int type;
+	unsigned int length;
+	unsigned char msg[256];
+} ca_msg_t;
+
+
+The flow of data can be described thus,
+
+
+
+
+
+	App (User)
+	-----
+	parse
+	  |
+	  |
+	  v
+	en50221 APDU (package)
+   --------------------------------------
+   |	  |				| High Level CI driver
+   |	  |				|
+   |	  v				|
+   |	en50221 APDU (unpackage)	|
+   |	  |				|
+   |	  |				|
+   |	  v				|
+   |	sanity checks			|
+   |	  |				|
+   |	  |				|
+   |	  v				|
+   |	do (H/W dep)			|
+   --------------------------------------
+	  |    Hardware
+	  |
+	  v
+
+
+
+
+The High Level CI interface uses the EN50221 DVB standard, following a
+standard ensures futureproofness.

+ 2 - 2
Documentation/dvb/get_dvb_firmware

@@ -107,7 +107,7 @@ sub tda10045 {
 sub tda10046 {
 sub tda10046 {
     my $sourcefile = "tt_budget_217g.zip";
     my $sourcefile = "tt_budget_217g.zip";
     my $url = "http://www.technotrend.de/new/217g/$sourcefile";
     my $url = "http://www.technotrend.de/new/217g/$sourcefile";
-    my $hash = "a25b579e37109af60f4a36c37893957c";
+    my $hash = "6a7e1e2f2644b162ff0502367553c72d";
     my $outfile = "dvb-fe-tda10046.fw";
     my $outfile = "dvb-fe-tda10046.fw";
     my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
     my $tmpdir = tempdir(DIR => "/tmp", CLEANUP => 1);
 
 
@@ -115,7 +115,7 @@ sub tda10046 {
 
 
     wgetfile($sourcefile, $url);
     wgetfile($sourcefile, $url);
     unzip($sourcefile, $tmpdir);
     unzip($sourcefile, $tmpdir);
-    extract("$tmpdir/software/OEM/PCI/App/ttlcdacc.dll", 0x3f731, 24479, "$tmpdir/fwtmp");
+    extract("$tmpdir/software/OEM/PCI/App/ttlcdacc.dll", 0x3f731, 24478, "$tmpdir/fwtmp");
     verify("$tmpdir/fwtmp", $hash);
     verify("$tmpdir/fwtmp", $hash);
     copy("$tmpdir/fwtmp", $outfile);
     copy("$tmpdir/fwtmp", $outfile);
 
 

+ 20 - 0
Documentation/feature-removal-schedule.txt

@@ -63,3 +63,23 @@ Why:	Outside of Linux, the only implementations of anything even
 	people, who might be using implementations that I am not aware
 	people, who might be using implementations that I am not aware
 	of, to adjust to this upcoming change.
 	of, to adjust to this upcoming change.
 Who:	Paul E. McKenney <paulmck@us.ibm.com>
 Who:	Paul E. McKenney <paulmck@us.ibm.com>
+
+---------------------------
+
+What:	IEEE1394 Audio and Music Data Transmission Protocol driver,
+	Connection Management Procedures driver
+When:	November 2005
+Files:	drivers/ieee1394/{amdtp,cmp}*
+Why:	These are incomplete, have never worked, and are better implemented
+	in userland via raw1394 (see http://freebob.sourceforge.net/ for
+	example.)
+Who:	Jody McIntyre <scjody@steamballoon.com>
+
+---------------------------
+
+What:	raw1394: requests of type RAW1394_REQ_ISO_SEND, RAW1394_REQ_ISO_LISTEN
+When:	November 2005
+Why:	Deprecated in favour of the new ioctl-based rawiso interface, which is
+	more efficient.  You should really be using libraw1394 for raw1394
+	access anyway.
+Who:	Jody McIntyre <scjody@steamballoon.com>

+ 2 - 4
Documentation/filesystems/sysfs-pci.txt

@@ -7,7 +7,6 @@ that support it.  For example, a given bus might look like this:
      |-- 0000:17:00.0
      |-- 0000:17:00.0
      |   |-- class
      |   |-- class
      |   |-- config
      |   |-- config
-     |   |-- detach_state
      |   |-- device
      |   |-- device
      |   |-- irq
      |   |-- irq
      |   |-- local_cpus
      |   |-- local_cpus
@@ -19,7 +18,7 @@ that support it.  For example, a given bus might look like this:
      |   |-- subsystem_device
      |   |-- subsystem_device
      |   |-- subsystem_vendor
      |   |-- subsystem_vendor
      |   `-- vendor
      |   `-- vendor
-     `-- detach_state
+     `-- ...
 
 
 The topmost element describes the PCI domain and bus number.  In this case,
 The topmost element describes the PCI domain and bus number.  In this case,
 the domain number is 0000 and the bus number is 17 (both values are in hex).
 the domain number is 0000 and the bus number is 17 (both values are in hex).
@@ -31,7 +30,6 @@ files, each with their own function.
        ----		   --------
        ----		   --------
        class		   PCI class (ascii, ro)
        class		   PCI class (ascii, ro)
        config		   PCI config space (binary, rw)
        config		   PCI config space (binary, rw)
-       detach_state	   connection status (bool, rw)
        device		   PCI device (ascii, ro)
        device		   PCI device (ascii, ro)
        irq		   IRQ number (ascii, ro)
        irq		   IRQ number (ascii, ro)
        local_cpus	   nearby CPU mask (cpumask, ro)
        local_cpus	   nearby CPU mask (cpumask, ro)
@@ -85,4 +83,4 @@ useful return codes should be provided.
 
 
 Legacy resources are protected by the HAVE_PCI_LEGACY define.  Platforms
 Legacy resources are protected by the HAVE_PCI_LEGACY define.  Platforms
 wishing to support legacy functionality should define it and provide
 wishing to support legacy functionality should define it and provide
-pci_legacy_read, pci_legacy_write and pci_mmap_legacy_page_range functions.
+pci_legacy_read, pci_legacy_write and pci_mmap_legacy_page_range functions.

+ 0 - 21
Documentation/power/devices.txt

@@ -207,27 +207,6 @@ SYSTEM_SHUTDOWN, I do not understand this one too much. probably event
 #READY_AFTER_RESUME
 #READY_AFTER_RESUME
 #
 #
 
 
-Driver Detach Power Management
-
-The kernel now supports the ability to place a device in a low-power
-state when it is detached from its driver, which happens when its
-module is removed. 
-
-Each device contains a 'detach_state' file in its sysfs directory
-which can be used to control this state. Reading from this file
-displays what the current detach state is set to. This is 0 (On) by
-default. A user may write a positive integer value to this file in the
-range of 1-4 inclusive. 
-
-A value of 1-3 will indicate the device should be placed in that
-low-power state, which will cause ->suspend() to be called for that
-device. A value of 4 indicates that the device should be shutdown, so
-->shutdown() will be called for that device. 
-
-The driver is responsible for reinitializing the device when the
-module is re-inserted during it's ->probe() (or equivalent) method. 
-The driver core will not call any extra functions when binding the
-device to the driver. 
 
 
 pm_message_t meaning
 pm_message_t meaning
 
 

+ 2 - 2
Documentation/powerpc/hvcs.txt

@@ -347,8 +347,8 @@ address that is created by firmware.  An example vty-server sysfs entry
 looks like the following:
 looks like the following:
 
 
 	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # ls
 	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # ls
-	.   current_vty   devspec  name          partner_vtys
-	..  detach_state  index    partner_clcs  vterm_state
+	.   current_vty   devspec       name          partner_vtys
+	..  index         partner_clcs  vterm_state
 
 
 Each entry is provided, by default with a "name" attribute.  Reading the
 Each entry is provided, by default with a "name" attribute.  Reading the
 "name" attribute will reveal the device type as shown in the following
 "name" attribute will reveal the device type as shown in the following

+ 3 - 0
Documentation/x86_64/boot-options.txt

@@ -25,6 +25,9 @@ APICs
 
 
    noapictimer	 Don't set up the APIC timer
    noapictimer	 Don't set up the APIC timer
 
 
+   no_timer_check Don't check the IO-APIC timer. This can work around
+		 problems with incorrect timer initialization on some boards.
+
 Early Console
 Early Console
 
 
    syntax: earlyprintk=vga
    syntax: earlyprintk=vga

+ 2 - 2
Makefile

@@ -1,7 +1,7 @@
 VERSION = 2
 VERSION = 2
 PATCHLEVEL = 6
 PATCHLEVEL = 6
 SUBLEVEL = 12
 SUBLEVEL = 12
-EXTRAVERSION =-rc4
+EXTRAVERSION =-rc5
 NAME=Woozy Numbat
 NAME=Woozy Numbat
 
 
 # *DOCUMENTATION*
 # *DOCUMENTATION*
@@ -530,7 +530,7 @@ endif
 include $(srctree)/arch/$(ARCH)/Makefile
 include $(srctree)/arch/$(ARCH)/Makefile
 
 
 # arch Makefile may override CC so keep this after arch Makefile is included
 # arch Makefile may override CC so keep this after arch Makefile is included
-NOSTDINC_FLAGS := -nostdinc -isystem $(shell $(CC) -print-file-name=include)
+NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
 CHECKFLAGS     += $(NOSTDINC_FLAGS)
 CHECKFLAGS     += $(NOSTDINC_FLAGS)
 
 
 # warn about C99 declaration after statement
 # warn about C99 declaration after statement

+ 2 - 5
arch/alpha/kernel/osf_sys.c

@@ -1150,16 +1150,13 @@ osf_usleep_thread(struct timeval32 __user *sleep, struct timeval32 __user *remai
 	if (get_tv32(&tmp, sleep))
 	if (get_tv32(&tmp, sleep))
 		goto fault;
 		goto fault;
 
 
-	ticks = tmp.tv_usec;
-	ticks = (ticks + (1000000 / HZ) - 1) / (1000000 / HZ);
-	ticks += tmp.tv_sec * HZ;
+	ticks = timeval_to_jiffies(&tmp);
 
 
 	current->state = TASK_INTERRUPTIBLE;
 	current->state = TASK_INTERRUPTIBLE;
 	ticks = schedule_timeout(ticks);
 	ticks = schedule_timeout(ticks);
 
 
 	if (remain) {
 	if (remain) {
-		tmp.tv_sec = ticks / HZ;
-		tmp.tv_usec = ticks % HZ;
+		jiffies_to_timeval(ticks, &tmp);
 		if (put_tv32(remain, &tmp))
 		if (put_tv32(remain, &tmp))
 			goto fault;
 			goto fault;
 	}
 	}

+ 1 - 1
arch/arm/mach-s3c2410/clock.c

@@ -478,7 +478,7 @@ static int s3c2440_clk_add(struct sys_device *sysdev)
 {
 {
 	unsigned long upllcon = __raw_readl(S3C2410_UPLLCON);
 	unsigned long upllcon = __raw_readl(S3C2410_UPLLCON);
 
 
-	s3c2440_clk_upll.rate = s3c2410_get_pll(upllcon, clk_xtal.rate) * 2;
+	s3c2440_clk_upll.rate = s3c2410_get_pll(upllcon, clk_xtal.rate);
 
 
 	printk("S3C2440: Clock Support, UPLL %ld.%03ld MHz\n",
 	printk("S3C2440: Clock Support, UPLL %ld.%03ld MHz\n",
 	       print_mhz(s3c2440_clk_upll.rate));
 	       print_mhz(s3c2440_clk_upll.rate));

+ 4 - 2
arch/arm/mach-s3c2410/s3c2440.c

@@ -192,9 +192,11 @@ void __init s3c2440_map_io(struct map_desc *mach_desc, int size)
 
 
 	iotable_init(s3c2440_iodesc, ARRAY_SIZE(s3c2440_iodesc));
 	iotable_init(s3c2440_iodesc, ARRAY_SIZE(s3c2440_iodesc));
 	iotable_init(mach_desc, size);
 	iotable_init(mach_desc, size);
+
 	/* rename any peripherals used differing from the s3c2410 */
 	/* rename any peripherals used differing from the s3c2410 */
 
 
-	s3c_device_i2c.name = "s3c2440-i2c";
+	s3c_device_i2c.name  = "s3c2440-i2c";
+	s3c_device_nand.name = "s3c2440-nand";
 
 
 	/* change irq for watchdog */
 	/* change irq for watchdog */
 
 
@@ -225,7 +227,7 @@ void __init s3c2440_init_clocks(int xtal)
 		break;
 		break;
 
 
 	case S3C2440_CLKDIVN_HDIVN_2:
 	case S3C2440_CLKDIVN_HDIVN_2:
-		hdiv = 1;
+		hdiv = 2;
 		break;
 		break;
 
 
 	case S3C2440_CLKDIVN_HDIVN_4_8:
 	case S3C2440_CLKDIVN_HDIVN_4_8:

+ 10 - 11
arch/arm/mm/Kconfig

@@ -412,21 +412,20 @@ config CPU_BPREDICT_DISABLE
 
 
 config TLS_REG_EMUL
 config TLS_REG_EMUL
 	bool
 	bool
-	default y if (SMP || CPU_32v6) && (CPU_32v5 || CPU_32v4 || CPU_32v3)
+	default y if SMP && (CPU_32v5 || CPU_32v4 || CPU_32v3)
 	help
 	help
-	  We might be running on an ARMv6+ processor which should have the TLS
-	  register but for some reason we can't use it, or maybe an SMP system
-	  using a pre-ARMv6 processor (there are apparently a few prototypes
-	  like that in existence) and therefore access to that register must
-	  be emulated.
+	  An SMP system using a pre-ARMv6 processor (there are apparently
+	  a few prototypes like that in existence) and therefore access to
+	  that required register must be emulated.
 
 
 config HAS_TLS_REG
 config HAS_TLS_REG
 	bool
 	bool
-	depends on CPU_32v6
-	default y if !TLS_REG_EMUL
+	depends on !TLS_REG_EMUL
+	default y if SMP || CPU_32v7
 	help
 	help
 	  This selects support for the CP15 thread register.
 	  This selects support for the CP15 thread register.
-	  It is defined to be available on ARMv6 or later.  If a particular
-	  ARMv6 or later CPU doesn't support it then it must omc;ide "select
-	  TLS_REG_EMUL" along with its other caracteristics.
+	  It is defined to be available on some ARMv6 processors (including
+	  all SMP capable ARMv6's) or later processors.  User space may
+	  assume directly accessing that register and always obtain the
+	  expected value only on ARMv7 and above.
 
 

+ 0 - 80
arch/arm/mm/copypage-v4mc.S

@@ -1,80 +0,0 @@
-/*
- *  linux/arch/arm/lib/copy_page-armv4mc.S
- *
- *  Copyright (C) 1995-2001 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  ASM optimised string functions
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/constants.h>
-
-	.text
-	.align	5
-/*
- * ARMv4 mini-dcache optimised copy_user_page
- *
- * We flush the destination cache lines just before we write the data into the
- * corresponding address.  Since the Dcache is read-allocate, this removes the
- * Dcache aliasing issue.  The writes will be forwarded to the write buffer,
- * and merged as appropriate.
- *
- * Note: We rely on all ARMv4 processors implementing the "invalidate D line"
- * instruction.  If your processor does not supply this, you have to write your
- * own copy_user_page that does the right thing.
- */
-ENTRY(v4_mc_copy_user_page)
-	stmfd	sp!, {r4, lr}			@ 2
-	mov	r4, r0
-	mov	r0, r1
-	bl	map_page_minicache
-	mov	r1, #PAGE_SZ/64			@ 1
-	ldmia	r0!, {r2, r3, ip, lr}		@ 4
-1:	mcr	p15, 0, r4, c7, c6, 1		@ 1   invalidate D line
-	stmia	r4!, {r2, r3, ip, lr}		@ 4
-	ldmia	r0!, {r2, r3, ip, lr}		@ 4+1
-	stmia	r4!, {r2, r3, ip, lr}		@ 4
-	ldmia	r0!, {r2, r3, ip, lr}		@ 4
-	mcr	p15, 0, r4, c7, c6, 1		@ 1   invalidate D line
-	stmia	r4!, {r2, r3, ip, lr}		@ 4
-	ldmia	r0!, {r2, r3, ip, lr}		@ 4
-	subs	r1, r1, #1			@ 1
-	stmia	r4!, {r2, r3, ip, lr}		@ 4
-	ldmneia	r0!, {r2, r3, ip, lr}		@ 4
-	bne	1b				@ 1
-	ldmfd	sp!, {r4, pc}			@ 3
-
-	.align	5
-/*
- * ARMv4 optimised clear_user_page
- *
- * Same story as above.
- */
-ENTRY(v4_mc_clear_user_page)
-	str	lr, [sp, #-4]!
-	mov	r1, #PAGE_SZ/64			@ 1
-	mov	r2, #0				@ 1
-	mov	r3, #0				@ 1
-	mov	ip, #0				@ 1
-	mov	lr, #0				@ 1
-1:	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line
-	stmia	r0!, {r2, r3, ip, lr}		@ 4
-	stmia	r0!, {r2, r3, ip, lr}		@ 4
-	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line
-	stmia	r0!, {r2, r3, ip, lr}		@ 4
-	stmia	r0!, {r2, r3, ip, lr}		@ 4
-	subs	r1, r1, #1			@ 1
-	bne	1b				@ 1
-	ldr	pc, [sp], #4
-
-	__INITDATA
-
-	.type	v4_mc_user_fns, #object
-ENTRY(v4_mc_user_fns)
-	.long	v4_mc_clear_user_page
-	.long	v4_mc_copy_user_page
-	.size	v4_mc_user_fns, . - v4_mc_user_fns

+ 111 - 0
arch/arm/mm/copypage-v4mc.c

@@ -0,0 +1,111 @@
+/*
+ *  linux/arch/arm/lib/copypage-armv4mc.S
+ *
+ *  Copyright (C) 1995-2005 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This handles the mini data cache, as found on SA11x0 and XScale
+ * processors.  When we copy a user page page, we map it in such a way
+ * that accesses to this page will not touch the main data cache, but
+ * will be cached in the mini data cache.  This prevents us thrashing
+ * the main data cache on page faults.
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+/*
+ * 0xffff8000 to 0xffffffff is reserved for any ARM architecture
+ * specific hacks for copying pages efficiently.
+ */
+#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
+				  L_PTE_CACHEABLE)
+
+#define TOP_PTE(x)	pte_offset_kernel(top_pmd, x)
+
+static DEFINE_SPINLOCK(minicache_lock);
+
+/*
+ * ARMv4 mini-dcache optimised copy_user_page
+ *
+ * We flush the destination cache lines just before we write the data into the
+ * corresponding address.  Since the Dcache is read-allocate, this removes the
+ * Dcache aliasing issue.  The writes will be forwarded to the write buffer,
+ * and merged as appropriate.
+ *
+ * Note: We rely on all ARMv4 processors implementing the "invalidate D line"
+ * instruction.  If your processor does not supply this, you have to write your
+ * own copy_user_page that does the right thing.
+ */
+static void __attribute__((naked))
+mc_copy_user_page(void *from, void *to)
+{
+	asm volatile(
+	"stmfd	sp!, {r4, lr}			@ 2\n\
+	mov	r4, %2				@ 1\n\
+	ldmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+1:	mcr	p15, 0, %1, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	%1!, {r2, r3, ip, lr}		@ 4\n\
+	ldmia	%0!, {r2, r3, ip, lr}		@ 4+1\n\
+	stmia	%1!, {r2, r3, ip, lr}		@ 4\n\
+	ldmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	mcr	p15, 0, %1, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	%1!, {r2, r3, ip, lr}		@ 4\n\
+	ldmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	subs	r4, r4, #1			@ 1\n\
+	stmia	%1!, {r2, r3, ip, lr}		@ 4\n\
+	ldmneia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	bne	1b				@ 1\n\
+	ldmfd	sp!, {r4, pc}			@ 3"
+	:
+	: "r" (from), "r" (to), "I" (PAGE_SIZE / 64));
+}
+
+void v4_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
+{
+	spin_lock(&minicache_lock);
+
+	set_pte(TOP_PTE(0xffff8000), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot));
+	flush_tlb_kernel_page(0xffff8000);
+
+	mc_copy_user_page((void *)0xffff8000, kto);
+
+	spin_unlock(&minicache_lock);
+}
+
+/*
+ * ARMv4 optimised clear_user_page
+ */
+void __attribute__((naked))
+v4_mc_clear_user_page(void *kaddr, unsigned long vaddr)
+{
+	asm volatile(
+	"str	lr, [sp, #-4]!\n\
+	mov	r1, %0				@ 1\n\
+	mov	r2, #0				@ 1\n\
+	mov	r3, #0				@ 1\n\
+	mov	ip, #0				@ 1\n\
+	mov	lr, #0				@ 1\n\
+1:	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	r0!, {r2, r3, ip, lr}		@ 4\n\
+	stmia	r0!, {r2, r3, ip, lr}		@ 4\n\
+	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	r0!, {r2, r3, ip, lr}		@ 4\n\
+	stmia	r0!, {r2, r3, ip, lr}		@ 4\n\
+	subs	r1, r1, #1			@ 1\n\
+	bne	1b				@ 1\n\
+	ldr	pc, [sp], #4"
+	:
+	: "I" (PAGE_SIZE / 64));
+}
+
+struct cpu_user_fns v4_mc_user_fns __initdata = {
+	.cpu_clear_user_page	= v4_mc_clear_user_page, 
+	.cpu_copy_user_page	= v4_mc_copy_user_page,
+};

+ 6 - 22
arch/arm/mm/copypage-v6.c

@@ -26,8 +26,8 @@
 #define to_address	(0xffffc000)
 #define to_address	(0xffffc000)
 #define to_pgprot	PAGE_KERNEL
 #define to_pgprot	PAGE_KERNEL
 
 
-static pte_t *from_pte;
-static pte_t *to_pte;
+#define TOP_PTE(x)	pte_offset_kernel(top_pmd, x)
+
 static DEFINE_SPINLOCK(v6_lock);
 static DEFINE_SPINLOCK(v6_lock);
 
 
 #define DCACHE_COLOUR(vaddr) ((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)
 #define DCACHE_COLOUR(vaddr) ((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)
@@ -74,8 +74,8 @@ void v6_copy_user_page_aliasing(void *kto, const void *kfrom, unsigned long vadd
 	 */
 	 */
 	spin_lock(&v6_lock);
 	spin_lock(&v6_lock);
 
 
-	set_pte(from_pte + offset, pfn_pte(__pa(kfrom) >> PAGE_SHIFT, from_pgprot));
-	set_pte(to_pte + offset, pfn_pte(__pa(kto) >> PAGE_SHIFT, to_pgprot));
+	set_pte(TOP_PTE(from_address) + offset, pfn_pte(__pa(kfrom) >> PAGE_SHIFT, from_pgprot));
+	set_pte(TOP_PTE(to_address) + offset, pfn_pte(__pa(kto) >> PAGE_SHIFT, to_pgprot));
 
 
 	from = from_address + (offset << PAGE_SHIFT);
 	from = from_address + (offset << PAGE_SHIFT);
 	to   = to_address + (offset << PAGE_SHIFT);
 	to   = to_address + (offset << PAGE_SHIFT);
@@ -114,7 +114,7 @@ void v6_clear_user_page_aliasing(void *kaddr, unsigned long vaddr)
 	 */
 	 */
 	spin_lock(&v6_lock);
 	spin_lock(&v6_lock);
 
 
-	set_pte(to_pte + offset, pfn_pte(__pa(kaddr) >> PAGE_SHIFT, to_pgprot));
+	set_pte(TOP_PTE(to_address) + offset, pfn_pte(__pa(kaddr) >> PAGE_SHIFT, to_pgprot));
 	flush_tlb_kernel_page(to);
 	flush_tlb_kernel_page(to);
 	clear_page((void *)to);
 	clear_page((void *)to);
 
 
@@ -129,21 +129,6 @@ struct cpu_user_fns v6_user_fns __initdata = {
 static int __init v6_userpage_init(void)
 static int __init v6_userpage_init(void)
 {
 {
 	if (cache_is_vipt_aliasing()) {
 	if (cache_is_vipt_aliasing()) {
-		pgd_t *pgd;
-		pmd_t *pmd;
-
-		pgd = pgd_offset_k(from_address);
-		pmd = pmd_alloc(&init_mm, pgd, from_address);
-		if (!pmd)
-			BUG();
-		from_pte = pte_alloc_kernel(&init_mm, pmd, from_address);
-		if (!from_pte)
-			BUG();
-
-		to_pte = pte_alloc_kernel(&init_mm, pmd, to_address);
-		if (!to_pte)
-			BUG();
-
 		cpu_user.cpu_clear_user_page = v6_clear_user_page_aliasing;
 		cpu_user.cpu_clear_user_page = v6_clear_user_page_aliasing;
 		cpu_user.cpu_copy_user_page = v6_copy_user_page_aliasing;
 		cpu_user.cpu_copy_user_page = v6_copy_user_page_aliasing;
 	}
 	}
@@ -151,5 +136,4 @@ static int __init v6_userpage_init(void)
 	return 0;
 	return 0;
 }
 }
 
 
-__initcall(v6_userpage_init);
-
+core_initcall(v6_userpage_init);

+ 35 - 2
arch/arm/mm/flush.c

@@ -13,6 +13,29 @@
 
 
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>
 #include <asm/system.h>
 #include <asm/system.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_CPU_CACHE_VIPT
+#define ALIAS_FLUSH_START	0xffff4000
+
+#define TOP_PTE(x)	pte_offset_kernel(top_pmd, x)
+
+static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr)
+{
+	unsigned long to = ALIAS_FLUSH_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT);
+
+	set_pte(TOP_PTE(to), pfn_pte(pfn, PAGE_KERNEL));
+	flush_tlb_kernel_page(to);
+
+	asm(	"mcrr	p15, 0, %1, %0, c14\n"
+	"	mcrr	p15, 0, %1, %0, c5\n"
+	    :
+	    : "r" (to), "r" (to + PAGE_SIZE - L1_CACHE_BYTES)
+	    : "cc");
+}
+#else
+#define flush_pfn_alias(pfn,vaddr)	do { } while (0)
+#endif
 
 
 static void __flush_dcache_page(struct address_space *mapping, struct page *page)
 static void __flush_dcache_page(struct address_space *mapping, struct page *page)
 {
 {
@@ -36,6 +59,18 @@ static void __flush_dcache_page(struct address_space *mapping, struct page *page
 	if (!mapping)
 	if (!mapping)
 		return;
 		return;
 
 
+	/*
+	 * This is a page cache page.  If we have a VIPT cache, we
+	 * only need to do one flush - which would be at the relevant
+	 * userspace colour, which is congruent with page->index.
+	 */
+	if (cache_is_vipt()) {
+		if (cache_is_vipt_aliasing())
+			flush_pfn_alias(page_to_pfn(page),
+					page->index << PAGE_CACHE_SHIFT);
+		return;
+	}
+
 	/*
 	/*
 	 * There are possible user space mappings of this page:
 	 * There are possible user space mappings of this page:
 	 * - VIVT cache: we need to also write back and invalidate all user
 	 * - VIVT cache: we need to also write back and invalidate all user
@@ -57,8 +92,6 @@ static void __flush_dcache_page(struct address_space *mapping, struct page *page
 			continue;
 			continue;
 		offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
 		offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
 		flush_cache_page(mpnt, mpnt->vm_start + offset, page_to_pfn(page));
 		flush_cache_page(mpnt, mpnt->vm_start + offset, page_to_pfn(page));
-		if (cache_is_vipt())
-			break;
 	}
 	}
 	flush_dcache_mmap_unlock(mapping);
 	flush_dcache_mmap_unlock(mapping);
 }
 }

+ 19 - 8
arch/arm/mm/mm-armv.c

@@ -37,6 +37,8 @@ pgprot_t pgprot_kernel;
 
 
 EXPORT_SYMBOL(pgprot_kernel);
 EXPORT_SYMBOL(pgprot_kernel);
 
 
+pmd_t *top_pmd;
+
 struct cachepolicy {
 struct cachepolicy {
 	const char	policy[16];
 	const char	policy[16];
 	unsigned int	cr_mask;
 	unsigned int	cr_mask;
@@ -142,6 +144,16 @@ __setup("noalign", noalign_setup);
 
 
 #define FIRST_KERNEL_PGD_NR	(FIRST_USER_PGD_NR + USER_PTRS_PER_PGD)
 #define FIRST_KERNEL_PGD_NR	(FIRST_USER_PGD_NR + USER_PTRS_PER_PGD)
 
 
+static inline pmd_t *pmd_off(pgd_t *pgd, unsigned long virt)
+{
+	return pmd_offset(pgd, virt);
+}
+
+static inline pmd_t *pmd_off_k(unsigned long virt)
+{
+	return pmd_off(pgd_offset_k(virt), virt);
+}
+
 /*
 /*
  * need to get a 16k page for level 1
  * need to get a 16k page for level 1
  */
  */
@@ -220,7 +232,7 @@ void free_pgd_slow(pgd_t *pgd)
 		return;
 		return;
 
 
 	/* pgd is always present and good */
 	/* pgd is always present and good */
-	pmd = (pmd_t *)pgd;
+	pmd = pmd_off(pgd, 0);
 	if (pmd_none(*pmd))
 	if (pmd_none(*pmd))
 		goto free;
 		goto free;
 	if (pmd_bad(*pmd)) {
 	if (pmd_bad(*pmd)) {
@@ -246,9 +258,8 @@ free:
 static inline void
 static inline void
 alloc_init_section(unsigned long virt, unsigned long phys, int prot)
 alloc_init_section(unsigned long virt, unsigned long phys, int prot)
 {
 {
-	pmd_t *pmdp;
+	pmd_t *pmdp = pmd_off_k(virt);
 
 
-	pmdp = pmd_offset(pgd_offset_k(virt), virt);
 	if (virt & (1 << 20))
 	if (virt & (1 << 20))
 		pmdp++;
 		pmdp++;
 
 
@@ -283,11 +294,9 @@ alloc_init_supersection(unsigned long virt, unsigned long phys, int prot)
 static inline void
 static inline void
 alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pgprot_t prot)
 alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pgprot_t prot)
 {
 {
-	pmd_t *pmdp;
+	pmd_t *pmdp = pmd_off_k(virt);
 	pte_t *ptep;
 	pte_t *ptep;
 
 
-	pmdp = pmd_offset(pgd_offset_k(virt), virt);
-
 	if (pmd_none(*pmdp)) {
 	if (pmd_none(*pmdp)) {
 		unsigned long pmdval;
 		unsigned long pmdval;
 		ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE *
 		ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE *
@@ -310,7 +319,7 @@ alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pg
  */
  */
 static inline void clear_mapping(unsigned long virt)
 static inline void clear_mapping(unsigned long virt)
 {
 {
-	pmd_clear(pmd_offset(pgd_offset_k(virt), virt));
+	pmd_clear(pmd_off_k(virt));
 }
 }
 
 
 struct mem_types {
 struct mem_types {
@@ -578,7 +587,7 @@ void setup_mm_for_reboot(char mode)
 			 PMD_TYPE_SECT;
 			 PMD_TYPE_SECT;
 		if (cpu_arch <= CPU_ARCH_ARMv5)
 		if (cpu_arch <= CPU_ARCH_ARMv5)
 			pmdval |= PMD_BIT4;
 			pmdval |= PMD_BIT4;
-		pmd = pmd_offset(pgd + i, i << PGDIR_SHIFT);
+		pmd = pmd_off(pgd, i << PGDIR_SHIFT);
 		pmd[0] = __pmd(pmdval);
 		pmd[0] = __pmd(pmdval);
 		pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1)));
 		pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1)));
 		flush_pmd_entry(pmd);
 		flush_pmd_entry(pmd);
@@ -675,6 +684,8 @@ void __init memtable_init(struct meminfo *mi)
 
 
 	flush_cache_all();
 	flush_cache_all();
 	flush_tlb_all();
 	flush_tlb_all();
+
+	top_pmd = pmd_off_k(0xffff0000);
 }
 }
 
 
 /*
 /*

+ 7 - 5
arch/i386/kernel/cpu/amd.c

@@ -24,9 +24,6 @@ __asm__(".align 4\nvide: ret");
 
 
 static void __init init_amd(struct cpuinfo_x86 *c)
 static void __init init_amd(struct cpuinfo_x86 *c)
 {
 {
-#ifdef CONFIG_X86_SMP
-	int cpu = c == &boot_cpu_data ? 0 : c - cpu_data;
-#endif
 	u32 l, h;
 	u32 l, h;
 	int mbytes = num_physpages >> (20-PAGE_SHIFT);
 	int mbytes = num_physpages >> (20-PAGE_SHIFT);
 	int r;
 	int r;
@@ -198,14 +195,19 @@ static void __init init_amd(struct cpuinfo_x86 *c)
 			c->x86_num_cores = 1;
 			c->x86_num_cores = 1;
 	}
 	}
 
 
-#ifdef CONFIG_X86_SMP
+#ifdef CONFIG_X86_HT
 	/*
 	/*
 	 * On a AMD dual core setup the lower bits of the APIC id
 	 * On a AMD dual core setup the lower bits of the APIC id
 	 * distingush the cores.  Assumes number of cores is a power
 	 * distingush the cores.  Assumes number of cores is a power
 	 * of two.
 	 * of two.
 	 */
 	 */
 	if (c->x86_num_cores > 1) {
 	if (c->x86_num_cores > 1) {
-		cpu_core_id[cpu] = cpu >> hweight32(c->x86_num_cores - 1);
+		int cpu = smp_processor_id();
+		unsigned bits = 0;
+		while ((1 << bits) < c->x86_num_cores)
+			bits++;
+		cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
+		phys_proc_id[cpu] >>= bits;
 		printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
 		printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
 		       cpu, c->x86_num_cores, cpu_core_id[cpu]);
 		       cpu, c->x86_num_cores, cpu_core_id[cpu]);
 	}
 	}

+ 4 - 0
arch/i386/kernel/cpu/common.c

@@ -243,6 +243,10 @@ static void __init early_cpu_detect(void)
 	}
 	}
 
 
 	early_intel_workaround(c);
 	early_intel_workaround(c);
+
+#ifdef CONFIG_X86_HT
+	phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+#endif
 }
 }
 
 
 void __init generic_identify(struct cpuinfo_x86 * c)
 void __init generic_identify(struct cpuinfo_x86 * c)

+ 1 - 0
arch/i386/kernel/smpboot.c

@@ -888,6 +888,7 @@ void *xquad_portio;
 
 
 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_core_map);
 
 
 static void __init smp_boot_cpus(unsigned int max_cpus)
 static void __init smp_boot_cpus(unsigned int max_cpus)
 {
 {

+ 8 - 9
arch/i386/mach-voyager/voyager_smp.c

@@ -97,7 +97,6 @@ static void ack_vic_irq(unsigned int irq);
 static void vic_enable_cpi(void);
 static void vic_enable_cpi(void);
 static void do_boot_cpu(__u8 cpuid);
 static void do_boot_cpu(__u8 cpuid);
 static void do_quad_bootstrap(void);
 static void do_quad_bootstrap(void);
-static inline void wrapper_smp_local_timer_interrupt(struct pt_regs *);
 
 
 int hard_smp_processor_id(void);
 int hard_smp_processor_id(void);
 
 
@@ -125,6 +124,14 @@ send_QIC_CPI(__u32 cpuset, __u8 cpi)
 	}
 	}
 }
 }
 
 
+static inline void
+wrapper_smp_local_timer_interrupt(struct pt_regs *regs)
+{
+	irq_enter();
+	smp_local_timer_interrupt(regs);
+	irq_exit();
+}
+
 static inline void
 static inline void
 send_one_CPI(__u8 cpu, __u8 cpi)
 send_one_CPI(__u8 cpu, __u8 cpi)
 {
 {
@@ -1249,14 +1256,6 @@ smp_vic_timer_interrupt(struct pt_regs *regs)
 	smp_local_timer_interrupt(regs);
 	smp_local_timer_interrupt(regs);
 }
 }
 
 
-static inline void
-wrapper_smp_local_timer_interrupt(struct pt_regs *regs)
-{
-	irq_enter();
-	smp_local_timer_interrupt(regs);
-	irq_exit();
-}
-
 /* local (per CPU) timer interrupt.  It does both profiling and
 /* local (per CPU) timer interrupt.  It does both profiling and
  * process statistics/rescheduling.
  * process statistics/rescheduling.
  *
  *

+ 6 - 4
arch/i386/mm/ioremap.c

@@ -238,19 +238,21 @@ void iounmap(volatile void __iomem *addr)
 			addr < phys_to_virt(ISA_END_ADDRESS))
 			addr < phys_to_virt(ISA_END_ADDRESS))
 		return;
 		return;
 
 
-	p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
+	write_lock(&vmlist_lock);
+	p = __remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
 	if (!p) { 
 	if (!p) { 
-		printk("__iounmap: bad address %p\n", addr);
-		return;
+		printk("iounmap: bad address %p\n", addr);
+		goto out_unlock;
 	}
 	}
 
 
 	if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) {
 	if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) {
-		/* p->size includes the guard page, but cpa doesn't like that */
 		change_page_attr(virt_to_page(__va(p->phys_addr)),
 		change_page_attr(virt_to_page(__va(p->phys_addr)),
 				 p->size >> PAGE_SHIFT,
 				 p->size >> PAGE_SHIFT,
 				 PAGE_KERNEL);
 				 PAGE_KERNEL);
 		global_flush_tlb();
 		global_flush_tlb();
 	} 
 	} 
+out_unlock:
+	write_unlock(&vmlist_lock);
 	kfree(p); 
 	kfree(p); 
 }
 }
 
 

+ 1 - 1
arch/i386/pci/fixup.c

@@ -253,7 +253,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci
 #define MAX_PCIEROOT	6
 #define MAX_PCIEROOT	6
 static int quirk_aspm_offset[MAX_PCIEROOT << 3];
 static int quirk_aspm_offset[MAX_PCIEROOT << 3];
 
 
-#define GET_INDEX(a, b) (((a - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + b)
+#define GET_INDEX(a, b) ((((a) - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + ((b) & 7))
 
 
 static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
 static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
 {
 {

+ 0 - 1
arch/ia64/ia32/ia32_ioctl.c

@@ -13,7 +13,6 @@
   
   
 #define	INCLUDES
 #define	INCLUDES
 #include "compat_ioctl.c"
 #include "compat_ioctl.c"
-#include <asm/ioctl32.h>
 
 
 #define IOCTL_NR(a)	((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT))
 #define IOCTL_NR(a)	((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT))
 
 

+ 48 - 7
arch/mips/vr41xx/common/pmu.c

@@ -1,7 +1,7 @@
 /*
 /*
  *  pmu.c, Power Management Unit routines for NEC VR4100 series.
  *  pmu.c, Power Management Unit routines for NEC VR4100 series.
  *
  *
- *  Copyright (C) 2003-2004  Yoichi Yuasa <yuasa@hh.iij4u.or.jp>
+ *  Copyright (C) 2003-2005  Yoichi Yuasa <yuasa@hh.iij4u.or.jp>
  *
  *
  *  This program is free software; you can redistribute it and/or modify
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  it under the terms of the GNU General Public License as published by
@@ -17,7 +17,9 @@
  *  along with this program; if not, write to the Free Software
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
  */
+#include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/init.h>
+#include <linux/ioport.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/smp.h>
 #include <linux/smp.h>
 #include <linux/types.h>
 #include <linux/types.h>
@@ -27,20 +29,31 @@
 #include <asm/reboot.h>
 #include <asm/reboot.h>
 #include <asm/system.h>
 #include <asm/system.h>
 
 
-#define PMUCNT2REG	KSEG1ADDR(0x0f0000c6)
+#define PMU_TYPE1_BASE	0x0b0000a0UL
+#define PMU_TYPE1_SIZE	0x0eUL
+
+#define PMU_TYPE2_BASE	0x0f0000c0UL
+#define PMU_TYPE2_SIZE	0x10UL
+
+#define PMUCNT2REG	0x06
  #define SOFTRST	0x0010
  #define SOFTRST	0x0010
 
 
+static void __iomem *pmu_base;
+
+#define pmu_read(offset)		readw(pmu_base + (offset))
+#define pmu_write(offset, value)	writew((value), pmu_base + (offset))
+
 static inline void software_reset(void)
 static inline void software_reset(void)
 {
 {
-	uint16_t val;
+	uint16_t pmucnt2;
 
 
 	switch (current_cpu_data.cputype) {
 	switch (current_cpu_data.cputype) {
 	case CPU_VR4122:
 	case CPU_VR4122:
 	case CPU_VR4131:
 	case CPU_VR4131:
 	case CPU_VR4133:
 	case CPU_VR4133:
-		val = readw(PMUCNT2REG);
-		val |= SOFTRST;
-		writew(val, PMUCNT2REG);
+		pmucnt2 = pmu_read(PMUCNT2REG);
+		pmucnt2 |= SOFTRST;
+		pmu_write(PMUCNT2REG, pmucnt2);
 		break;
 		break;
 	default:
 	default:
 		break;
 		break;
@@ -71,6 +84,34 @@ static void vr41xx_power_off(void)
 
 
 static int __init vr41xx_pmu_init(void)
 static int __init vr41xx_pmu_init(void)
 {
 {
+	unsigned long start, size;
+
+	switch (current_cpu_data.cputype) {
+	case CPU_VR4111:
+	case CPU_VR4121:
+		start = PMU_TYPE1_BASE;
+		size = PMU_TYPE1_SIZE;
+		break;
+	case CPU_VR4122:
+	case CPU_VR4131:
+	case CPU_VR4133:
+		start = PMU_TYPE2_BASE;
+		size = PMU_TYPE2_SIZE;
+		break;
+	default:
+		printk("Unexpected CPU of NEC VR4100 series\n");
+		return -ENODEV;
+	}
+
+	if (request_mem_region(start, size, "PMU") == NULL)
+		return -EBUSY;
+
+	pmu_base = ioremap(start, size);
+	if (pmu_base == NULL) {
+		release_mem_region(start, size);
+		return -EBUSY;
+	}
+
 	_machine_restart = vr41xx_restart;
 	_machine_restart = vr41xx_restart;
 	_machine_halt = vr41xx_halt;
 	_machine_halt = vr41xx_halt;
 	_machine_power_off = vr41xx_power_off;
 	_machine_power_off = vr41xx_power_off;
@@ -78,4 +119,4 @@ static int __init vr41xx_pmu_init(void)
 	return 0;
 	return 0;
 }
 }
 
 
-early_initcall(vr41xx_pmu_init);
+core_initcall(vr41xx_pmu_init);

+ 9 - 6
arch/ppc/kernel/head_44x.S

@@ -330,8 +330,9 @@ interrupt_base:
 	/* If we are faulting a kernel address, we have to use the
 	/* If we are faulting a kernel address, we have to use the
 	 * kernel page tables.
 	 * kernel page tables.
 	 */
 	 */
-	andis.	r11, r10, 0x8000
-	beq	3f
+	lis	r11, TASK_SIZE@h
+	cmplw	r10, r11
+	blt+	3f
 	lis	r11, swapper_pg_dir@h
 	lis	r11, swapper_pg_dir@h
 	ori	r11, r11, swapper_pg_dir@l
 	ori	r11, r11, swapper_pg_dir@l
 
 
@@ -464,8 +465,9 @@ interrupt_base:
 	/* If we are faulting a kernel address, we have to use the
 	/* If we are faulting a kernel address, we have to use the
 	 * kernel page tables.
 	 * kernel page tables.
 	 */
 	 */
-	andis.	r11, r10, 0x8000
-	beq	3f
+	lis	r11, TASK_SIZE@h
+	cmplw	r10, r11
+	blt+	3f
 	lis	r11, swapper_pg_dir@h
 	lis	r11, swapper_pg_dir@h
 	ori	r11, r11, swapper_pg_dir@l
 	ori	r11, r11, swapper_pg_dir@l
 
 
@@ -533,8 +535,9 @@ interrupt_base:
 	/* If we are faulting a kernel address, we have to use the
 	/* If we are faulting a kernel address, we have to use the
 	 * kernel page tables.
 	 * kernel page tables.
 	 */
 	 */
-	andis.	r11, r10, 0x8000
-	beq	3f
+	lis	r11, TASK_SIZE@h
+	cmplw	r10, r11
+	blt+	3f
 	lis	r11, swapper_pg_dir@h
 	lis	r11, swapper_pg_dir@h
 	ori	r11, r11, swapper_pg_dir@l
 	ori	r11, r11, swapper_pg_dir@l
 
 

+ 3 - 1
arch/ppc/kernel/setup.c

@@ -499,7 +499,7 @@ static int __init set_preferred_console(void)
 {
 {
 	struct device_node *prom_stdout;
 	struct device_node *prom_stdout;
 	char *name;
 	char *name;
-	int offset;
+	int offset = 0;
 
 
 	if (of_stdout_device == NULL)
 	if (of_stdout_device == NULL)
 		return -ENODEV;
 		return -ENODEV;
@@ -753,6 +753,8 @@ void __init setup_arch(char **cmdline_p)
 	strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
 	strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
 	*cmdline_p = cmd_line;
 	*cmdline_p = cmd_line;
 
 
+	parse_early_param();
+
 	/* set up the bootmem stuff with available memory */
 	/* set up the bootmem stuff with available memory */
 	do_init_bootmem();
 	do_init_bootmem();
 	if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab);
 	if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab);

+ 5 - 2
arch/ppc/lib/string.S

@@ -446,6 +446,7 @@ _GLOBAL(__copy_tofrom_user)
 #ifdef CONFIG_8xx
 #ifdef CONFIG_8xx
 	/* Don't use prefetch on 8xx */
 	/* Don't use prefetch on 8xx */
 	mtctr	r0
 	mtctr	r0
+	li	r0,0
 53:	COPY_16_BYTES_WITHEX(0)
 53:	COPY_16_BYTES_WITHEX(0)
 	bdnz	53b
 	bdnz	53b
 
 
@@ -564,7 +565,9 @@ _GLOBAL(__copy_tofrom_user)
 /* or write fault in cacheline loop */
 /* or write fault in cacheline loop */
 105:	li	r9,1
 105:	li	r9,1
 92:	li	r3,LG_CACHELINE_BYTES
 92:	li	r3,LG_CACHELINE_BYTES
-	b	99f
+	mfctr	r8
+	add	r0,r0,r8
+	b	106f
 /* read fault in final word loop */
 /* read fault in final word loop */
 108:	li	r9,0
 108:	li	r9,0
 	b	93f
 	b	93f
@@ -585,7 +588,7 @@ _GLOBAL(__copy_tofrom_user)
  * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
  * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
  */
  */
 99:	mfctr	r0
 99:	mfctr	r0
-	slw	r3,r0,r3
+106:	slw	r3,r0,r3
 	add.	r3,r3,r5
 	add.	r3,r3,r5
 	beq	120f			/* shouldn't happen */
 	beq	120f			/* shouldn't happen */
 	cmpwi	0,r9,0
 	cmpwi	0,r9,0

+ 1 - 0
arch/ppc/mm/init.c

@@ -179,6 +179,7 @@ void free_initmem(void)
 	if (!have_of)
 	if (!have_of)
 		FREESEC(openfirmware);
 		FREESEC(openfirmware);
  	printk("\n");
  	printk("\n");
+	ppc_md.progress = NULL;
 #undef FREESEC
 #undef FREESEC
 }
 }
 
 

+ 1 - 1
arch/ppc/syslib/ipic.c

@@ -479,7 +479,7 @@ void __init ipic_init(phys_addr_t phys_addr,
 	temp = 0;
 	temp = 0;
 	for (i = 0 ; i < senses_count ; i++) {
 	for (i = 0 ; i < senses_count ; i++) {
 		if ((senses[i] & IRQ_SENSE_MASK) == IRQ_SENSE_EDGE) {
 		if ((senses[i] & IRQ_SENSE_MASK) == IRQ_SENSE_EDGE) {
-			temp |= 1 << (16 - i);
+			temp |= 1 << (15 - i);
 			if (i != 0)
 			if (i != 0)
 				irq_desc[i + irq_offset + MPC83xx_IRQ_EXT1 - 1].status = 0;
 				irq_desc[i + irq_offset + MPC83xx_IRQ_EXT1 - 1].status = 0;
 			else
 			else

+ 1 - 0
arch/ppc/syslib/mpc83xx_devices.c

@@ -61,6 +61,7 @@ static struct plat_serial8250_port serial_platform_data[] = {
 		.iotype		= UPIO_MEM,
 		.iotype		= UPIO_MEM,
 		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
 		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
 	},
 	},
+	{ },
 };
 };
 
 
 struct platform_device ppc_sys_platform_devices[] = {
 struct platform_device ppc_sys_platform_devices[] = {

+ 1 - 0
arch/ppc/syslib/mpc85xx_devices.c

@@ -61,6 +61,7 @@ static struct plat_serial8250_port serial_platform_data[] = {
 		.iotype		= UPIO_MEM,
 		.iotype		= UPIO_MEM,
 		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ,
 		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ,
 	},
 	},
+	{ },
 };
 };
 
 
 struct platform_device ppc_sys_platform_devices[] = {
 struct platform_device ppc_sys_platform_devices[] = {

+ 0 - 2
arch/ppc/syslib/open_pic.c

@@ -557,12 +557,10 @@ static void __init openpic_initipi(u_int ipi, u_int pri, u_int vec)
  */
  */
 void openpic_cause_IPI(u_int ipi, cpumask_t cpumask)
 void openpic_cause_IPI(u_int ipi, cpumask_t cpumask)
 {
 {
-	cpumask_t phys;
 	DECL_THIS_CPU;
 	DECL_THIS_CPU;
 
 
 	CHECK_THIS_CPU;
 	CHECK_THIS_CPU;
 	check_arg_ipi(ipi);
 	check_arg_ipi(ipi);
-	phys = physmask(cpumask);
 	openpic_write(&OpenPIC->THIS_CPU.IPI_Dispatch(ipi),
 	openpic_write(&OpenPIC->THIS_CPU.IPI_Dispatch(ipi),
 		      cpus_addr(physmask(cpumask))[0]);
 		      cpus_addr(physmask(cpumask))[0]);
 }
 }

+ 65 - 20
arch/ppc64/kernel/mf.c

@@ -1,7 +1,7 @@
 /*
 /*
   * mf.c
   * mf.c
   * Copyright (C) 2001 Troy D. Armstrong  IBM Corporation
   * Copyright (C) 2001 Troy D. Armstrong  IBM Corporation
-  * Copyright (C) 2004 Stephen Rothwell  IBM Corporation
+  * Copyright (C) 2004-2005 Stephen Rothwell  IBM Corporation
   *
   *
   * This modules exists as an interface between a Linux secondary partition
   * This modules exists as an interface between a Linux secondary partition
   * running on an iSeries and the primary partition's Virtual Service
   * running on an iSeries and the primary partition's Virtual Service
@@ -36,10 +36,12 @@
 
 
 #include <asm/time.h>
 #include <asm/time.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
+#include <asm/paca.h>
 #include <asm/iSeries/vio.h>
 #include <asm/iSeries/vio.h>
 #include <asm/iSeries/mf.h>
 #include <asm/iSeries/mf.h>
 #include <asm/iSeries/HvLpConfig.h>
 #include <asm/iSeries/HvLpConfig.h>
 #include <asm/iSeries/ItSpCommArea.h>
 #include <asm/iSeries/ItSpCommArea.h>
+#include <asm/iSeries/ItLpQueue.h>
 
 
 /*
 /*
  * This is the structure layout for the Machine Facilites LPAR event
  * This is the structure layout for the Machine Facilites LPAR event
@@ -696,36 +698,23 @@ static void get_rtc_time_complete(void *token, struct ce_msg_data *ce_msg)
 	complete(&rtc->com);
 	complete(&rtc->com);
 }
 }
 
 
-int mf_get_rtc(struct rtc_time *tm)
+static int rtc_set_tm(int rc, u8 *ce_msg, struct rtc_time *tm)
 {
 {
-	struct ce_msg_comp_data ce_complete;
-	struct rtc_time_data rtc_data;
-	int rc;
-
-	memset(&ce_complete, 0, sizeof(ce_complete));
-	memset(&rtc_data, 0, sizeof(rtc_data));
-	init_completion(&rtc_data.com);
-	ce_complete.handler = &get_rtc_time_complete;
-	ce_complete.token = &rtc_data;
-	rc = signal_ce_msg_simple(0x40, &ce_complete);
-	if (rc)
-		return rc;
-	wait_for_completion(&rtc_data.com);
 	tm->tm_wday = 0;
 	tm->tm_wday = 0;
 	tm->tm_yday = 0;
 	tm->tm_yday = 0;
 	tm->tm_isdst = 0;
 	tm->tm_isdst = 0;
-	if (rtc_data.rc) {
+	if (rc) {
 		tm->tm_sec = 0;
 		tm->tm_sec = 0;
 		tm->tm_min = 0;
 		tm->tm_min = 0;
 		tm->tm_hour = 0;
 		tm->tm_hour = 0;
 		tm->tm_mday = 15;
 		tm->tm_mday = 15;
 		tm->tm_mon = 5;
 		tm->tm_mon = 5;
 		tm->tm_year = 52;
 		tm->tm_year = 52;
-		return rtc_data.rc;
+		return rc;
 	}
 	}
 
 
-	if ((rtc_data.ce_msg.ce_msg[2] == 0xa9) ||
-	    (rtc_data.ce_msg.ce_msg[2] == 0xaf)) {
+	if ((ce_msg[2] == 0xa9) ||
+	    (ce_msg[2] == 0xaf)) {
 		/* TOD clock is not set */
 		/* TOD clock is not set */
 		tm->tm_sec = 1;
 		tm->tm_sec = 1;
 		tm->tm_min = 1;
 		tm->tm_min = 1;
@@ -736,7 +725,6 @@ int mf_get_rtc(struct rtc_time *tm)
 		mf_set_rtc(tm);
 		mf_set_rtc(tm);
 	}
 	}
 	{
 	{
-		u8 *ce_msg = rtc_data.ce_msg.ce_msg;
 		u8 year = ce_msg[5];
 		u8 year = ce_msg[5];
 		u8 sec = ce_msg[6];
 		u8 sec = ce_msg[6];
 		u8 min = ce_msg[7];
 		u8 min = ce_msg[7];
@@ -765,6 +753,63 @@ int mf_get_rtc(struct rtc_time *tm)
 	return 0;
 	return 0;
 }
 }
 
 
+int mf_get_rtc(struct rtc_time *tm)
+{
+	struct ce_msg_comp_data ce_complete;
+	struct rtc_time_data rtc_data;
+	int rc;
+
+	memset(&ce_complete, 0, sizeof(ce_complete));
+	memset(&rtc_data, 0, sizeof(rtc_data));
+	init_completion(&rtc_data.com);
+	ce_complete.handler = &get_rtc_time_complete;
+	ce_complete.token = &rtc_data;
+	rc = signal_ce_msg_simple(0x40, &ce_complete);
+	if (rc)
+		return rc;
+	wait_for_completion(&rtc_data.com);
+	return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm);
+}
+
+struct boot_rtc_time_data {
+	int busy;
+	struct ce_msg_data ce_msg;
+	int rc;
+};
+
+static void get_boot_rtc_time_complete(void *token, struct ce_msg_data *ce_msg)
+{
+	struct boot_rtc_time_data *rtc = token;
+
+	memcpy(&rtc->ce_msg, ce_msg, sizeof(rtc->ce_msg));
+	rtc->rc = 0;
+	rtc->busy = 0;
+}
+
+int mf_get_boot_rtc(struct rtc_time *tm)
+{
+	struct ce_msg_comp_data ce_complete;
+	struct boot_rtc_time_data rtc_data;
+	int rc;
+
+	memset(&ce_complete, 0, sizeof(ce_complete));
+	memset(&rtc_data, 0, sizeof(rtc_data));
+	rtc_data.busy = 1;
+	ce_complete.handler = &get_boot_rtc_time_complete;
+	ce_complete.token = &rtc_data;
+	rc = signal_ce_msg_simple(0x40, &ce_complete);
+	if (rc)
+		return rc;
+	/* We need to poll here as we are not yet taking interrupts */
+	while (rtc_data.busy) {
+		extern unsigned long lpevent_count;
+		struct ItLpQueue *lpq = get_paca()->lpqueue_ptr;
+		if (lpq && ItLpQueue_isLpIntPending(lpq))
+			lpevent_count += ItLpQueue_process(lpq, NULL);
+	}
+	return rtc_set_tm(rtc_data.rc, rtc_data.ce_msg.ce_msg, tm);
+}
+
 int mf_set_rtc(struct rtc_time *tm)
 int mf_set_rtc(struct rtc_time *tm)
 {
 {
 	char ce_time[12];
 	char ce_time[12];

+ 21 - 7
arch/ppc64/kernel/pmac_smp.c

@@ -68,6 +68,7 @@ extern struct smp_ops_t *smp_ops;
 
 
 static void (*pmac_tb_freeze)(int freeze);
 static void (*pmac_tb_freeze)(int freeze);
 static struct device_node *pmac_tb_clock_chip_host;
 static struct device_node *pmac_tb_clock_chip_host;
+static u8 pmac_tb_pulsar_addr;
 static DEFINE_SPINLOCK(timebase_lock);
 static DEFINE_SPINLOCK(timebase_lock);
 static unsigned long timebase;
 static unsigned long timebase;
 
 
@@ -106,12 +107,9 @@ static void smp_core99_pulsar_tb_freeze(int freeze)
 	u8 data;
 	u8 data;
 	int rc;
 	int rc;
 
 
-	/* Strangely, the device-tree says address is 0xd2, but darwin
-	 * accesses 0xd0 ...
-	 */
 	pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_combined);
 	pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_combined);
 	rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host,
 	rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host,
-			       0xd4 | pmac_low_i2c_read,
+			       pmac_tb_pulsar_addr | pmac_low_i2c_read,
 			       0x2e, &data, 1);
 			       0x2e, &data, 1);
 	if (rc != 0)
 	if (rc != 0)
 		goto bail;
 		goto bail;
@@ -120,7 +118,7 @@ static void smp_core99_pulsar_tb_freeze(int freeze)
 
 
 	pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_stdsub);
 	pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_stdsub);
 	rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host,
 	rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host,
-			       0xd4 | pmac_low_i2c_write,
+			       pmac_tb_pulsar_addr | pmac_low_i2c_write,
 			       0x2e, &data, 1);
 			       0x2e, &data, 1);
  bail:
  bail:
 	if (rc != 0) {
 	if (rc != 0) {
@@ -185,6 +183,12 @@ static int __init smp_core99_probe(void)
 	if (ncpus <= 1)
 	if (ncpus <= 1)
 		return 1;
 		return 1;
 
 
+	/* HW sync only on these platforms */
+	if (!machine_is_compatible("PowerMac7,2") &&
+	    !machine_is_compatible("PowerMac7,3") &&
+	    !machine_is_compatible("RackMac3,1"))
+		goto nohwsync;
+
 	/* Look for the clock chip */
 	/* Look for the clock chip */
 	for (cc = NULL; (cc = of_find_node_by_name(cc, "i2c-hwclock")) != NULL;) {
 	for (cc = NULL; (cc = of_find_node_by_name(cc, "i2c-hwclock")) != NULL;) {
 		struct device_node *p = of_get_parent(cc);
 		struct device_node *p = of_get_parent(cc);
@@ -198,11 +202,18 @@ static int __init smp_core99_probe(void)
 			goto next;
 			goto next;
 		switch (*reg) {
 		switch (*reg) {
 		case 0xd2:
 		case 0xd2:
-			pmac_tb_freeze = smp_core99_cypress_tb_freeze;
-			printk(KERN_INFO "Timebase clock is Cypress chip\n");
+			if (device_is_compatible(cc, "pulsar-legacy-slewing")) {
+				pmac_tb_freeze = smp_core99_pulsar_tb_freeze;
+				pmac_tb_pulsar_addr = 0xd2;
+				printk(KERN_INFO "Timebase clock is Pulsar chip\n");
+			} else if (device_is_compatible(cc, "cy28508")) {
+				pmac_tb_freeze = smp_core99_cypress_tb_freeze;
+				printk(KERN_INFO "Timebase clock is Cypress chip\n");
+			}
 			break;
 			break;
 		case 0xd4:
 		case 0xd4:
 			pmac_tb_freeze = smp_core99_pulsar_tb_freeze;
 			pmac_tb_freeze = smp_core99_pulsar_tb_freeze;
+			pmac_tb_pulsar_addr = 0xd4;
 			printk(KERN_INFO "Timebase clock is Pulsar chip\n");
 			printk(KERN_INFO "Timebase clock is Pulsar chip\n");
 			break;
 			break;
 		}
 		}
@@ -210,12 +221,15 @@ static int __init smp_core99_probe(void)
 			pmac_tb_clock_chip_host = p;
 			pmac_tb_clock_chip_host = p;
 			smp_ops->give_timebase = smp_core99_give_timebase;
 			smp_ops->give_timebase = smp_core99_give_timebase;
 			smp_ops->take_timebase = smp_core99_take_timebase;
 			smp_ops->take_timebase = smp_core99_take_timebase;
+			of_node_put(cc);
+			of_node_put(p);
 			break;
 			break;
 		}
 		}
 	next:
 	next:
 		of_node_put(p);
 		of_node_put(p);
 	}
 	}
 
 
+ nohwsync:
 	mpic_request_ipis();
 	mpic_request_ipis();
 
 
 	return ncpus;
 	return ncpus;

+ 43 - 1
arch/ppc64/kernel/prom_init.c

@@ -1750,7 +1750,44 @@ static void __init flatten_device_tree(void)
 	prom_printf("Device tree struct  0x%x -> 0x%x\n",
 	prom_printf("Device tree struct  0x%x -> 0x%x\n",
 		    RELOC(dt_struct_start), RELOC(dt_struct_end));
 		    RELOC(dt_struct_start), RELOC(dt_struct_end));
 
 
- }
+}
+
+
+static void __init fixup_device_tree(void)
+{
+	unsigned long offset = reloc_offset();
+	phandle u3, i2c, mpic;
+	u32 u3_rev;
+	u32 interrupts[2];
+	u32 parent;
+
+	/* Some G5s have a missing interrupt definition, fix it up here */
+	u3 = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000"));
+	if ((long)u3 <= 0)
+		return;
+	i2c = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/i2c@f8001000"));
+	if ((long)i2c <= 0)
+		return;
+	mpic = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/mpic@f8040000"));
+	if ((long)mpic <= 0)
+		return;
+
+	/* check if proper rev of u3 */
+	if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev)) <= 0)
+		return;
+	if (u3_rev != 0x35)
+		return;
+	/* does it need fixup ? */
+	if (prom_getproplen(i2c, "interrupts") > 0)
+		return;
+	/* interrupt on this revision of u3 is number 0 and level */
+	interrupts[0] = 0;
+	interrupts[1] = 1;
+	prom_setprop(i2c, "interrupts", &interrupts, sizeof(interrupts));
+	parent = (u32)mpic;
+	prom_setprop(i2c, "interrupt-parent", &parent, sizeof(parent));
+}
+
 
 
 static void __init prom_find_boot_cpu(void)
 static void __init prom_find_boot_cpu(void)
 {
 {
@@ -1919,6 +1956,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long
 			PTRRELOC(&prom_tce_alloc_end), sizeof(RELOC(prom_tce_alloc_end)));
 			PTRRELOC(&prom_tce_alloc_end), sizeof(RELOC(prom_tce_alloc_end)));
 	}
 	}
 
 
+	/*
+	 * Fixup any known bugs in the device-tree
+	 */
+	fixup_device_tree();
+
 	/*
 	/*
 	 * Now finally create the flattened device-tree
 	 * Now finally create the flattened device-tree
 	 */
 	 */

+ 1 - 38
arch/ppc64/kernel/rtc.c

@@ -292,47 +292,10 @@ int iSeries_set_rtc_time(struct rtc_time *tm)
 
 
 void iSeries_get_boot_time(struct rtc_time *tm)
 void iSeries_get_boot_time(struct rtc_time *tm)
 {
 {
-	unsigned long time;
-	static unsigned long lastsec = 1;
-
-	u32 dataWord1 = *((u32 *)(&xSpCommArea.xBcdTimeAtIplStart));
-	u32 dataWord2 = *(((u32 *)&(xSpCommArea.xBcdTimeAtIplStart)) + 1);
-	int year = 1970;
-	int year1 = ( dataWord1 >> 24 ) & 0x000000FF;
-	int year2 = ( dataWord1 >> 16 ) & 0x000000FF;
-	int sec = ( dataWord1 >> 8 ) & 0x000000FF;
-	int min = dataWord1 & 0x000000FF;
-	int hour = ( dataWord2 >> 24 ) & 0x000000FF;
-	int day = ( dataWord2 >> 8 ) & 0x000000FF;
-	int mon = dataWord2 & 0x000000FF;
-
 	if ( piranha_simulator )
 	if ( piranha_simulator )
 		return;
 		return;
 
 
-	BCD_TO_BIN(sec);
-	BCD_TO_BIN(min);
-	BCD_TO_BIN(hour);
-	BCD_TO_BIN(day);
-	BCD_TO_BIN(mon);
-	BCD_TO_BIN(year1);
-	BCD_TO_BIN(year2);
-	year = year1 * 100 + year2;
-
-	time = mktime(year, mon, day, hour, min, sec);
-	time += ( jiffies / HZ );
-
-	/* Now THIS is a nasty hack!
-	* It ensures that the first two calls get different answers.  
-	* That way the loop in init_time (time.c) will not think
-	* the clock is stuck.
-	*/
-	if ( lastsec ) {
-		time -= lastsec;
-		--lastsec;
-	}
-
-	to_tm(time, tm); 
-	tm->tm_year -= 1900;
+	mf_get_boot_rtc(tm);
 	tm->tm_mon  -= 1;
 	tm->tm_mon  -= 1;
 }
 }
 #endif
 #endif

+ 1 - 0
arch/ppc64/kernel/time.c

@@ -515,6 +515,7 @@ void __init time_init(void)
 	do_gtod.varp = &do_gtod.vars[0];
 	do_gtod.varp = &do_gtod.vars[0];
 	do_gtod.var_idx = 0;
 	do_gtod.var_idx = 0;
 	do_gtod.varp->tb_orig_stamp = tb_last_stamp;
 	do_gtod.varp->tb_orig_stamp = tb_last_stamp;
+	get_paca()->next_jiffy_update_tb = tb_last_stamp + tb_ticks_per_jiffy;
 	do_gtod.varp->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
 	do_gtod.varp->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
 	do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
 	do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
 	do_gtod.varp->tb_to_xs = tb_to_xs;
 	do_gtod.varp->tb_to_xs = tb_to_xs;

+ 67 - 100
arch/sparc64/kernel/pci_iommu.c

@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
+#include <linux/delay.h>
 
 
 #include <asm/pbm.h>
 #include <asm/pbm.h>
 
 
@@ -379,6 +380,56 @@ bad:
 	return PCI_DMA_ERROR_CODE;
 	return PCI_DMA_ERROR_CODE;
 }
 }
 
 
+static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, u32 vaddr, unsigned long ctx, unsigned long npages)
+{
+	int limit;
+
+	PCI_STC_FLUSHFLAG_INIT(strbuf);
+	if (strbuf->strbuf_ctxflush &&
+	    iommu->iommu_ctxflush) {
+		unsigned long matchreg, flushreg;
+
+		flushreg = strbuf->strbuf_ctxflush;
+		matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
+
+		limit = 100000;
+		pci_iommu_write(flushreg, ctx);
+		for(;;) {
+			if (((long)pci_iommu_read(matchreg)) >= 0L)
+				break;
+			limit--;
+			if (!limit)
+				break;
+			udelay(1);
+		}
+		if (!limit)
+			printk(KERN_WARNING "pci_strbuf_flush: ctx flush "
+			       "timeout vaddr[%08x] ctx[%lx]\n",
+			       vaddr, ctx);
+	} else {
+		unsigned long i;
+
+		for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
+			pci_iommu_write(strbuf->strbuf_pflush, vaddr);
+	}
+
+	pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
+	(void) pci_iommu_read(iommu->write_complete_reg);
+
+	limit = 100000;
+	while (!PCI_STC_FLUSHFLAG_SET(strbuf)) {
+		limit--;
+		if (!limit)
+			break;
+		udelay(1);
+		membar("#LoadLoad");
+	}
+	if (!limit)
+		printk(KERN_WARNING "pci_strbuf_flush: flushflag timeout "
+		       "vaddr[%08x] ctx[%lx] npages[%ld]\n",
+		       vaddr, ctx, npages);
+}
+
 /* Unmap a single streaming mode DMA translation. */
 /* Unmap a single streaming mode DMA translation. */
 void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
 void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
 {
 {
@@ -386,7 +437,7 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
 	struct pci_iommu *iommu;
 	struct pci_iommu *iommu;
 	struct pci_strbuf *strbuf;
 	struct pci_strbuf *strbuf;
 	iopte_t *base;
 	iopte_t *base;
-	unsigned long flags, npages, i, ctx;
+	unsigned long flags, npages, ctx;
 
 
 	if (direction == PCI_DMA_NONE)
 	if (direction == PCI_DMA_NONE)
 		BUG();
 		BUG();
@@ -414,29 +465,8 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
 		ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
 		ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
 
 
 	/* Step 1: Kick data out of streaming buffers if necessary. */
 	/* Step 1: Kick data out of streaming buffers if necessary. */
-	if (strbuf->strbuf_enabled) {
-		u32 vaddr = bus_addr;
-
-		PCI_STC_FLUSHFLAG_INIT(strbuf);
-		if (strbuf->strbuf_ctxflush &&
-		    iommu->iommu_ctxflush) {
-			unsigned long matchreg, flushreg;
-
-			flushreg = strbuf->strbuf_ctxflush;
-			matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
-			do {
-				pci_iommu_write(flushreg, ctx);
-			} while(((long)pci_iommu_read(matchreg)) < 0L);
-		} else {
-			for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
-				pci_iommu_write(strbuf->strbuf_pflush, vaddr);
-		}
-
-		pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
-		(void) pci_iommu_read(iommu->write_complete_reg);
-		while (!PCI_STC_FLUSHFLAG_SET(strbuf))
-			membar("#LoadLoad");
-	}
+	if (strbuf->strbuf_enabled)
+		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
 
 
 	/* Step 2: Clear out first TSB entry. */
 	/* Step 2: Clear out first TSB entry. */
 	iopte_make_dummy(iommu, base);
 	iopte_make_dummy(iommu, base);
@@ -647,29 +677,8 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
 		ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
 		ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
 
 
 	/* Step 1: Kick data out of streaming buffers if necessary. */
 	/* Step 1: Kick data out of streaming buffers if necessary. */
-	if (strbuf->strbuf_enabled) {
-		u32 vaddr = (u32) bus_addr;
-
-		PCI_STC_FLUSHFLAG_INIT(strbuf);
-		if (strbuf->strbuf_ctxflush &&
-		    iommu->iommu_ctxflush) {
-			unsigned long matchreg, flushreg;
-
-			flushreg = strbuf->strbuf_ctxflush;
-			matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
-			do {
-				pci_iommu_write(flushreg, ctx);
-			} while(((long)pci_iommu_read(matchreg)) < 0L);
-		} else {
-			for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
-				pci_iommu_write(strbuf->strbuf_pflush, vaddr);
-		}
-
-		pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
-		(void) pci_iommu_read(iommu->write_complete_reg);
-		while (!PCI_STC_FLUSHFLAG_SET(strbuf))
-			membar("#LoadLoad");
-	}
+	if (strbuf->strbuf_enabled)
+		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
 
 
 	/* Step 2: Clear out first TSB entry. */
 	/* Step 2: Clear out first TSB entry. */
 	iopte_make_dummy(iommu, base);
 	iopte_make_dummy(iommu, base);
@@ -715,28 +724,7 @@ void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size
 	}
 	}
 
 
 	/* Step 2: Kick data out of streaming buffers. */
 	/* Step 2: Kick data out of streaming buffers. */
-	PCI_STC_FLUSHFLAG_INIT(strbuf);
-	if (iommu->iommu_ctxflush &&
-	    strbuf->strbuf_ctxflush) {
-		unsigned long matchreg, flushreg;
-
-		flushreg = strbuf->strbuf_ctxflush;
-		matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
-		do {
-			pci_iommu_write(flushreg, ctx);
-		} while(((long)pci_iommu_read(matchreg)) < 0L);
-	} else {
-		unsigned long i;
-
-		for (i = 0; i < npages; i++, bus_addr += IO_PAGE_SIZE)
-			pci_iommu_write(strbuf->strbuf_pflush, bus_addr);
-	}
-
-	/* Step 3: Perform flush synchronization sequence. */
-	pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
-	(void) pci_iommu_read(iommu->write_complete_reg);
-	while (!PCI_STC_FLUSHFLAG_SET(strbuf))
-		membar("#LoadLoad");
+	pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
 
 
 	spin_unlock_irqrestore(&iommu->lock, flags);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 }
@@ -749,7 +737,8 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i
 	struct pcidev_cookie *pcp;
 	struct pcidev_cookie *pcp;
 	struct pci_iommu *iommu;
 	struct pci_iommu *iommu;
 	struct pci_strbuf *strbuf;
 	struct pci_strbuf *strbuf;
-	unsigned long flags, ctx;
+	unsigned long flags, ctx, npages, i;
+	u32 bus_addr;
 
 
 	pcp = pdev->sysdata;
 	pcp = pdev->sysdata;
 	iommu = pcp->pbm->iommu;
 	iommu = pcp->pbm->iommu;
@@ -772,36 +761,14 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i
 	}
 	}
 
 
 	/* Step 2: Kick data out of streaming buffers. */
 	/* Step 2: Kick data out of streaming buffers. */
-	PCI_STC_FLUSHFLAG_INIT(strbuf);
-	if (iommu->iommu_ctxflush &&
-	    strbuf->strbuf_ctxflush) {
-		unsigned long matchreg, flushreg;
-
-		flushreg = strbuf->strbuf_ctxflush;
-		matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
-		do {
-			pci_iommu_write(flushreg, ctx);
-		} while (((long)pci_iommu_read(matchreg)) < 0L);
-	} else {
-		unsigned long i, npages;
-		u32 bus_addr;
-
-		bus_addr = sglist[0].dma_address & IO_PAGE_MASK;
-
-		for(i = 1; i < nelems; i++)
-			if (!sglist[i].dma_length)
-				break;
-		i--;
-		npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT;
-		for (i = 0; i < npages; i++, bus_addr += IO_PAGE_SIZE)
-			pci_iommu_write(strbuf->strbuf_pflush, bus_addr);
-	}
-
-	/* Step 3: Perform flush synchronization sequence. */
-	pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
-	(void) pci_iommu_read(iommu->write_complete_reg);
-	while (!PCI_STC_FLUSHFLAG_SET(strbuf))
-		membar("#LoadLoad");
+	bus_addr = sglist[0].dma_address & IO_PAGE_MASK;
+	for(i = 1; i < nelems; i++)
+		if (!sglist[i].dma_length)
+			break;
+	i--;
+	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length)
+		  - bus_addr) >> IO_PAGE_SHIFT;
+	pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
 
 
 	spin_unlock_irqrestore(&iommu->lock, flags);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 }

+ 23 - 8
arch/sparc64/kernel/sbus.c

@@ -117,19 +117,34 @@ static void iommu_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages
 
 
 #define STRBUF_TAG_VALID	0x02UL
 #define STRBUF_TAG_VALID	0x02UL
 
 
-static void strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages)
+static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages)
 {
 {
+	unsigned long n;
+	int limit;
+
 	iommu->strbuf_flushflag = 0UL;
 	iommu->strbuf_flushflag = 0UL;
-	while (npages--)
-		upa_writeq(base + (npages << IO_PAGE_SHIFT),
+	n = npages;
+	while (n--)
+		upa_writeq(base + (n << IO_PAGE_SHIFT),
 			   iommu->strbuf_regs + STRBUF_PFLUSH);
 			   iommu->strbuf_regs + STRBUF_PFLUSH);
 
 
 	/* Whoopee cushion! */
 	/* Whoopee cushion! */
 	upa_writeq(__pa(&iommu->strbuf_flushflag),
 	upa_writeq(__pa(&iommu->strbuf_flushflag),
 		   iommu->strbuf_regs + STRBUF_FSYNC);
 		   iommu->strbuf_regs + STRBUF_FSYNC);
 	upa_readq(iommu->sbus_control_reg);
 	upa_readq(iommu->sbus_control_reg);
-	while (iommu->strbuf_flushflag == 0UL)
+
+	limit = 100000;
+	while (iommu->strbuf_flushflag == 0UL) {
+		limit--;
+		if (!limit)
+			break;
+		udelay(1);
 		membar("#LoadLoad");
 		membar("#LoadLoad");
+	}
+	if (!limit)
+		printk(KERN_WARNING "sbus_strbuf_flush: flushflag timeout "
+		       "vaddr[%08x] npages[%ld]\n",
+		       base, npages);
 }
 }
 
 
 static iopte_t *alloc_streaming_cluster(struct sbus_iommu *iommu, unsigned long npages)
 static iopte_t *alloc_streaming_cluster(struct sbus_iommu *iommu, unsigned long npages)
@@ -406,7 +421,7 @@ void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t dma_addr, size_t size,
 
 
 	spin_lock_irqsave(&iommu->lock, flags);
 	spin_lock_irqsave(&iommu->lock, flags);
 	free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT);
 	free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT);
-	strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT);
+	sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 }
 
 
@@ -569,7 +584,7 @@ void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int
 	iommu = sdev->bus->iommu;
 	iommu = sdev->bus->iommu;
 	spin_lock_irqsave(&iommu->lock, flags);
 	spin_lock_irqsave(&iommu->lock, flags);
 	free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT);
 	free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT);
-	strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT);
+	sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 }
 
 
@@ -581,7 +596,7 @@ void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t base, size_t
 	size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK));
 	size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK));
 
 
 	spin_lock_irqsave(&iommu->lock, flags);
 	spin_lock_irqsave(&iommu->lock, flags);
-	strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT);
+	sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 }
 
 
@@ -605,7 +620,7 @@ void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sg, int
 	size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base;
 	size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base;
 
 
 	spin_lock_irqsave(&iommu->lock, flags);
 	spin_lock_irqsave(&iommu->lock, flags);
-	strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT);
+	sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 }
 
 

+ 11 - 0
arch/sparc64/kernel/setup.c

@@ -383,6 +383,17 @@ static void __init process_switch(char c)
 		/* Use PROM debug console. */
 		/* Use PROM debug console. */
 		register_console(&prom_debug_console);
 		register_console(&prom_debug_console);
 		break;
 		break;
+	case 'P':
+		/* Force UltraSPARC-III P-Cache on. */
+		if (tlb_type != cheetah) {
+			printk("BOOT: Ignoring P-Cache force option.\n");
+			break;
+		}
+		cheetah_pcache_forced_on = 1;
+		add_taint(TAINT_MACHINE_CHECK);
+		cheetah_enable_pcache();
+		break;
+
 	default:
 	default:
 		printk("Unknown boot switch (-%c)\n", c);
 		printk("Unknown boot switch (-%c)\n", c);
 		break;
 		break;

+ 3 - 0
arch/sparc64/kernel/smp.c

@@ -123,6 +123,9 @@ void __init smp_callin(void)
 
 
 	smp_setup_percpu_timer();
 	smp_setup_percpu_timer();
 
 
+	if (cheetah_pcache_forced_on)
+		cheetah_enable_pcache();
+
 	local_irq_enable();
 	local_irq_enable();
 
 
 	calibrate_delay();
 	calibrate_delay();

+ 19 - 0
arch/sparc64/kernel/traps.c

@@ -421,6 +421,25 @@ asmlinkage void cee_log(unsigned long ce_status,
 	}
 	}
 }
 }
 
 
+int cheetah_pcache_forced_on;
+
+void cheetah_enable_pcache(void)
+{
+	unsigned long dcr;
+
+	printk("CHEETAH: Enabling P-Cache on cpu %d.\n",
+	       smp_processor_id());
+
+	__asm__ __volatile__("ldxa [%%g0] %1, %0"
+			     : "=r" (dcr)
+			     : "i" (ASI_DCU_CONTROL_REG));
+	dcr |= (DCU_PE | DCU_HPE | DCU_SPE | DCU_SL);
+	__asm__ __volatile__("stxa %0, [%%g0] %1\n\t"
+			     "membar #Sync"
+			     : /* no outputs */
+			     : "r" (dcr), "i" (ASI_DCU_CONTROL_REG));
+}
+
 /* Cheetah error trap handling. */
 /* Cheetah error trap handling. */
 static unsigned long ecache_flush_physbase;
 static unsigned long ecache_flush_physbase;
 static unsigned long ecache_flush_linesize;
 static unsigned long ecache_flush_linesize;

+ 4 - 0
arch/um/Kconfig_x86_64

@@ -6,6 +6,10 @@ config 64BIT
 	bool
 	bool
 	default y
 	default y
 
 
+config TOP_ADDR
+ 	hex
+	default 0x80000000
+
 config 3_LEVEL_PGTABLES
 config 3_LEVEL_PGTABLES
        bool
        bool
        default y
        default y

+ 15 - 7
arch/um/drivers/chan_kern.c

@@ -20,9 +20,17 @@
 #include "os.h"
 #include "os.h"
 
 
 #ifdef CONFIG_NOCONFIG_CHAN
 #ifdef CONFIG_NOCONFIG_CHAN
+
+/* The printk's here are wrong because we are complaining that there is no
+ * output device, but printk is printing to that output device.  The user will
+ * never see the error.  printf would be better, except it can't run on a
+ * kernel stack because it will overflow it.
+ * Use printk for now since that will avoid crashing.
+ */
+
 static void *not_configged_init(char *str, int device, struct chan_opts *opts)
 static void *not_configged_init(char *str, int device, struct chan_opts *opts)
 {
 {
-	printf(KERN_ERR "Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	       "UML\n");
 	return(NULL);
 	return(NULL);
 }
 }
@@ -30,27 +38,27 @@ static void *not_configged_init(char *str, int device, struct chan_opts *opts)
 static int not_configged_open(int input, int output, int primary, void *data,
 static int not_configged_open(int input, int output, int primary, void *data,
 			      char **dev_out)
 			      char **dev_out)
 {
 {
-	printf(KERN_ERR "Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	       "UML\n");
 	return(-ENODEV);
 	return(-ENODEV);
 }
 }
 
 
 static void not_configged_close(int fd, void *data)
 static void not_configged_close(int fd, void *data)
 {
 {
-	printf(KERN_ERR "Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	       "UML\n");
 }
 }
 
 
 static int not_configged_read(int fd, char *c_out, void *data)
 static int not_configged_read(int fd, char *c_out, void *data)
 {
 {
-	printf(KERN_ERR "Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	       "UML\n");
 	return(-EIO);
 	return(-EIO);
 }
 }
 
 
 static int not_configged_write(int fd, const char *buf, int len, void *data)
 static int not_configged_write(int fd, const char *buf, int len, void *data)
 {
 {
-	printf(KERN_ERR "Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	       "UML\n");
 	return(-EIO);
 	return(-EIO);
 }
 }
@@ -58,7 +66,7 @@ static int not_configged_write(int fd, const char *buf, int len, void *data)
 static int not_configged_console_write(int fd, const char *buf, int len,
 static int not_configged_console_write(int fd, const char *buf, int len,
 				       void *data)
 				       void *data)
 {
 {
-	printf(KERN_ERR "Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	       "UML\n");
 	return(-EIO);
 	return(-EIO);
 }
 }
@@ -66,7 +74,7 @@ static int not_configged_console_write(int fd, const char *buf, int len,
 static int not_configged_window_size(int fd, void *data, unsigned short *rows,
 static int not_configged_window_size(int fd, void *data, unsigned short *rows,
 				     unsigned short *cols)
 				     unsigned short *cols)
 {
 {
-	printf(KERN_ERR "Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	       "UML\n");
 	return(-ENODEV);
 	return(-ENODEV);
 }
 }

+ 1 - 3
arch/um/drivers/mcast_kern.c

@@ -73,7 +73,6 @@ int mcast_setup(char *str, char **mac_out, void *data)
 	struct mcast_init *init = data;
 	struct mcast_init *init = data;
 	char *port_str = NULL, *ttl_str = NULL, *remain;
 	char *port_str = NULL, *ttl_str = NULL, *remain;
 	char *last;
 	char *last;
-	int n;
 
 
 	*init = ((struct mcast_init)
 	*init = ((struct mcast_init)
 		{ .addr 	= "239.192.168.1",
 		{ .addr 	= "239.192.168.1",
@@ -89,13 +88,12 @@ int mcast_setup(char *str, char **mac_out, void *data)
 	}
 	}
 	
 	
 	if(port_str != NULL){
 	if(port_str != NULL){
-		n = simple_strtoul(port_str, &last, 10);
+		init->port = simple_strtoul(port_str, &last, 10);
 		if((*last != '\0') || (last == port_str)){
 		if((*last != '\0') || (last == port_str)){
 			printk(KERN_ERR "mcast_setup - Bad port : '%s'\n", 
 			printk(KERN_ERR "mcast_setup - Bad port : '%s'\n", 
 			       port_str);
 			       port_str);
 			return(0);
 			return(0);
 		}
 		}
-		init->port = htons(n);
 	}
 	}
 
 
 	if(ttl_str != NULL){
 	if(ttl_str != NULL){

+ 15 - 32
arch/um/drivers/mcast_user.c

@@ -38,7 +38,7 @@ static struct sockaddr_in *new_addr(char *addr, unsigned short port)
 	}
 	}
 	sin->sin_family = AF_INET;
 	sin->sin_family = AF_INET;
 	sin->sin_addr.s_addr = in_aton(addr);
 	sin->sin_addr.s_addr = in_aton(addr);
-	sin->sin_port = port;
+	sin->sin_port = htons(port);
 	return(sin);
 	return(sin);
 }
 }
 
 
@@ -55,28 +55,25 @@ static int mcast_open(void *data)
 	struct mcast_data *pri = data;
 	struct mcast_data *pri = data;
 	struct sockaddr_in *sin = pri->mcast_addr;
 	struct sockaddr_in *sin = pri->mcast_addr;
 	struct ip_mreq mreq;
 	struct ip_mreq mreq;
-	int fd, yes = 1;
+	int fd = -EINVAL, yes = 1, err = -EINVAL;;
 
 
 
 
-	if ((sin->sin_addr.s_addr == 0) || (sin->sin_port == 0)) {
-		fd = -EINVAL;
+	if ((sin->sin_addr.s_addr == 0) || (sin->sin_port == 0))
 		goto out;
 		goto out;
-	}
 
 
 	fd = socket(AF_INET, SOCK_DGRAM, 0);
 	fd = socket(AF_INET, SOCK_DGRAM, 0);
+
 	if (fd < 0){
 	if (fd < 0){
 		printk("mcast_open : data socket failed, errno = %d\n", 
 		printk("mcast_open : data socket failed, errno = %d\n", 
 		       errno);
 		       errno);
-		fd = -ENOMEM;
+		fd = -errno;
 		goto out;
 		goto out;
 	}
 	}
 
 
 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) {
 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) {
 		printk("mcast_open: SO_REUSEADDR failed, errno = %d\n",
 		printk("mcast_open: SO_REUSEADDR failed, errno = %d\n",
 			errno);
 			errno);
-		os_close_file(fd);
-		fd = -EINVAL;
-		goto out;
+		goto out_close;
 	}
 	}
 
 
 	/* set ttl according to config */
 	/* set ttl according to config */
@@ -84,26 +81,20 @@ static int mcast_open(void *data)
 		       sizeof(pri->ttl)) < 0) {
 		       sizeof(pri->ttl)) < 0) {
 		printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n",
 		printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n",
 			errno);
 			errno);
-		os_close_file(fd);
-		fd = -EINVAL;
-		goto out;
+		goto out_close;
 	}
 	}
 
 
 	/* set LOOP, so data does get fed back to local sockets */
 	/* set LOOP, so data does get fed back to local sockets */
 	if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) {
 	if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) {
 		printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n",
 		printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n",
 			errno);
 			errno);
-		os_close_file(fd);
-		fd = -EINVAL;
-		goto out;
+		goto out_close;
 	}
 	}
 
 
 	/* bind socket to mcast address */
 	/* bind socket to mcast address */
 	if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) {
 	if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) {
 		printk("mcast_open : data bind failed, errno = %d\n", errno);
 		printk("mcast_open : data bind failed, errno = %d\n", errno);
-		os_close_file(fd);
-		fd = -EINVAL;
-		goto out;
+		goto out_close;
 	}		
 	}		
 	
 	
 	/* subscribe to the multicast group */
 	/* subscribe to the multicast group */
@@ -117,12 +108,15 @@ static int mcast_open(void *data)
 		       "interface on the host.\n");
 		       "interface on the host.\n");
 		printk("eth0 should be configured in order to use the "
 		printk("eth0 should be configured in order to use the "
 		       "multicast transport.\n");
 		       "multicast transport.\n");
-		os_close_file(fd);
-		fd = -EINVAL;
+                goto out_close;
 	}
 	}
 
 
  out:
  out:
-	return(fd);
+	return fd;
+
+ out_close:
+        os_close_file(fd);
+        return err;
 }
 }
 
 
 static void mcast_close(int fd, void *data)
 static void mcast_close(int fd, void *data)
@@ -164,14 +158,3 @@ struct net_user_info mcast_user_info = {
 	.delete_address = NULL,
 	.delete_address = NULL,
 	.max_packet	= MAX_PACKET - ETH_HEADER_OTHER
 	.max_packet	= MAX_PACKET - ETH_HEADER_OTHER
 };
 };
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */

+ 3 - 293
arch/um/drivers/ubd_kern.c

@@ -55,7 +55,7 @@
 #include "mem_kern.h"
 #include "mem_kern.h"
 #include "cow.h"
 #include "cow.h"
 
 
-enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP };
+enum ubd_req { UBD_READ, UBD_WRITE };
 
 
 struct io_thread_req {
 struct io_thread_req {
 	enum ubd_req op;
 	enum ubd_req op;
@@ -68,8 +68,6 @@ struct io_thread_req {
 	unsigned long sector_mask;
 	unsigned long sector_mask;
 	unsigned long long cow_offset;
 	unsigned long long cow_offset;
 	unsigned long bitmap_words[2];
 	unsigned long bitmap_words[2];
-	int map_fd;
-	unsigned long long map_offset;
 	int error;
 	int error;
 };
 };
 
 
@@ -122,10 +120,6 @@ static int ubd_ioctl(struct inode * inode, struct file * file,
 
 
 #define MAX_DEV (8)
 #define MAX_DEV (8)
 
 
-/* Changed in early boot */
-static int ubd_do_mmap = 0;
-#define UBD_MMAP_BLOCK_SIZE PAGE_SIZE
-
 static struct block_device_operations ubd_blops = {
 static struct block_device_operations ubd_blops = {
         .owner		= THIS_MODULE,
         .owner		= THIS_MODULE,
         .open		= ubd_open,
         .open		= ubd_open,
@@ -175,12 +169,6 @@ struct ubd {
 	int no_cow;
 	int no_cow;
 	struct cow cow;
 	struct cow cow;
 	struct platform_device pdev;
 	struct platform_device pdev;
-
-	int map_writes;
-	int map_reads;
-	int nomap_writes;
-	int nomap_reads;
-	int write_maps;
 };
 };
 
 
 #define DEFAULT_COW { \
 #define DEFAULT_COW { \
@@ -200,11 +188,6 @@ struct ubd {
 	.openflags =		OPEN_FLAGS, \
 	.openflags =		OPEN_FLAGS, \
         .no_cow =               0, \
         .no_cow =               0, \
         .cow =			DEFAULT_COW, \
         .cow =			DEFAULT_COW, \
-	.map_writes		= 0, \
-	.map_reads		= 0, \
-	.nomap_writes		= 0, \
-	.nomap_reads		= 0, \
-	.write_maps		= 0, \
 }
 }
 
 
 struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
 struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
@@ -314,13 +297,6 @@ static int ubd_setup_common(char *str, int *index_out)
 		int major;
 		int major;
 
 
 		str++;
 		str++;
-		if(!strcmp(str, "mmap")){
-			CHOOSE_MODE(printk("mmap not supported by the ubd "
-					   "driver in tt mode\n"),
-				    ubd_do_mmap = 1);
-			return(0);
-		}
-
 		if(!strcmp(str, "sync")){
 		if(!strcmp(str, "sync")){
 			global_openflags = of_sync(global_openflags);
 			global_openflags = of_sync(global_openflags);
 			return(0);
 			return(0);
@@ -524,7 +500,7 @@ static void ubd_handler(void)
 {
 {
 	struct io_thread_req req;
 	struct io_thread_req req;
 	struct request *rq = elv_next_request(ubd_queue);
 	struct request *rq = elv_next_request(ubd_queue);
-	int n, err;
+	int n;
 
 
 	do_ubd = NULL;
 	do_ubd = NULL;
 	intr_count++;
 	intr_count++;
@@ -538,19 +514,6 @@ static void ubd_handler(void)
 		return;
 		return;
 	}
 	}
         
         
-	if((req.op != UBD_MMAP) &&
-	   ((req.offset != ((__u64) (rq->sector)) << 9) ||
-	    (req.length != (rq->current_nr_sectors) << 9)))
-		panic("I/O op mismatch");
-	
-	if(req.map_fd != -1){
-		err = physmem_subst_mapping(req.buffer, req.map_fd,
-					    req.map_offset, 1);
-		if(err)
-			printk("ubd_handler - physmem_subst_mapping failed, "
-			       "err = %d\n", -err);
-	}
-
 	ubd_finish(rq, req.error);
 	ubd_finish(rq, req.error);
 	reactivate_fd(thread_fd, UBD_IRQ);	
 	reactivate_fd(thread_fd, UBD_IRQ);	
 	do_ubd_request(ubd_queue);
 	do_ubd_request(ubd_queue);
@@ -583,14 +546,10 @@ static int ubd_file_size(struct ubd *dev, __u64 *size_out)
 
 
 static void ubd_close(struct ubd *dev)
 static void ubd_close(struct ubd *dev)
 {
 {
-	if(ubd_do_mmap)
-		physmem_forget_descriptor(dev->fd);
 	os_close_file(dev->fd);
 	os_close_file(dev->fd);
 	if(dev->cow.file == NULL)
 	if(dev->cow.file == NULL)
 		return;
 		return;
 
 
-	if(ubd_do_mmap)
-		physmem_forget_descriptor(dev->cow.fd);
 	os_close_file(dev->cow.fd);
 	os_close_file(dev->cow.fd);
 	vfree(dev->cow.bitmap);
 	vfree(dev->cow.bitmap);
 	dev->cow.bitmap = NULL;
 	dev->cow.bitmap = NULL;
@@ -1010,94 +969,13 @@ static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
 			   req->bitmap_words, bitmap_len);
 			   req->bitmap_words, bitmap_len);
 }
 }
 
 
-static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset)
-{
-	__u64 sector;
-	unsigned char *bitmap;
-	int bit, i;
-
-	/* mmap must have been requested on the command line */
-	if(!ubd_do_mmap)
-		return(-1);
-
-	/* The buffer must be page aligned */
-	if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0)
-		return(-1);
-
-	/* The request must be a page long */
-	if((req->current_nr_sectors << 9) != PAGE_SIZE)
-		return(-1);
-
-	if(dev->cow.file == NULL)
-		return(dev->fd);
-
-	sector = offset >> 9;
-	bitmap = (unsigned char *) dev->cow.bitmap;
-	bit = ubd_test_bit(sector, bitmap);
-
-	for(i = 1; i < req->current_nr_sectors; i++){
-		if(ubd_test_bit(sector + i, bitmap) != bit)
-			return(-1);
-	}
-
-	if(bit || (rq_data_dir(req) == WRITE))
-		offset += dev->cow.data_offset;
-
-	/* The data on disk must be page aligned */
-	if((offset % UBD_MMAP_BLOCK_SIZE) != 0)
-		return(-1);
-
-	return(bit ? dev->fd : dev->cow.fd);
-}
-
-static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset,
-				struct request *req,
-				struct io_thread_req *io_req)
-{
-	int err;
-
-	if(rq_data_dir(req) == WRITE){
-		/* Writes are almost no-ops since the new data is already in the
-		 * host page cache
-		 */
-		dev->map_writes++;
-		if(dev->cow.file != NULL)
-			cowify_bitmap(io_req->offset, io_req->length,
-				      &io_req->sector_mask, &io_req->cow_offset,
-				      dev->cow.bitmap, dev->cow.bitmap_offset,
-				      io_req->bitmap_words,
-				      dev->cow.bitmap_len);
-	}
-	else {
-		int w;
-
-		if((dev->cow.file != NULL) && (fd == dev->cow.fd))
-			w = 0;
-		else w = dev->openflags.w;
-
-		if((dev->cow.file != NULL) && (fd == dev->fd))
-			offset += dev->cow.data_offset;
-
-		err = physmem_subst_mapping(req->buffer, fd, offset, w);
-		if(err){
-			printk("physmem_subst_mapping failed, err = %d\n",
-			       -err);
-			return(1);
-		}
-		dev->map_reads++;
-	}
-	io_req->op = UBD_MMAP;
-	io_req->buffer = req->buffer;
-	return(0);
-}
-
 /* Called with ubd_io_lock held */
 /* Called with ubd_io_lock held */
 static int prepare_request(struct request *req, struct io_thread_req *io_req)
 static int prepare_request(struct request *req, struct io_thread_req *io_req)
 {
 {
 	struct gendisk *disk = req->rq_disk;
 	struct gendisk *disk = req->rq_disk;
 	struct ubd *dev = disk->private_data;
 	struct ubd *dev = disk->private_data;
 	__u64 offset;
 	__u64 offset;
-	int len, fd;
+	int len;
 
 
 	if(req->rq_status == RQ_INACTIVE) return(1);
 	if(req->rq_status == RQ_INACTIVE) return(1);
 
 
@@ -1114,34 +992,12 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req)
 
 
 	io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
 	io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
 	io_req->fds[1] = dev->fd;
 	io_req->fds[1] = dev->fd;
-	io_req->map_fd = -1;
 	io_req->cow_offset = -1;
 	io_req->cow_offset = -1;
 	io_req->offset = offset;
 	io_req->offset = offset;
 	io_req->length = len;
 	io_req->length = len;
 	io_req->error = 0;
 	io_req->error = 0;
 	io_req->sector_mask = 0;
 	io_req->sector_mask = 0;
 
 
-	fd = mmap_fd(req, dev, io_req->offset);
-	if(fd > 0){
-		/* If mmapping is otherwise OK, but the first access to the
-		 * page is a write, then it's not mapped in yet.  So we have
-		 * to write the data to disk first, then we can map the disk
-		 * page in and continue normally from there.
-		 */
-		if((rq_data_dir(req) == WRITE) && !is_remapped(req->buffer)){
-			io_req->map_fd = dev->fd;
-			io_req->map_offset = io_req->offset +
-				dev->cow.data_offset;
-			dev->write_maps++;
-		}
-		else return(prepare_mmap_request(dev, fd, io_req->offset, req,
-						 io_req));
-	}
-
-	if(rq_data_dir(req) == READ)
-		dev->nomap_reads++;
-	else dev->nomap_writes++;
-
 	io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
 	io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
 	io_req->offsets[0] = 0;
 	io_req->offsets[0] = 0;
 	io_req->offsets[1] = dev->cow.data_offset;
 	io_req->offsets[1] = dev->cow.data_offset;
@@ -1229,143 +1085,6 @@ static int ubd_ioctl(struct inode * inode, struct file * file,
 	return(-EINVAL);
 	return(-EINVAL);
 }
 }
 
 
-static int ubd_check_remapped(int fd, unsigned long address, int is_write,
-			      __u64 offset)
-{
-	__u64 bitmap_offset;
-	unsigned long new_bitmap[2];
-	int i, err, n;
-
-	/* If it's not a write access, we can't do anything about it */
-	if(!is_write)
-		return(0);
-
-	/* We have a write */
-	for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){
-		struct ubd *dev = &ubd_dev[i];
-
-		if((dev->fd != fd) && (dev->cow.fd != fd))
-			continue;
-
-		/* It's a write to a ubd device */
-
-		/* This should be impossible now */
-		if(!dev->openflags.w){
-			/* It's a write access on a read-only device - probably
-			 * shouldn't happen.  If the kernel is trying to change
-			 * something with no intention of writing it back out,
-			 * then this message will clue us in that this needs
-			 * fixing
-			 */
-			printk("Write access to mapped page from readonly ubd "
-			       "device %d\n", i);
-			return(0);
-		}
-
-		/* It's a write to a writeable ubd device - it must be COWed
-		 * because, otherwise, the page would have been mapped in
-		 * writeable
-		 */
-
-		if(!dev->cow.file)
-			panic("Write fault on writeable non-COW ubd device %d",
-			      i);
-
-		/* It should also be an access to the backing file since the
-		 * COW pages should be mapped in read-write
-		 */
-
-		if(fd == dev->fd)
-			panic("Write fault on a backing page of ubd "
-			      "device %d\n", i);
-
-		/* So, we do the write, copying the backing data to the COW
-		 * file...
-		 */
-
-		err = os_seek_file(dev->fd, offset + dev->cow.data_offset);
-		if(err < 0)
-			panic("Couldn't seek to %lld in COW file of ubd "
-			      "device %d, err = %d",
-			      offset + dev->cow.data_offset, i, -err);
-
-		n = os_write_file(dev->fd, (void *) address, PAGE_SIZE);
-		if(n != PAGE_SIZE)
-			panic("Couldn't copy data to COW file of ubd "
-			      "device %d, err = %d", i, -n);
-
-		/* ... updating the COW bitmap... */
-
-		cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset,
-			      dev->cow.bitmap, dev->cow.bitmap_offset,
-			      new_bitmap, dev->cow.bitmap_len);
-
-		err = os_seek_file(dev->fd, bitmap_offset);
-		if(err < 0)
-			panic("Couldn't seek to %lld in COW file of ubd "
-			      "device %d, err = %d", bitmap_offset, i, -err);
-
-		n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap));
-		if(n != sizeof(new_bitmap))
-			panic("Couldn't update bitmap  of ubd device %d, "
-			      "err = %d", i, -n);
-
-		/* Maybe we can map the COW page in, and maybe we can't.  If
-		 * it is a pre-V3 COW file, we can't, since the alignment will
-		 * be wrong.  If it is a V3 or later COW file which has been
-		 * moved to a system with a larger page size, then maybe we
-		 * can't, depending on the exact location of the page.
-		 */
-
-		offset += dev->cow.data_offset;
-
-		/* Remove the remapping, putting the original anonymous page
-		 * back.  If the COW file can be mapped in, that is done.
-		 * Otherwise, the COW page is read in.
-		 */
-
-		if(!physmem_remove_mapping((void *) address))
-			panic("Address 0x%lx not remapped by ubd device %d",
-			      address, i);
-		if((offset % UBD_MMAP_BLOCK_SIZE) == 0)
-			physmem_subst_mapping((void *) address, dev->fd,
-					      offset, 1);
-		else {
-			err = os_seek_file(dev->fd, offset);
-			if(err < 0)
-				panic("Couldn't seek to %lld in COW file of "
-				      "ubd device %d, err = %d", offset, i,
-				      -err);
-
-			n = os_read_file(dev->fd, (void *) address, PAGE_SIZE);
-			if(n != PAGE_SIZE)
-				panic("Failed to read page from offset %llx of "
-				      "COW file of ubd device %d, err = %d",
-				      offset, i, -n);
-		}
-
-		return(1);
-	}
-
-	/* It's not a write on a ubd device */
-	return(0);
-}
-
-static struct remapper ubd_remapper = {
-	.list	= LIST_HEAD_INIT(ubd_remapper.list),
-	.proc	= ubd_check_remapped,
-};
-
-static int ubd_remapper_setup(void)
-{
-	if(ubd_do_mmap)
-		register_remapper(&ubd_remapper);
-
-	return(0);
-}
-
-__initcall(ubd_remapper_setup);
-
 static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
 static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
 {
 {
 	struct uml_stat buf1, buf2;
 	struct uml_stat buf1, buf2;
@@ -1568,15 +1287,6 @@ void do_io(struct io_thread_req *req)
 	int err;
 	int err;
 	__u64 off;
 	__u64 off;
 
 
-	if(req->op == UBD_MMAP){
-		/* Touch the page to force the host to do any necessary IO to
-		 * get it into memory
-		 */
-		n = *((volatile int *) req->buffer);
-		req->error = update_bitmap(req);
-		return;
-	}
-
 	nsectors = req->length / req->sectorsize;
 	nsectors = req->length / req->sectorsize;
 	start = 0;
 	start = 0;
 	do {
 	do {

+ 1 - 1
arch/um/include/sysdep-i386/ptrace.h

@@ -31,7 +31,6 @@ extern int sysemu_supported;
 #ifdef UML_CONFIG_MODE_SKAS
 #ifdef UML_CONFIG_MODE_SKAS
 
 
 #include "skas_ptregs.h"
 #include "skas_ptregs.h"
-#include "sysdep/faultinfo.h"
 
 
 #define REGS_IP(r) ((r)[HOST_IP])
 #define REGS_IP(r) ((r)[HOST_IP])
 #define REGS_SP(r) ((r)[HOST_SP])
 #define REGS_SP(r) ((r)[HOST_SP])
@@ -59,6 +58,7 @@ extern int sysemu_supported;
 #define PTRACE_SYSEMU_SINGLESTEP 32
 #define PTRACE_SYSEMU_SINGLESTEP 32
 #endif
 #endif
 
 
+#include "sysdep/faultinfo.h"
 #include "choose-mode.h"
 #include "choose-mode.h"
 
 
 union uml_pt_regs {
 union uml_pt_regs {

+ 10 - 16
arch/um/include/sysdep-x86_64/checksum.h

@@ -9,8 +9,6 @@
 #include "linux/in6.h"
 #include "linux/in6.h"
 #include "asm/uaccess.h"
 #include "asm/uaccess.h"
 
 
-extern unsigned int csum_partial_copy_from(const unsigned char *src, unsigned char *dst, int len,
-					   int sum, int *err_ptr);
 extern unsigned csum_partial(const unsigned char *buff, unsigned len,
 extern unsigned csum_partial(const unsigned char *buff, unsigned len,
                              unsigned sum);
                              unsigned sum);
 
 
@@ -31,10 +29,15 @@ unsigned int csum_partial_copy_nocheck(const unsigned char *src, unsigned char *
 }
 }
 
 
 static __inline__
 static __inline__
-unsigned int csum_partial_copy_from_user(const unsigned char *src, unsigned char *dst,
-					 int len, int sum, int *err_ptr)
+unsigned int csum_partial_copy_from_user(const unsigned char *src,
+                                         unsigned char *dst, int len, int sum,
+                                         int *err_ptr)
 {
 {
-	return csum_partial_copy_from(src, dst, len, sum, err_ptr);
+        if(copy_from_user(dst, src, len)){
+                *err_ptr = -EFAULT;
+                return(-1);
+        }
+        return csum_partial(dst, len, sum);
 }
 }
 
 
 /**
 /**
@@ -137,15 +140,6 @@ static inline unsigned add32_with_carry(unsigned a, unsigned b)
         return a;
         return a;
 }
 }
 
 
-#endif
+extern unsigned short ip_compute_csum(unsigned char * buff, int len);
 
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+#endif

+ 26 - 36
arch/um/include/sysdep-x86_64/ptrace.h

@@ -135,6 +135,7 @@ extern int mode_tt;
 	__CHOOSE_MODE(SC_EFLAGS(UPT_SC(r)), REGS_EFLAGS((r)->skas.regs))
 	__CHOOSE_MODE(SC_EFLAGS(UPT_SC(r)), REGS_EFLAGS((r)->skas.regs))
 #define UPT_SC(r) ((r)->tt.sc)
 #define UPT_SC(r) ((r)->tt.sc)
 #define UPT_SYSCALL_NR(r) __CHOOSE_MODE((r)->tt.syscall, (r)->skas.syscall)
 #define UPT_SYSCALL_NR(r) __CHOOSE_MODE((r)->tt.syscall, (r)->skas.syscall)
+#define UPT_SYSCALL_RET(r) UPT_RAX(r)
 
 
 extern int user_context(unsigned long sp);
 extern int user_context(unsigned long sp);
 
 
@@ -196,32 +197,32 @@ struct syscall_args {
 
 
 
 
 #define UPT_SET(regs, reg, val) \
 #define UPT_SET(regs, reg, val) \
-        ({      unsigned long val; \
+        ({      unsigned long __upt_val = val; \
                 switch(reg){ \
                 switch(reg){ \
-		case R8: UPT_R8(regs) = val; break; \
-		case R9: UPT_R9(regs) = val; break; \
-		case R10: UPT_R10(regs) = val; break; \
-		case R11: UPT_R11(regs) = val; break; \
-		case R12: UPT_R12(regs) = val; break; \
-		case R13: UPT_R13(regs) = val; break; \
-		case R14: UPT_R14(regs) = val; break; \
-		case R15: UPT_R15(regs) = val; break; \
-                case RIP: UPT_IP(regs) = val; break; \
-                case RSP: UPT_SP(regs) = val; break; \
-                case RAX: UPT_RAX(regs) = val; break; \
-                case RBX: UPT_RBX(regs) = val; break; \
-                case RCX: UPT_RCX(regs) = val; break; \
-                case RDX: UPT_RDX(regs) = val; break; \
-                case RSI: UPT_RSI(regs) = val; break; \
-                case RDI: UPT_RDI(regs) = val; break; \
-                case RBP: UPT_RBP(regs) = val; break; \
-                case ORIG_RAX: UPT_ORIG_RAX(regs) = val; break; \
-                case CS: UPT_CS(regs) = val; break; \
-                case DS: UPT_DS(regs) = val; break; \
-                case ES: UPT_ES(regs) = val; break; \
-                case FS: UPT_FS(regs) = val; break; \
-                case GS: UPT_GS(regs) = val; break; \
-                case EFLAGS: UPT_EFLAGS(regs) = val; break; \
+                case R8: UPT_R8(regs) = __upt_val; break; \
+                case R9: UPT_R9(regs) = __upt_val; break; \
+                case R10: UPT_R10(regs) = __upt_val; break; \
+                case R11: UPT_R11(regs) = __upt_val; break; \
+                case R12: UPT_R12(regs) = __upt_val; break; \
+                case R13: UPT_R13(regs) = __upt_val; break; \
+                case R14: UPT_R14(regs) = __upt_val; break; \
+                case R15: UPT_R15(regs) = __upt_val; break; \
+                case RIP: UPT_IP(regs) = __upt_val; break; \
+                case RSP: UPT_SP(regs) = __upt_val; break; \
+                case RAX: UPT_RAX(regs) = __upt_val; break; \
+                case RBX: UPT_RBX(regs) = __upt_val; break; \
+                case RCX: UPT_RCX(regs) = __upt_val; break; \
+                case RDX: UPT_RDX(regs) = __upt_val; break; \
+                case RSI: UPT_RSI(regs) = __upt_val; break; \
+                case RDI: UPT_RDI(regs) = __upt_val; break; \
+                case RBP: UPT_RBP(regs) = __upt_val; break; \
+                case ORIG_RAX: UPT_ORIG_RAX(regs) = __upt_val; break; \
+                case CS: UPT_CS(regs) = __upt_val; break; \
+                case DS: UPT_DS(regs) = __upt_val; break; \
+                case ES: UPT_ES(regs) = __upt_val; break; \
+                case FS: UPT_FS(regs) = __upt_val; break; \
+                case GS: UPT_GS(regs) = __upt_val; break; \
+                case EFLAGS: UPT_EFLAGS(regs) = __upt_val; break; \
                 default :  \
                 default :  \
                         panic("Bad register in UPT_SET : %d\n", reg);  \
                         panic("Bad register in UPT_SET : %d\n", reg);  \
 			break; \
 			break; \
@@ -245,14 +246,3 @@ struct syscall_args {
         CHOOSE_MODE((&(r)->tt.faultinfo), (&(r)->skas.faultinfo))
         CHOOSE_MODE((&(r)->tt.faultinfo), (&(r)->skas.faultinfo))
 
 
 #endif
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */

+ 1 - 1
arch/um/kernel/Makefile

@@ -14,7 +14,7 @@ obj-y = config.o exec_kern.o exitcode.o \
 	tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o umid.o \
 	tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o umid.o \
 	user_util.o
 	user_util.o
 
 
-obj-$(CONFIG_BLK_DEV_INITRD) += initrd_kern.o initrd_user.o
+obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
 obj-$(CONFIG_GPROF)	+= gprof_syms.o
 obj-$(CONFIG_GPROF)	+= gprof_syms.o
 obj-$(CONFIG_GCOV)	+= gmon_syms.o
 obj-$(CONFIG_GCOV)	+= gmon_syms.o
 obj-$(CONFIG_TTY_LOG)	+= tty_log.o
 obj-$(CONFIG_TTY_LOG)	+= tty_log.o

+ 0 - 0
arch/um/kernel/checksum.c


+ 78 - 0
arch/um/kernel/initrd.c

@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/init.h"
+#include "linux/bootmem.h"
+#include "linux/initrd.h"
+#include "asm/types.h"
+#include "user_util.h"
+#include "kern_util.h"
+#include "initrd.h"
+#include "init.h"
+#include "os.h"
+
+/* Changed by uml_initrd_setup, which is a setup */
+static char *initrd __initdata = NULL;
+
+static int __init read_initrd(void)
+{
+	void *area;
+	long long size;
+	int err;
+
+	if(initrd == NULL) return 0;
+	err = os_file_size(initrd, &size);
+	if(err) return 0;
+	area = alloc_bootmem(size);
+	if(area == NULL) return 0;
+	if(load_initrd(initrd, area, size) == -1) return 0;
+	initrd_start = (unsigned long) area;
+	initrd_end = initrd_start + size;
+	return 0;
+}
+
+__uml_postsetup(read_initrd);
+
+static int __init uml_initrd_setup(char *line, int *add)
+{
+	initrd = line;
+	return 0;
+}
+
+__uml_setup("initrd=", uml_initrd_setup,
+"initrd=<initrd image>\n"
+"    This is used to boot UML from an initrd image.  The argument is the\n"
+"    name of the file containing the image.\n\n"
+);
+
+int load_initrd(char *filename, void *buf, int size)
+{
+	int fd, n;
+
+	fd = os_open_file(filename, of_read(OPENFLAGS()), 0);
+	if(fd < 0){
+		printk("Opening '%s' failed - err = %d\n", filename, -fd);
+		return(-1);
+	}
+	n = os_read_file(fd, buf, size);
+	if(n != size){
+		printk("Read of %d bytes from '%s' failed, err = %d\n", size,
+		       filename, -n);
+		return(-1);
+	}
+
+	os_close_file(fd);
+	return(0);
+}
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */

+ 8 - 2
arch/um/kernel/irq_user.c

@@ -236,9 +236,15 @@ static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
 				       (*prev)->fd, pollfds[i].fd);
 				       (*prev)->fd, pollfds[i].fd);
 				goto out;
 				goto out;
 			}
 			}
-			memcpy(&pollfds[i], &pollfds[i + 1],
-			       (pollfds_num - i - 1) * sizeof(pollfds[0]));
+
 			pollfds_num--;
 			pollfds_num--;
+
+			/* This moves the *whole* array after pollfds[i] (though
+			 * it doesn't spot as such)! */
+
+			memmove(&pollfds[i], &pollfds[i + 1],
+			       (pollfds_num - i) * sizeof(pollfds[0]));
+
 			if(last_irq_ptr == &old_fd->next) 
 			if(last_irq_ptr == &old_fd->next) 
 				last_irq_ptr = prev;
 				last_irq_ptr = prev;
 			*prev = (*prev)->next;
 			*prev = (*prev)->next;

+ 1 - 0
arch/um/kernel/ksyms.c

@@ -57,6 +57,7 @@ EXPORT_SYMBOL(copy_to_user_tt);
 EXPORT_SYMBOL(strncpy_from_user_skas);
 EXPORT_SYMBOL(strncpy_from_user_skas);
 EXPORT_SYMBOL(copy_to_user_skas);
 EXPORT_SYMBOL(copy_to_user_skas);
 EXPORT_SYMBOL(copy_from_user_skas);
 EXPORT_SYMBOL(copy_from_user_skas);
+EXPORT_SYMBOL(clear_user_skas);
 #endif
 #endif
 EXPORT_SYMBOL(uml_strdup);
 EXPORT_SYMBOL(uml_strdup);
 
 

+ 31 - 9
arch/um/kernel/mem.c

@@ -100,12 +100,37 @@ void mem_init(void)
 #endif
 #endif
 }
 }
 
 
+/*
+ * Create a page table and place a pointer to it in a middle page
+ * directory entry.
+ */
+static void __init one_page_table_init(pmd_t *pmd)
+{
+	if (pmd_none(*pmd)) {
+		pte_t *pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+		set_pmd(pmd, __pmd(_KERNPG_TABLE +
+					   (unsigned long) __pa(pte)));
+		if (pte != pte_offset_kernel(pmd, 0))
+			BUG();
+	}
+}
+
+static void __init one_md_table_init(pud_t *pud)
+{
+#ifdef CONFIG_3_LEVEL_PGTABLES
+	pmd_t *pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+	set_pud(pud, __pud(_KERNPG_TABLE + (unsigned long) __pa(pmd_table)));
+	if (pmd_table != pmd_offset(pud, 0))
+		BUG();
+#endif
+}
+
 static void __init fixrange_init(unsigned long start, unsigned long end, 
 static void __init fixrange_init(unsigned long start, unsigned long end, 
 				 pgd_t *pgd_base)
 				 pgd_t *pgd_base)
 {
 {
 	pgd_t *pgd;
 	pgd_t *pgd;
+	pud_t *pud;
 	pmd_t *pmd;
 	pmd_t *pmd;
-	pte_t *pte;
 	int i, j;
 	int i, j;
 	unsigned long vaddr;
 	unsigned long vaddr;
 
 
@@ -115,15 +140,12 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
 	pgd = pgd_base + i;
 	pgd = pgd_base + i;
 
 
 	for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
 	for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) {
-		pmd = (pmd_t *)pgd;
+		pud = pud_offset(pgd, vaddr);
+		if (pud_none(*pud))
+			one_md_table_init(pud);
+		pmd = pmd_offset(pud, vaddr);
 		for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
 		for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
-			if (pmd_none(*pmd)) {
-				pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-				set_pmd(pmd, __pmd(_KERNPG_TABLE + 
-						   (unsigned long) __pa(pte)));
-				if (pte != pte_offset_kernel(pmd, 0))
-					BUG();
-			}
+			one_page_table_init(pmd);
 			vaddr += PMD_SIZE;
 			vaddr += PMD_SIZE;
 		}
 		}
 		j = 0;
 		j = 0;

+ 3 - 3
arch/um/kernel/ptrace.c

@@ -28,9 +28,9 @@ static inline void set_singlestepping(struct task_struct *child, int on)
         child->thread.singlestep_syscall = 0;
         child->thread.singlestep_syscall = 0;
 
 
 #ifdef SUBARCH_SET_SINGLESTEPPING
 #ifdef SUBARCH_SET_SINGLESTEPPING
-        SUBARCH_SET_SINGLESTEPPING(child, on)
+        SUBARCH_SET_SINGLESTEPPING(child, on);
 #endif
 #endif
-                }
+}
 
 
 /*
 /*
  * Called by kernel/ptrace.c when detaching..
  * Called by kernel/ptrace.c when detaching..
@@ -83,7 +83,7 @@ long sys_ptrace(long request, long pid, long addr, long data)
 	}
 	}
 
 
 #ifdef SUBACH_PTRACE_SPECIAL
 #ifdef SUBACH_PTRACE_SPECIAL
-        SUBARCH_PTRACE_SPECIAL(child,request,addr,data)
+        SUBARCH_PTRACE_SPECIAL(child,request,addr,data);
 #endif
 #endif
 
 
 	ret = ptrace_check_attach(child, request == PTRACE_KILL);
 	ret = ptrace_check_attach(child, request == PTRACE_KILL);

+ 4 - 32
arch/um/kernel/trap_kern.c

@@ -57,10 +57,11 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 	*code_out = SEGV_ACCERR;
 	*code_out = SEGV_ACCERR;
 	if(is_write && !(vma->vm_flags & VM_WRITE)) 
 	if(is_write && !(vma->vm_flags & VM_WRITE)) 
 		goto out;
 		goto out;
+
+        if(!(vma->vm_flags & (VM_READ | VM_EXEC)))
+                goto out;
+
 	page = address & PAGE_MASK;
 	page = address & PAGE_MASK;
-	pgd = pgd_offset(mm, page);
-	pud = pud_offset(pgd, page);
-	pmd = pmd_offset(pud, page);
 	do {
 	do {
  survive:
  survive:
 		switch (handle_mm_fault(mm, vma, address, is_write)){
 		switch (handle_mm_fault(mm, vma, address, is_write)){
@@ -106,33 +107,6 @@ out_of_memory:
 	goto out;
 	goto out;
 }
 }
 
 
-LIST_HEAD(physmem_remappers);
-
-void register_remapper(struct remapper *info)
-{
-	list_add(&info->list, &physmem_remappers);
-}
-
-static int check_remapped_addr(unsigned long address, int is_write)
-{
-	struct remapper *remapper;
-	struct list_head *ele;
-	__u64 offset;
-	int fd;
-
-	fd = phys_mapping(__pa(address), &offset);
-	if(fd == -1)
-		return(0);
-
-	list_for_each(ele, &physmem_remappers){
-		remapper = list_entry(ele, struct remapper, list);
-		if((*remapper->proc)(fd, address, is_write, offset))
-			return(1);
-	}
-
-	return(0);
-}
-
 /*
 /*
  * We give a *copy* of the faultinfo in the regs to segv.
  * We give a *copy* of the faultinfo in the regs to segv.
  * This must be done, since nesting SEGVs could overwrite
  * This must be done, since nesting SEGVs could overwrite
@@ -151,8 +125,6 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void *sc)
                 flush_tlb_kernel_vm();
                 flush_tlb_kernel_vm();
                 return(0);
                 return(0);
         }
         }
-	else if(check_remapped_addr(address & PAGE_MASK, is_write))
-		return(0);
 	else if(current->mm == NULL)
 	else if(current->mm == NULL)
 		panic("Segfault with no mm");
 		panic("Segfault with no mm");
 	err = handle_page_fault(address, ip, is_write, is_user, &si.si_code);
 	err = handle_page_fault(address, ip, is_write, is_user, &si.si_code);

+ 1 - 0
arch/um/kernel/tt/ksyms.c

@@ -12,6 +12,7 @@ EXPORT_SYMBOL(__do_copy_to_user);
 EXPORT_SYMBOL(__do_strncpy_from_user);
 EXPORT_SYMBOL(__do_strncpy_from_user);
 EXPORT_SYMBOL(__do_strnlen_user); 
 EXPORT_SYMBOL(__do_strnlen_user); 
 EXPORT_SYMBOL(__do_clear_user);
 EXPORT_SYMBOL(__do_clear_user);
+EXPORT_SYMBOL(clear_user_tt);
 
 
 EXPORT_SYMBOL(tracing_pid);
 EXPORT_SYMBOL(tracing_pid);
 EXPORT_SYMBOL(honeypot);
 EXPORT_SYMBOL(honeypot);

+ 2 - 0
arch/um/kernel/uml.lds.S

@@ -73,6 +73,8 @@ SECTIONS
 
 
   .got           : { *(.got.plt) *(.got) }
   .got           : { *(.got.plt) *(.got) }
   .dynamic       : { *(.dynamic) }
   .dynamic       : { *(.dynamic) }
+  .tdata	  : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+  .tbss		  : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
   /* We want the small data sections together, so single-instruction offsets
   /* We want the small data sections together, so single-instruction offsets
      can access them all, and initialized data all before uninitialized, so
      can access them all, and initialized data all before uninitialized, so
      we can shorten the on-disk segment size.  */
      we can shorten the on-disk segment size.  */

+ 2 - 2
arch/um/sys-i386/Makefile

@@ -9,11 +9,11 @@ USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o
 
 
 SYMLINKS = bitops.c semaphore.c highmem.c module.c
 SYMLINKS = bitops.c semaphore.c highmem.c module.c
 
 
+include arch/um/scripts/Makefile.rules
+
 bitops.c-dir = lib
 bitops.c-dir = lib
 semaphore.c-dir = kernel
 semaphore.c-dir = kernel
 highmem.c-dir = mm
 highmem.c-dir = mm
 module.c-dir = kernel
 module.c-dir = kernel
 
 
 subdir- := util
 subdir- := util
-
-include arch/um/scripts/Makefile.rules

+ 12 - 4
arch/um/sys-i386/delay.c

@@ -1,5 +1,7 @@
-#include "linux/delay.h"
-#include "asm/param.h"
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <asm/param.h>
 
 
 void __delay(unsigned long time)
 void __delay(unsigned long time)
 {
 {
@@ -20,13 +22,19 @@ void __udelay(unsigned long usecs)
 	int i, n;
 	int i, n;
 
 
 	n = (loops_per_jiffy * HZ * usecs) / MILLION;
 	n = (loops_per_jiffy * HZ * usecs) / MILLION;
-	for(i=0;i<n;i++) ;
+        for(i=0;i<n;i++)
+                cpu_relax();
 }
 }
 
 
+EXPORT_SYMBOL(__udelay);
+
 void __const_udelay(unsigned long usecs)
 void __const_udelay(unsigned long usecs)
 {
 {
 	int i, n;
 	int i, n;
 
 
 	n = (loops_per_jiffy * HZ * usecs) / MILLION;
 	n = (loops_per_jiffy * HZ * usecs) / MILLION;
-	for(i=0;i<n;i++) ;
+        for(i=0;i<n;i++)
+                cpu_relax();
 }
 }
+
+EXPORT_SYMBOL(__const_udelay);

+ 2 - 4
arch/um/sys-x86_64/Makefile

@@ -14,11 +14,11 @@ obj-$(CONFIG_MODULES) += module.o um_module.o
 
 
 USER_OBJS := ptrace_user.o sigcontext.o
 USER_OBJS := ptrace_user.o sigcontext.o
 
 
-include arch/um/scripts/Makefile.rules
-
 SYMLINKS = bitops.c csum-copy.S csum-partial.c csum-wrappers.c memcpy.S \
 SYMLINKS = bitops.c csum-copy.S csum-partial.c csum-wrappers.c memcpy.S \
 	semaphore.c thunk.S module.c
 	semaphore.c thunk.S module.c
 
 
+include arch/um/scripts/Makefile.rules
+
 bitops.c-dir = lib
 bitops.c-dir = lib
 csum-copy.S-dir = lib
 csum-copy.S-dir = lib
 csum-partial.c-dir = lib
 csum-partial.c-dir = lib
@@ -28,6 +28,4 @@ semaphore.c-dir = kernel
 thunk.S-dir = lib
 thunk.S-dir = lib
 module.c-dir = kernel
 module.c-dir = kernel
 
 
-CFLAGS_csum-partial.o := -Dcsum_partial=arch_csum_partial
-
 subdir- := util
 subdir- := util

+ 15 - 18
arch/um/sys-x86_64/delay.c

@@ -5,40 +5,37 @@
  * Licensed under the GPL
  * Licensed under the GPL
  */
  */
 
 
-#include "linux/delay.h"
-#include "asm/processor.h"
-#include "asm/param.h"
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <asm/processor.h>
+#include <asm/param.h>
 
 
 void __delay(unsigned long loops)
 void __delay(unsigned long loops)
 {
 {
 	unsigned long i;
 	unsigned long i;
 
 
-	for(i = 0; i < loops; i++) ;
+        for(i = 0; i < loops; i++)
+                cpu_relax();
 }
 }
 
 
 void __udelay(unsigned long usecs)
 void __udelay(unsigned long usecs)
 {
 {
-	int i, n;
+	unsigned long i, n;
 
 
 	n = (loops_per_jiffy * HZ * usecs) / MILLION;
 	n = (loops_per_jiffy * HZ * usecs) / MILLION;
-	for(i=0;i<n;i++) ;
+        for(i=0;i<n;i++)
+                cpu_relax();
 }
 }
 
 
+EXPORT_SYMBOL(__udelay);
+
 void __const_udelay(unsigned long usecs)
 void __const_udelay(unsigned long usecs)
 {
 {
-	int i, n;
+	unsigned long i, n;
 
 
 	n = (loops_per_jiffy * HZ * usecs) / MILLION;
 	n = (loops_per_jiffy * HZ * usecs) / MILLION;
-	for(i=0;i<n;i++) ;
+        for(i=0;i<n;i++)
+                cpu_relax();
 }
 }
 
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+EXPORT_SYMBOL(__const_udelay);

+ 1 - 2
arch/um/sys-x86_64/ksyms.c

@@ -16,5 +16,4 @@ EXPORT_SYMBOL(__up_wakeup);
 EXPORT_SYMBOL(__memcpy);
 EXPORT_SYMBOL(__memcpy);
 
 
 /* Networking helper routines. */
 /* Networking helper routines. */
-/*EXPORT_SYMBOL(csum_partial_copy_from);
-EXPORT_SYMBOL(csum_partial_copy_to);*/
+EXPORT_SYMBOL(ip_compute_csum);

+ 5 - 4
arch/um/sys-x86_64/ptrace.c

@@ -5,10 +5,11 @@
  */
  */
 
 
 #define __FRAME_OFFSETS
 #define __FRAME_OFFSETS
-#include "asm/ptrace.h"
-#include "linux/sched.h"
-#include "linux/errno.h"
-#include "asm/elf.h"
+#include <asm/ptrace.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include <asm/elf.h>
 
 
 /* XXX x86_64 */
 /* XXX x86_64 */
 unsigned long not_ss;
 unsigned long not_ss;

+ 1 - 0
arch/um/sys-x86_64/syscalls.c

@@ -15,6 +15,7 @@
 #include "asm/unistd.h"
 #include "asm/unistd.h"
 #include "asm/prctl.h" /* XXX This should get the constants from libc */
 #include "asm/prctl.h" /* XXX This should get the constants from libc */
 #include "choose-mode.h"
 #include "choose-mode.h"
+#include "kern.h"
 
 
 asmlinkage long sys_uname64(struct new_utsname __user * name)
 asmlinkage long sys_uname64(struct new_utsname __user * name)
 {
 {

+ 8 - 0
arch/um/sys-x86_64/user-offsets.c

@@ -3,6 +3,14 @@
 #include <signal.h>
 #include <signal.h>
 #define __FRAME_OFFSETS
 #define __FRAME_OFFSETS
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
+#include <asm/types.h>
+/* For some reason, x86_64 defines u64 and u32 only in <pci/types.h>, which I
+ * refuse to include here, even though they're used throughout the headers.
+ * These are used in asm/user.h, and that include can't be avoided because of
+ * the sizeof(struct user_regs_struct) below.
+ */
+typedef __u64 u64;
+typedef __u32 u32;
 #include <asm/user.h>
 #include <asm/user.h>
 
 
 #define DEFINE(sym, val) \
 #define DEFINE(sym, val) \

+ 14 - 0
arch/x86_64/Kconfig

@@ -303,6 +303,20 @@ config HPET_TIMER
 	  as it is off-chip.  You can find the HPET spec at
 	  as it is off-chip.  You can find the HPET spec at
 	  <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
 	  <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
 
 
+config X86_PM_TIMER
+	bool "PM timer"
+	default y
+	help
+	  Support the ACPI PM timer for time keeping. This is slow,
+	  but is useful on some chipsets without HPET on systems with more
+	  than one CPU. On a single processor or single socket multi core
+	  system it is normally not required.
+	  When the PM timer is active 64bit vsyscalls are disabled
+	  and should not be enabled (/proc/sys/kernel/vsyscall64 should
+	  not be changed).
+	  The kernel selects the PM timer only as a last resort, so it is
+	  useful to enable just in case.
+
 config HPET_EMULATE_RTC
 config HPET_EMULATE_RTC
 	bool "Provide RTC interrupt"
 	bool "Provide RTC interrupt"
 	depends on HPET_TIMER && RTC=y
 	depends on HPET_TIMER && RTC=y

+ 30 - 28
arch/x86_64/defconfig

@@ -1,7 +1,7 @@
 #
 #
 # Automatically generated make config: don't edit
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.11-bk7
-# Sat Mar 12 23:43:44 2005
+# Linux kernel version: 2.6.12-rc4
+# Fri May 13 06:39:11 2005
 #
 #
 CONFIG_X86_64=y
 CONFIG_X86_64=y
 CONFIG_64BIT=y
 CONFIG_64BIT=y
@@ -11,8 +11,6 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_X86_CMPXCHG=y
 CONFIG_X86_CMPXCHG=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_EARLY_PRINTK=y
-CONFIG_HPET_TIMER=y
-CONFIG_HPET_EMULATE_RTC=y
 CONFIG_GENERIC_ISA_DMA=y
 CONFIG_GENERIC_ISA_DMA=y
 CONFIG_GENERIC_IOMAP=y
 CONFIG_GENERIC_IOMAP=y
 
 
@@ -22,6 +20,7 @@ CONFIG_GENERIC_IOMAP=y
 CONFIG_EXPERIMENTAL=y
 CONFIG_EXPERIMENTAL=y
 CONFIG_CLEAN_COMPILE=y
 CONFIG_CLEAN_COMPILE=y
 CONFIG_LOCK_KERNEL=y
 CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
 
 
 #
 #
 # General setup
 # General setup
@@ -33,7 +32,6 @@ CONFIG_POSIX_MQUEUE=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_BSD_PROCESS_ACCT is not set
 CONFIG_SYSCTL=y
 CONFIG_SYSCTL=y
 # CONFIG_AUDIT is not set
 # CONFIG_AUDIT is not set
-CONFIG_LOG_BUF_SHIFT=18
 # CONFIG_HOTPLUG is not set
 # CONFIG_HOTPLUG is not set
 CONFIG_KOBJECT_UEVENT=y
 CONFIG_KOBJECT_UEVENT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG=y
@@ -43,10 +41,11 @@ CONFIG_IKCONFIG_PROC=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_PRINTK=y
+CONFIG_BUG=y
 CONFIG_BASE_FULL=y
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
 CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SHMEM=y
 CONFIG_SHMEM=y
 CONFIG_CC_ALIGN_FUNCTIONS=0
 CONFIG_CC_ALIGN_FUNCTIONS=0
 CONFIG_CC_ALIGN_LABELS=0
 CONFIG_CC_ALIGN_LABELS=0
@@ -93,6 +92,9 @@ CONFIG_DISCONTIGMEM=y
 CONFIG_NUMA=y
 CONFIG_NUMA=y
 CONFIG_HAVE_DEC_LOCK=y
 CONFIG_HAVE_DEC_LOCK=y
 CONFIG_NR_CPUS=8
 CONFIG_NR_CPUS=8
+CONFIG_HPET_TIMER=y
+CONFIG_X86_PM_TIMER=y
+CONFIG_HPET_EMULATE_RTC=y
 CONFIG_GART_IOMMU=y
 CONFIG_GART_IOMMU=y
 CONFIG_SWIOTLB=y
 CONFIG_SWIOTLB=y
 CONFIG_X86_MCE=y
 CONFIG_X86_MCE=y
@@ -100,6 +102,7 @@ CONFIG_X86_MCE_INTEL=y
 CONFIG_SECCOMP=y
 CONFIG_SECCOMP=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_ISA_DMA_API=y
 
 
 #
 #
 # Power management options
 # Power management options
@@ -129,7 +132,7 @@ CONFIG_ACPI_NUMA=y
 # CONFIG_ACPI_IBM is not set
 # CONFIG_ACPI_IBM is not set
 CONFIG_ACPI_TOSHIBA=y
 CONFIG_ACPI_TOSHIBA=y
 CONFIG_ACPI_BLACKLIST_YEAR=2001
 CONFIG_ACPI_BLACKLIST_YEAR=2001
-CONFIG_ACPI_DEBUG=y
+# CONFIG_ACPI_DEBUG is not set
 CONFIG_ACPI_BUS=y
 CONFIG_ACPI_BUS=y
 CONFIG_ACPI_EC=y
 CONFIG_ACPI_EC=y
 CONFIG_ACPI_POWER=y
 CONFIG_ACPI_POWER=y
@@ -141,6 +144,7 @@ CONFIG_ACPI_SYSTEM=y
 # CPU Frequency scaling
 # CPU Frequency scaling
 #
 #
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_TABLE=y
 # CONFIG_CPU_FREQ_DEBUG is not set
 # CONFIG_CPU_FREQ_DEBUG is not set
 CONFIG_CPU_FREQ_STAT=y
 CONFIG_CPU_FREQ_STAT=y
 # CONFIG_CPU_FREQ_STAT_DETAILS is not set
 # CONFIG_CPU_FREQ_STAT_DETAILS is not set
@@ -150,7 +154,6 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
 # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
 # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y
-CONFIG_CPU_FREQ_TABLE=y
 
 
 #
 #
 # CPUFreq processor drivers
 # CPUFreq processor drivers
@@ -164,6 +167,7 @@ CONFIG_X86_ACPI_CPUFREQ=y
 # shared options
 # shared options
 #
 #
 CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y
 CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y
+# CONFIG_X86_SPEEDSTEP_LIB is not set
 
 
 #
 #
 # Bus options (PCI etc.)
 # Bus options (PCI etc.)
@@ -172,19 +176,17 @@ CONFIG_PCI=y
 CONFIG_PCI_DIRECT=y
 CONFIG_PCI_DIRECT=y
 CONFIG_PCI_MMCONFIG=y
 CONFIG_PCI_MMCONFIG=y
 CONFIG_UNORDERED_IO=y
 CONFIG_UNORDERED_IO=y
+# CONFIG_PCIEPORTBUS is not set
 CONFIG_PCI_MSI=y
 CONFIG_PCI_MSI=y
 # CONFIG_PCI_LEGACY_PROC is not set
 # CONFIG_PCI_LEGACY_PROC is not set
 # CONFIG_PCI_NAMES is not set
 # CONFIG_PCI_NAMES is not set
+# CONFIG_PCI_DEBUG is not set
 
 
 #
 #
 # PCCARD (PCMCIA/CardBus) support
 # PCCARD (PCMCIA/CardBus) support
 #
 #
 # CONFIG_PCCARD is not set
 # CONFIG_PCCARD is not set
 
 
-#
-# PC-card bridges
-#
-
 #
 #
 # PCI Hotplug Support
 # PCI Hotplug Support
 #
 #
@@ -254,7 +256,7 @@ CONFIG_LBD=y
 # IO Schedulers
 # IO Schedulers
 #
 #
 CONFIG_IOSCHED_NOOP=y
 CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
+# CONFIG_IOSCHED_AS is not set
 CONFIG_IOSCHED_DEADLINE=y
 CONFIG_IOSCHED_DEADLINE=y
 CONFIG_IOSCHED_CFQ=y
 CONFIG_IOSCHED_CFQ=y
 # CONFIG_ATA_OVER_ETH is not set
 # CONFIG_ATA_OVER_ETH is not set
@@ -308,7 +310,8 @@ CONFIG_BLK_DEV_AMD74XX=y
 CONFIG_BLK_DEV_PIIX=y
 CONFIG_BLK_DEV_PIIX=y
 # CONFIG_BLK_DEV_NS87415 is not set
 # CONFIG_BLK_DEV_NS87415 is not set
 # CONFIG_BLK_DEV_PDC202XX_OLD is not set
 # CONFIG_BLK_DEV_PDC202XX_OLD is not set
-# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+# CONFIG_PDC202XX_FORCE is not set
 # CONFIG_BLK_DEV_SVWKS is not set
 # CONFIG_BLK_DEV_SVWKS is not set
 # CONFIG_BLK_DEV_SIIMAGE is not set
 # CONFIG_BLK_DEV_SIIMAGE is not set
 # CONFIG_BLK_DEV_SIS5513 is not set
 # CONFIG_BLK_DEV_SIS5513 is not set
@@ -353,7 +356,7 @@ CONFIG_BLK_DEV_SD=y
 #
 #
 # SCSI low-level drivers
 # SCSI low-level drivers
 #
 #
-CONFIG_BLK_DEV_3W_XXXX_RAID=y
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
 # CONFIG_SCSI_3W_9XXX is not set
 # CONFIG_SCSI_3W_9XXX is not set
 # CONFIG_SCSI_ACARD is not set
 # CONFIG_SCSI_ACARD is not set
 # CONFIG_SCSI_AACRAID is not set
 # CONFIG_SCSI_AACRAID is not set
@@ -384,7 +387,6 @@ CONFIG_SCSI_SATA_VIA=y
 # CONFIG_SCSI_BUSLOGIC is not set
 # CONFIG_SCSI_BUSLOGIC is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_EATA is not set
 # CONFIG_SCSI_EATA is not set
-# CONFIG_SCSI_EATA_PIO is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
 # CONFIG_SCSI_FUTURE_DOMAIN is not set
 # CONFIG_SCSI_GDTH is not set
 # CONFIG_SCSI_GDTH is not set
 # CONFIG_SCSI_IPS is not set
 # CONFIG_SCSI_IPS is not set
@@ -392,7 +394,6 @@ CONFIG_SCSI_SATA_VIA=y
 # CONFIG_SCSI_INIA100 is not set
 # CONFIG_SCSI_INIA100 is not set
 # CONFIG_SCSI_SYM53C8XX_2 is not set
 # CONFIG_SCSI_SYM53C8XX_2 is not set
 # CONFIG_SCSI_IPR is not set
 # CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_ISP is not set
 # CONFIG_SCSI_QLOGIC_FC is not set
 # CONFIG_SCSI_QLOGIC_FC is not set
 # CONFIG_SCSI_QLOGIC_1280 is not set
 # CONFIG_SCSI_QLOGIC_1280 is not set
 CONFIG_SCSI_QLA2XXX=y
 CONFIG_SCSI_QLA2XXX=y
@@ -401,6 +402,7 @@ CONFIG_SCSI_QLA2XXX=y
 # CONFIG_SCSI_QLA2300 is not set
 # CONFIG_SCSI_QLA2300 is not set
 # CONFIG_SCSI_QLA2322 is not set
 # CONFIG_SCSI_QLA2322 is not set
 # CONFIG_SCSI_QLA6312 is not set
 # CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_LPFC is not set
 # CONFIG_SCSI_DC395x is not set
 # CONFIG_SCSI_DC395x is not set
 # CONFIG_SCSI_DC390T is not set
 # CONFIG_SCSI_DC390T is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DEBUG is not set
@@ -437,7 +439,6 @@ CONFIG_NET=y
 #
 #
 CONFIG_PACKET=y
 CONFIG_PACKET=y
 # CONFIG_PACKET_MMAP is not set
 # CONFIG_PACKET_MMAP is not set
-# CONFIG_NETLINK_DEV is not set
 CONFIG_UNIX=y
 CONFIG_UNIX=y
 # CONFIG_NET_KEY is not set
 # CONFIG_NET_KEY is not set
 CONFIG_INET=y
 CONFIG_INET=y
@@ -502,7 +503,7 @@ CONFIG_NETDEVICES=y
 # CONFIG_DUMMY is not set
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
 # CONFIG_BONDING is not set
 # CONFIG_EQUALIZER is not set
 # CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
+CONFIG_TUN=y
 
 
 #
 #
 # ARCnet devices
 # ARCnet devices
@@ -525,8 +526,7 @@ CONFIG_MII=y
 # CONFIG_HP100 is not set
 # CONFIG_HP100 is not set
 CONFIG_NET_PCI=y
 CONFIG_NET_PCI=y
 # CONFIG_PCNET32 is not set
 # CONFIG_PCNET32 is not set
-CONFIG_AMD8111_ETH=y
-# CONFIG_AMD8111E_NAPI is not set
+# CONFIG_AMD8111_ETH is not set
 # CONFIG_ADAPTEC_STARFIRE is not set
 # CONFIG_ADAPTEC_STARFIRE is not set
 # CONFIG_B44 is not set
 # CONFIG_B44 is not set
 CONFIG_FORCEDETH=y
 CONFIG_FORCEDETH=y
@@ -536,7 +536,7 @@ CONFIG_FORCEDETH=y
 # CONFIG_FEALNX is not set
 # CONFIG_FEALNX is not set
 # CONFIG_NATSEMI is not set
 # CONFIG_NATSEMI is not set
 # CONFIG_NE2K_PCI is not set
 # CONFIG_NE2K_PCI is not set
-CONFIG_8139CP=m
+CONFIG_8139CP=y
 CONFIG_8139TOO=y
 CONFIG_8139TOO=y
 # CONFIG_8139TOO_PIO is not set
 # CONFIG_8139TOO_PIO is not set
 # CONFIG_8139TOO_TUNE_TWISTER is not set
 # CONFIG_8139TOO_TUNE_TWISTER is not set
@@ -671,6 +671,7 @@ CONFIG_SERIAL_8250_NR_UARTS=4
 #
 #
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
 CONFIG_LEGACY_PTY_COUNT=256
@@ -696,6 +697,7 @@ CONFIG_RTC=y
 #
 #
 CONFIG_AGP=y
 CONFIG_AGP=y
 CONFIG_AGP_AMD64=y
 CONFIG_AGP_AMD64=y
+CONFIG_AGP_INTEL=y
 # CONFIG_DRM is not set
 # CONFIG_DRM is not set
 # CONFIG_MWAVE is not set
 # CONFIG_MWAVE is not set
 CONFIG_RAW_DRIVER=y
 CONFIG_RAW_DRIVER=y
@@ -703,7 +705,7 @@ CONFIG_HPET=y
 # CONFIG_HPET_RTC_IRQ is not set
 # CONFIG_HPET_RTC_IRQ is not set
 CONFIG_HPET_MMAP=y
 CONFIG_HPET_MMAP=y
 CONFIG_MAX_RAW_DEVS=256
 CONFIG_MAX_RAW_DEVS=256
-CONFIG_HANGCHECK_TIMER=y
+# CONFIG_HANGCHECK_TIMER is not set
 
 
 #
 #
 # TPM devices
 # TPM devices
@@ -786,6 +788,8 @@ CONFIG_SOUND_ICH=y
 #
 #
 # USB support
 # USB support
 #
 #
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
 CONFIG_USB=y
 CONFIG_USB=y
 # CONFIG_USB_DEBUG is not set
 # CONFIG_USB_DEBUG is not set
 
 
@@ -797,8 +801,6 @@ CONFIG_USB_DEVICEFS=y
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_SUSPEND is not set
 # CONFIG_USB_SUSPEND is not set
 # CONFIG_USB_OTG is not set
 # CONFIG_USB_OTG is not set
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
 
 
 #
 #
 # USB Host Controller Drivers
 # USB Host Controller Drivers
@@ -826,7 +828,6 @@ CONFIG_USB_PRINTER=y
 #
 #
 CONFIG_USB_STORAGE=y
 CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
 # CONFIG_USB_STORAGE_DEBUG is not set
-# CONFIG_USB_STORAGE_RW_DETECT is not set
 # CONFIG_USB_STORAGE_DATAFAB is not set
 # CONFIG_USB_STORAGE_DATAFAB is not set
 # CONFIG_USB_STORAGE_FREECOM is not set
 # CONFIG_USB_STORAGE_FREECOM is not set
 # CONFIG_USB_STORAGE_ISD200 is not set
 # CONFIG_USB_STORAGE_ISD200 is not set
@@ -965,7 +966,7 @@ CONFIG_AUTOFS_FS=y
 # CD-ROM/DVD Filesystems
 # CD-ROM/DVD Filesystems
 #
 #
 CONFIG_ISO9660_FS=y
 CONFIG_ISO9660_FS=y
-# CONFIG_JOLIET is not set
+CONFIG_JOLIET=y
 # CONFIG_ZISOFS is not set
 # CONFIG_ZISOFS is not set
 # CONFIG_UDF_FS is not set
 # CONFIG_UDF_FS is not set
 
 
@@ -1092,9 +1093,10 @@ CONFIG_OPROFILE=y
 #
 #
 # Kernel hacking
 # Kernel hacking
 #
 #
+# CONFIG_PRINTK_TIME is not set
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_MAGIC_SYSRQ=y
-# CONFIG_PRINTK_TIME is not set
+CONFIG_LOG_BUF_SHIFT=18
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_DEBUG_SLAB is not set
 # CONFIG_DEBUG_SLAB is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_SPINLOCK is not set

+ 1 - 0
arch/x86_64/kernel/Makefile

@@ -28,6 +28,7 @@ obj-$(CONFIG_GART_IOMMU)	+= pci-gart.o aperture.o
 obj-$(CONFIG_DUMMY_IOMMU)	+= pci-nommu.o pci-dma.o
 obj-$(CONFIG_DUMMY_IOMMU)	+= pci-nommu.o pci-dma.o
 obj-$(CONFIG_SWIOTLB)		+= swiotlb.o
 obj-$(CONFIG_SWIOTLB)		+= swiotlb.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
+obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer.o
 
 
 obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_MODULES)		+= module.o
 
 

+ 3 - 2
arch/x86_64/kernel/apic.c

@@ -33,6 +33,7 @@
 #include <asm/mpspec.h>
 #include <asm/mpspec.h>
 #include <asm/pgalloc.h>
 #include <asm/pgalloc.h>
 #include <asm/mach_apic.h>
 #include <asm/mach_apic.h>
+#include <asm/nmi.h>
 
 
 int apic_verbosity;
 int apic_verbosity;
 
 
@@ -925,7 +926,7 @@ __init int oem_force_hpet_timer(void)
 	unsigned id;
 	unsigned id;
 	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
 	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
 
 
-	bitmap_empty(clustermap, NUM_APIC_CLUSTERS);
+	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
 
 	for (i = 0; i < NR_CPUS; i++) {
 	for (i = 0; i < NR_CPUS; i++) {
 		id = bios_cpu_apicid[i];
 		id = bios_cpu_apicid[i];
@@ -1056,7 +1057,7 @@ int __init APIC_init_uniprocessor (void)
 		nr_ioapics = 0;
 		nr_ioapics = 0;
 #endif
 #endif
 	setup_boot_APIC_clock();
 	setup_boot_APIC_clock();
-
+	check_nmi_watchdog();
 	return 0;
 	return 0;
 }
 }
 
 

+ 9 - 2
arch/x86_64/kernel/entry.S

@@ -220,13 +220,18 @@ sysret_careful:
 	jmp sysret_check
 	jmp sysret_check
 
 
 	/* Handle a signal */ 
 	/* Handle a signal */ 
-	/* edx:	work flags (arg3) */
 sysret_signal:
 sysret_signal:
 	sti
 	sti
+	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+	jz    1f
+
+	/* Really a signal */
+	/* edx:	work flags (arg3) */
 	leaq do_notify_resume(%rip),%rax
 	leaq do_notify_resume(%rip),%rax
 	leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
 	leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
 	xorl %esi,%esi # oldset -> arg2
 	xorl %esi,%esi # oldset -> arg2
 	call ptregscall_common
 	call ptregscall_common
+1:	movl $_TIF_NEED_RESCHED,%edi
 	jmp sysret_check
 	jmp sysret_check
 	
 	
 	/* Do syscall tracing */
 	/* Do syscall tracing */
@@ -484,6 +489,8 @@ retint_careful:
 	jmp retint_check
 	jmp retint_check
 	
 	
 retint_signal:
 retint_signal:
+	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+	jz    retint_swapgs
 	sti
 	sti
 	SAVE_REST
 	SAVE_REST
 	movq $-1,ORIG_RAX(%rsp) 			
 	movq $-1,ORIG_RAX(%rsp) 			
@@ -492,8 +499,8 @@ retint_signal:
 	call do_notify_resume
 	call do_notify_resume
 	RESTORE_REST
 	RESTORE_REST
 	cli
 	cli
+	movl $_TIF_NEED_RESCHED,%edi
 	GET_THREAD_INFO(%rcx)
 	GET_THREAD_INFO(%rcx)
-	movl $_TIF_WORK_MASK,%edi
 	jmp retint_check
 	jmp retint_check
 
 
 #ifdef CONFIG_PREEMPT
 #ifdef CONFIG_PREEMPT

+ 10 - 71
arch/x86_64/kernel/io_apic.c

@@ -42,6 +42,8 @@
 
 
 int sis_apic_bug; /* not actually supported, dummy for compile */
 int sis_apic_bug; /* not actually supported, dummy for compile */
 
 
+static int no_timer_check;
+
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(ioapic_lock);
 
 
 /*
 /*
@@ -1601,7 +1603,7 @@ static inline void check_timer(void)
 		 * Ok, does IRQ0 through the IOAPIC work?
 		 * Ok, does IRQ0 through the IOAPIC work?
 		 */
 		 */
 		unmask_IO_APIC_irq(0);
 		unmask_IO_APIC_irq(0);
-		if (timer_irq_works()) {
+		if (!no_timer_check && timer_irq_works()) {
 			nmi_watchdog_default();
 			nmi_watchdog_default();
 			if (nmi_watchdog == NMI_IO_APIC) {
 			if (nmi_watchdog == NMI_IO_APIC) {
 				disable_8259A_irq(0);
 				disable_8259A_irq(0);
@@ -1671,6 +1673,13 @@ static inline void check_timer(void)
 	panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
 	panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
 }
 }
 
 
+static int __init notimercheck(char *s)
+{
+	no_timer_check = 1;
+	return 1;
+}
+__setup("no_timer_check", notimercheck);
+
 /*
 /*
  *
  *
  * IRQ's that are handled by the PIC in the MPS IOAPIC case.
  * IRQ's that are handled by the PIC in the MPS IOAPIC case.
@@ -1804,76 +1813,6 @@ device_initcall(ioapic_init_sysfs);
 
 
 #define IO_APIC_MAX_ID		0xFE
 #define IO_APIC_MAX_ID		0xFE
 
 
-int __init io_apic_get_unique_id (int ioapic, int apic_id)
-{
-	union IO_APIC_reg_00 reg_00;
-	static physid_mask_t apic_id_map;
-	unsigned long flags;
-	int i = 0;
-
-	/*
-	 * The P4 platform supports up to 256 APIC IDs on two separate APIC 
-	 * buses (one for LAPICs, one for IOAPICs), where predecessors only 
-	 * supports up to 16 on one shared APIC bus.
-	 * 
-	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
-	 *      advantage of new APIC bus architecture.
-	 */
-
-	if (physids_empty(apic_id_map))
-		apic_id_map = phys_cpu_present_map;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(ioapic, 0);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-
-	if (apic_id >= IO_APIC_MAX_ID) {
-		apic_printk(APIC_QUIET, KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
-			"%d\n", ioapic, apic_id, reg_00.bits.ID);
-		apic_id = reg_00.bits.ID;
-	}
-
-	/*
-	 * Every APIC in a system must have a unique ID or we get lots of nice 
-	 * 'stuck on smp_invalidate_needed IPI wait' messages.
-	 */
-	if (physid_isset(apic_id, apic_id_map)) {
-
-		for (i = 0; i < IO_APIC_MAX_ID; i++) {
-			if (!physid_isset(i, apic_id_map))
-				break;
-		}
-
-		if (i == IO_APIC_MAX_ID)
-			panic("Max apic_id exceeded!\n");
-
-		apic_printk(APIC_VERBOSE, KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
-			"trying %d\n", ioapic, apic_id, i);
-
-		apic_id = i;
-	} 
-
-	physid_set(apic_id, apic_id_map);
-
-	if (reg_00.bits.ID != apic_id) {
-		reg_00.bits.ID = apic_id;
-
-		spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(ioapic, 0, reg_00.raw);
-		reg_00.raw = io_apic_read(ioapic, 0);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-
-		/* Sanity check */
-		if (reg_00.bits.ID != apic_id)
-			panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
-	}
-
-	apic_printk(APIC_VERBOSE,KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
-
-	return apic_id;
-}
-
-
 int __init io_apic_get_version (int ioapic)
 int __init io_apic_get_version (int ioapic)
 {
 {
 	union IO_APIC_reg_01	reg_01;
 	union IO_APIC_reg_01	reg_01;

+ 15 - 7
arch/x86_64/kernel/mpparse.c

@@ -107,6 +107,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
 static void __init MP_processor_info (struct mpc_config_processor *m)
 static void __init MP_processor_info (struct mpc_config_processor *m)
 {
 {
 	int ver;
 	int ver;
+	static int found_bsp=0;
 
 
 	if (!(m->mpc_cpuflag & CPU_ENABLED))
 	if (!(m->mpc_cpuflag & CPU_ENABLED))
 		return;
 		return;
@@ -126,11 +127,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
 			" Processor ignored.\n", NR_CPUS);
 			" Processor ignored.\n", NR_CPUS);
 		return;
 		return;
 	}
 	}
-	if (num_processors >= maxcpus) {
-		printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
-			" Processor ignored.\n", maxcpus);
-		return;
-	}
 
 
 	num_processors++;
 	num_processors++;
 
 
@@ -150,7 +146,19 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
 		ver = 0x10;
 		ver = 0x10;
 	}
 	}
 	apic_version[m->mpc_apicid] = ver;
 	apic_version[m->mpc_apicid] = ver;
-	bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
+ 	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+ 		/*
+ 		 * bios_cpu_apicid is required to have processors listed
+ 		 * in same order as logical cpu numbers. Hence the first
+ 		 * entry is BSP, and so on.
+ 		 */
+ 		bios_cpu_apicid[0] = m->mpc_apicid;
+ 		x86_cpu_to_apicid[0] = m->mpc_apicid;
+ 		found_bsp = 1;
+ 	} else {
+ 		bios_cpu_apicid[num_processors - found_bsp] = m->mpc_apicid;
+ 		x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid;
+ 	}
 }
 }
 
 
 static void __init MP_bus_info (struct mpc_config_bus *m)
 static void __init MP_bus_info (struct mpc_config_bus *m)
@@ -759,7 +767,7 @@ void __init mp_register_ioapic (
 	mp_ioapics[idx].mpc_apicaddr = address;
 	mp_ioapics[idx].mpc_apicaddr = address;
 
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-	mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
+	mp_ioapics[idx].mpc_apicid = id;
 	mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
 	mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
 	
 	
 	/* 
 	/* 

+ 175 - 73
arch/x86_64/kernel/nmi.c

@@ -33,6 +33,7 @@
 #include <asm/msr.h>
 #include <asm/msr.h>
 #include <asm/proto.h>
 #include <asm/proto.h>
 #include <asm/kdebug.h>
 #include <asm/kdebug.h>
+#include <asm/local.h>
 
 
 /*
 /*
  * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
  * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -59,7 +60,8 @@ int panic_on_timeout;
 
 
 unsigned int nmi_watchdog = NMI_DEFAULT;
 unsigned int nmi_watchdog = NMI_DEFAULT;
 static unsigned int nmi_hz = HZ;
 static unsigned int nmi_hz = HZ;
-unsigned int nmi_perfctr_msr;	/* the MSR to reset in NMI handler */
+static unsigned int nmi_perfctr_msr;	/* the MSR to reset in NMI handler */
+static unsigned int nmi_p4_cccr_val;
 
 
 /* Note that these events don't tick when the CPU idles. This means
 /* Note that these events don't tick when the CPU idles. This means
    the frequency varies with CPU load. */
    the frequency varies with CPU load. */
@@ -71,61 +73,87 @@ unsigned int nmi_perfctr_msr;	/* the MSR to reset in NMI handler */
 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
 #define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
 #define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
 
 
-#define P6_EVNTSEL0_ENABLE	(1 << 22)
-#define P6_EVNTSEL_INT		(1 << 20)
-#define P6_EVNTSEL_OS		(1 << 17)
-#define P6_EVNTSEL_USR		(1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
-#define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
+#define MSR_P4_MISC_ENABLE	0x1A0
+#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
+#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL	(1<<12)
+#define MSR_P4_PERFCTR0		0x300
+#define MSR_P4_CCCR0		0x360
+#define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
+#define P4_ESCR_OS		(1<<3)
+#define P4_ESCR_USR		(1<<2)
+#define P4_CCCR_OVF_PMI0	(1<<26)
+#define P4_CCCR_OVF_PMI1	(1<<27)
+#define P4_CCCR_THRESHOLD(N)	((N)<<20)
+#define P4_CCCR_COMPLEMENT	(1<<19)
+#define P4_CCCR_COMPARE		(1<<18)
+#define P4_CCCR_REQUIRED	(3<<16)
+#define P4_CCCR_ESCR_SELECT(N)	((N)<<13)
+#define P4_CCCR_ENABLE		(1<<12)
+/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
+   CRU_ESCR0 (with any non-null event selector) through a complemented
+   max threshold. [IA32-Vol3, Section 14.9.9] */
+#define MSR_P4_IQ_COUNTER0	0x30C
+#define P4_NMI_CRU_ESCR0	(P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
+#define P4_NMI_IQ_CCCR0	\
+	(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|	\
+	 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
+
+static __init inline int nmi_known_cpu(void)
+{
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		return boot_cpu_data.x86 == 15;
+	case X86_VENDOR_INTEL:
+		return boot_cpu_data.x86 == 15;
+	}
+	return 0;
+}
 
 
 /* Run after command line and cpu_init init, but before all other checks */
 /* Run after command line and cpu_init init, but before all other checks */
 void __init nmi_watchdog_default(void)
 void __init nmi_watchdog_default(void)
 {
 {
 	if (nmi_watchdog != NMI_DEFAULT)
 	if (nmi_watchdog != NMI_DEFAULT)
 		return;
 		return;
-
-	/* For some reason the IO APIC watchdog doesn't work on the AMD
-	   8111 chipset. For now switch to local APIC mode using
-	   perfctr0 there.  On Intel CPUs we don't have code to handle
-	   the perfctr and the IO-APIC seems to work, so use that.  */
-
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
-		nmi_watchdog = NMI_LOCAL_APIC; 
-		printk(KERN_INFO 
-              "Using local APIC NMI watchdog using perfctr0\n");
-	} else {
-		printk(KERN_INFO "Using IO APIC NMI watchdog\n");
+	if (nmi_known_cpu())
+		nmi_watchdog = NMI_LOCAL_APIC;
+	else
 		nmi_watchdog = NMI_IO_APIC;
 		nmi_watchdog = NMI_IO_APIC;
-	}
 }
 }
 
 
-/* Why is there no CPUID flag for this? */
-static __init int cpu_has_lapic(void)
+#ifdef CONFIG_SMP
+/* The performance counters used by NMI_LOCAL_APIC don't trigger when
+ * the CPU is idle. To make sure the NMI watchdog really ticks on all
+ * CPUs during the test make them busy.
+ */
+static __init void nmi_cpu_busy(void *data)
 {
 {
-	switch (boot_cpu_data.x86_vendor) { 
-	case X86_VENDOR_INTEL:
-	case X86_VENDOR_AMD: 
-		return boot_cpu_data.x86 >= 6; 
-	/* .... add more cpus here or find a different way to figure this out. */	
-	default:
-		return 0;
-	} 	
+	volatile int *endflag = data;
+	local_irq_enable();
+	/* Intentionally don't use cpu_relax here. This is
+	   to make sure that the performance counter really ticks,
+	   even if there is a simulator or similar that catches the
+	   pause instruction. On a real HT machine this is fine because
+	   all other CPUs are busy with "useless" delay loops and don't
+	   care if they get somewhat less cycles. */
+	while (*endflag == 0)
+		barrier();
 }
 }
+#endif
 
 
-static int __init check_nmi_watchdog (void)
+int __init check_nmi_watchdog (void)
 {
 {
-	int counts[NR_CPUS];
+	volatile int endflag = 0;
+	int *counts;
 	int cpu;
 	int cpu;
 
 
-	if (nmi_watchdog == NMI_NONE)
-		return 0;
+	counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
+	if (!counts)
+		return -1;
 
 
-	if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic())  {
-		nmi_watchdog = NMI_NONE;
-		return -1; 
-	}	
+	printk(KERN_INFO "testing NMI watchdog ... ");
 
 
-	printk(KERN_INFO "Testing NMI watchdog ... ");
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
 
 
 	for (cpu = 0; cpu < NR_CPUS; cpu++)
 	for (cpu = 0; cpu < NR_CPUS; cpu++)
 		counts[cpu] = cpu_pda[cpu].__nmi_count; 
 		counts[cpu] = cpu_pda[cpu].__nmi_count; 
@@ -133,15 +161,22 @@ static int __init check_nmi_watchdog (void)
 	mdelay((10*1000)/nmi_hz); // wait 10 ticks
 	mdelay((10*1000)/nmi_hz); // wait 10 ticks
 
 
 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		if (!cpu_online(cpu))
+			continue;
 		if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) {
 		if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) {
-			printk("CPU#%d: NMI appears to be stuck (%d)!\n", 
+			endflag = 1;
+			printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
 			       cpu,
 			       cpu,
+			       counts[cpu],
 			       cpu_pda[cpu].__nmi_count);
 			       cpu_pda[cpu].__nmi_count);
 			nmi_active = 0;
 			nmi_active = 0;
 			lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
 			lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
+			nmi_perfctr_msr = 0;
+			kfree(counts);
 			return -1;
 			return -1;
 		}
 		}
 	}
 	}
+	endflag = 1;
 	printk("OK.\n");
 	printk("OK.\n");
 
 
 	/* now that we know it works we can reduce NMI frequency to
 	/* now that we know it works we can reduce NMI frequency to
@@ -149,10 +184,9 @@ static int __init check_nmi_watchdog (void)
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		nmi_hz = 1;
 		nmi_hz = 1;
 
 
+	kfree(counts);
 	return 0;
 	return 0;
 }
 }
-/* Have this called later during boot so counters are updating */
-late_initcall(check_nmi_watchdog);
 
 
 int __init setup_nmi_watchdog(char *str)
 int __init setup_nmi_watchdog(char *str)
 {
 {
@@ -170,7 +204,7 @@ int __init setup_nmi_watchdog(char *str)
 
 
 	if (nmi >= NMI_INVALID)
 	if (nmi >= NMI_INVALID)
 		return 0;
 		return 0;
-		nmi_watchdog = nmi;
+	nmi_watchdog = nmi;
 	return 1;
 	return 1;
 }
 }
 
 
@@ -185,7 +219,10 @@ static void disable_lapic_nmi_watchdog(void)
 		wrmsr(MSR_K7_EVNTSEL0, 0, 0);
 		wrmsr(MSR_K7_EVNTSEL0, 0, 0);
 		break;
 		break;
 	case X86_VENDOR_INTEL:
 	case X86_VENDOR_INTEL:
-		wrmsr(MSR_IA32_EVNTSEL0, 0, 0);
+		if (boot_cpu_data.x86 == 15) {
+			wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
+			wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
+		}
 		break;
 		break;
 	}
 	}
 	nmi_active = -1;
 	nmi_active = -1;
@@ -253,7 +290,7 @@ void enable_timer_nmi_watchdog(void)
 
 
 static int nmi_pm_active; /* nmi_active before suspend */
 static int nmi_pm_active; /* nmi_active before suspend */
 
 
-static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
+static int lapic_nmi_suspend(struct sys_device *dev, u32 state)
 {
 {
 	nmi_pm_active = nmi_active;
 	nmi_pm_active = nmi_active;
 	disable_lapic_nmi_watchdog();
 	disable_lapic_nmi_watchdog();
@@ -300,22 +337,27 @@ late_initcall(init_lapic_nmi_sysfs);
  * Original code written by Keith Owens.
  * Original code written by Keith Owens.
  */
  */
 
 
+static void clear_msr_range(unsigned int base, unsigned int n)
+{
+	unsigned int i;
+
+	for(i = 0; i < n; ++i)
+		wrmsr(base+i, 0, 0);
+}
+
 static void setup_k7_watchdog(void)
 static void setup_k7_watchdog(void)
 {
 {
 	int i;
 	int i;
 	unsigned int evntsel;
 	unsigned int evntsel;
 
 
-	/* No check, so can start with slow frequency */
-	nmi_hz = 1; 
-
-	/* XXX should check these in EFER */
-
 	nmi_perfctr_msr = MSR_K7_PERFCTR0;
 	nmi_perfctr_msr = MSR_K7_PERFCTR0;
 
 
 	for(i = 0; i < 4; ++i) {
 	for(i = 0; i < 4; ++i) {
 		/* Simulator may not support it */
 		/* Simulator may not support it */
-		if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL))
+		if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) {
+			nmi_perfctr_msr = 0;
 			return;
 			return;
+		}
 		wrmsrl(MSR_K7_PERFCTR0+i, 0UL);
 		wrmsrl(MSR_K7_PERFCTR0+i, 0UL);
 	}
 	}
 
 
@@ -325,12 +367,54 @@ static void setup_k7_watchdog(void)
 		| K7_NMI_EVENT;
 		| K7_NMI_EVENT;
 
 
 	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
 	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
-	wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz*1000) / nmi_hz);
+	wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	evntsel |= K7_EVNTSEL_ENABLE;
 	evntsel |= K7_EVNTSEL_ENABLE;
 	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
 	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
 }
 }
 
 
+
+static int setup_p4_watchdog(void)
+{
+	unsigned int misc_enable, dummy;
+
+	rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
+	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
+		return 0;
+
+	nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
+	nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
+#ifdef CONFIG_SMP
+	if (smp_num_siblings == 2)
+		nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
+#endif
+
+	if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
+		clear_msr_range(0x3F1, 2);
+	/* MSR 0x3F0 seems to have a default value of 0xFC00, but current
+	   docs doesn't fully define it, so leave it alone for now. */
+	if (boot_cpu_data.x86_model >= 0x3) {
+		/* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
+		clear_msr_range(0x3A0, 26);
+		clear_msr_range(0x3BC, 3);
+	} else {
+		clear_msr_range(0x3A0, 31);
+	}
+	clear_msr_range(0x3C0, 6);
+	clear_msr_range(0x3C8, 6);
+	clear_msr_range(0x3E0, 2);
+	clear_msr_range(MSR_P4_CCCR0, 18);
+	clear_msr_range(MSR_P4_PERFCTR0, 18);
+
+	wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
+	wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
+	Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
+	wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
+	return 1;
+}
+
 void setup_apic_nmi_watchdog(void)
 void setup_apic_nmi_watchdog(void)
 {
 {
 	switch (boot_cpu_data.x86_vendor) {
 	switch (boot_cpu_data.x86_vendor) {
@@ -341,6 +425,13 @@ void setup_apic_nmi_watchdog(void)
 			return;
 			return;
 		setup_k7_watchdog();
 		setup_k7_watchdog();
 		break;
 		break;
+	case X86_VENDOR_INTEL:
+		if (boot_cpu_data.x86 != 15)
+			return;
+		if (!setup_p4_watchdog())
+			return;
+		break;
+
 	default:
 	default:
 		return;
 		return;
 	}
 	}
@@ -355,56 +446,67 @@ void setup_apic_nmi_watchdog(void)
  *
  *
  * as these watchdog NMI IRQs are generated on every CPU, we only
  * as these watchdog NMI IRQs are generated on every CPU, we only
  * have to check the current processor.
  * have to check the current processor.
- *
- * since NMIs don't listen to _any_ locks, we have to be extremely
- * careful not to rely on unsafe variables. The printk might lock
- * up though, so we have to break up any console locks first ...
- * [when there will be more tty-related locks, break them up
- *  here too!]
  */
  */
 
 
-static unsigned int
-	last_irq_sums [NR_CPUS],
-	alert_counter [NR_CPUS];
+static DEFINE_PER_CPU(unsigned, last_irq_sum);
+static DEFINE_PER_CPU(local_t, alert_counter);
+static DEFINE_PER_CPU(int, nmi_touch);
 
 
 void touch_nmi_watchdog (void)
 void touch_nmi_watchdog (void)
 {
 {
 	int i;
 	int i;
 
 
 	/*
 	/*
-	 * Just reset the alert counters, (other CPUs might be
-	 * spinning on locks we hold):
+ 	 * Tell other CPUs to reset their alert counters. We cannot
+	 * do it ourselves because the alert count increase is not
+	 * atomic.
 	 */
 	 */
 	for (i = 0; i < NR_CPUS; i++)
 	for (i = 0; i < NR_CPUS; i++)
-		alert_counter[i] = 0;
+		per_cpu(nmi_touch, i) = 1;
 }
 }
 
 
 void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
 void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
 {
 {
-	int sum, cpu;
+	int sum;
+	int touched = 0;
 
 
-	cpu = safe_smp_processor_id();
 	sum = read_pda(apic_timer_irqs);
 	sum = read_pda(apic_timer_irqs);
-	if (last_irq_sums[cpu] == sum) {
+	if (__get_cpu_var(nmi_touch)) {
+		__get_cpu_var(nmi_touch) = 0;
+		touched = 1;
+	}
+	if (!touched && __get_cpu_var(last_irq_sum) == sum) {
 		/*
 		/*
 		 * Ayiee, looks like this CPU is stuck ...
 		 * Ayiee, looks like this CPU is stuck ...
 		 * wait a few IRQs (5 seconds) before doing the oops ...
 		 * wait a few IRQs (5 seconds) before doing the oops ...
 		 */
 		 */
-		alert_counter[cpu]++;
-		if (alert_counter[cpu] == 5*nmi_hz) {
+		local_inc(&__get_cpu_var(alert_counter));
+		if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) {
 			if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
 			if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
 							== NOTIFY_STOP) {
 							== NOTIFY_STOP) {
-				alert_counter[cpu] = 0; 
+				local_set(&__get_cpu_var(alert_counter), 0);
 				return;
 				return;
 			} 
 			} 
 			die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs);
 			die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs);
 		}
 		}
 	} else {
 	} else {
-		last_irq_sums[cpu] = sum;
-		alert_counter[cpu] = 0;
+		__get_cpu_var(last_irq_sum) = sum;
+		local_set(&__get_cpu_var(alert_counter), 0);
 	}
 	}
-	if (nmi_perfctr_msr)
+	if (nmi_perfctr_msr) {
+ 		if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
+ 			/*
+ 			 * P4 quirks:
+ 			 * - An overflown perfctr will assert its interrupt
+ 			 *   until the OVF flag in its CCCR is cleared.
+ 			 * - LVTPC is masked on interrupt and must be
+ 			 *   unmasked by the LVTPC handler.
+ 			 */
+ 			wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
+ 			apic_write(APIC_LVTPC, APIC_DM_NMI);
+ 		}
 		wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
 		wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
+	}
 }
 }
 
 
 static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
 static int dummy_nmi_callback(struct pt_regs * regs, int cpu)

+ 101 - 0
arch/x86_64/kernel/pmtimer.c

@@ -0,0 +1,101 @@
+/* Ported over from i386 by AK, original copyright was:
+ *
+ * (C) Dominik Brodowski <linux@brodo.de> 2003
+ *
+ * Driver to use the Power Management Timer (PMTMR) available in some
+ * southbridges as primary timing source for the Linux kernel.
+ *
+ * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
+ * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
+ *
+ * This file is licensed under the GPL v2.
+ *
+ * Dropped all the hardware bug workarounds for now. Hopefully they
+ * are not needed on 64bit chipsets.
+ */
+
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/cpumask.h>
+#include <asm/io.h>
+#include <asm/proto.h>
+#include <asm/msr.h>
+#include <asm/vsyscall.h>
+
+/* The I/O port the PMTMR resides at.
+ * The location is detected during setup_arch(),
+ * in arch/i386/kernel/acpi/boot.c */
+u32 pmtmr_ioport;
+
+/* value of the Power timer at last timer interrupt */
+static u32 offset_delay;
+static u32 last_pmtmr_tick;
+
+#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
+
+static inline u32 cyc2us(u32 cycles)
+{
+	/* The Power Management Timer ticks at 3.579545 ticks per microsecond.
+	 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
+	 *
+	 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
+	 * easily be multiplied with 286 (=0x11E) without having to fear
+	 * u32 overflows.
+	 */
+	cycles *= 286;
+	return (cycles >> 10);
+}
+
+int pmtimer_mark_offset(void)
+{
+	static int first_run = 1;
+	unsigned long tsc;
+	u32 lost;
+
+	u32 tick = inl(pmtmr_ioport);
+	u32 delta;
+
+	delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK);
+
+	last_pmtmr_tick = tick;
+	monotonic_base += delta * NSEC_PER_USEC;
+
+	delta += offset_delay;
+
+	lost = delta / (USEC_PER_SEC / HZ);
+	offset_delay = delta % (USEC_PER_SEC / HZ);
+
+	rdtscll(tsc);
+	vxtime.last_tsc = tsc - offset_delay * cpu_khz;
+
+	/* don't calculate delay for first run,
+	   or if we've got less then a tick */
+	if (first_run || (lost < 1)) {
+		first_run = 0;
+		offset_delay = 0;
+	}
+
+	return lost - 1;
+}
+
+unsigned int do_gettimeoffset_pm(void)
+{
+	u32 now, offset, delta = 0;
+
+	offset = last_pmtmr_tick;
+	now = inl(pmtmr_ioport);
+	delta = (now - offset) & ACPI_PM_MASK;
+
+	return offset_delay + cyc2us(delta);
+}
+
+
+static int __init nopmtimer_setup(char *s)
+{
+	pmtmr_ioport = 0;
+	return 0;
+}
+
+__setup("nopmtimer", nopmtimer_setup);

+ 11 - 6
arch/x86_64/kernel/ptrace.c

@@ -257,13 +257,13 @@ static int putreg(struct task_struct *child,
 			value &= 0xffff;
 			value &= 0xffff;
 			return 0;
 			return 0;
 		case offsetof(struct user_regs_struct,fs_base):
 		case offsetof(struct user_regs_struct,fs_base):
-			if (!((value >> 48) == 0 || (value >> 48) == 0xffff))
-				return -EIO; 
+			if (value >= TASK_SIZE)
+				return -EIO;
 			child->thread.fs = value;
 			child->thread.fs = value;
 			return 0;
 			return 0;
 		case offsetof(struct user_regs_struct,gs_base):
 		case offsetof(struct user_regs_struct,gs_base):
-			if (!((value >> 48) == 0 || (value >> 48) == 0xffff))
-				return -EIO; 
+			if (value >= TASK_SIZE)
+				return -EIO;
 			child->thread.gs = value;
 			child->thread.gs = value;
 			return 0;
 			return 0;
 		case offsetof(struct user_regs_struct, eflags):
 		case offsetof(struct user_regs_struct, eflags):
@@ -277,6 +277,11 @@ static int putreg(struct task_struct *child,
 				return -EIO;
 				return -EIO;
 			value &= 0xffff;
 			value &= 0xffff;
 			break;
 			break;
+		case offsetof(struct user_regs_struct, rip):
+			/* Check if the new RIP address is canonical */
+			if (value >= TASK_SIZE)
+				return -EIO;
+			break;
 	}
 	}
 	put_stack_long(child, regno - sizeof(struct pt_regs), value);
 	put_stack_long(child, regno - sizeof(struct pt_regs), value);
 	return 0;
 	return 0;
@@ -375,7 +380,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data
 			break;
 			break;
 
 
 		switch (addr) { 
 		switch (addr) { 
-		case 0 ... sizeof(struct user_regs_struct):
+		case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
 			tmp = getreg(child, addr);
 			tmp = getreg(child, addr);
 			break;
 			break;
 		case offsetof(struct user, u_debugreg[0]):
 		case offsetof(struct user, u_debugreg[0]):
@@ -420,7 +425,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data
 			break;
 			break;
 
 
 		switch (addr) { 
 		switch (addr) { 
-		case 0 ... sizeof(struct user_regs_struct): 
+		case 0 ... sizeof(struct user_regs_struct) - sizeof(long):
 			ret = putreg(child, addr, data);
 			ret = putreg(child, addr, data);
 			break;
 			break;
 		/* Disallows to set a breakpoint into the vsyscall */
 		/* Disallows to set a breakpoint into the vsyscall */

+ 18 - 12
arch/x86_64/kernel/setup.c

@@ -719,7 +719,6 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c)
 	}
 	}
 }
 }
 
 
-#ifdef CONFIG_SMP
 /*
 /*
  * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
  * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
  * Assumes number of cores is a power of two.
  * Assumes number of cores is a power of two.
@@ -727,17 +726,26 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c)
 static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 {
 {
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
-	int cpu = c->x86_apicid;
+	int cpu = smp_processor_id();
 	int node = 0;
 	int node = 0;
+	unsigned bits;
 	if (c->x86_num_cores == 1)
 	if (c->x86_num_cores == 1)
 		return;
 		return;
-	cpu_core_id[cpu] = cpu >> hweight32(c->x86_num_cores - 1);
+
+	bits = 0;
+	while ((1 << bits) < c->x86_num_cores)
+		bits++;
+
+	/* Low order bits define the core id (index of core in socket) */
+	cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
+	/* Convert the APIC ID into the socket ID */
+	phys_proc_id[cpu] >>= bits;
 
 
 #ifdef CONFIG_NUMA
 #ifdef CONFIG_NUMA
 	/* When an ACPI SRAT table is available use the mappings from SRAT
 	/* When an ACPI SRAT table is available use the mappings from SRAT
  	   instead. */
  	   instead. */
 	if (acpi_numa <= 0) {
 	if (acpi_numa <= 0) {
-		node = cpu_core_id[cpu];
+		node = phys_proc_id[cpu];
 		if (!node_online(node))
 		if (!node_online(node))
 			node = first_node(node_online_map);
 			node = first_node(node_online_map);
 		cpu_to_node[cpu] = node;
 		cpu_to_node[cpu] = node;
@@ -745,15 +753,11 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 		node = cpu_to_node[cpu];
 		node = cpu_to_node[cpu];
 	}
 	}
 #endif
 #endif
+
 	printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
 	printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
 			cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
 			cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
 #endif
 #endif
 }
 }
-#else
-static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
-{
-}
-#endif
 
 
 static int __init init_amd(struct cpuinfo_x86 *c)
 static int __init init_amd(struct cpuinfo_x86 *c)
 {
 {
@@ -925,7 +929,6 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	c->x86_clflush_size = 64;
 	c->x86_clflush_size = 64;
 	c->x86_cache_alignment = c->x86_clflush_size;
 	c->x86_cache_alignment = c->x86_clflush_size;
 	c->x86_num_cores = 1;
 	c->x86_num_cores = 1;
-	c->x86_apicid = c == &boot_cpu_data ? 0 : c - cpu_data;
 	c->extended_cpuid_level = 0;
 	c->extended_cpuid_level = 0;
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
 
@@ -954,11 +957,14 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c)
 		} 
 		} 
 		if (c->x86_capability[0] & (1<<19)) 
 		if (c->x86_capability[0] & (1<<19)) 
 			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
 			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
-		c->x86_apicid = misc >> 24;
 	} else {
 	} else {
 		/* Have CPUID level 0 only - unheard of */
 		/* Have CPUID level 0 only - unheard of */
 		c->x86 = 4;
 		c->x86 = 4;
 	}
 	}
+
+#ifdef CONFIG_SMP
+	phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+#endif
 }
 }
 
 
 /*
 /*
@@ -1088,7 +1094,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 
 
 		/* Other (Linux-defined) */
 		/* Other (Linux-defined) */
-		"cxmmx", NULL, "cyrix_arr", "centaur_mcr", "k8c+",
+		"cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
 		"constant_tsc", NULL, NULL,
 		"constant_tsc", NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,

+ 3 - 1
arch/x86_64/kernel/signal.c

@@ -452,7 +452,9 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
 			regs->rip -= 2;
 			regs->rip -= 2;
 		}
 		}
 		if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) {
 		if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) {
-			regs->rax = __NR_restart_syscall;
+			regs->rax = test_thread_flag(TIF_IA32) ?
+					__NR_ia32_restart_syscall :
+					__NR_restart_syscall;
 			regs->rip -= 2;
 			regs->rip -= 2;
 		}
 		}
 	}
 	}

+ 186 - 77
arch/x86_64/kernel/smpboot.c

@@ -56,6 +56,7 @@
 #include <asm/kdebug.h>
 #include <asm/kdebug.h>
 #include <asm/tlbflush.h>
 #include <asm/tlbflush.h>
 #include <asm/proto.h>
 #include <asm/proto.h>
+#include <asm/nmi.h>
 
 
 /* Change for real CPU hotplug. Note other files need to be fixed
 /* Change for real CPU hotplug. Note other files need to be fixed
    first too. */
    first too. */
@@ -93,6 +94,7 @@ int smp_threads_ready;
 
 
 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_core_map);
 
 
 /*
 /*
  * Trampoline 80x86 program as an array.
  * Trampoline 80x86 program as an array.
@@ -125,96 +127,210 @@ static void __cpuinit smp_store_cpu_info(int id)
 
 
 	*c = boot_cpu_data;
 	*c = boot_cpu_data;
 	identify_cpu(c);
 	identify_cpu(c);
+	print_cpu_info(c);
 }
 }
 
 
 /*
 /*
- * Synchronize TSCs of CPUs
+ * New Funky TSC sync algorithm borrowed from IA64.
+ * Main advantage is that it doesn't reset the TSCs fully and
+ * in general looks more robust and it works better than my earlier
+ * attempts. I believe it was written by David Mosberger. Some minor
+ * adjustments for x86-64 by me -AK
  *
  *
- * This new algorithm is less accurate than the old "zero TSCs"
- * one, but we cannot zero TSCs anymore in the new hotplug CPU
- * model.
+ * Original comment reproduced below.
+ *
+ * Synchronize TSC of the current (slave) CPU with the TSC of the
+ * MASTER CPU (normally the time-keeper CPU).  We use a closed loop to
+ * eliminate the possibility of unaccounted-for errors (such as
+ * getting a machine check in the middle of a calibration step).  The
+ * basic idea is for the slave to ask the master what itc value it has
+ * and to read its own itc before and after the master responds.  Each
+ * iteration gives us three timestamps:
+ *
+ *	slave		master
+ *
+ *	t0 ---\
+ *             ---\
+ *		   --->
+ *			tm
+ *		   /---
+ *	       /---
+ *	t1 <---
+ *
+ *
+ * The goal is to adjust the slave's TSC such that tm falls exactly
+ * half-way between t0 and t1.  If we achieve this, the clocks are
+ * synchronized provided the interconnect between the slave and the
+ * master is symmetric.  Even if the interconnect were asymmetric, we
+ * would still know that the synchronization error is smaller than the
+ * roundtrip latency (t0 - t1).
+ *
+ * When the interconnect is quiet and symmetric, this lets us
+ * synchronize the TSC to within one or two cycles.  However, we can
+ * only *guarantee* that the synchronization is accurate to within a
+ * round-trip time, which is typically in the range of several hundred
+ * cycles (e.g., ~500 cycles).  In practice, this means that the TSCs
+ * are usually almost perfectly synchronized, but we shouldn't assume
+ * that the accuracy is much better than half a micro second or so.
+ *
+ * [there are other errors like the latency of RDTSC and of the
+ * WRMSR. These can also account to hundreds of cycles. So it's
+ * probably worse. It claims 153 cycles error on a dual Opteron,
+ * but I suspect the numbers are actually somewhat worse -AK]
  */
  */
 
 
-static atomic_t __cpuinitdata tsc_flag;
+#define MASTER	0
+#define SLAVE	(SMP_CACHE_BYTES/8)
+
+/* Intentionally don't use cpu_relax() while TSC synchronization
+   because we don't want to go into funky power save modi or cause
+   hypervisors to schedule us away.  Going to sleep would likely affect
+   latency and low latency is the primary objective here. -AK */
+#define no_cpu_relax() barrier()
+
 static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
 static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
-static unsigned long long __cpuinitdata bp_tsc, ap_tsc;
+static volatile __cpuinitdata unsigned long go[SLAVE + 1];
+static int notscsync __cpuinitdata;
+
+#undef DEBUG_TSC_SYNC
 
 
-#define NR_LOOPS 5
+#define NUM_ROUNDS	64	/* magic value */
+#define NUM_ITERS	5	/* likewise */
 
 
-static void __cpuinit sync_tsc_bp_init(int init)
+/* Callback on boot CPU */
+static __cpuinit void sync_master(void *arg)
 {
 {
-	if (init)
-		_raw_spin_lock(&tsc_sync_lock);
-	else
-		_raw_spin_unlock(&tsc_sync_lock);
-	atomic_set(&tsc_flag, 0);
+	unsigned long flags, i;
+
+	if (smp_processor_id() != boot_cpu_id)
+		return;
+
+	go[MASTER] = 0;
+
+	local_irq_save(flags);
+	{
+		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
+			while (!go[MASTER])
+				no_cpu_relax();
+			go[MASTER] = 0;
+			rdtscll(go[SLAVE]);
+		}
+	}
+	local_irq_restore(flags);
 }
 }
 
 
 /*
 /*
- * Synchronize TSC on AP with BP.
+ * Return the number of cycles by which our tsc differs from the tsc
+ * on the master (time-keeper) CPU.  A positive number indicates our
+ * tsc is ahead of the master, negative that it is behind.
  */
  */
-static void __cpuinit __sync_tsc_ap(void)
+static inline long
+get_delta(long *rt, long *master)
 {
 {
-	if (!cpu_has_tsc)
-		return;
-	Dprintk("AP %d syncing TSC\n", smp_processor_id());
+	unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
+	unsigned long tcenter, t0, t1, tm;
+	int i;
 
 
-	while (atomic_read(&tsc_flag) != 0)
-		cpu_relax();
-	atomic_inc(&tsc_flag);
-	mb();
-	_raw_spin_lock(&tsc_sync_lock);
-	wrmsrl(MSR_IA32_TSC, bp_tsc);
-	_raw_spin_unlock(&tsc_sync_lock);
-	rdtscll(ap_tsc);
-	mb();
-	atomic_inc(&tsc_flag);
-	mb();
+	for (i = 0; i < NUM_ITERS; ++i) {
+		rdtscll(t0);
+		go[MASTER] = 1;
+		while (!(tm = go[SLAVE]))
+			no_cpu_relax();
+		go[SLAVE] = 0;
+		rdtscll(t1);
+
+		if (t1 - t0 < best_t1 - best_t0)
+			best_t0 = t0, best_t1 = t1, best_tm = tm;
+	}
+
+	*rt = best_t1 - best_t0;
+	*master = best_tm - best_t0;
+
+	/* average best_t0 and best_t1 without overflow: */
+	tcenter = (best_t0/2 + best_t1/2);
+	if (best_t0 % 2 + best_t1 % 2 == 2)
+		++tcenter;
+	return tcenter - best_tm;
 }
 }
 
 
-static void __cpuinit sync_tsc_ap(void)
+static __cpuinit void sync_tsc(void)
 {
 {
-	int i;
-	for (i = 0; i < NR_LOOPS; i++)
-		__sync_tsc_ap();
+	int i, done = 0;
+	long delta, adj, adjust_latency = 0;
+	unsigned long flags, rt, master_time_stamp, bound;
+#if DEBUG_TSC_SYNC
+	static struct syncdebug {
+		long rt;	/* roundtrip time */
+		long master;	/* master's timestamp */
+		long diff;	/* difference between midpoint and master's timestamp */
+		long lat;	/* estimate of tsc adjustment latency */
+	} t[NUM_ROUNDS] __cpuinitdata;
+#endif
+
+	go[MASTER] = 1;
+
+	smp_call_function(sync_master, NULL, 1, 0);
+
+	while (go[MASTER])	/* wait for master to be ready */
+		no_cpu_relax();
+
+	spin_lock_irqsave(&tsc_sync_lock, flags);
+	{
+		for (i = 0; i < NUM_ROUNDS; ++i) {
+			delta = get_delta(&rt, &master_time_stamp);
+			if (delta == 0) {
+				done = 1;	/* let's lock on to this... */
+				bound = rt;
+			}
+
+			if (!done) {
+				unsigned long t;
+				if (i > 0) {
+					adjust_latency += -delta;
+					adj = -delta + adjust_latency/4;
+				} else
+					adj = -delta;
+
+				rdtscll(t);
+				wrmsrl(MSR_IA32_TSC, t + adj);
+			}
+#if DEBUG_TSC_SYNC
+			t[i].rt = rt;
+			t[i].master = master_time_stamp;
+			t[i].diff = delta;
+			t[i].lat = adjust_latency/4;
+#endif
+		}
+	}
+	spin_unlock_irqrestore(&tsc_sync_lock, flags);
+
+#if DEBUG_TSC_SYNC
+	for (i = 0; i < NUM_ROUNDS; ++i)
+		printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
+		       t[i].rt, t[i].master, t[i].diff, t[i].lat);
+#endif
+
+	printk(KERN_INFO
+	       "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
+	       "maxerr %lu cycles)\n",
+	       smp_processor_id(), boot_cpu_id, delta, rt);
 }
 }
 
 
-/*
- * Synchronize TSC from BP to AP.
- */
-static void __cpuinit __sync_tsc_bp(int cpu)
+static void __cpuinit tsc_sync_wait(void)
 {
 {
-	if (!cpu_has_tsc)
+	if (notscsync || !cpu_has_tsc)
 		return;
 		return;
-
-	/* Wait for AP */
-	while (atomic_read(&tsc_flag) == 0)
-		cpu_relax();
-	/* Save BPs TSC */
-	sync_core();
-	rdtscll(bp_tsc);
-	/* Don't do the sync core here to avoid too much latency. */
-	mb();
-	/* Start the AP */
-	_raw_spin_unlock(&tsc_sync_lock);
-	/* Wait for AP again */
-	while (atomic_read(&tsc_flag) < 2)
-		cpu_relax();
-	rdtscl(bp_tsc);
-	barrier();
+	printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
+			boot_cpu_id);
+	sync_tsc();
 }
 }
 
 
-static void __cpuinit sync_tsc_bp(int cpu)
+static __init int notscsync_setup(char *s)
 {
 {
-	int i;
-	for (i = 0; i < NR_LOOPS - 1; i++) {
-		__sync_tsc_bp(cpu);
-		sync_tsc_bp_init(1);
-	}
-	__sync_tsc_bp(cpu);
-	printk(KERN_INFO "Synced TSC of CPU %d difference %Ld\n",
-	       cpu, ap_tsc - bp_tsc);
+	notscsync = 1;
+	return 0;
 }
 }
+__setup("notscsync", notscsync_setup);
 
 
 static atomic_t init_deasserted __cpuinitdata;
 static atomic_t init_deasserted __cpuinitdata;
 
 
@@ -315,11 +431,6 @@ void __cpuinit start_secondary(void)
 	cpu_init();
 	cpu_init();
 	smp_callin();
 	smp_callin();
 
 
-	/*
-	 * Synchronize the TSC with the BP
-	 */
-	sync_tsc_ap();
-
 	/* otherwise gcc will move up the smp_processor_id before the cpu_init */
 	/* otherwise gcc will move up the smp_processor_id before the cpu_init */
 	barrier();
 	barrier();
 
 
@@ -334,7 +445,6 @@ void __cpuinit start_secondary(void)
 		enable_8259A_irq(0);
 		enable_8259A_irq(0);
 	}
 	}
 
 
-
 	enable_APIC_timer();
 	enable_APIC_timer();
 
 
 	/*
 	/*
@@ -343,6 +453,11 @@ void __cpuinit start_secondary(void)
 	cpu_set(smp_processor_id(), cpu_online_map);
 	cpu_set(smp_processor_id(), cpu_online_map);
 	mb();
 	mb();
 
 
+	/* Wait for TSC sync to not schedule things before.
+	   We still process interrupts, which could see an inconsistent
+	   time in that window unfortunately. */
+	tsc_sync_wait();
+
 	cpu_idle();
 	cpu_idle();
 }
 }
 
 
@@ -531,7 +646,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 		printk("failed fork for CPU %d\n", cpu);
 		printk("failed fork for CPU %d\n", cpu);
 		return PTR_ERR(idle);
 		return PTR_ERR(idle);
 	}
 	}
-	x86_cpu_to_apicid[cpu] = apicid;
 
 
 	cpu_pda[cpu].pcurrent = idle;
 	cpu_pda[cpu].pcurrent = idle;
 
 
@@ -600,8 +714,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 
 
 		if (cpu_isset(cpu, cpu_callin_map)) {
 		if (cpu_isset(cpu, cpu_callin_map)) {
 			/* number CPUs logically, starting from 1 (BSP is 0) */
 			/* number CPUs logically, starting from 1 (BSP is 0) */
-			Dprintk("OK.\n");
-			print_cpu_info(&cpu_data[cpu]);
 			Dprintk("CPU has booted.\n");
 			Dprintk("CPU has booted.\n");
 		} else {
 		} else {
 			boot_error = 1;
 			boot_error = 1;
@@ -842,7 +954,6 @@ void __cpuinit smp_prepare_cpus(unsigned int max_cpus)
 		      GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
 		      GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
 		/* Or can we switch back to PIC here? */
 		/* Or can we switch back to PIC here? */
 	}
 	}
-	x86_cpu_to_apicid[0] = boot_cpu_id;
 
 
 	/*
 	/*
 	 * Now start the IO-APICs
 	 * Now start the IO-APICs
@@ -889,18 +1000,14 @@ int __cpuinit __cpu_up(unsigned int cpu)
 		printk("__cpu_up: bad cpu %d\n", cpu);
 		printk("__cpu_up: bad cpu %d\n", cpu);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
-	sync_tsc_bp_init(1);
 
 
 	/* Boot it! */
 	/* Boot it! */
 	err = do_boot_cpu(cpu, apicid);
 	err = do_boot_cpu(cpu, apicid);
 	if (err < 0) {
 	if (err < 0) {
-		sync_tsc_bp_init(0);
 		Dprintk("do_boot_cpu failed %d\n", err);
 		Dprintk("do_boot_cpu failed %d\n", err);
 		return err;
 		return err;
 	}
 	}
 
 
-	sync_tsc_bp(cpu);
-
 	/* Unleash the CPU! */
 	/* Unleash the CPU! */
 	Dprintk("waiting for cpu %d\n", cpu);
 	Dprintk("waiting for cpu %d\n", cpu);
 
 
@@ -923,4 +1030,6 @@ void __cpuinit smp_cpus_done(unsigned int max_cpus)
 
 
 	detect_siblings();
 	detect_siblings();
 	time_init_gtod();
 	time_init_gtod();
+
+	check_nmi_watchdog();
 }
 }

+ 45 - 17
arch/x86_64/kernel/time.c

@@ -26,6 +26,8 @@
 #include <linux/sysdev.h>
 #include <linux/sysdev.h>
 #include <linux/bcd.h>
 #include <linux/bcd.h>
 #include <linux/kallsyms.h>
 #include <linux/kallsyms.h>
+#include <linux/acpi.h>
+#include <acpi/achware.h>	/* for PM timer frequency */
 #include <asm/8253pit.h>
 #include <asm/8253pit.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/vsyscall.h>
 #include <asm/vsyscall.h>
@@ -396,6 +398,10 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 			(offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick;
 			(offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick;
 
 
 		vxtime.last = offset;
 		vxtime.last = offset;
+#ifdef CONFIG_X86_PM_TIMER
+	} else if (vxtime.mode == VXTIME_PMTMR) {
+		lost = pmtimer_mark_offset();
+#endif
 	} else {
 	} else {
 		offset = (((tsc - vxtime.last_tsc) *
 		offset = (((tsc - vxtime.last_tsc) *
 			   vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
 			   vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
@@ -898,6 +904,13 @@ void __init time_init(void)
 			hpet_period;
 			hpet_period;
 		cpu_khz = hpet_calibrate_tsc();
 		cpu_khz = hpet_calibrate_tsc();
 		timename = "HPET";
 		timename = "HPET";
+#ifdef CONFIG_X86_PM_TIMER
+	} else if (pmtmr_ioport) {
+		vxtime_hz = PM_TIMER_FREQUENCY;
+		timename = "PM";
+		pit_init();
+		cpu_khz = pit_calibrate_tsc();
+#endif
 	} else {
 	} else {
 		pit_init();
 		pit_init();
 		cpu_khz = pit_calibrate_tsc();
 		cpu_khz = pit_calibrate_tsc();
@@ -922,6 +935,27 @@ void __init time_init(void)
 #endif
 #endif
 }
 }
 
 
+/*
+ * Make an educated guess if the TSC is trustworthy and synchronized
+ * over all CPUs.
+ */
+static __init int unsynchronized_tsc(void)
+{
+#ifdef CONFIG_SMP
+	if (oem_force_hpet_timer())
+		return 1;
+ 	/* Intel systems are normally all synchronized. Exceptions
+ 	   are handled in the OEM check above. */
+ 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ 		return 0;
+ 	/* All in a single socket - should be synchronized */
+ 	if (cpus_weight(cpu_core_map[0]) == num_online_cpus())
+ 		return 0;
+#endif
+ 	/* Assume multi socket systems are not synchronized */
+ 	return num_online_cpus() > 1;
+}
+
 /*
 /*
  * Decide after all CPUs are booted what mode gettimeofday should use.
  * Decide after all CPUs are booted what mode gettimeofday should use.
  */
  */
@@ -929,29 +963,23 @@ void __init time_init_gtod(void)
 {
 {
 	char *timetype;
 	char *timetype;
 
 
-	/*
-	 * AMD systems with more than one CPU don't have fully synchronized
-	 * TSCs. Always use HPET gettimeofday for these, although it is slower.
-	 * Intel SMP systems usually have synchronized TSCs, so use always
-	 * the TSC.
-	 *
-	 * Exceptions:
-	 * IBM Summit2 checked by oem_force_hpet_timer().
- 	 * AMD dual core may also not need HPET. Check me.
-	 *
-	 * Can be turned off with "notsc".
-	 */
-	if (num_online_cpus() > 1 &&
-	    boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-		notsc = 1;
-	/* Some systems will want to disable TSC and use HPET. */
-	if (oem_force_hpet_timer())
+	if (unsynchronized_tsc())
 		notsc = 1;
 		notsc = 1;
 	if (vxtime.hpet_address && notsc) {
 	if (vxtime.hpet_address && notsc) {
 		timetype = "HPET";
 		timetype = "HPET";
 		vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
 		vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
 		vxtime.mode = VXTIME_HPET;
 		vxtime.mode = VXTIME_HPET;
 		do_gettimeoffset = do_gettimeoffset_hpet;
 		do_gettimeoffset = do_gettimeoffset_hpet;
+#ifdef CONFIG_X86_PM_TIMER
+	/* Using PM for gettimeofday is quite slow, but we have no other
+	   choice because the TSC is too unreliable on some systems. */
+	} else if (pmtmr_ioport && !vxtime.hpet_address && notsc) {
+		timetype = "PM";
+		do_gettimeoffset = do_gettimeoffset_pm;
+		vxtime.mode = VXTIME_PMTMR;
+		sysctl_vsyscall = 0;
+		printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n");
+#endif
 	} else {
 	} else {
 		timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC";
 		timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC";
 		vxtime.mode = VXTIME_TSC;
 		vxtime.mode = VXTIME_TSC;

+ 2 - 0
arch/x86_64/kernel/traps.c

@@ -332,10 +332,12 @@ void handle_BUG(struct pt_regs *regs)
 	printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line);
 	printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line);
 } 
 } 
 
 
+#ifdef CONFIG_BUG
 void out_of_line_bug(void)
 void out_of_line_bug(void)
 { 
 { 
 	BUG(); 
 	BUG(); 
 } 
 } 
+#endif
 
 
 static DEFINE_SPINLOCK(die_lock);
 static DEFINE_SPINLOCK(die_lock);
 static int die_owner = -1;
 static int die_owner = -1;

+ 3 - 2
arch/x86_64/kernel/vsyscall.c

@@ -65,7 +65,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv)
 		usec = (__xtime.tv_nsec / 1000) +
 		usec = (__xtime.tv_nsec / 1000) +
 			(__jiffies - __wall_jiffies) * (1000000 / HZ);
 			(__jiffies - __wall_jiffies) * (1000000 / HZ);
 
 
-		if (__vxtime.mode == VXTIME_TSC) {
+		if (__vxtime.mode != VXTIME_HPET) {
 			sync_core();
 			sync_core();
 			rdtscll(t);
 			rdtscll(t);
 			if (t < __vxtime.last_tsc)
 			if (t < __vxtime.last_tsc)
@@ -217,8 +217,9 @@ static int __init vsyscall_init(void)
 	BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
 	BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
 	BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
 	BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
 	map_vsyscall();
 	map_vsyscall();
-	sysctl_vsyscall = 1;
+#ifdef CONFIG_SYSCTL
 	register_sysctl_table(kernel_root_table2, 0);
 	register_sysctl_table(kernel_root_table2, 0);
+#endif
 	return 0;
 	return 0;
 }
 }
 
 

+ 2 - 1
arch/x86_64/kernel/x8664_ksyms.c

@@ -193,8 +193,9 @@ EXPORT_SYMBOL(smp_num_siblings);
 extern void do_softirq_thunk(void);
 extern void do_softirq_thunk(void);
 EXPORT_SYMBOL(do_softirq_thunk);
 EXPORT_SYMBOL(do_softirq_thunk);
 
 
-void out_of_line_bug(void);
+#ifdef CONFIG_BUG
 EXPORT_SYMBOL(out_of_line_bug);
 EXPORT_SYMBOL(out_of_line_bug);
+#endif
 
 
 EXPORT_SYMBOL(init_level4_pgt);
 EXPORT_SYMBOL(init_level4_pgt);
 
 

+ 9 - 2
arch/x86_64/mm/fault.c

@@ -234,6 +234,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 
 
 /*
 /*
  * Handle a fault on the vmalloc or module mapping area
  * Handle a fault on the vmalloc or module mapping area
+ *
+ * This assumes no large pages in there.
  */
  */
 static int vmalloc_fault(unsigned long address)
 static int vmalloc_fault(unsigned long address)
 {
 {
@@ -272,7 +274,10 @@ static int vmalloc_fault(unsigned long address)
 	if (!pte_present(*pte_ref))
 	if (!pte_present(*pte_ref))
 		return -1;
 		return -1;
 	pte = pte_offset_kernel(pmd, address);
 	pte = pte_offset_kernel(pmd, address);
-	if (!pte_present(*pte) || pte_page(*pte) != pte_page(*pte_ref))
+	/* Don't use pte_page here, because the mappings can point
+	   outside mem_map, and the NUMA hash lookup cannot handle
+	   that. */
+	if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
 		BUG();
 		BUG();
 	__flush_tlb_all();
 	__flush_tlb_all();
 	return 0;
 	return 0;
@@ -346,7 +351,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	 * protection error (error_code & 1) == 0.
 	 * protection error (error_code & 1) == 0.
 	 */
 	 */
 	if (unlikely(address >= TASK_SIZE)) {
 	if (unlikely(address >= TASK_SIZE)) {
-		if (!(error_code & 5)) {
+		if (!(error_code & 5) &&
+		      ((address >= VMALLOC_START && address < VMALLOC_END) ||
+		       (address >= MODULES_VADDR && address < MODULES_END))) {
 			if (vmalloc_fault(address) < 0)
 			if (vmalloc_fault(address) < 0)
 				goto bad_area_nosemaphore;
 				goto bad_area_nosemaphore;
 			return;
 			return;

+ 8 - 21
arch/x86_64/mm/ioremap.c

@@ -133,7 +133,7 @@ ioremap_change_attr(unsigned long phys_addr, unsigned long size,
 					unsigned long flags)
 					unsigned long flags)
 {
 {
 	int err = 0;
 	int err = 0;
-	if (flags && phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) {
+	if (phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) {
 		unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		unsigned long vaddr = (unsigned long) __va(phys_addr);
 		unsigned long vaddr = (unsigned long) __va(phys_addr);
 
 
@@ -214,7 +214,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 		remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
 		remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
 		return NULL;
 		return NULL;
 	}
 	}
-	if (ioremap_change_attr(phys_addr, size, flags) < 0) {
+	if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) {
 		area->flags &= 0xffffff;
 		area->flags &= 0xffffff;
 		vunmap(addr);
 		vunmap(addr);
 		return NULL;
 		return NULL;
@@ -251,7 +251,7 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
 
 
 void iounmap(volatile void __iomem *addr)
 void iounmap(volatile void __iomem *addr)
 {
 {
-	struct vm_struct *p, **pprev;
+	struct vm_struct *p;
 
 
 	if (addr <= high_memory) 
 	if (addr <= high_memory) 
 		return; 
 		return; 
@@ -260,24 +260,11 @@ void iounmap(volatile void __iomem *addr)
 		return;
 		return;
 
 
 	write_lock(&vmlist_lock);
 	write_lock(&vmlist_lock);
-	for (p = vmlist, pprev = &vmlist; p != NULL; pprev = &p->next, p = *pprev)
-		if (p->addr == (void *)(PAGE_MASK & (unsigned long)addr))
-			break;
-	if (!p) { 
-		printk("__iounmap: bad address %p\n", addr);
-		goto out_unlock;
-	}
-	*pprev = p->next;
-	unmap_vm_area(p);
-	if ((p->flags >> 20) &&
-		p->phys_addr + p->size - 1 < virt_to_phys(high_memory)) {
-		/* p->size includes the guard page, but cpa doesn't like that */
-		change_page_attr(virt_to_page(__va(p->phys_addr)),
-				 p->size >> PAGE_SHIFT,
-				 PAGE_KERNEL);
-		global_flush_tlb();
-	} 
-out_unlock:
+	p = __remove_vm_area((void *)((unsigned long)addr & PAGE_MASK));
+	if (!p)
+		printk("iounmap: bad address %p\n", addr);
+	else if (p->flags >> 20)
+		ioremap_change_attr(p->phys_addr, p->size, 0);
 	write_unlock(&vmlist_lock);
 	write_unlock(&vmlist_lock);
 	kfree(p); 
 	kfree(p); 
 }
 }

+ 15 - 13
crypto/crypto_null.c

@@ -21,6 +21,7 @@
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <asm/scatterlist.h>
 #include <asm/scatterlist.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
+#include <linux/string.h>
 
 
 #define NULL_KEY_SIZE		0
 #define NULL_KEY_SIZE		0
 #define NULL_BLOCK_SIZE		1
 #define NULL_BLOCK_SIZE		1
@@ -28,11 +29,13 @@
 
 
 static int null_compress(void *ctx, const u8 *src, unsigned int slen,
 static int null_compress(void *ctx, const u8 *src, unsigned int slen,
                          u8 *dst, unsigned int *dlen)
                          u8 *dst, unsigned int *dlen)
-{ return 0; }
-
-static int null_decompress(void *ctx, const u8 *src, unsigned int slen,
-                           u8 *dst, unsigned int *dlen)
-{ return 0; }
+{
+	if (slen > *dlen)
+		return -EINVAL;
+	memcpy(dst, src, slen);
+	*dlen = slen;
+	return 0;
+}
 
 
 static void null_init(void *ctx)
 static void null_init(void *ctx)
 { }
 { }
@@ -47,11 +50,10 @@ static int null_setkey(void *ctx, const u8 *key,
                        unsigned int keylen, u32 *flags)
                        unsigned int keylen, u32 *flags)
 { return 0; }
 { return 0; }
 
 
-static void null_encrypt(void *ctx, u8 *dst, const u8 *src)
-{ }
-
-static void null_decrypt(void *ctx, u8 *dst, const u8 *src)
-{ }
+static void null_crypt(void *ctx, u8 *dst, const u8 *src)
+{
+	memcpy(dst, src, NULL_BLOCK_SIZE);
+}
 
 
 static struct crypto_alg compress_null = {
 static struct crypto_alg compress_null = {
 	.cra_name		=	"compress_null",
 	.cra_name		=	"compress_null",
@@ -62,7 +64,7 @@ static struct crypto_alg compress_null = {
 	.cra_list		=       LIST_HEAD_INIT(compress_null.cra_list),
 	.cra_list		=       LIST_HEAD_INIT(compress_null.cra_list),
 	.cra_u			=	{ .compress = {
 	.cra_u			=	{ .compress = {
 	.coa_compress 		=	null_compress,
 	.coa_compress 		=	null_compress,
-	.coa_decompress		=	null_decompress } }
+	.coa_decompress		=	null_compress } }
 };
 };
 
 
 static struct crypto_alg digest_null = {
 static struct crypto_alg digest_null = {
@@ -90,8 +92,8 @@ static struct crypto_alg cipher_null = {
 	.cia_min_keysize	=	NULL_KEY_SIZE,
 	.cia_min_keysize	=	NULL_KEY_SIZE,
 	.cia_max_keysize	=	NULL_KEY_SIZE,
 	.cia_max_keysize	=	NULL_KEY_SIZE,
 	.cia_setkey		= 	null_setkey,
 	.cia_setkey		= 	null_setkey,
-	.cia_encrypt		=	null_encrypt,
-	.cia_decrypt		=	null_decrypt } }
+	.cia_encrypt		=	null_crypt,
+	.cia_decrypt		=	null_crypt } }
 };
 };
 
 
 MODULE_ALIAS("compress_null");
 MODULE_ALIAS("compress_null");

+ 1 - 1
crypto/internal.h

@@ -38,7 +38,7 @@ static inline void crypto_kunmap(void *vaddr, int out)
 
 
 static inline void crypto_yield(struct crypto_tfm *tfm)
 static inline void crypto_yield(struct crypto_tfm *tfm)
 {
 {
-	if (!in_softirq())
+	if (!in_atomic())
 		cond_resched();
 		cond_resched();
 }
 }
 
 

+ 1 - 1
drivers/base/Makefile

@@ -1,6 +1,6 @@
 # Makefile for the Linux device tree
 # Makefile for the Linux device tree
 
 
-obj-y			:= core.o sys.o interface.o bus.o \
+obj-y			:= core.o sys.o bus.o \
 			   driver.o class.o class_simple.o platform.o \
 			   driver.o class.o class_simple.o platform.o \
 			   cpu.o firmware.o init.o map.o dmapool.o \
 			   cpu.o firmware.o init.o map.o dmapool.o \
 			   attribute_container.o transport_class.o
 			   attribute_container.o transport_class.o

+ 0 - 1
drivers/base/bus.c

@@ -390,7 +390,6 @@ void device_release_driver(struct device * dev)
 		sysfs_remove_link(&drv->kobj, kobject_name(&dev->kobj));
 		sysfs_remove_link(&drv->kobj, kobject_name(&dev->kobj));
 		sysfs_remove_link(&dev->kobj, "driver");
 		sysfs_remove_link(&dev->kobj, "driver");
 		list_del_init(&dev->driver_list);
 		list_del_init(&dev->driver_list);
-		device_detach_shutdown(dev);
 		if (drv->remove)
 		if (drv->remove)
 			drv->remove(dev);
 			drv->remove(dev);
 		dev->driver = NULL;
 		dev->driver = NULL;

+ 2 - 5
drivers/base/core.c

@@ -31,8 +31,6 @@ int (*platform_notify_remove)(struct device * dev) = NULL;
 #define to_dev(obj) container_of(obj, struct device, kobj)
 #define to_dev(obj) container_of(obj, struct device, kobj)
 #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)
 #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)
 
 
-extern struct attribute * dev_default_attrs[];
-
 static ssize_t
 static ssize_t
 dev_attr_show(struct kobject * kobj, struct attribute * attr, char * buf)
 dev_attr_show(struct kobject * kobj, struct attribute * attr, char * buf)
 {
 {
@@ -89,7 +87,6 @@ static void device_release(struct kobject * kobj)
 static struct kobj_type ktype_device = {
 static struct kobj_type ktype_device = {
 	.release	= device_release,
 	.release	= device_release,
 	.sysfs_ops	= &dev_sysfs_ops,
 	.sysfs_ops	= &dev_sysfs_ops,
-	.default_attrs	= dev_default_attrs,
 };
 };
 
 
 
 
@@ -248,6 +245,7 @@ int device_add(struct device *dev)
 
 
 	if ((error = kobject_add(&dev->kobj)))
 	if ((error = kobject_add(&dev->kobj)))
 		goto Error;
 		goto Error;
+	kobject_hotplug(&dev->kobj, KOBJ_ADD);
 	if ((error = device_pm_add(dev)))
 	if ((error = device_pm_add(dev)))
 		goto PMError;
 		goto PMError;
 	if ((error = bus_add_device(dev)))
 	if ((error = bus_add_device(dev)))
@@ -260,14 +258,13 @@ int device_add(struct device *dev)
 	/* notify platform of device entry */
 	/* notify platform of device entry */
 	if (platform_notify)
 	if (platform_notify)
 		platform_notify(dev);
 		platform_notify(dev);
-
-	kobject_hotplug(&dev->kobj, KOBJ_ADD);
  Done:
  Done:
 	put_device(dev);
 	put_device(dev);
 	return error;
 	return error;
  BusError:
  BusError:
 	device_pm_remove(dev);
 	device_pm_remove(dev);
  PMError:
  PMError:
+	kobject_hotplug(&dev->kobj, KOBJ_REMOVE);
 	kobject_del(&dev->kobj);
 	kobject_del(&dev->kobj);
  Error:
  Error:
 	if (parent)
 	if (parent)

+ 0 - 51
drivers/base/interface.c

@@ -1,51 +0,0 @@
-/*
- * drivers/base/interface.c - common driverfs interface that's exported to
- * 	the world for all devices.
- *
- * Copyright (c) 2002-3 Patrick Mochel
- * Copyright (c) 2002-3 Open Source Development Labs
- *
- * This file is released under the GPLv2
- *
- */
-
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/stat.h>
-#include <linux/string.h>
-
-/**
- *	detach_state - control the default power state for the device.
- *
- *	This is the state the device enters when it's driver module is
- *	unloaded. The value is an unsigned integer, in the range of 0-4.
- *	'0' indicates 'On', so no action will be taken when the driver is
- *	unloaded. This is the default behavior.
- *	'4' indicates 'Off', meaning the driver core will call the driver's
- *	shutdown method to quiesce the device.
- *	1-3 indicate a low-power state for the device to enter via the
- *	driver's suspend method.
- */
-
-static ssize_t detach_show(struct device * dev, char * buf)
-{
-	return sprintf(buf, "%u\n", dev->detach_state);
-}
-
-static ssize_t detach_store(struct device * dev, const char * buf, size_t n)
-{
-	u32 state;
-	state = simple_strtoul(buf, NULL, 10);
-	if (state > 4)
-		return -EINVAL;
-	dev->detach_state = state;
-	return n;
-}
-
-static DEVICE_ATTR(detach_state, 0644, detach_show, detach_store);
-
-
-struct attribute * dev_default_attrs[] = {
-	&dev_attr_detach_state.attr,
-	NULL,
-};

+ 0 - 11
drivers/base/power/power.h

@@ -1,18 +1,7 @@
-
-
-enum {
-	DEVICE_PM_ON,
-	DEVICE_PM1,
-	DEVICE_PM2,
-	DEVICE_PM3,
-	DEVICE_PM_OFF,
-};
-
 /*
 /*
  * shutdown.c
  * shutdown.c
  */
  */
 
 
-extern int device_detach_shutdown(struct device *);
 extern void device_shutdown(void);
 extern void device_shutdown(void);
 
 
 
 

+ 10 - 1
drivers/base/power/resume.c

@@ -22,8 +22,17 @@ extern int sysdev_resume(void);
 
 
 int resume_device(struct device * dev)
 int resume_device(struct device * dev)
 {
 {
-	if (dev->bus && dev->bus->resume)
+	if (dev->power.pm_parent
+			&& dev->power.pm_parent->power.power_state) {
+		dev_err(dev, "PM: resume from %d, parent %s still %d\n",
+			dev->power.power_state,
+			dev->power.pm_parent->bus_id,
+			dev->power.pm_parent->power.power_state);
+	}
+	if (dev->bus && dev->bus->resume) {
+		dev_dbg(dev,"resuming\n");
 		return dev->bus->resume(dev);
 		return dev->bus->resume(dev);
+	}
 	return 0;
 	return 0;
 }
 }
 
 

Một số tệp đã không được hiển thị bởi vì quá nhiều tập tin thay đổi trong này khác