Browse Source

Merge branches 'at91', 'dcache', 'ftrace', 'hwbpt', 'misc', 'mmci', 's3c', 'st-ux' and 'unwind' into devel

Russell King 14 years ago
100 changed files with 2798 additions and 667 deletions
  1. 0 1
      Documentation/DocBook/device-drivers.tmpl
  2. 0 1
      Documentation/DocBook/kernel-api.tmpl
  3. 6 0
      Documentation/DocBook/kernel-locking.tmpl
  4. 5 0
      Documentation/DocBook/tracepoint.tmpl
  5. 45 0
      Documentation/block/cfq-iosched.txt
  6. 28 0
      Documentation/cgroups/blkio-controller.txt
  7. 14 8
      Documentation/gpio.txt
  8. 3 4
      Documentation/hwmon/sysfs-interface
  9. 5 0
      Documentation/kernel-doc-nano-HOWTO.txt
  10. 10 7
      Documentation/kernel-parameters.txt
  11. 2 1
      Documentation/mutex-design.txt
  12. 1 1
      Documentation/power/regulator/overview.txt
  13. 1 0
      Documentation/sound/alsa/HD-Audio-Models.txt
  14. 380 0
      Documentation/workqueue.txt
  15. 47 24
      MAINTAINERS
  16. 8 1
      Makefile
  17. 2 2
      arch/Kconfig
  18. 0 1
      arch/alpha/include/asm/cache.h
  19. 2 0
      arch/alpha/include/asm/cacheflush.h
  20. 5 1
      arch/alpha/include/asm/unistd.h
  21. 29 52
      arch/alpha/kernel/entry.S
  22. 7 5
      arch/alpha/kernel/err_ev6.c
  23. 20 19
      arch/alpha/kernel/err_marvel.c
  24. 20 15
      arch/alpha/kernel/err_titan.c
  25. 2 7
      arch/alpha/kernel/osf_sys.c
  26. 1 1
      arch/alpha/kernel/pci-sysfs.c
  27. 9 9
      arch/alpha/kernel/perf_event.c
  28. 1 1
      arch/alpha/kernel/process.c
  29. 0 3
      arch/alpha/kernel/proto.h
  30. 18 79
      arch/alpha/kernel/signal.c
  31. 1 1
      arch/alpha/kernel/srm_env.c
  32. 16 3
      arch/alpha/kernel/sys_cabriolet.c
  33. 9 2
      arch/alpha/kernel/sys_takara.c
  34. 4 1
      arch/alpha/kernel/systbls.S
  35. 5 5
      arch/alpha/kernel/time.c
  36. 0 3
      arch/alpha/kernel/traps.c
  37. 53 102
      arch/arm/Kconfig
  38. 5 0
      arch/arm/Kconfig.debug
  39. 5 3
      arch/arm/boot/Makefile
  40. 5 1
      arch/arm/boot/compressed/Makefile
  41. 1 1
      arch/arm/boot/compressed/head.S
  42. 16 0
      arch/arm/common/it8152.c
  43. 3 4
      arch/arm/common/pl330.c
  44. 1 1
      arch/arm/common/sa1111.c
  45. 13 2
      arch/arm/configs/realview-smp_defconfig
  46. 13 2
      arch/arm/configs/realview_defconfig
  47. 2 35
      arch/arm/configs/u300_defconfig
  48. 25 2
      arch/arm/include/asm/assembler.h
  49. 44 21
      arch/arm/include/asm/cacheflush.h
  50. 6 2
      arch/arm/include/asm/cachetype.h
  51. 0 8
      arch/arm/include/asm/dma-mapping.h
  52. 19 1
      arch/arm/include/asm/ftrace.h
  53. 11 23
      arch/arm/include/asm/hardware/coresight.h
  54. 133 0
      arch/arm/include/asm/hw_breakpoint.h
  55. 19 12
      arch/arm/include/asm/module.h
  56. 1 1
      arch/arm/include/asm/perf_event.h
  57. 27 3
      arch/arm/include/asm/pgtable.h
  58. 4 0
      arch/arm/include/asm/processor.h
  59. 2 0
      arch/arm/include/asm/ptrace.h
  60. 17 0
      arch/arm/include/asm/smp_mpidr.h
  61. 25 0
      arch/arm/include/asm/smp_plat.h
  62. 6 0
      arch/arm/include/asm/system.h
  63. 27 9
      arch/arm/include/asm/tlbflush.h
  64. 3 0
      arch/arm/include/asm/unistd.h
  65. 1 0
      arch/arm/kernel/Makefile
  66. 2 0
      arch/arm/kernel/armksyms.c
  67. 3 0
      arch/arm/kernel/calls.S
  68. 5 6
      arch/arm/kernel/entry-armv.S
  69. 55 12
      arch/arm/kernel/entry-common.S
  70. 15 0
      arch/arm/kernel/etm.c
  71. 140 48
      arch/arm/kernel/ftrace.c
  72. 1 1
      arch/arm/kernel/head-common.S
  73. 50 0
      arch/arm/kernel/head.S
  74. 849 0
      arch/arm/kernel/hw_breakpoint.c
  75. 35 33
      arch/arm/kernel/module.c
  76. 6 6
      arch/arm/kernel/perf_event.c
  77. 24 0
      arch/arm/kernel/process.c
  78. 239 0
      arch/arm/kernel/ptrace.c
  79. 41 5
      arch/arm/kernel/setup.c
  80. 2 1
      arch/arm/kernel/smp.c
  81. 2 0
      arch/arm/kernel/unwind.c
  82. 13 2
      arch/arm/kernel/vmlinux.lds.S
  83. 12 3
      arch/arm/mach-at91/at91sam9g45.c
  84. 5 5
      arch/arm/mach-at91/at91sam9g45_devices.c
  85. 19 12
      arch/arm/mach-at91/board-sam9261ek.c
  86. 2 1
      arch/arm/mach-at91/clock.c
  87. 1 2
      arch/arm/mach-davinci/dm355.c
  88. 1 2
      arch/arm/mach-davinci/dm365.c
  89. 1 2
      arch/arm/mach-davinci/dm644x.c
  90. 1 2
      arch/arm/mach-davinci/dm646x.c
  91. 3 3
      arch/arm/mach-dove/include/mach/io.h
  92. 1 1
      arch/arm/mach-ep93xx/clock.c
  93. 8 0
      arch/arm/mach-ixp4xx/common-pci.c
  94. 2 0
      arch/arm/mach-ixp4xx/include/mach/hardware.h
  95. 1 1
      arch/arm/mach-kirkwood/include/mach/kirkwood.h
  96. 2 2
      arch/arm/mach-kirkwood/pcie.c
  97. 6 1
      arch/arm/mach-mmp/include/mach/system.h
  98. 1 1
      arch/arm/mach-mx25/eukrea_mbimxsd-baseboard.c
  99. 2 2
      arch/arm/mach-mx25/mach-cpuimx25.c
  100. 55 22
      arch/arm/mach-mx3/clock-imx35.c

+ 0 - 1
Documentation/DocBook/device-drivers.tmpl

@@ -46,7 +46,6 @@
 
      <sect1><title>Atomic and pointer manipulation</title>
 !Iarch/x86/include/asm/atomic.h
-!Iarch/x86/include/asm/unaligned.h
      </sect1>
 
      <sect1><title>Delaying, scheduling, and timer routines</title>

+ 0 - 1
Documentation/DocBook/kernel-api.tmpl

@@ -57,7 +57,6 @@
      </para>
 
      <sect1><title>String Conversions</title>
-!Ilib/vsprintf.c
 !Elib/vsprintf.c
      </sect1>
      <sect1><title>String Manipulation</title>

+ 6 - 0
Documentation/DocBook/kernel-locking.tmpl

@@ -1961,6 +1961,12 @@ machines due to caching.
    </sect1>
   </chapter>
 
+  <chapter id="apiref">
+   <title>Mutex API reference</title>
+!Iinclude/linux/mutex.h
+!Ekernel/mutex.c
+  </chapter>
+
   <chapter id="references">
    <title>Further reading</title>
 

+ 5 - 0
Documentation/DocBook/tracepoint.tmpl

@@ -104,4 +104,9 @@
    <title>Block IO</title>
 !Iinclude/trace/events/block.h
   </chapter>
+
+  <chapter id="workqueue">
+   <title>Workqueue</title>
+!Iinclude/trace/events/workqueue.h
+  </chapter>
 </book>

+ 45 - 0
Documentation/block/cfq-iosched.txt

@@ -0,0 +1,45 @@
+CFQ ioscheduler tunables
+========================
+
+slice_idle
+----------
+This specifies how long CFQ should idle for next request on certain cfq queues
+(for sequential workloads) and service trees (for random workloads) before
+queue is expired and CFQ selects next queue to dispatch from.
+
+By default slice_idle is a non-zero value. That means by default we idle on
+queues/service trees. This can be very helpful on highly seeky media like
+single spindle SATA/SAS disks where we can cut down on overall number of
+seeks and see improved throughput.
+
+Setting slice_idle to 0 will remove all the idling on queues/service tree
+level and one should see an overall improved throughput on faster storage
+devices like multiple SATA/SAS disks in hardware RAID configuration. The down
+side is that isolation provided from WRITES also goes down and notion of
+IO priority becomes weaker.
+
+So depending on storage and workload, it might be useful to set slice_idle=0.
+In general I think for SATA/SAS disks and software RAID of SATA/SAS disks
+keeping slice_idle enabled should be useful. For any configurations where
+there are multiple spindles behind single LUN (Host based hardware RAID
+controller or for storage arrays), setting slice_idle=0 might end up in better
+throughput and acceptable latencies.
+
+CFQ IOPS Mode for group scheduling
+===================================
+Basic CFQ design is to provide priority based time slices. Higher priority
+process gets bigger time slice and lower priority process gets smaller time
+slice. Measuring time becomes harder if storage is fast and supports NCQ and
+it would be better to dispatch multiple requests from multiple cfq queues in
+request queue at a time. In such scenario, it is not possible to measure time
+consumed by single queue accurately.
+
+What is possible though is to measure number of requests dispatched from a
+single queue and also allow dispatch from multiple cfq queue at the same time.
+This effectively becomes the fairness in terms of IOPS (IO operations per
+second).
+
+If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches
+to IOPS mode and starts providing fairness in terms of number of requests
+dispatched. Note that this mode switching takes effect only for group
+scheduling. For non-cgroup users nothing should change.

+ 28 - 0
Documentation/cgroups/blkio-controller.txt

@@ -217,6 +217,7 @@ Details of cgroup files
 CFQ sysfs tunable
 =================
 /sys/block/<disk>/queue/iosched/group_isolation
+-----------------------------------------------
 
 If group_isolation=1, it provides stronger isolation between groups at the
 expense of throughput. By default group_isolation is 0. In general that
@@ -243,6 +244,33 @@ By default one should run with group_isolation=0. If that is not sufficient
 and one wants stronger isolation between groups, then set group_isolation=1
 but this will come at cost of reduced throughput.
 
+/sys/block/<disk>/queue/iosched/slice_idle
+------------------------------------------
+On a faster hardware CFQ can be slow, especially with sequential workload.
+This happens because CFQ idles on a single queue and single queue might not
+drive deeper request queue depths to keep the storage busy. In such scenarios
+one can try setting slice_idle=0 and that would switch CFQ to IOPS
+(IO operations per second) mode on NCQ supporting hardware.
+
+That means CFQ will not idle between cfq queues of a cfq group and hence be
+able to driver higher queue depth and achieve better throughput. That also
+means that cfq provides fairness among groups in terms of IOPS and not in
+terms of disk time.
+
+/sys/block/<disk>/queue/iosched/group_idle
+------------------------------------------
+If one disables idling on individual cfq queues and cfq service trees by
+setting slice_idle=0, group_idle kicks in. That means CFQ will still idle
+on the group in an attempt to provide fairness among groups.
+
+By default group_idle is same as slice_idle and does not do anything if
+slice_idle is enabled.
+
+One can experience an overall throughput drop if you have created multiple
+groups and put applications in that group which are not driving enough
+IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle
+on individual groups and throughput should improve.
+
 What works
 ==========
 - Currently only sync IO queues are support. All the buffered writes are

+ 14 - 8
Documentation/gpio.txt

@@ -109,17 +109,19 @@ use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders.
 
 If you want to initialize a structure with an invalid GPIO number, use
 some negative number (perhaps "-EINVAL"); that will never be valid.  To
-test if a number could reference a GPIO, you may use this predicate:
+test if such number from such a structure could reference a GPIO, you
+may use this predicate:
 
 	int gpio_is_valid(int number);
 
 A number that's not valid will be rejected by calls which may request
 or free GPIOs (see below).  Other numbers may also be rejected; for
-example, a number might be valid but unused on a given board.
-
-Whether a platform supports multiple GPIO controllers is currently a
-platform-specific implementation issue.
+example, a number might be valid but temporarily unused on a given board.
 
+Whether a platform supports multiple GPIO controllers is a platform-specific
+implementation issue, as are whether that support can leave "holes" in the space
+of GPIO numbers, and whether new controllers can be added at runtime.  Such issues
+can affect things including whether adjacent GPIO numbers are both valid.
 
 Using GPIOs
 -----------
@@ -480,12 +482,16 @@ To support this framework, a platform's Kconfig will "select" either
 ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB
 and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines
 three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep().
-They may also want to provide a custom value for ARCH_NR_GPIOS.
 
-ARCH_REQUIRE_GPIOLIB means that the gpio-lib code will always get compiled
+It may also provide a custom value for ARCH_NR_GPIOS, so that it better
+reflects the number of GPIOs in actual use on that platform, without
+wasting static table space.  (It should count both built-in/SoC GPIOs and
+also ones on GPIO expanders.
+
+ARCH_REQUIRE_GPIOLIB means that the gpiolib code will always get compiled
 into the kernel on that architecture.
 
-ARCH_WANT_OPTIONAL_GPIOLIB means the gpio-lib code defaults to off and the user
+ARCH_WANT_OPTIONAL_GPIOLIB means the gpiolib code defaults to off and the user
 can enable it and build it into the kernel optionally.
 
 If neither of these options are selected, the platform does not support

+ 3 - 4
Documentation/hwmon/sysfs-interface

@@ -91,12 +91,11 @@ name		The chip name.
 		I2C devices get this attribute created automatically.
 		RO
 
-update_rate	The rate at which the chip will update readings.
+update_interval	The interval at which the chip will update readings.
 		Unit: millisecond
 		RW
-		Some devices have a variable update rate. This attribute
-		can be used to change the update rate to the desired
-		frequency.
+		Some devices have a variable update rate or interval.
+		This attribute can be used to change it to the desired value.
 
 
 ************

+ 5 - 0
Documentation/kernel-doc-nano-HOWTO.txt

@@ -345,5 +345,10 @@ documentation, in <filename>, for the functions listed.
 section titled <section title> from <filename>.
 Spaces are allowed in <section title>; do not quote the <section title>.
 
+!C<filename> is replaced by nothing, but makes the tools check that
+all DOC: sections and documented functions, symbols, etc. are used.
+This makes sense to use when you use !F/!P only and want to verify
+that all documentation is included.
+
 Tim.
 */ <twaugh@redhat.com>

+ 10 - 7
Documentation/kernel-parameters.txt

@@ -1974,15 +1974,18 @@ and is between 256 and 4096 characters. It is defined in the file
 		force	Enable ASPM even on devices that claim not to support it.
 			WARNING: Forcing ASPM on may cause system lockups.
 
+	pcie_ports=	[PCIE] PCIe ports handling:
+		auto	Ask the BIOS whether or not to use native PCIe services
+			associated with PCIe ports (PME, hot-plug, AER).  Use
+			them only if that is allowed by the BIOS.
+		native	Use native PCIe services associated with PCIe ports
+			unconditionally.
+		compat	Treat PCIe ports as PCI-to-PCI bridges, disable the PCIe
+			ports driver.
+
 	pcie_pme=	[PCIE,PM] Native PCIe PME signaling options:
-			Format: {auto|force}[,nomsi]
-		auto	Use native PCIe PME signaling if the BIOS allows the
-			kernel to control PCIe config registers of root ports.
-		force	Use native PCIe PME signaling even if the BIOS refuses
-			to allow the kernel to control the relevant PCIe config
-			registers.
 		nomsi	Do not use MSI for native PCIe PME signaling (this makes
-			all PCIe root ports use INTx for everything).
+			all PCIe root ports use INTx for all services).
 
 	pcmv=		[HW,PCMCIA] BadgePAD 4
 

+ 2 - 1
Documentation/mutex-design.txt

@@ -9,7 +9,7 @@ firstly, there's nothing wrong with semaphores. But if the simpler
 mutex semantics are sufficient for your code, then there are a couple
 of advantages of mutexes:
 
- - 'struct mutex' is smaller on most architectures: .e.g on x86,
+ - 'struct mutex' is smaller on most architectures: E.g. on x86,
    'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes.
    A smaller structure size means less RAM footprint, and better
    CPU-cache utilization.
@@ -136,3 +136,4 @@ the APIs of 'struct mutex' have been streamlined:
  void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
  int  mutex_lock_interruptible_nested(struct mutex *lock,
                                       unsigned int subclass);
+ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);

+ 1 - 1
Documentation/power/regulator/overview.txt

@@ -13,7 +13,7 @@ regulators (where voltage output is controllable) and current sinks (where
 current limit is controllable).
 
 (C) 2008  Wolfson Microelectronics PLC.
-Author: Liam Girdwood <lg@opensource.wolfsonmicro.com>
+Author: Liam Girdwood <lrg@slimlogic.co.uk>
 
 
 Nomenclature

+ 1 - 0
Documentation/sound/alsa/HD-Audio-Models.txt

@@ -296,6 +296,7 @@ Conexant 5051
 Conexant 5066
 =============
   laptop	Basic Laptop config (default)
+  hp-laptop	HP laptops, e g G60
   dell-laptop	Dell laptops
   dell-vostro	Dell Vostro
   olpc-xo-1_5	OLPC XO 1.5

+ 380 - 0
Documentation/workqueue.txt

@@ -0,0 +1,380 @@
+
+Concurrency Managed Workqueue (cmwq)
+
+September, 2010		Tejun Heo <tj@kernel.org>
+			Florian Mickler <florian@mickler.org>
+
+CONTENTS
+
+1. Introduction
+2. Why cmwq?
+3. The Design
+4. Application Programming Interface (API)
+5. Example Execution Scenarios
+6. Guidelines
+
+
+1. Introduction
+
+There are many cases where an asynchronous process execution context
+is needed and the workqueue (wq) API is the most commonly used
+mechanism for such cases.
+
+When such an asynchronous execution context is needed, a work item
+describing which function to execute is put on a queue.  An
+independent thread serves as the asynchronous execution context.  The
+queue is called workqueue and the thread is called worker.
+
+While there are work items on the workqueue the worker executes the
+functions associated with the work items one after the other.  When
+there is no work item left on the workqueue the worker becomes idle.
+When a new work item gets queued, the worker begins executing again.
+
+
+2. Why cmwq?
+
+In the original wq implementation, a multi threaded (MT) wq had one
+worker thread per CPU and a single threaded (ST) wq had one worker
+thread system-wide.  A single MT wq needed to keep around the same
+number of workers as the number of CPUs.  The kernel grew a lot of MT
+wq users over the years and with the number of CPU cores continuously
+rising, some systems saturated the default 32k PID space just booting
+up.
+
+Although MT wq wasted a lot of resource, the level of concurrency
+provided was unsatisfactory.  The limitation was common to both ST and
+MT wq albeit less severe on MT.  Each wq maintained its own separate
+worker pool.  A MT wq could provide only one execution context per CPU
+while a ST wq one for the whole system.  Work items had to compete for
+those very limited execution contexts leading to various problems
+including proneness to deadlocks around the single execution context.
+
+The tension between the provided level of concurrency and resource
+usage also forced its users to make unnecessary tradeoffs like libata
+choosing to use ST wq for polling PIOs and accepting an unnecessary
+limitation that no two polling PIOs can progress at the same time.  As
+MT wq don't provide much better concurrency, users which require
+higher level of concurrency, like async or fscache, had to implement
+their own thread pool.
+
+Concurrency Managed Workqueue (cmwq) is a reimplementation of wq with
+focus on the following goals.
+
+* Maintain compatibility with the original workqueue API.
+
+* Use per-CPU unified worker pools shared by all wq to provide
+  flexible level of concurrency on demand without wasting a lot of
+  resource.
+
+* Automatically regulate worker pool and level of concurrency so that
+  the API users don't need to worry about such details.
+
+
+3. The Design
+
+In order to ease the asynchronous execution of functions a new
+abstraction, the work item, is introduced.
+
+A work item is a simple struct that holds a pointer to the function
+that is to be executed asynchronously.  Whenever a driver or subsystem
+wants a function to be executed asynchronously it has to set up a work
+item pointing to that function and queue that work item on a
+workqueue.
+
+Special purpose threads, called worker threads, execute the functions
+off of the queue, one after the other.  If no work is queued, the
+worker threads become idle.  These worker threads are managed in so
+called thread-pools.
+
+The cmwq design differentiates between the user-facing workqueues that
+subsystems and drivers queue work items on and the backend mechanism
+which manages thread-pool and processes the queued work items.
+
+The backend is called gcwq.  There is one gcwq for each possible CPU
+and one gcwq to serve work items queued on unbound workqueues.
+
+Subsystems and drivers can create and queue work items through special
+workqueue API functions as they see fit. They can influence some
+aspects of the way the work items are executed by setting flags on the
+workqueue they are putting the work item on. These flags include
+things like CPU locality, reentrancy, concurrency limits and more. To
+get a detailed overview refer to the API description of
+alloc_workqueue() below.
+
+When a work item is queued to a workqueue, the target gcwq is
+determined according to the queue parameters and workqueue attributes
+and appended on the shared worklist of the gcwq.  For example, unless
+specifically overridden, a work item of a bound workqueue will be
+queued on the worklist of exactly that gcwq that is associated to the
+CPU the issuer is running on.
+
+For any worker pool implementation, managing the concurrency level
+(how many execution contexts are active) is an important issue.  cmwq
+tries to keep the concurrency at a minimal but sufficient level.
+Minimal to save resources and sufficient in that the system is used at
+its full capacity.
+
+Each gcwq bound to an actual CPU implements concurrency management by
+hooking into the scheduler.  The gcwq is notified whenever an active
+worker wakes up or sleeps and keeps track of the number of the
+currently runnable workers.  Generally, work items are not expected to
+hog a CPU and consume many cycles.  That means maintaining just enough
+concurrency to prevent work processing from stalling should be
+optimal.  As long as there are one or more runnable workers on the
+CPU, the gcwq doesn't start execution of a new work, but, when the
+last running worker goes to sleep, it immediately schedules a new
+worker so that the CPU doesn't sit idle while there are pending work
+items.  This allows using a minimal number of workers without losing
+execution bandwidth.
+
+Keeping idle workers around doesn't cost other than the memory space
+for kthreads, so cmwq holds onto idle ones for a while before killing
+them.
+
+For an unbound wq, the above concurrency management doesn't apply and
+the gcwq for the pseudo unbound CPU tries to start executing all work
+items as soon as possible.  The responsibility of regulating
+concurrency level is on the users.  There is also a flag to mark a
+bound wq to ignore the concurrency management.  Please refer to the
+API section for details.
+
+Forward progress guarantee relies on that workers can be created when
+more execution contexts are necessary, which in turn is guaranteed
+through the use of rescue workers.  All work items which might be used
+on code paths that handle memory reclaim are required to be queued on
+wq's that have a rescue-worker reserved for execution under memory
+pressure.  Else it is possible that the thread-pool deadlocks waiting
+for execution contexts to free up.
+
+
+4. Application Programming Interface (API)
+
+alloc_workqueue() allocates a wq.  The original create_*workqueue()
+functions are deprecated and scheduled for removal.  alloc_workqueue()
+takes three arguments - @name, @flags and @max_active.  @name is the
+name of the wq and also used as the name of the rescuer thread if
+there is one.
+
+A wq no longer manages execution resources but serves as a domain for
+forward progress guarantee, flush and work item attributes.  @flags
+and @max_active control how work items are assigned execution
+resources, scheduled and executed.
+
+@flags:
+
+  WQ_NON_REENTRANT
+
+	By default, a wq guarantees non-reentrance only on the same
+	CPU.  A work item may not be executed concurrently on the same
+	CPU by multiple workers but is allowed to be executed
+	concurrently on multiple CPUs.  This flag makes sure
+	non-reentrance is enforced across all CPUs.  Work items queued
+	to a non-reentrant wq are guaranteed to be executed by at most
+	one worker system-wide at any given time.
+
+  WQ_UNBOUND
+
+	Work items queued to an unbound wq are served by a special
+	gcwq which hosts workers which are not bound to any specific
+	CPU.  This makes the wq behave as a simple execution context
+	provider without concurrency management.  The unbound gcwq
+	tries to start execution of work items as soon as possible.
+	Unbound wq sacrifices locality but is useful for the following
+	cases.
+
+	* Wide fluctuation in the concurrency level requirement is
+	  expected and using bound wq may end up creating large number
+	  of mostly unused workers across different CPUs as the issuer
+	  hops through different CPUs.
+
+	* Long running CPU intensive workloads which can be better
+	  managed by the system scheduler.
+
+  WQ_FREEZEABLE
+
+	A freezeable wq participates in the freeze phase of the system
+	suspend operations.  Work items on the wq are drained and no
+	new work item starts execution until thawed.
+
+  WQ_RESCUER
+
+	All wq which might be used in the memory reclaim paths _MUST_
+	have this flag set.  This reserves one worker exclusively for
+	the execution of this wq under memory pressure.
+
+  WQ_HIGHPRI
+
+	Work items of a highpri wq are queued at the head of the
+	worklist of the target gcwq and start execution regardless of
+	the current concurrency level.  In other words, highpri work
+	items will always start execution as soon as execution
+	resource is available.
+
+	Ordering among highpri work items is preserved - a highpri
+	work item queued after another highpri work item will start
+	execution after the earlier highpri work item starts.
+
+	Although highpri work items are not held back by other
+	runnable work items, they still contribute to the concurrency
+	level.  Highpri work items in runnable state will prevent
+	non-highpri work items from starting execution.
+
+	This flag is meaningless for unbound wq.
+
+  WQ_CPU_INTENSIVE
+
+	Work items of a CPU intensive wq do not contribute to the
+	concurrency level.  In other words, runnable CPU intensive
+	work items will not prevent other work items from starting
+	execution.  This is useful for bound work items which are
+	expected to hog CPU cycles so that their execution is
+	regulated by the system scheduler.
+
+	Although CPU intensive work items don't contribute to the
+	concurrency level, start of their executions is still
+	regulated by the concurrency management and runnable
+	non-CPU-intensive work items can delay execution of CPU
+	intensive work items.
+
+	This flag is meaningless for unbound wq.
+
+  WQ_HIGHPRI | WQ_CPU_INTENSIVE
+
+	This combination makes the wq avoid interaction with
+	concurrency management completely and behave as a simple
+	per-CPU execution context provider.  Work items queued on a
+	highpri CPU-intensive wq start execution as soon as resources
+	are available and don't affect execution of other work items.
+
+@max_active:
+
+@max_active determines the maximum number of execution contexts per
+CPU which can be assigned to the work items of a wq.  For example,
+with @max_active of 16, at most 16 work items of the wq can be
+executing at the same time per CPU.
+
+Currently, for a bound wq, the maximum limit for @max_active is 512
+and the default value used when 0 is specified is 256.  For an unbound
+wq, the limit is higher of 512 and 4 * num_possible_cpus().  These
+values are chosen sufficiently high such that they are not the
+limiting factor while providing protection in runaway cases.
+
+The number of active work items of a wq is usually regulated by the
+users of the wq, more specifically, by how many work items the users
+may queue at the same time.  Unless there is a specific need for
+throttling the number of active work items, specifying '0' is
+recommended.
+
+Some users depend on the strict execution ordering of ST wq.  The
+combination of @max_active of 1 and WQ_UNBOUND is used to achieve this
+behavior.  Work items on such wq are always queued to the unbound gcwq
+and only one work item can be active at any given time thus achieving
+the same ordering property as ST wq.
+
+
+5. Example Execution Scenarios
+
+The following example execution scenarios try to illustrate how cmwq
+behave under different configurations.
+
+ Work items w0, w1, w2 are queued to a bound wq q0 on the same CPU.
+ w0 burns CPU for 5ms then sleeps for 10ms then burns CPU for 5ms
+ again before finishing.  w1 and w2 burn CPU for 5ms then sleep for
+ 10ms.
+
+Ignoring all other tasks, works and processing overhead, and assuming
+simple FIFO scheduling, the following is one highly simplified version
+of possible sequences of events with the original wq.
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 starts and burns CPU
+ 25		w1 sleeps
+ 35		w1 wakes up and finishes
+ 35		w2 starts and burns CPU
+ 40		w2 sleeps
+ 50		w2 wakes up and finishes
+
+And with cmwq with @max_active >= 3,
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 starts and burns CPU
+ 10		w1 sleeps
+ 10		w2 starts and burns CPU
+ 15		w2 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 25		w2 wakes up and finishes
+
+If @max_active == 2,
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 starts and burns CPU
+ 10		w1 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 20		w2 starts and burns CPU
+ 25		w2 sleeps
+ 35		w2 wakes up and finishes
+
+Now, let's assume w1 and w2 are queued to a different wq q1 which has
+WQ_HIGHPRI set,
+
+ TIME IN MSECS	EVENT
+ 0		w1 and w2 start and burn CPU
+ 5		w1 sleeps
+ 10		w2 sleeps
+ 10		w0 starts and burns CPU
+ 15		w0 sleeps
+ 15		w1 wakes up and finishes
+ 20		w2 wakes up and finishes
+ 25		w0 wakes up and burns CPU
+ 30		w0 finishes
+
+If q1 has WQ_CPU_INTENSIVE set,
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 and w2 start and burn CPU
+ 10		w1 sleeps
+ 15		w2 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 25		w2 wakes up and finishes
+
+
+6. Guidelines
+
+* Do not forget to use WQ_RESCUER if a wq may process work items which
+  are used during memory reclaim.  Each wq with WQ_RESCUER set has one
+  rescuer thread reserved for it.  If there is dependency among
+  multiple work items used during memory reclaim, they should be
+  queued to separate wq each with WQ_RESCUER.
+
+* Unless strict ordering is required, there is no need to use ST wq.
+
+* Unless there is a specific need, using 0 for @max_active is
+  recommended.  In most use cases, concurrency level usually stays
+  well under the default limit.
+
+* A wq serves as a domain for forward progress guarantee (WQ_RESCUER),
+  flush and work item attributes.  Work items which are not involved
+  in memory reclaim and don't need to be flushed as a part of a group
+  of work items, and don't require any special attribute, can use one
+  of the system wq.  There is no difference in execution
+  characteristics between using a dedicated wq and a system wq.
+
+* Unless work items are expected to consume a huge amount of CPU
+  cycles, using a bound wq is usually beneficial due to the increased
+  level of locality in wq operations and work item execution.

+ 47 - 24
MAINTAINERS

@@ -1135,7 +1135,7 @@ ATLX ETHERNET DRIVERS
 M:	Jay Cliburn <jcliburn@gmail.com>
 M:	Chris Snook <chris.snook@gmail.com>
 M:	Jie Yang <jie.yang@atheros.com>
-L:	atl1-devel@lists.sourceforge.net
+L:	netdev@vger.kernel.org
 W:	http://sourceforge.net/projects/atl1
 W:	http://atl1.sourceforge.net
 S:	Maintained
@@ -1220,7 +1220,7 @@ F:	drivers/auxdisplay/
 F:	include/linux/cfag12864b.h
 
 AVR32 ARCHITECTURE
-M:	Haavard Skinnemoen <hskinnemoen@atmel.com>
+M:	Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
 W:	http://www.atmel.com/products/AVR32/
 W:	http://avr32linux.org/
 W:	http://avrfreaks.net/
@@ -1228,7 +1228,7 @@ S:	Supported
 F:	arch/avr32/
 
 AVR32/AT32AP MACHINE SUPPORT
-M:	Haavard Skinnemoen <hskinnemoen@atmel.com>
+M:	Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
 S:	Supported
 F:	arch/avr32/mach-at32ap/
 
@@ -1445,6 +1445,16 @@ S:	Maintained
 F:	Documentation/video4linux/cafe_ccic
 F:	drivers/media/video/cafe_ccic*
 
+CAIF NETWORK LAYER
+M:	Sjur Braendeland <sjur.brandeland@stericsson.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	Documentation/networking/caif/
+F:	drivers/net/caif/
+F:	include/linux/caif/
+F:	include/net/caif/
+F:	net/caif/
+
 CALGARY x86-64 IOMMU
 M:	Muli Ben-Yehuda <muli@il.ibm.com>
 M:	"Jon D. Mason" <jdmason@kudzu.us>
@@ -2189,6 +2199,12 @@ W:	http://acpi4asus.sf.net
 S:	Maintained
 F:	drivers/platform/x86/eeepc-laptop.c
 
+EFIFB FRAMEBUFFER DRIVER
+L:	linux-fbdev@vger.kernel.org
+M:	Peter Jones <pjones@redhat.com>
+S:	Maintained
+F:	drivers/video/efifb.c
+
 EFS FILESYSTEM
 W:	http://aeschi.ch.eu.org/efs/
 S:	Orphan
@@ -2201,6 +2217,12 @@ L:	linux-rdma@vger.kernel.org
 S:	Supported
 F:	drivers/infiniband/hw/ehca/
 
+EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER
+M:	Breno Leitao <leitao@linux.vnet.ibm.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ehea/
+
 EMBEDDED LINUX
 M:	Paul Gortmaker <paul.gortmaker@windriver.com>
 M:	Matt Mackall <mpm@selenic.com>
@@ -2641,9 +2663,14 @@ S:	Maintained
 F:	drivers/media/video/gspca/
 
 HARDWARE MONITORING
+M:	Jean Delvare <khali@linux-fr.org>
+M:	Guenter Roeck <guenter.roeck@ericsson.com>
 L:	lm-sensors@lm-sensors.org
 W:	http://www.lm-sensors.org/
-S:	Orphan
+T:	quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/
+T:	quilt kernel.org/pub/linux/kernel/people/groeck/linux-staging/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git
+S:	Maintained
 F:	Documentation/hwmon/
 F:	drivers/hwmon/
 F:	include/linux/hwmon*.h
@@ -2781,11 +2808,6 @@ S:	Maintained
 F:	arch/x86/kernel/hpet.c
 F:	arch/x86/include/asm/hpet.h
 
-HPET:	ACPI
-M:	Bob Picco <bob.picco@hp.com>
-S:	Maintained
-F:	drivers/char/hpet.c
-
 HPFS FILESYSTEM
 M:	Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>
 W:	http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi
@@ -3398,7 +3420,7 @@ F:	drivers/s390/kvm/
 
 KEXEC
 M:	Eric Biederman <ebiederm@xmission.com>
-W:	http://ftp.kernel.org/pub/linux/kernel/people/horms/kexec-tools/
+W:	http://kernel.org/pub/linux/utils/kernel/kexec/
 L:	kexec@lists.infradead.org
 S:	Maintained
 F:	include/linux/kexec.h
@@ -3885,10 +3907,8 @@ F:	Documentation/serial/moxa-smartio
 F:	drivers/char/mxser.*
 
 MSI LAPTOP SUPPORT
-M:	Lennart Poettering <mzxreary@0pointer.de>
+M:	Lee, Chun-Yi <jlee@novell.com>
 L:	platform-driver-x86@vger.kernel.org
-W:	https://tango.0pointer.de/mailman/listinfo/s270-linux
-W:	http://0pointer.de/lennart/tchibo.html
 S:	Maintained
 F:	drivers/platform/x86/msi-laptop.c
 
@@ -3905,8 +3925,10 @@ S:	Supported
 F:	drivers/mfd/
 
 MULTIMEDIA CARD (MMC), SECURE DIGITAL (SD) AND SDIO SUBSYSTEM
-S:	Orphan
+M:	Chris Ball <cjb@laptop.org>
 L:	linux-mmc@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc.git
+S:	Maintained
 F:	drivers/mmc/
 F:	include/linux/mmc/
 
@@ -3923,13 +3945,12 @@ F:	Documentation/sound/oss/MultiSound
 F:	sound/oss/msnd*
 
 MULTITECH MULTIPORT CARD (ISICOM)
-M:	Jiri Slaby <jirislaby@gmail.com>
-S:	Maintained
+S:	Orphan
 F:	drivers/char/isicom.c
 F:	include/linux/isicom.h
 
 MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER
-M:	Felipe Balbi <felipe.balbi@nokia.com>
+M:	Felipe Balbi <balbi@ti.com>
 L:	linux-usb@vger.kernel.org
 T:	git git://gitorious.org/usb/usb.git
 S:	Maintained
@@ -4227,7 +4248,7 @@ S:	Maintained
 F:	drivers/char/hw_random/omap-rng.c
 
 OMAP USB SUPPORT
-M:	Felipe Balbi <felipe.balbi@nokia.com>
+M:	Felipe Balbi <balbi@ti.com>
 M:	David Brownell <dbrownell@users.sourceforge.net>
 L:	linux-usb@vger.kernel.org
 L:	linux-omap@vger.kernel.org
@@ -4604,7 +4625,7 @@ F:	include/linux/preempt.h
 PRISM54 WIRELESS DRIVER
 M:	"Luis R. Rodriguez" <mcgrof@gmail.com>
 L:	linux-wireless@vger.kernel.org
-W:	http://prism54.org
+W:	http://wireless.kernel.org/en/users/Drivers/p54
 S:	Obsolete
 F:	drivers/net/wireless/prism54/
 
@@ -4805,6 +4826,7 @@ RCUTORTURE MODULE
 M:	Josh Triplett <josh@freedesktop.org>
 M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
 S:	Supported
+T:	git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-2.6-rcu.git
 F:	Documentation/RCU/torture.txt
 F:	kernel/rcutorture.c
 
@@ -4829,6 +4851,7 @@ M:	Dipankar Sarma <dipankar@in.ibm.com>
 M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
 W:	http://www.rdrop.com/users/paulmck/rclock/
 S:	Supported
+T:	git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-2.6-rcu.git
 F:	Documentation/RCU/
 F:	include/linux/rcu*
 F:	include/linux/srcu*
@@ -4836,12 +4859,10 @@ F:	kernel/rcu*
 F:	kernel/srcu*
 X:	kernel/rcutorture.c
 
-REAL TIME CLOCK DRIVER
+REAL TIME CLOCK DRIVER (LEGACY)
 M:	Paul Gortmaker <p_gortmaker@yahoo.com>
 S:	Maintained
-F:	Documentation/rtc.txt
-F:	drivers/rtc/
-F:	include/linux/rtc.h
+F:	drivers/char/rtc.c
 
 REAL TIME CLOCK (RTC) SUBSYSTEM
 M:	Alessandro Zummo <a.zummo@towertech.it>
@@ -5078,8 +5099,10 @@ S:	Maintained
 F:	drivers/mmc/host/sdricoh_cs.c
 
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER
-S:	Orphan
+M:	Chris Ball <cjb@laptop.org>
 L:	linux-mmc@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc.git
+S:	Maintained
 F:	drivers/mmc/host/sdhci.*
 
 SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)

+ 8 - 1
Makefile

@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 36
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc6
 NAME = Sheep on Meth
 
 # *DOCUMENTATION*
@@ -554,8 +554,15 @@ endif
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS	+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else
+# Some targets (ARM with Thumb2, for example), can't be built with frame
+# pointers.  For those, we don't have FUNCTION_TRACER automatically
+# select FRAME_POINTER.  However, FUNCTION_TRACER adds -pg, and this is
+# incompatible with -fomit-frame-pointer with current GCC, so we don't use
+# -fomit-frame-pointer with FUNCTION_TRACER.
+ifndef CONFIG_FUNCTION_TRACER
 KBUILD_CFLAGS	+= -fomit-frame-pointer
 endif
+endif
 
 ifdef CONFIG_DEBUG_INFO
 KBUILD_CFLAGS	+= -g

+ 2 - 2
arch/Kconfig

@@ -32,8 +32,9 @@ config HAVE_OPROFILE
 
 config KPROBES
 	bool "Kprobes"
-	depends on KALLSYMS && MODULES
+	depends on MODULES
 	depends on HAVE_KPROBES
+	select KALLSYMS
 	help
 	  Kprobes allows you to trap at almost any kernel address and
 	  execute a callback function.  register_kprobe() establishes
@@ -45,7 +46,6 @@ config OPTPROBES
 	def_bool y
 	depends on KPROBES && HAVE_OPTPROBES
 	depends on !PREEMPT
-	select KALLSYMS_ALL
 
 config HAVE_EFFICIENT_UNALIGNED_ACCESS
 	bool

+ 0 - 1
arch/alpha/include/asm/cache.h

@@ -17,7 +17,6 @@
 # define L1_CACHE_SHIFT     5
 #endif
 
-#define L1_CACHE_ALIGN(x)  (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
 #define SMP_CACHE_BYTES    L1_CACHE_BYTES
 
 #endif

+ 2 - 0
arch/alpha/include/asm/cacheflush.h

@@ -43,6 +43,8 @@ extern void smp_imb(void);
 /* ??? Ought to use this in arch/alpha/kernel/signal.c too.  */
 
 #ifndef CONFIG_SMP
+#include <linux/sched.h>
+
 extern void __load_new_mm_context(struct mm_struct *);
 static inline void
 flush_icache_user_range(struct vm_area_struct *vma, struct page *page,

+ 5 - 1
arch/alpha/include/asm/unistd.h

@@ -449,10 +449,13 @@
 #define __NR_pwritev			491
 #define __NR_rt_tgsigqueueinfo		492
 #define __NR_perf_event_open		493
+#define __NR_fanotify_init		494
+#define __NR_fanotify_mark		495
+#define __NR_prlimit64			496
 
 #ifdef __KERNEL__
 
-#define NR_SYSCALLS			494
+#define NR_SYSCALLS			497
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
@@ -463,6 +466,7 @@
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 
 /* "Conditional" syscalls.  What we want is
 

+ 29 - 52
arch/alpha/kernel/entry.S

@@ -73,8 +73,6 @@
 	ldq	$20, HAE_REG($19);	\
 	stq	$21, HAE_CACHE($19);	\
 	stq	$21, 0($20);		\
-	ldq	$0, 0($sp);		\
-	ldq	$1, 8($sp);		\
 99:;					\
 	ldq	$19, 72($sp);		\
 	ldq	$20, 80($sp);		\
@@ -316,19 +314,24 @@ ret_from_sys_call:
 	cmovne	$26, 0, $19		/* $19 = 0 => non-restartable */
 	ldq	$0, SP_OFF($sp)
 	and	$0, 8, $0
-	beq	$0, restore_all
-ret_from_reschedule:
+	beq	$0, ret_to_kernel
+ret_to_user:
 	/* Make sure need_resched and sigpending don't change between
 		sampling and the rti.  */
 	lda	$16, 7
 	call_pal PAL_swpipl
 	ldl	$5, TI_FLAGS($8)
 	and	$5, _TIF_WORK_MASK, $2
-	bne	$5, work_pending
+	bne	$2, work_pending
 restore_all:
 	RESTORE_ALL
 	call_pal PAL_rti
 
+ret_to_kernel:
+	lda	$16, 7
+	call_pal PAL_swpipl
+	br restore_all
+
 	.align 3
 $syscall_error:
 	/*
@@ -363,7 +366,7 @@ $ret_success:
  *       $8: current.
  *      $19: The old syscall number, or zero if this is not a return
  *           from a syscall that errored and is possibly restartable.
- *      $20: Error indication.
+ *      $20: The old a3 value
  */
 
 	.align	4
@@ -392,12 +395,18 @@ $work_resched:
 
 $work_notifysig:
 	mov	$sp, $16
-	br	$1, do_switch_stack
+	bsr	$1, do_switch_stack
 	mov	$sp, $17
 	mov	$5, $18
+	mov	$19, $9		/* save old syscall number */
+	mov	$20, $10	/* save old a3 */
+	and	$5, _TIF_SIGPENDING, $2
+	cmovne	$2, 0, $9	/* we don't want double syscall restarts */
 	jsr	$26, do_notify_resume
+	mov	$9, $19
+	mov	$10, $20
 	bsr	$1, undo_switch_stack
-	br	restore_all
+	br	ret_to_user
 .end work_pending
 
 /*
@@ -430,6 +439,7 @@ strace:
 	beq	$1, 1f
 	ldq	$27, 0($2)
 1:	jsr	$26, ($27), sys_gettimeofday
+ret_from_straced:
 	ldgp	$gp, 0($26)
 
 	/* check return.. */
@@ -650,7 +660,7 @@ kernel_thread:
 	/* We don't actually care for a3 success widgetry in the kernel.
 	   Not for positive errno values.  */
 	stq	$0, 0($sp)		/* $0 */
-	br	restore_all
+	br	ret_to_kernel
 .end kernel_thread
 
 /*
@@ -757,11 +767,15 @@ sys_vfork:
 	.ent	sys_sigreturn
 sys_sigreturn:
 	.prologue 0
+	lda	$9, ret_from_straced
+	cmpult	$26, $9, $9
 	mov	$sp, $17
 	lda	$18, -SWITCH_STACK_SIZE($sp)
 	lda	$sp, -SWITCH_STACK_SIZE($sp)
 	jsr	$26, do_sigreturn
-	br	$1, undo_switch_stack
+	bne	$9, 1f
+	jsr	$26, syscall_trace
+1:	br	$1, undo_switch_stack
 	br	ret_from_sys_call
 .end sys_sigreturn
 
@@ -770,46 +784,18 @@ sys_sigreturn:
 	.ent	sys_rt_sigreturn
 sys_rt_sigreturn:
 	.prologue 0
+	lda	$9, ret_from_straced
+	cmpult	$26, $9, $9
 	mov	$sp, $17
 	lda	$18, -SWITCH_STACK_SIZE($sp)
 	lda	$sp, -SWITCH_STACK_SIZE($sp)
 	jsr	$26, do_rt_sigreturn
-	br	$1, undo_switch_stack
+	bne	$9, 1f
+	jsr	$26, syscall_trace
+1:	br	$1, undo_switch_stack
 	br	ret_from_sys_call
 .end sys_rt_sigreturn
 
-	.align	4
-	.globl	sys_sigsuspend
-	.ent	sys_sigsuspend
-sys_sigsuspend:
-	.prologue 0
-	mov	$sp, $17
-	br	$1, do_switch_stack
-	mov	$sp, $18
-	subq	$sp, 16, $sp
-	stq	$26, 0($sp)
-	jsr	$26, do_sigsuspend
-	ldq	$26, 0($sp)
-	lda	$sp, SWITCH_STACK_SIZE+16($sp)
-	ret
-.end sys_sigsuspend
-
-	.align	4
-	.globl	sys_rt_sigsuspend
-	.ent	sys_rt_sigsuspend
-sys_rt_sigsuspend:
-	.prologue 0
-	mov	$sp, $18
-	br	$1, do_switch_stack
-	mov	$sp, $19
-	subq	$sp, 16, $sp
-	stq	$26, 0($sp)
-	jsr	$26, do_rt_sigsuspend
-	ldq	$26, 0($sp)
-	lda	$sp, SWITCH_STACK_SIZE+16($sp)
-	ret
-.end sys_rt_sigsuspend
-
 	.align	4
 	.globl	sys_sethae
 	.ent	sys_sethae
@@ -928,15 +914,6 @@ sys_execve:
 	jmp	$31, do_sys_execve
 .end sys_execve
 
-	.align	4
-	.globl	osf_sigprocmask
-	.ent	osf_sigprocmask
-osf_sigprocmask:
-	.prologue 0
-	mov	$sp, $18
-	jmp	$31, sys_osf_sigprocmask
-.end osf_sigprocmask
-
 	.align	4
 	.globl	alpha_ni_syscall
 	.ent	alpha_ni_syscall

+ 7 - 5
arch/alpha/kernel/err_ev6.c

@@ -90,11 +90,13 @@ static int
 ev6_parse_cbox(u64 c_addr, u64 c1_syn, u64 c2_syn, 
 	       u64 c_stat, u64 c_sts, int print)
 {
-	char *sourcename[] = { "UNKNOWN", "UNKNOWN", "UNKNOWN",
-			       "MEMORY", "BCACHE", "DCACHE", 
-			       "BCACHE PROBE", "BCACHE PROBE" };
-	char *streamname[] = { "D", "I" };
-	char *bitsname[] = { "SINGLE", "DOUBLE" };
+	static const char * const sourcename[] = {
+		"UNKNOWN", "UNKNOWN", "UNKNOWN",
+		"MEMORY", "BCACHE", "DCACHE",
+		"BCACHE PROBE", "BCACHE PROBE"
+	};
+	static const char * const streamname[] = { "D", "I" };
+	static const char * const bitsname[] = { "SINGLE", "DOUBLE" };
 	int status = MCHK_DISPOSITION_REPORT;
 	int source = -1, stream = -1, bits = -1;
 

+ 20 - 19
arch/alpha/kernel/err_marvel.c

@@ -109,7 +109,7 @@ marvel_print_err_cyc(u64 err_cyc)
 #define IO7__ERR_CYC__CYCLE__M	(0x7)
 
 	printk("%s        Packet In Error: %s\n"
-	       "%s        Error in %s, cycle %ld%s%s\n",
+	       "%s        Error in %s, cycle %lld%s%s\n",
 	       err_print_prefix, 
 	       packet_desc[EXTRACT(err_cyc, IO7__ERR_CYC__PACKET)],
 	       err_print_prefix,
@@ -313,7 +313,7 @@ marvel_print_po7_ugbge_sym(u64 ugbge_sym)
 	}
 
 	printk("%s      Up Hose Garbage Symptom:\n"
-	       "%s        Source Port: %ld - Dest PID: %ld - OpCode: %s\n", 
+	       "%s        Source Port: %lld - Dest PID: %lld - OpCode: %s\n",
 	       err_print_prefix,
 	       err_print_prefix, 
 	       EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_SRC_PORT),
@@ -552,7 +552,7 @@ marvel_print_pox_spl_cmplt(u64 spl_cmplt)
 #define IO7__POX_SPLCMPLT__REM_BYTE_COUNT__M	(0xfff)
 
 	printk("%s      Split Completion Error:\n"	
-	       "%s         Source (Bus:Dev:Func): %ld:%ld:%ld\n",
+	       "%s         Source (Bus:Dev:Func): %lld:%lld:%lld\n",
 	       err_print_prefix,
 	       err_print_prefix,
 	       EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__SOURCE_BUS),
@@ -589,22 +589,23 @@ marvel_print_pox_spl_cmplt(u64 spl_cmplt)
 static void
 marvel_print_pox_trans_sum(u64 trans_sum)
 {
-	char *pcix_cmd[] = { "Interrupt Acknowledge",
-			     "Special Cycle",
-			     "I/O Read",
-			     "I/O Write",
-			     "Reserved",
-			     "Reserved / Device ID Message",
-			     "Memory Read",
-			     "Memory Write",
-			     "Reserved / Alias to Memory Read Block",
-			     "Reserved / Alias to Memory Write Block",
-			     "Configuration Read",
-			     "Configuration Write",
-			     "Memory Read Multiple / Split Completion",
-			     "Dual Address Cycle",
-			     "Memory Read Line / Memory Read Block",
-			     "Memory Write and Invalidate / Memory Write Block"
+	static const char * const pcix_cmd[] = {
+		"Interrupt Acknowledge",
+		"Special Cycle",
+		"I/O Read",
+		"I/O Write",
+		"Reserved",
+		"Reserved / Device ID Message",
+		"Memory Read",
+		"Memory Write",
+		"Reserved / Alias to Memory Read Block",
+		"Reserved / Alias to Memory Write Block",
+		"Configuration Read",
+		"Configuration Write",
+		"Memory Read Multiple / Split Completion",
+		"Dual Address Cycle",
+		"Memory Read Line / Memory Read Block",
+		"Memory Write and Invalidate / Memory Write Block"
 	};
 
 #define IO7__POX_TRANSUM__PCI_ADDR__S		(0)

+ 20 - 15
arch/alpha/kernel/err_titan.c

@@ -75,8 +75,12 @@ titan_parse_p_serror(int which, u64 serror, int print)
 	int status = MCHK_DISPOSITION_REPORT;
 
 #ifdef CONFIG_VERBOSE_MCHECK
-	char *serror_src[] = {"GPCI", "APCI", "AGP HP", "AGP LP"};
-	char *serror_cmd[] = {"DMA Read", "DMA RMW", "SGTE Read", "Reserved"};
+	static const char * const serror_src[] = {
+		"GPCI", "APCI", "AGP HP", "AGP LP"
+	};
+	static const char * const serror_cmd[] = {
+		"DMA Read", "DMA RMW", "SGTE Read", "Reserved"
+	};
 #endif /* CONFIG_VERBOSE_MCHECK */
 
 #define TITAN__PCHIP_SERROR__LOST_UECC	(1UL << 0)
@@ -140,14 +144,15 @@ titan_parse_p_perror(int which, int port, u64 perror, int print)
 	int status = MCHK_DISPOSITION_REPORT;
 
 #ifdef CONFIG_VERBOSE_MCHECK
-	char *perror_cmd[] = { "Interrupt Acknowledge", "Special Cycle",
-			       "I/O Read",	       	"I/O Write",
-			       "Reserved",	       	"Reserved",
-			       "Memory Read",		"Memory Write",
-			       "Reserved",		"Reserved",
-			       "Configuration Read",	"Configuration Write",
-			       "Memory Read Multiple",	"Dual Address Cycle",
-			       "Memory Read Line","Memory Write and Invalidate"
+	static const char * const perror_cmd[] = {
+		"Interrupt Acknowledge", "Special Cycle",
+		"I/O Read",		"I/O Write",
+		"Reserved",		"Reserved",
+		"Memory Read",		"Memory Write",
+		"Reserved",		"Reserved",
+		"Configuration Read",	"Configuration Write",
+		"Memory Read Multiple",	"Dual Address Cycle",
+		"Memory Read Line",	"Memory Write and Invalidate"
 	};
 #endif /* CONFIG_VERBOSE_MCHECK */
 
@@ -273,11 +278,11 @@ titan_parse_p_agperror(int which, u64 agperror, int print)
 	int cmd, len;
 	unsigned long addr;
 
-	char *agperror_cmd[] = { "Read (low-priority)",	"Read (high-priority)",
-				 "Write (low-priority)",
-				 "Write (high-priority)",
-				 "Reserved",		"Reserved",
-				 "Flush",		"Fence"
+	static const char * const agperror_cmd[] = {
+		"Read (low-priority)",	"Read (high-priority)",
+		"Write (low-priority)",	"Write (high-priority)",
+		"Reserved",		"Reserved",
+		"Flush",		"Fence"
 	};
 #endif /* CONFIG_VERBOSE_MCHECK */
 

+ 2 - 7
arch/alpha/kernel/osf_sys.c

@@ -15,7 +15,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
@@ -69,7 +68,6 @@ SYSCALL_DEFINE4(osf_set_program_attributes, unsigned long, text_start,
 {
 	struct mm_struct *mm;
 
-	lock_kernel();
 	mm = current->mm;
 	mm->end_code = bss_start + bss_len;
 	mm->start_brk = bss_start + bss_len;
@@ -78,7 +76,6 @@ SYSCALL_DEFINE4(osf_set_program_attributes, unsigned long, text_start,
 	printk("set_program_attributes(%lx %lx %lx %lx)\n",
 		text_start, text_len, bss_start, bss_len);
 #endif
-	unlock_kernel();
 	return 0;
 }
 
@@ -517,7 +514,6 @@ SYSCALL_DEFINE2(osf_proplist_syscall, enum pl_code, code,
 	long error;
 	int __user *min_buf_size_ptr;
 
-	lock_kernel();
 	switch (code) {
 	case PL_SET:
 		if (get_user(error, &args->set.nbytes))
@@ -547,7 +543,6 @@ SYSCALL_DEFINE2(osf_proplist_syscall, enum pl_code, code,
 		error = -EOPNOTSUPP;
 		break;
 	};
-	unlock_kernel();
 	return error;
 }
 
@@ -594,7 +589,7 @@ SYSCALL_DEFINE2(osf_sigstack, struct sigstack __user *, uss,
 
 SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count)
 {
-	char *sysinfo_table[] = {
+	const char *sysinfo_table[] = {
 		utsname()->sysname,
 		utsname()->nodename,
 		utsname()->release,
@@ -606,7 +601,7 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count)
 		"dummy",	/* secure RPC domain */
 	};
 	unsigned long offset;
-	char *res;
+	const char *res;
 	long len, err = -EINVAL;
 
 	offset = command-1;

+ 1 - 1
arch/alpha/kernel/pci-sysfs.c

@@ -66,7 +66,7 @@ static int pci_mmap_resource(struct kobject *kobj,
 {
 	struct pci_dev *pdev = to_pci_dev(container_of(kobj,
 						       struct device, kobj));
-	struct resource *res = (struct resource *)attr->private;
+	struct resource *res = attr->private;
 	enum pci_mmap_state mmap_type;
 	struct pci_bus_region bar;
 	int i;

+ 9 - 9
arch/alpha/kernel/perf_event.c

@@ -241,20 +241,20 @@ static inline unsigned long alpha_read_pmc(int idx)
 static int alpha_perf_event_set_period(struct perf_event *event,
 				struct hw_perf_event *hwc, int idx)
 {
-	long left = atomic64_read(&hwc->period_left);
+	long left = local64_read(&hwc->period_left);
 	long period = hwc->sample_period;
 	int ret = 0;
 
 	if (unlikely(left <= -period)) {
 		left = period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		ret = 1;
 	}
 
 	if (unlikely(left <= 0)) {
 		left += period;
-		atomic64_set(&hwc->period_left, left);
+		local64_set(&hwc->period_left, left);
 		hwc->last_period = period;
 		ret = 1;
 	}
@@ -269,7 +269,7 @@ static int alpha_perf_event_set_period(struct perf_event *event,
 	if (left > (long)alpha_pmu->pmc_max_period[idx])
 		left = alpha_pmu->pmc_max_period[idx];
 
-	atomic64_set(&hwc->prev_count, (unsigned long)(-left));
+	local64_set(&hwc->prev_count, (unsigned long)(-left));
 
 	alpha_write_pmc(idx, (unsigned long)(-left));
 
@@ -300,10 +300,10 @@ static unsigned long alpha_perf_event_update(struct perf_event *event,
 	long delta;
 
 again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
+	prev_raw_count = local64_read(&hwc->prev_count);
 	new_raw_count = alpha_read_pmc(idx);
 
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 			     new_raw_count) != prev_raw_count)
 		goto again;
 
@@ -316,8 +316,8 @@ again:
 		delta += alpha_pmu->pmc_max_period[idx] + 1;
 	}
 
-	atomic64_add(delta, &event->count);
-	atomic64_sub(delta, &hwc->period_left);
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
 
 	return new_raw_count;
 }
@@ -636,7 +636,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 	if (!hwc->sample_period) {
 		hwc->sample_period = alpha_pmu->pmc_max_period[0];
 		hwc->last_period = hwc->sample_period;
-		atomic64_set(&hwc->period_left, hwc->sample_period);
+		local64_set(&hwc->period_left, hwc->sample_period);
 	}
 
 	return 0;

+ 1 - 1
arch/alpha/kernel/process.c

@@ -356,7 +356,7 @@ dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, struct thread_info *ti)
 	dest[27] = pt->r27;
 	dest[28] = pt->r28;
 	dest[29] = pt->gp;
-	dest[30] = rdusp();
+	dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp;
 	dest[31] = pt->pc;
 
 	/* Once upon a time this was the PS value.  Which is stupid

+ 0 - 3
arch/alpha/kernel/proto.h

@@ -156,9 +156,6 @@ extern void SMC669_Init(int);
 /* es1888.c */
 extern void es1888_init(void);
 
-/* ns87312.c */
-extern void ns87312_enable_ide(long ide_base);
-
 /* ../lib/fpreg.c */
 extern void alpha_write_fp_reg (unsigned long reg, unsigned long val);
 extern unsigned long alpha_read_fp_reg (unsigned long reg);

+ 18 - 79
arch/alpha/kernel/signal.c

@@ -41,46 +41,20 @@ static void do_signal(struct pt_regs *, struct switch_stack *,
 /*
  * The OSF/1 sigprocmask calling sequence is different from the
  * C sigprocmask() sequence..
- *
- * how:
- * 1 - SIG_BLOCK
- * 2 - SIG_UNBLOCK
- * 3 - SIG_SETMASK
- *
- * We change the range to -1 .. 1 in order to let gcc easily
- * use the conditional move instructions.
- *
- * Note that we don't need to acquire the kernel lock for SMP
- * operation, as all of this is local to this thread.
  */
-SYSCALL_DEFINE3(osf_sigprocmask, int, how, unsigned long, newmask,
-		struct pt_regs *, regs)
+SYSCALL_DEFINE2(osf_sigprocmask, int, how, unsigned long, newmask)
 {
-	unsigned long oldmask = -EINVAL;
-
-	if ((unsigned long)how-1 <= 2) {
-		long sign = how-2;		/* -1 .. 1 */
-		unsigned long block, unblock;
-
-		newmask &= _BLOCKABLE;
-		spin_lock_irq(&current->sighand->siglock);
-		oldmask = current->blocked.sig[0];
-
-		unblock = oldmask & ~newmask;
-		block = oldmask | newmask;
-		if (!sign)
-			block = unblock;
-		if (sign <= 0)
-			newmask = block;
-		if (_NSIG_WORDS > 1 && sign > 0)
-			sigemptyset(&current->blocked);
-		current->blocked.sig[0] = newmask;
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-
-		regs->r0 = 0;		/* special no error return */
+	sigset_t oldmask;
+	sigset_t mask;
+	unsigned long res;
+
+	siginitset(&mask, newmask & ~_BLOCKABLE);
+	res = sigprocmask(how, &mask, &oldmask);
+	if (!res) {
+		force_successful_syscall_return();
+		res = oldmask.sig[0];
 	}
-	return oldmask;
+	return res;
 }
 
 SYSCALL_DEFINE3(osf_sigaction, int, sig,
@@ -94,9 +68,9 @@ SYSCALL_DEFINE3(osf_sigaction, int, sig,
 		old_sigset_t mask;
 		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
 		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
-		    __get_user(new_ka.sa.sa_flags, &act->sa_flags))
+		    __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
+		    __get_user(mask, &act->sa_mask))
 			return -EFAULT;
-		__get_user(mask, &act->sa_mask);
 		siginitset(&new_ka.sa.sa_mask, mask);
 		new_ka.ka_restorer = NULL;
 	}
@@ -106,9 +80,9 @@ SYSCALL_DEFINE3(osf_sigaction, int, sig,
 	if (!ret && oact) {
 		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
 		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
-		    __put_user(old_ka.sa.sa_flags, &oact->sa_flags))
+		    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
+		    __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
 			return -EFAULT;
-		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
 	}
 
 	return ret;
@@ -144,8 +118,7 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act,
 /*
  * Atomically swap in the new signal mask, and wait for a signal.
  */
-asmlinkage int
-do_sigsuspend(old_sigset_t mask, struct pt_regs *regs, struct switch_stack *sw)
+SYSCALL_DEFINE1(sigsuspend, old_sigset_t, mask)
 {
 	mask &= _BLOCKABLE;
 	spin_lock_irq(&current->sighand->siglock);
@@ -154,41 +127,6 @@ do_sigsuspend(old_sigset_t mask, struct pt_regs *regs, struct switch_stack *sw)
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	/* Indicate EINTR on return from any possible signal handler,
-	   which will not come back through here, but via sigreturn.  */
-	regs->r0 = EINTR;
-	regs->r19 = 1;
-
-	current->state = TASK_INTERRUPTIBLE;
-	schedule();
-	set_thread_flag(TIF_RESTORE_SIGMASK);
-	return -ERESTARTNOHAND;
-}
-
-asmlinkage int
-do_rt_sigsuspend(sigset_t __user *uset, size_t sigsetsize,
-		 struct pt_regs *regs, struct switch_stack *sw)
-{
-	sigset_t set;
-
-	/* XXX: Don't preclude handling different sized sigset_t's.  */
-	if (sigsetsize != sizeof(sigset_t))
-		return -EINVAL;
-	if (copy_from_user(&set, uset, sizeof(set)))
-		return -EFAULT;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->saved_sigmask = current->blocked;
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	/* Indicate EINTR on return from any possible signal handler,
-	   which will not come back through here, but via sigreturn.  */
-	regs->r0 = EINTR;
-	regs->r19 = 1;
-
 	current->state = TASK_INTERRUPTIBLE;
 	schedule();
 	set_thread_flag(TIF_RESTORE_SIGMASK);
@@ -239,6 +177,8 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
 	unsigned long usp;
 	long i, err = __get_user(regs->pc, &sc->sc_pc);
 
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
 	sw->r26 = (unsigned long) ret_from_sys_call;
 
 	err |= __get_user(regs->r0, sc->sc_regs+0);
@@ -591,7 +531,6 @@ syscall_restart(unsigned long r0, unsigned long r19,
 		regs->pc -= 4;
 		break;
 	case ERESTART_RESTARTBLOCK:
-		current_thread_info()->restart_block.fn = do_no_restart_syscall;
 		regs->r0 = EINTR;
 		break;
 	}

+ 1 - 1
arch/alpha/kernel/srm_env.c

@@ -87,7 +87,7 @@ static int srm_env_proc_show(struct seq_file *m, void *v)
 	srm_env_t	*entry;
 	char		*page;
 
-	entry = (srm_env_t *)m->private;
+	entry = m->private;
 	page = (char *)__get_free_page(GFP_USER);
 	if (!page)
 		return -ENOMEM;

+ 16 - 3
arch/alpha/kernel/sys_cabriolet.c

@@ -33,7 +33,7 @@
 #include "irq_impl.h"
 #include "pci_impl.h"
 #include "machvec_impl.h"
-
+#include "pc873xx.h"
 
 /* Note mask bit is true for DISABLED irqs.  */
 static unsigned long cached_irq_mask = ~0UL;
@@ -235,18 +235,31 @@ cabriolet_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
 	return COMMON_TABLE_LOOKUP;
 }
 
+static inline void __init
+cabriolet_enable_ide(void)
+{
+	if (pc873xx_probe() == -1) {
+		printk(KERN_ERR "Probing for PC873xx Super IO chip failed.\n");
+	 } else {
+		printk(KERN_INFO "Found %s Super IO chip at 0x%x\n",
+			pc873xx_get_model(), pc873xx_get_base());
+
+		pc873xx_enable_ide();
+	}
+}
+
 static inline void __init
 cabriolet_init_pci(void)
 {
 	common_init_pci();
-	ns87312_enable_ide(0x398);
+	cabriolet_enable_ide();
 }
 
 static inline void __init
 cia_cab_init_pci(void)
 {
 	cia_init_pci();
-	ns87312_enable_ide(0x398);
+	cabriolet_enable_ide();
 }
 
 /*

+ 9 - 2
arch/alpha/kernel/sys_takara.c

@@ -29,7 +29,7 @@
 #include "irq_impl.h"
 #include "pci_impl.h"
 #include "machvec_impl.h"
-
+#include "pc873xx.h"
 
 /* Note mask bit is true for DISABLED irqs.  */
 static unsigned long cached_irq_mask[2] = { -1, -1 };
@@ -264,7 +264,14 @@ takara_init_pci(void)
 		alpha_mv.pci_map_irq = takara_map_irq_srm;
 
 	cia_init_pci();
-	ns87312_enable_ide(0x26e);
+
+	if (pc873xx_probe() == -1) {
+		printk(KERN_ERR "Probing for PC873xx Super IO chip failed.\n");
+	} else {
+		printk(KERN_INFO "Found %s Super IO chip at 0x%x\n",
+			pc873xx_get_model(), pc873xx_get_base());
+		pc873xx_enable_ide();
+	}
 }
 
 

+ 4 - 1
arch/alpha/kernel/systbls.S

@@ -58,7 +58,7 @@ sys_call_table:
 	.quad sys_open				/* 45 */
 	.quad alpha_ni_syscall
 	.quad sys_getxgid
-	.quad osf_sigprocmask
+	.quad sys_osf_sigprocmask
 	.quad alpha_ni_syscall
 	.quad alpha_ni_syscall			/* 50 */
 	.quad sys_acct
@@ -512,6 +512,9 @@ sys_call_table:
 	.quad sys_pwritev
 	.quad sys_rt_tgsigqueueinfo
 	.quad sys_perf_event_open
+	.quad sys_fanotify_init
+	.quad sys_fanotify_mark				/* 495 */
+	.quad sys_prlimit64
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object

+ 5 - 5
arch/alpha/kernel/time.c

@@ -191,16 +191,16 @@ irqreturn_t timer_interrupt(int irq, void *dev)
 
 	write_sequnlock(&xtime_lock);
 
-#ifndef CONFIG_SMP
-	while (nticks--)
-		update_process_times(user_mode(get_irq_regs()));
-#endif
-
 	if (test_perf_event_pending()) {
 		clear_perf_event_pending();
 		perf_event_do_pending();
 	}
 
+#ifndef CONFIG_SMP
+	while (nticks--)
+		update_process_times(user_mode(get_irq_regs()));
+#endif
+
 	return IRQ_HANDLED;
 }
 

+ 0 - 3
arch/alpha/kernel/traps.c

@@ -13,7 +13,6 @@
 #include <linux/sched.h>
 #include <linux/tty.h>
 #include <linux/delay.h>
-#include <linux/smp_lock.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
@@ -623,7 +622,6 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg,
 		return;
 	}
 
-	lock_kernel();
 	printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n",
 		pc, va, opcode, reg);
 	do_exit(SIGSEGV);
@@ -646,7 +644,6 @@ got_exception:
 	 * Yikes!  No one to forward the exception to.
 	 * Since the registers are in a weird format, dump them ourselves.
  	 */
-	lock_kernel();
 
 	printk("%s(%d): unhandled unaligned exception\n",
 	       current->comm, task_pid_nr(current));

+ 53 - 102
arch/arm/Kconfig

@@ -19,6 +19,8 @@ config ARM
 	select HAVE_KPROBES if (!XIP_KERNEL)
 	select HAVE_KRETPROBES if (HAVE_KPROBES)
 	select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
+	select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
+	select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL)
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZO
@@ -26,6 +28,7 @@ config ARM
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V7))
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
@@ -145,6 +148,9 @@ config ARCH_HAS_CPUFREQ
 	  and that the relevant menu configurations are displayed for
 	  it.
 
+config ARCH_HAS_CPU_IDLE_WAIT
+       def_bool y
+
 config GENERIC_HWEIGHT
 	bool
 	default y
@@ -271,7 +277,6 @@ config ARCH_AT91
 	bool "Atmel AT91"
 	select ARCH_REQUIRE_GPIOLIB
 	select HAVE_CLK
-	select ARCH_USES_GETTIMEOFFSET
 	help
 	  This enables support for systems based on the Atmel AT91RM9200,
 	  AT91SAM9 and AT91CAP9 processors.
@@ -1003,7 +1008,7 @@ endif
 
 config ARM_ERRATA_411920
 	bool "ARM errata: Invalidation of the Instruction Cache operation can fail"
-	depends on CPU_V6 && !SMP
+	depends on CPU_V6
 	help
 	  Invalidation of the Instruction Cache operation can
 	  fail. This erratum is present in 1136 (before r1p4), 1156 and 1176.
@@ -1051,6 +1056,32 @@ config ARM_ERRATA_460075
 	  ACTLR register. Note that setting specific bits in the ACTLR register
 	  may not be available in non-secure mode.
 
+config ARM_ERRATA_742230
+	bool "ARM errata: DMB operation may be faulty"
+	depends on CPU_V7 && SMP
+	help
+	  This option enables the workaround for the 742230 Cortex-A9
+	  (r1p0..r2p2) erratum. Under rare circumstances, a DMB instruction
+	  between two write operations may not ensure the correct visibility
+	  ordering of the two writes. This workaround sets a specific bit in
+	  the diagnostic register of the Cortex-A9 which causes the DMB
+	  instruction to behave as a DSB, ensuring the correct behaviour of
+	  the two writes.
+
+config ARM_ERRATA_742231
+	bool "ARM errata: Incorrect hazard handling in the SCU may lead to data corruption"
+	depends on CPU_V7 && SMP
+	help
+	  This option enables the workaround for the 742231 Cortex-A9
+	  (r2p0..r2p2) erratum. Under certain conditions, specific to the
+	  Cortex-A9 MPCore micro-architecture, two CPUs working in SMP mode,
+	  accessing some data located in the same cache line, may get corrupted
+	  data due to bad handling of the address hazard when the line gets
+	  replaced from one of the CPUs at the same time as another CPU is
+	  accessing it. This workaround sets specific bits in the diagnostic
+	  register of the Cortex-A9 which reduces the linefill issuing
+	  capabilities of the processor.
+
 config PL310_ERRATA_588369
 	bool "Clean & Invalidate maintenance operations do not invalidate clean lines"
 	depends on CACHE_L2X0 && ARCH_OMAP4
@@ -1142,13 +1173,13 @@ source "kernel/time/Kconfig"
 
 config SMP
 	bool "Symmetric Multi-Processing (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && (REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP ||\
-		 MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\
-		 ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4)
+	depends on EXPERIMENTAL
 	depends on GENERIC_CLOCKEVENTS
+	depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \
+		 MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\
+		 ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4
 	select USE_GENERIC_SMP_HELPERS
-	select HAVE_ARM_SCU if ARCH_REALVIEW || ARCH_OMAP4 || ARCH_S5PV310 ||\
-		 ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4
+	select HAVE_ARM_SCU
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
@@ -1166,6 +1197,19 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
+config SMP_ON_UP
+	bool "Allow booting SMP kernel on uniprocessor systems (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	depends on SMP && !XIP && !THUMB2_KERNEL
+	default y
+	help
+	  SMP kernels contain instructions which fail on non-SMP processors.
+	  Enabling this option allows the kernel to modify itself to make
+	  these instructions safe.  Disabling it allows about 1K of space
+	  savings.
+
+	  If you don't know what to do here, say Y.
+
 config HAVE_ARM_SCU
 	bool
 	depends on SMP
@@ -1216,12 +1260,9 @@ config HOTPLUG_CPU
 
 config LOCAL_TIMERS
 	bool "Use local timer interrupts"
-	depends on SMP && (REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || \
-		REALVIEW_EB_A9MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \
-		ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4)
+	depends on SMP
 	default y
-	select HAVE_ARM_TWD if ARCH_REALVIEW || ARCH_OMAP4 || ARCH_S5PV310 || \
-		ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS
+	select HAVE_ARM_TWD
 	help
 	  Enable support for local timers on SMP platforms, rather then the
 	  legacy IPI broadcast method.  Local timers allows the system
@@ -1576,96 +1617,6 @@ config AUTO_ZRELADDR
 	  0xf8000000. This assumes the zImage being placed in the first 128MB
 	  from start of memory.
 
-config ZRELADDR
-	hex "Physical address of the decompressed kernel image"
-	depends on !AUTO_ZRELADDR
-	default 0x00008000 if ARCH_BCMRING ||\
-		ARCH_CNS3XXX ||\
-		ARCH_DOVE ||\
-		ARCH_EBSA110 ||\
-		ARCH_FOOTBRIDGE ||\
-		ARCH_INTEGRATOR ||\
-		ARCH_IOP13XX ||\
-		ARCH_IOP33X ||\
-		ARCH_IXP2000 ||\
-		ARCH_IXP23XX ||\
-		ARCH_IXP4XX ||\
-		ARCH_KIRKWOOD ||\
-		ARCH_KS8695 ||\
-		ARCH_LOKI ||\
-		ARCH_MMP ||\
-		ARCH_MV78XX0 ||\
-		ARCH_NOMADIK ||\
-		ARCH_NUC93X ||\
-		ARCH_NS9XXX ||\
-		ARCH_ORION5X ||\
-		ARCH_SPEAR3XX ||\
-		ARCH_SPEAR6XX ||\
-		ARCH_U8500 ||\
-		ARCH_VERSATILE ||\
-		ARCH_W90X900
-	default 0x08008000 if ARCH_MX1 ||\
-		ARCH_SHARK
-	default 0x10008000 if ARCH_MSM ||\
-		ARCH_OMAP1 ||\
-		ARCH_RPC
-	default 0x20008000 if ARCH_S5P6440 ||\
-		ARCH_S5P6442 ||\
-		ARCH_S5PC100 ||\
-		ARCH_S5PV210
-	default 0x30008000 if ARCH_S3C2410 ||\
-		ARCH_S3C2400 ||\
-		ARCH_S3C2412 ||\
-		ARCH_S3C2416 ||\
-		ARCH_S3C2440 ||\
-		ARCH_S3C2443
-	default 0x40008000 if ARCH_STMP378X ||\
-		ARCH_STMP37XX ||\
-		ARCH_SH7372 ||\
-		ARCH_SH7377 ||\
-		ARCH_S5PV310
-	default 0x50008000 if ARCH_S3C64XX ||\
-		ARCH_SH7367
-	default 0x60008000 if ARCH_VEXPRESS
-	default 0x80008000 if ARCH_MX25 ||\
-		ARCH_MX3 ||\
-		ARCH_NETX ||\
-		ARCH_OMAP2PLUS ||\
-		ARCH_PNX4008
-	default 0x90008000 if ARCH_MX5 ||\
-		ARCH_MX91231
-	default 0xa0008000 if ARCH_IOP32X ||\
-		ARCH_PXA ||\
-		MACH_MX27
-	default 0xc0008000 if ARCH_LH7A40X ||\
-		MACH_MX21
-	default 0xf0008000 if ARCH_AAEC2000 ||\
-		ARCH_L7200
-	default 0xc0028000 if ARCH_CLPS711X
-	default 0x70008000 if ARCH_AT91 && (ARCH_AT91CAP9 || ARCH_AT91SAM9G45)
-	default 0x20008000 if ARCH_AT91 && !(ARCH_AT91CAP9 || ARCH_AT91SAM9G45)
-	default 0xc0008000 if ARCH_DAVINCI && ARCH_DAVINCI_DA8XX
-	default 0x80008000 if ARCH_DAVINCI && !ARCH_DAVINCI_DA8XX
-	default 0x00008000 if ARCH_EP93XX && EP93XX_SDCE3_SYNC_PHYS_OFFSET
-	default 0xc0008000 if ARCH_EP93XX && EP93XX_SDCE0_PHYS_OFFSET
-	default 0xd0008000 if ARCH_EP93XX && EP93XX_SDCE1_PHYS_OFFSET
-	default 0xe0008000 if ARCH_EP93XX && EP93XX_SDCE2_PHYS_OFFSET
-	default 0xf0008000 if ARCH_EP93XX && EP93XX_SDCE3_ASYNC_PHYS_OFFSET
-	default 0x00008000 if ARCH_GEMINI && GEMINI_MEM_SWAP
-	default 0x10008000 if ARCH_GEMINI && !GEMINI_MEM_SWAP
-	default 0x70008000 if ARCH_REALVIEW && REALVIEW_HIGH_PHYS_OFFSET
-	default 0x00008000 if ARCH_REALVIEW && !REALVIEW_HIGH_PHYS_OFFSET
-	default 0xc0208000 if ARCH_SA1100 && SA1111
-	default 0xc0008000 if ARCH_SA1100 && !SA1111
-	default 0x30108000 if ARCH_S3C2410 && PM_H1940
-	default 0x28E08000 if ARCH_U300 && MACH_U300_SINGLE_RAM
-	default 0x48008000 if ARCH_U300 && !MACH_U300_SINGLE_RAM
-	help
-	  ZRELADDR is the physical address where the decompressed kernel
-	  image will be placed. ZRELADDR has to be specified when the
-	  assumption of AUTO_ZRELADDR is not valid, or when ZBOOT_ROM is
-	  selected.
-
 endmenu
 
 menu "CPU Power Management"

+ 5 - 0
arch/arm/Kconfig.debug

@@ -27,6 +27,11 @@ config ARM_UNWIND
 	  the performance is not affected. Currently, this feature
 	  only works with EABI compilers. If unsure say Y.
 
+config OLD_MCOUNT
+	bool
+	depends on FUNCTION_TRACER && FRAME_POINTER
+	default y
+
 config DEBUG_USER
 	bool "Verbose user fault messages"
 	help

+ 5 - 3
arch/arm/boot/Makefile

@@ -14,16 +14,18 @@
 MKIMAGE         := $(srctree)/scripts/mkuboot.sh
 
 ifneq ($(MACHINE),)
--include $(srctree)/$(MACHINE)/Makefile.boot
+include $(srctree)/$(MACHINE)/Makefile.boot
 endif
 
 # Note: the following conditions must always be true:
+#   ZRELADDR == virt_to_phys(PAGE_OFFSET + TEXT_OFFSET)
 #   PARAMS_PHYS must be within 4MB of ZRELADDR
 #   INITRD_PHYS must be in RAM
+ZRELADDR    := $(zreladdr-y)
 PARAMS_PHYS := $(params_phys-y)
 INITRD_PHYS := $(initrd_phys-y)
 
-export INITRD_PHYS PARAMS_PHYS
+export ZRELADDR INITRD_PHYS PARAMS_PHYS
 
 targets := Image zImage xipImage bootpImage uImage
 
@@ -65,7 +67,7 @@ quiet_cmd_uimage = UIMAGE  $@
 ifeq ($(CONFIG_ZBOOT_ROM),y)
 $(obj)/uImage: LOADADDR=$(CONFIG_ZBOOT_ROM_TEXT)
 else
-$(obj)/uImage: LOADADDR=$(CONFIG_ZRELADDR)
+$(obj)/uImage: LOADADDR=$(ZRELADDR)
 endif
 
 ifeq ($(CONFIG_THUMB2_KERNEL),y)

+ 5 - 1
arch/arm/boot/compressed/Makefile

@@ -79,6 +79,10 @@ endif
 EXTRA_CFLAGS  := -fpic -fno-builtin
 EXTRA_AFLAGS  := -Wa,-march=all
 
+# Supply ZRELADDR to the decompressor via a linker symbol.
+ifneq ($(CONFIG_AUTO_ZRELADDR),y)
+LDFLAGS_vmlinux := --defsym zreladdr=$(ZRELADDR)
+endif
 ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
 LDFLAGS_vmlinux += --be8
 endif
@@ -112,5 +116,5 @@ CFLAGS_font.o := -Dstatic=
 $(obj)/font.c: $(FONTC)
 	$(call cmd,shipped)
 
-$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile .config
+$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG)
 	@sed "$(SEDFLAGS)" < $< > $@

+ 1 - 1
arch/arm/boot/compressed/head.S

@@ -177,7 +177,7 @@ not_angel:
 		and	r4, pc, #0xf8000000
 		add	r4, r4, #TEXT_OFFSET
 #else
-		ldr	r4, =CONFIG_ZRELADDR
+		ldr	r4, =zreladdr
 #endif
 		subs	r0, r0, r1		@ calculate the delta offset
 

+ 16 - 0
arch/arm/common/it8152.c

@@ -263,6 +263,22 @@ static int it8152_pci_platform_notify_remove(struct device *dev)
 	return 0;
 }
 
+int dma_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size)
+{
+	dev_dbg(dev, "%s: dma_addr %08x, size %08x\n",
+		__func__, dma_addr, size);
+	return (dev->bus == &pci_bus_type) &&
+		((dma_addr + size - PHYS_OFFSET) >= SZ_64M);
+}
+
+int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+	if (mask >= PHYS_OFFSET + SZ_64M - 1)
+		return 0;
+
+	return -EIO;
+}
+
 int __init it8152_pci_setup(int nr, struct pci_sys_data *sys)
 {
 	it8152_io.start = IT8152_IO_BASE + 0x12000;

+ 3 - 4
arch/arm/common/pl330.c

@@ -146,8 +146,7 @@
 #define DESIGNER	0x41
 #define REVISION	0x0
 #define INTEG_CFG	0x0
-#define PERIPH_ID_VAL	((PART << 0) | (DESIGNER << 12) \
-			  | (REVISION << 20) | (INTEG_CFG << 24))
+#define PERIPH_ID_VAL	((PART << 0) | (DESIGNER << 12))
 
 #define PCELL_ID_VAL	0xb105f00d
 
@@ -1859,10 +1858,10 @@ int pl330_add(struct pl330_info *pi)
 	regs = pi->base;
 
 	/* Check if we can handle this DMAC */
-	if (get_id(pi, PERIPH_ID) != PERIPH_ID_VAL
+	if ((get_id(pi, PERIPH_ID) & 0xfffff) != PERIPH_ID_VAL
 	   || get_id(pi, PCELL_ID) != PCELL_ID_VAL) {
 		dev_err(pi->dev, "PERIPH_ID 0x%x, PCELL_ID 0x%x !\n",
-			readl(regs + PERIPH_ID), readl(regs + PCELL_ID));
+			get_id(pi, PERIPH_ID), get_id(pi, PCELL_ID));
 		return -EINVAL;
 	}
 

+ 1 - 1
arch/arm/common/sa1111.c

@@ -678,7 +678,7 @@ out:
  *	%-EBUSY		physical address already marked in-use.
  *	%0		successful.
  */
-static int
+static int __devinit
 __sa1111_probe(struct device *me, struct resource *mem, int irq)
 {
 	struct sa1111 *sachip;

+ 13 - 2
arch/arm/configs/realview-smp_defconfig

@@ -39,6 +39,7 @@ CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_ARM_INTEGRATOR=y
+CONFIG_ARM_CHARLCD=y
 CONFIG_NETDEVICES=y
 CONFIG_SMSC_PHY=y
 CONFIG_NET_ETHERNET=y
@@ -52,10 +53,13 @@ CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 CONFIG_LEGACY_PTY_COUNT=16
 # CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_VERSATILE=y
+CONFIG_SPI=y
+CONFIG_GPIOLIB=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FB_ARMCLCD=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
@@ -70,7 +74,13 @@ CONFIG_SND_ARMAACI=y
 # CONFIG_USB_SUPPORT is not set
 CONFIG_MMC=y
 CONFIG_MMC_ARMMMCI=y
-CONFIG_INOTIFY=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_RTC_DRV_PL031=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
 CONFIG_CRAMFS=y
@@ -80,6 +90,7 @@ CONFIG_ROOT_NFS=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set

+ 13 - 2
arch/arm/configs/realview_defconfig

@@ -38,6 +38,7 @@ CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_ARM_INTEGRATOR=y
+CONFIG_ARM_CHARLCD=y
 CONFIG_NETDEVICES=y
 CONFIG_SMSC_PHY=y
 CONFIG_NET_ETHERNET=y
@@ -51,10 +52,13 @@ CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 CONFIG_LEGACY_PTY_COUNT=16
 # CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_VERSATILE=y
+CONFIG_SPI=y
+CONFIG_GPIOLIB=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FB_ARMCLCD=y
-# CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
@@ -69,7 +73,13 @@ CONFIG_SND_ARMAACI=y
 # CONFIG_USB_SUPPORT is not set
 CONFIG_MMC=y
 CONFIG_MMC_ARMMMCI=y
-CONFIG_INOTIFY=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_RTC_DRV_PL031=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
 CONFIG_CRAMFS=y
@@ -79,6 +89,7 @@ CONFIG_ROOT_NFS=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set

+ 2 - 35
arch/arm/configs/u300_defconfig

@@ -28,26 +28,9 @@ CONFIG_CPU_IDLE=y
 CONFIG_FPE_NWFPE=y
 CONFIG_PM=y
 # CONFIG_SUSPEND is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_IPV6 is not set
-# CONFIG_WIRELESS is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
-CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_ECC_SMC=y
+# CONFIG_MISC_DEVICES is not set
 # CONFIG_INPUT_MOUSEDEV is not set
 CONFIG_INPUT_EVDEV=y
 # CONFIG_KEYBOARD_ATKBD is not set
@@ -58,7 +41,6 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 CONFIG_LEGACY_PTY_COUNT=16
 # CONFIG_HW_RANDOM is not set
 CONFIG_I2C=y
-CONFIG_POWER_SUPPLY=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_REGULATOR=y
@@ -66,24 +48,10 @@ CONFIG_FB=y
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
 # CONFIG_LCD_CLASS_DEVICE is not set
 CONFIG_BACKLIGHT_CLASS_DEVICE=y
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_SOUND=y
-CONFIG_SND=y
-# CONFIG_SND_SUPPORT_OLD_API is not set
-# CONFIG_SND_VERBOSE_PROCFS is not set
-# CONFIG_SND_DRIVERS is not set
-# CONFIG_SND_ARM is not set
-# CONFIG_SND_SPI is not set
-CONFIG_SND_SOC=y
 # CONFIG_HID_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_MMC=y
-CONFIG_MMC_DEBUG=y
 CONFIG_MMC_ARMMMCI=y
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_BACKLIGHT=y
 CONFIG_RTC_CLASS=y
 # CONFIG_RTC_HCTOSYS is not set
 CONFIG_RTC_DRV_COH901331=y
@@ -93,12 +61,11 @@ CONFIG_COH901318=y
 CONFIG_FUSE_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
-# CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
-# CONFIG_DETECT_SOFTLOCKUP is not set
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_TIMER_STATS=y
 # CONFIG_DEBUG_PREEMPT is not set

+ 25 - 2
arch/arm/include/asm/assembler.h

@@ -154,16 +154,39 @@
 	.long	9999b,9001f;			\
 	.popsection
 
+#ifdef CONFIG_SMP
+#define ALT_SMP(instr...)					\
+9998:	instr
+#define ALT_UP(instr...)					\
+	.pushsection ".alt.smp.init", "a"			;\
+	.long	9998b						;\
+	instr							;\
+	.popsection
+#define ALT_UP_B(label)					\
+	.equ	up_b_offset, label - 9998b			;\
+	.pushsection ".alt.smp.init", "a"			;\
+	.long	9998b						;\
+	b	. + up_b_offset					;\
+	.popsection
+#else
+#define ALT_SMP(instr...)
+#define ALT_UP(instr...) instr
+#define ALT_UP_B(label) b label
+#endif
+
 /*
  * SMP data memory barrier
  */
 	.macro	smp_dmb
 #ifdef CONFIG_SMP
 #if __LINUX_ARM_ARCH__ >= 7
-	dmb
+	ALT_SMP(dmb)
 #elif __LINUX_ARM_ARCH__ == 6
-	mcr	p15, 0, r0, c7, c10, 5	@ dmb
+	ALT_SMP(mcr	p15, 0, r0, c7, c10, 5)	@ dmb
+#else
+#error Incompatible SMP platform
 #endif
+	ALT_UP(nop)
 #endif
 	.endm
 

+ 44 - 21
arch/arm/include/asm/cacheflush.h

@@ -137,10 +137,10 @@
 #endif
 
 /*
- * This flag is used to indicate that the page pointed to by a pte
- * is dirty and requires cleaning before returning it to the user.
+ * This flag is used to indicate that the page pointed to by a pte is clean
+ * and does not require cleaning before returning it to the user.
  */
-#define PG_dcache_dirty PG_arch_1
+#define PG_dcache_clean PG_arch_1
 
 /*
  *	MM Cache Management
@@ -156,6 +156,12 @@
  *	Please note that the implementation of these, and the required
  *	effects are cache-type (VIVT/VIPT/PIPT) specific.
  *
+ *	flush_icache_all()
+ *
+ *		Unconditionally clean and invalidate the entire icache.
+ *		Currently only needed for cache-v6.S and cache-v7.S, see
+ *		__flush_icache_all for the generic implementation.
+ *
  *	flush_kern_all()
  *
  *		Unconditionally clean and invalidate the entire cache.
@@ -206,6 +212,7 @@
  */
 
 struct cpu_cache_fns {
+	void (*flush_icache_all)(void);
 	void (*flush_kern_all)(void);
 	void (*flush_user_all)(void);
 	void (*flush_user_range)(unsigned long, unsigned long, unsigned int);
@@ -227,6 +234,7 @@ struct cpu_cache_fns {
 
 extern struct cpu_cache_fns cpu_cache;
 
+#define __cpuc_flush_icache_all		cpu_cache.flush_icache_all
 #define __cpuc_flush_kern_all		cpu_cache.flush_kern_all
 #define __cpuc_flush_user_all		cpu_cache.flush_user_all
 #define __cpuc_flush_user_range		cpu_cache.flush_user_range
@@ -246,6 +254,7 @@ extern struct cpu_cache_fns cpu_cache;
 
 #else
 
+#define __cpuc_flush_icache_all		__glue(_CACHE,_flush_icache_all)
 #define __cpuc_flush_kern_all		__glue(_CACHE,_flush_kern_cache_all)
 #define __cpuc_flush_user_all		__glue(_CACHE,_flush_user_cache_all)
 #define __cpuc_flush_user_range		__glue(_CACHE,_flush_user_cache_range)
@@ -253,6 +262,7 @@ extern struct cpu_cache_fns cpu_cache;
 #define __cpuc_coherent_user_range	__glue(_CACHE,_coherent_user_range)
 #define __cpuc_flush_dcache_area	__glue(_CACHE,_flush_kern_dcache_area)
 
+extern void __cpuc_flush_icache_all(void);
 extern void __cpuc_flush_kern_all(void);
 extern void __cpuc_flush_user_all(void);
 extern void __cpuc_flush_user_range(unsigned long, unsigned long, unsigned int);
@@ -291,6 +301,37 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
 /*
  * Convert calls to our calling convention.
  */
+
+/* Invalidate I-cache */
+#define __flush_icache_all_generic()					\
+	asm("mcr	p15, 0, %0, c7, c5, 0"				\
+	    : : "r" (0));
+
+/* Invalidate I-cache inner shareable */
+#define __flush_icache_all_v7_smp()					\
+	asm("mcr	p15, 0, %0, c7, c1, 0"				\
+	    : : "r" (0));
+
+/*
+ * Optimized __flush_icache_all for the common cases. Note that UP ARMv7
+ * will fall through to use __flush_icache_all_generic.
+ */
+#if (defined(CONFIG_CPU_V7) && defined(CONFIG_CPU_V6)) ||		\
+	defined(CONFIG_SMP_ON_UP)
+#define __flush_icache_preferred	__cpuc_flush_icache_all
+#elif __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
+#define __flush_icache_preferred	__flush_icache_all_v7_smp
+#elif __LINUX_ARM_ARCH__ == 6 && defined(CONFIG_ARM_ERRATA_411920)
+#define __flush_icache_preferred	__cpuc_flush_icache_all
+#else
+#define __flush_icache_preferred	__flush_icache_all_generic
+#endif
+
+static inline void __flush_icache_all(void)
+{
+	__flush_icache_preferred();
+}
+
 #define flush_cache_all()		__cpuc_flush_kern_all()
 
 static inline void vivt_flush_cache_mm(struct mm_struct *mm)
@@ -366,21 +407,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
-static inline void __flush_icache_all(void)
-{
-#ifdef CONFIG_ARM_ERRATA_411920
-	extern void v6_icache_inval_all(void);
-	v6_icache_inval_all();
-#elif defined(CONFIG_SMP) && __LINUX_ARM_ARCH__ >= 7
-	asm("mcr	p15, 0, %0, c7, c1, 0	@ invalidate I-cache inner shareable\n"
-	    :
-	    : "r" (0));
-#else
-	asm("mcr	p15, 0, %0, c7, c5, 0	@ invalidate I-cache\n"
-	    :
-	    : "r" (0));
-#endif
-}
 static inline void flush_kernel_vmap_range(void *addr, int size)
 {
 	if ((cache_is_vivt() || cache_is_vipt_aliasing()))
@@ -405,9 +431,6 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
 static inline void flush_kernel_dcache_page(struct page *page)
 {
-	/* highmem pages are always flushed upon kunmap already */
-	if ((cache_is_vivt() || cache_is_vipt_aliasing()) && !PageHighMem(page))
-		__cpuc_flush_dcache_area(page_address(page), PAGE_SIZE);
 }
 
 #define flush_dcache_mmap_lock(mapping) \

+ 6 - 2
arch/arm/include/asm/cachetype.h

@@ -6,6 +6,7 @@
 #define CACHEID_VIPT_ALIASING		(1 << 2)
 #define CACHEID_VIPT			(CACHEID_VIPT_ALIASING|CACHEID_VIPT_NONALIASING)
 #define CACHEID_ASID_TAGGED		(1 << 3)
+#define CACHEID_VIPT_I_ALIASING		(1 << 4)
 
 extern unsigned int cacheid;
 
@@ -14,15 +15,18 @@ extern unsigned int cacheid;
 #define cache_is_vipt_nonaliasing()	cacheid_is(CACHEID_VIPT_NONALIASING)
 #define cache_is_vipt_aliasing()	cacheid_is(CACHEID_VIPT_ALIASING)
 #define icache_is_vivt_asid_tagged()	cacheid_is(CACHEID_ASID_TAGGED)
+#define icache_is_vipt_aliasing()	cacheid_is(CACHEID_VIPT_I_ALIASING)
 
 /*
  * __LINUX_ARM_ARCH__ is the minimum supported CPU architecture
  * Mask out support which will never be present on newer CPUs.
  * - v6+ is never VIVT
- * - v7+ VIPT never aliases
+ * - v7+ VIPT never aliases on D-side
  */
 #if __LINUX_ARM_ARCH__ >= 7
-#define __CACHEID_ARCH_MIN	(CACHEID_VIPT_NONALIASING | CACHEID_ASID_TAGGED)
+#define __CACHEID_ARCH_MIN	(CACHEID_VIPT_NONALIASING |\
+				 CACHEID_ASID_TAGGED |\
+				 CACHEID_VIPT_I_ALIASING)
 #elif __LINUX_ARM_ARCH__ >= 6
 #define	__CACHEID_ARCH_MIN	(~CACHEID_VIVT)
 #else

+ 0 - 8
arch/arm/include/asm/dma-mapping.h

@@ -288,15 +288,7 @@ extern void dmabounce_unregister_dev(struct device *);
  * DMA access and 1 if the buffer needs to be bounced.
  *
  */
-#ifdef CONFIG_SA1111
 extern int dma_needs_bounce(struct device*, dma_addr_t, size_t);
-#else
-static inline int dma_needs_bounce(struct device *dev, dma_addr_t addr,
-				   size_t size)
-{
-	return 0;
-}
-#endif
 
 /*
  * The DMA API, implemented by dmabounce.c.  See below for descriptions.

+ 19 - 1
arch/arm/include/asm/ftrace.h

@@ -2,12 +2,30 @@
 #define _ASM_ARM_FTRACE
 
 #ifdef CONFIG_FUNCTION_TRACER
-#define MCOUNT_ADDR		((long)(mcount))
+#define MCOUNT_ADDR		((unsigned long)(__gnu_mcount_nc))
 #define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
 
 #ifndef __ASSEMBLY__
 extern void mcount(void);
 extern void __gnu_mcount_nc(void);
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+struct dyn_arch_ftrace {
+#ifdef CONFIG_OLD_MCOUNT
+	bool	old_mcount;
+#endif
+};
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/* With Thumb-2, the recorded addresses have the lsb set */
+	return addr & ~1;
+}
+
+extern void ftrace_caller_old(void);
+extern void ftrace_call_old(void);
+#endif
+
 #endif
 
 #endif

+ 11 - 23
arch/arm/include/asm/hardware/coresight.h

@@ -21,18 +21,6 @@
 #define TRACER_RUNNING		BIT(TRACER_RUNNING_BIT)
 #define TRACER_CYCLE_ACC	BIT(TRACER_CYCLE_ACC_BIT)
 
-struct tracectx {
-	unsigned int	etb_bufsz;
-	void __iomem	*etb_regs;
-	void __iomem	*etm_regs;
-	unsigned long	flags;
-	int		ncmppairs;
-	int		etm_portsz;
-	struct device	*dev;
-	struct clk	*emu_clk;
-	struct mutex	mutex;
-};
-
 #define TRACER_TIMEOUT 10000
 
 #define etm_writel(t, v, x) \
@@ -112,10 +100,10 @@ struct tracectx {
 
 /* ETM status register, "ETM Architecture", 3.3.2 */
 #define ETMR_STATUS		(0x10)
-#define ETMST_OVERFLOW		(1 << 0)
-#define ETMST_PROGBIT		(1 << 1)
-#define ETMST_STARTSTOP		(1 << 2)
-#define ETMST_TRIGGER		(1 << 3)
+#define ETMST_OVERFLOW		BIT(0)
+#define ETMST_PROGBIT		BIT(1)
+#define ETMST_STARTSTOP		BIT(2)
+#define ETMST_TRIGGER		BIT(3)
 
 #define etm_progbit(t)		(etm_readl((t), ETMR_STATUS) & ETMST_PROGBIT)
 #define etm_started(t)		(etm_readl((t), ETMR_STATUS) & ETMST_STARTSTOP)
@@ -123,7 +111,7 @@ struct tracectx {
 
 #define ETMR_TRACEENCTRL2	0x1c
 #define ETMR_TRACEENCTRL	0x24
-#define ETMTE_INCLEXCL		(1 << 24)
+#define ETMTE_INCLEXCL		BIT(24)
 #define ETMR_TRACEENEVT		0x20
 #define ETMCTRL_OPTS		(ETMCTRL_DO_CPRT | \
 				ETMCTRL_DATA_DO_ADDR | \
@@ -146,12 +134,12 @@ struct tracectx {
 #define ETBR_CTRL		0x20
 #define ETBR_FORMATTERCTRL	0x304
 #define ETBFF_ENFTC		1
-#define ETBFF_ENFCONT		(1 << 1)
-#define ETBFF_FONFLIN		(1 << 4)
-#define ETBFF_MANUAL_FLUSH	(1 << 6)
-#define ETBFF_TRIGIN		(1 << 8)
-#define ETBFF_TRIGEVT		(1 << 9)
-#define ETBFF_TRIGFL		(1 << 10)
+#define ETBFF_ENFCONT		BIT(1)
+#define ETBFF_FONFLIN		BIT(4)
+#define ETBFF_MANUAL_FLUSH	BIT(6)
+#define ETBFF_TRIGIN		BIT(8)
+#define ETBFF_TRIGEVT		BIT(9)
+#define ETBFF_TRIGFL		BIT(10)
 
 #define etb_writel(t, v, x) \
 	(__raw_writel((v), (t)->etb_regs + (x)))

+ 133 - 0
arch/arm/include/asm/hw_breakpoint.h

@@ -0,0 +1,133 @@
+#ifndef _ARM_HW_BREAKPOINT_H
+#define _ARM_HW_BREAKPOINT_H
+
+#ifdef __KERNEL__
+
+struct task_struct;
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+
+struct arch_hw_breakpoint_ctrl {
+		u32 __reserved	: 9,
+		mismatch	: 1,
+				: 9,
+		len		: 8,
+		type		: 2,
+		privilege	: 2,
+		enabled		: 1;
+};
+
+struct arch_hw_breakpoint {
+	u32	address;
+	u32	trigger;
+	struct perf_event *suspended_wp;
+	struct arch_hw_breakpoint_ctrl ctrl;
+};
+
+static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl)
+{
+	return (ctrl.mismatch << 22) | (ctrl.len << 5) | (ctrl.type << 3) |
+		(ctrl.privilege << 1) | ctrl.enabled;
+}
+
+static inline void decode_ctrl_reg(u32 reg,
+				   struct arch_hw_breakpoint_ctrl *ctrl)
+{
+	ctrl->enabled	= reg & 0x1;
+	reg >>= 1;
+	ctrl->privilege	= reg & 0x3;
+	reg >>= 2;
+	ctrl->type	= reg & 0x3;
+	reg >>= 2;
+	ctrl->len	= reg & 0xff;
+	reg >>= 17;
+	ctrl->mismatch	= reg & 0x1;
+}
+
+/* Debug architecture numbers. */
+#define ARM_DEBUG_ARCH_RESERVED	0	/* In case of ptrace ABI updates. */
+#define ARM_DEBUG_ARCH_V6	1
+#define ARM_DEBUG_ARCH_V6_1	2
+#define ARM_DEBUG_ARCH_V7_ECP14	3
+#define ARM_DEBUG_ARCH_V7_MM	4
+
+/* Breakpoint */
+#define ARM_BREAKPOINT_EXECUTE	0
+
+/* Watchpoints */
+#define ARM_BREAKPOINT_LOAD	1
+#define ARM_BREAKPOINT_STORE	2
+
+/* Privilege Levels */
+#define ARM_BREAKPOINT_PRIV	1
+#define ARM_BREAKPOINT_USER	2
+
+/* Lengths */
+#define ARM_BREAKPOINT_LEN_1	0x1
+#define ARM_BREAKPOINT_LEN_2	0x3
+#define ARM_BREAKPOINT_LEN_4	0xf
+#define ARM_BREAKPOINT_LEN_8	0xff
+
+/* Limits */
+#define ARM_MAX_BRP		16
+#define ARM_MAX_WRP		16
+#define ARM_MAX_HBP_SLOTS	(ARM_MAX_BRP + ARM_MAX_WRP)
+
+/* DSCR method of entry bits. */
+#define ARM_DSCR_MOE(x)			((x >> 2) & 0xf)
+#define ARM_ENTRY_BREAKPOINT		0x1
+#define ARM_ENTRY_ASYNC_WATCHPOINT	0x2
+#define ARM_ENTRY_SYNC_WATCHPOINT	0xa
+
+/* DSCR monitor/halting bits. */
+#define ARM_DSCR_HDBGEN		(1 << 14)
+#define ARM_DSCR_MDBGEN		(1 << 15)
+
+/* opcode2 numbers for the co-processor instructions. */
+#define ARM_OP2_BVR		4
+#define ARM_OP2_BCR		5
+#define ARM_OP2_WVR		6
+#define ARM_OP2_WCR		7
+
+/* Base register numbers for the debug registers. */
+#define ARM_BASE_BVR		64
+#define ARM_BASE_BCR		80
+#define ARM_BASE_WVR		96
+#define ARM_BASE_WCR		112
+
+/* Accessor macros for the debug registers. */
+#define ARM_DBG_READ(M, OP2, VAL) do {\
+	asm volatile("mrc p14, 0, %0, c0," #M ", " #OP2 : "=r" (VAL));\
+} while (0)
+
+#define ARM_DBG_WRITE(M, OP2, VAL) do {\
+	asm volatile("mcr p14, 0, %0, c0," #M ", " #OP2 : : "r" (VAL));\
+} while (0)
+
+struct notifier_block;
+struct perf_event;
+struct pmu;
+
+extern struct pmu perf_ops_bp;
+extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
+				  int *gen_len, int *gen_type);
+extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+					   unsigned long val, void *data);
+
+extern u8 arch_get_debug_arch(void);
+extern u8 arch_get_max_wp_len(void);
+extern void clear_ptrace_hw_breakpoint(struct task_struct *tsk);
+
+int arch_install_hw_breakpoint(struct perf_event *bp);
+void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void hw_breakpoint_pmu_read(struct perf_event *bp);
+int hw_breakpoint_slots(int type);
+
+#else
+static inline void clear_ptrace_hw_breakpoint(struct task_struct *tsk) {}
+
+#endif	/* CONFIG_HAVE_HW_BREAKPOINT */
+#endif	/* __KERNEL__ */
+#endif	/* _ARM_HW_BREAKPOINT_H */

+ 19 - 12
arch/arm/include/asm/module.h

@@ -7,20 +7,27 @@
 
 struct unwind_table;
 
-struct mod_arch_specific
-{
 #ifdef CONFIG_ARM_UNWIND
-	Elf_Shdr *unw_sec_init;
-	Elf_Shdr *unw_sec_devinit;
-	Elf_Shdr *unw_sec_core;
-	Elf_Shdr *sec_init_text;
-	Elf_Shdr *sec_devinit_text;
-	Elf_Shdr *sec_core_text;
-	struct unwind_table *unwind_init;
-	struct unwind_table *unwind_devinit;
-	struct unwind_table *unwind_core;
-#endif
+struct arm_unwind_mapping {
+	Elf_Shdr *unw_sec;
+	Elf_Shdr *sec_text;
+	struct unwind_table *unwind;
+};
+enum {
+	ARM_SEC_INIT,
+	ARM_SEC_DEVINIT,
+	ARM_SEC_CORE,
+	ARM_SEC_EXIT,
+	ARM_SEC_DEVEXIT,
+	ARM_SEC_MAX,
+};
+struct mod_arch_specific {
+	struct arm_unwind_mapping map[ARM_SEC_MAX];
 };
+#else
+struct mod_arch_specific {
+};
+#endif
 
 /*
  * Include the ARM architecture version.

+ 1 - 1
arch/arm/include/asm/perf_event.h

@@ -17,7 +17,7 @@
  * counter interrupts are regular interrupts and not an NMI. This
  * means that when we receive the interrupt we can call
  * perf_event_do_pending() that handles all of the work with
- * interrupts enabled.
+ * interrupts disabled.
  */
 static inline void
 set_perf_event_pending(void)

+ 27 - 3
arch/arm/include/asm/pgtable.h

@@ -278,9 +278,24 @@ extern struct page *empty_zero_page;
 
 #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext)
 
-#define set_pte_at(mm,addr,ptep,pteval) do { \
-	set_pte_ext(ptep, pteval, (addr) >= TASK_SIZE ? 0 : PTE_EXT_NG); \
- } while (0)
+#if __LINUX_ARM_ARCH__ < 6
+static inline void __sync_icache_dcache(pte_t pteval)
+{
+}
+#else
+extern void __sync_icache_dcache(pte_t pteval);
+#endif
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pteval)
+{
+	if (addr >= TASK_SIZE)
+		set_pte_ext(ptep, pteval, 0);
+	else {
+		__sync_icache_dcache(pteval);
+		set_pte_ext(ptep, pteval, PTE_EXT_NG);
+	}
+}
 
 /*
  * The following only work if pte_present() is true.
@@ -290,8 +305,13 @@ extern struct page *empty_zero_page;
 #define pte_write(pte)		(pte_val(pte) & L_PTE_WRITE)
 #define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
 #define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
+#define pte_exec(pte)		(pte_val(pte) & L_PTE_EXEC)
 #define pte_special(pte)	(0)
 
+#define pte_present_user(pte) \
+	((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \
+	 (L_PTE_PRESENT | L_PTE_USER))
+
 #define PTE_BIT_FUNC(fn,op) \
 static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
 
@@ -317,6 +337,10 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
 #define pgprot_dmacoherent(prot) \
 	__pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_BUFFERABLE)
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+struct file;
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				     unsigned long size, pgprot_t vma_prot);
 #else
 #define pgprot_dmacoherent(prot) \
 	__pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_UNCACHED)

+ 4 - 0
arch/arm/include/asm/processor.h

@@ -19,6 +19,7 @@
 
 #ifdef __KERNEL__
 
+#include <asm/hw_breakpoint.h>
 #include <asm/ptrace.h>
 #include <asm/types.h>
 
@@ -41,6 +42,9 @@ struct debug_entry {
 struct debug_info {
 	int			nsaved;
 	struct debug_entry	bp[2];
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	struct perf_event	*hbp[ARM_MAX_HBP_SLOTS];
+#endif
 };
 
 struct thread_struct {

+ 2 - 0
arch/arm/include/asm/ptrace.h

@@ -29,6 +29,8 @@
 #define PTRACE_SETCRUNCHREGS	26
 #define PTRACE_GETVFPREGS	27
 #define PTRACE_SETVFPREGS	28
+#define PTRACE_GETHBPREGS	29
+#define PTRACE_SETHBPREGS	30
 
 /*
  * PSR bits

+ 17 - 0
arch/arm/include/asm/smp_mpidr.h

@@ -0,0 +1,17 @@
+#ifndef ASMARM_SMP_MIDR_H
+#define ASMARM_SMP_MIDR_H
+
+#define hard_smp_processor_id()						\
+	({								\
+		unsigned int cpunum;					\
+		__asm__("\n"						\
+			"1:	mrc p15, 0, %0, c0, c0, 5\n"		\
+			"	.pushsection \".alt.smp.init\", \"a\"\n"\
+			"	.long	1b\n"				\
+			"	mov	%0, #0\n"			\
+			"	.popsection"				\
+			: "=r" (cpunum));				\
+		cpunum &= 0x0F;						\
+	})
+
+#endif

+ 25 - 0
arch/arm/include/asm/smp_plat.h

@@ -7,15 +7,40 @@
 
 #include <asm/cputype.h>
 
+/*
+ * Return true if we are running on a SMP platform
+ */
+static inline bool is_smp(void)
+{
+#ifndef CONFIG_SMP
+	return false;
+#elif defined(CONFIG_SMP_ON_UP)
+	extern unsigned int smp_on_up;
+	return !!smp_on_up;
+#else
+	return true;
+#endif
+}
+
 /* all SMP configurations have the extended CPUID registers */
 static inline int tlb_ops_need_broadcast(void)
 {
+	if (!is_smp())
+		return 0;
+
 	return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 2;
 }
 
+#if !defined(CONFIG_SMP) || __LINUX_ARM_ARCH__ >= 7
+#define cache_ops_need_broadcast()	0
+#else
 static inline int cache_ops_need_broadcast(void)
 {
+	if (!is_smp())
+		return 0;
+
 	return ((read_cpuid_ext(CPUID_EXT_MMFR3) >> 12) & 0xf) < 1;
 }
+#endif
 
 #endif

+ 6 - 0
arch/arm/include/asm/system.h

@@ -85,6 +85,10 @@ void hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
 				       struct pt_regs *),
 		     int sig, int code, const char *name);
 
+void hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int,
+				       struct pt_regs *),
+		     int sig, int code, const char *name);
+
 #define xchg(ptr,x) \
 	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
 
@@ -325,6 +329,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 extern void disable_hlt(void);
 extern void enable_hlt(void);
 
+void cpu_idle_wait(void);
+
 #include <asm-generic/cmpxchg-local.h>
 
 #if __LINUX_ARM_ARCH__ < 6

+ 27 - 9
arch/arm/include/asm/tlbflush.h

@@ -70,6 +70,10 @@
 #undef _TLB
 #undef MULTI_TLB
 
+#ifdef CONFIG_SMP_ON_UP
+#define MULTI_TLB 1
+#endif
+
 #define v3_tlb_flags	(TLB_V3_FULL | TLB_V3_PAGE)
 
 #ifdef CONFIG_CPU_TLB_V3
@@ -185,17 +189,23 @@
 # define v6wbi_always_flags	(-1UL)
 #endif
 
-#ifdef CONFIG_SMP
-#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_V7_IS_BTB | \
+#define v7wbi_tlb_flags_smp	(TLB_WB | TLB_DCLEAN | TLB_V7_IS_BTB | \
 			 TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID)
-#else
-#define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \
+#define v7wbi_tlb_flags_up	(TLB_WB | TLB_DCLEAN | TLB_BTB | \
 			 TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID)
-#endif
 
 #ifdef CONFIG_CPU_TLB_V7
-# define v7wbi_possible_flags	v7wbi_tlb_flags
-# define v7wbi_always_flags	v7wbi_tlb_flags
+
+# ifdef CONFIG_SMP_ON_UP
+#  define v7wbi_possible_flags	(v7wbi_tlb_flags_smp | v7wbi_tlb_flags_up)
+#  define v7wbi_always_flags	(v7wbi_tlb_flags_smp & v7wbi_tlb_flags_up)
+# elif defined(CONFIG_SMP)
+#  define v7wbi_possible_flags	v7wbi_tlb_flags_smp
+#  define v7wbi_always_flags	v7wbi_tlb_flags_smp
+# else
+#  define v7wbi_possible_flags	v7wbi_tlb_flags_up
+#  define v7wbi_always_flags	v7wbi_tlb_flags_up
+# endif
 # ifdef _TLB
 #  define MULTI_TLB 1
 # else
@@ -560,12 +570,20 @@ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 #endif
 
 /*
- * if PG_dcache_dirty is set for the page, we need to ensure that any
+ * If PG_dcache_clean is not set for the page, we need to ensure that any
  * cache entries for the kernels virtual memory range are written
- * back to the page.
+ * back to the page. On ARMv6 and later, the cache coherency is handled via
+ * the set_pte_at() function.
  */
+#if __LINUX_ARM_ARCH__ < 6
 extern void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
 	pte_t *ptep);
+#else
+static inline void update_mmu_cache(struct vm_area_struct *vma,
+				    unsigned long addr, pte_t *ptep)
+{
+}
+#endif
 
 #endif
 

+ 3 - 0
arch/arm/include/asm/unistd.h

@@ -393,6 +393,9 @@
 #define __NR_perf_event_open		(__NR_SYSCALL_BASE+364)
 #define __NR_recvmmsg			(__NR_SYSCALL_BASE+365)
 #define __NR_accept4			(__NR_SYSCALL_BASE+366)
+#define __NR_fanotify_init		(__NR_SYSCALL_BASE+367)
+#define __NR_fanotify_mark		(__NR_SYSCALL_BASE+368)
+#define __NR_prlimit64			(__NR_SYSCALL_BASE+369)
 
 /*
  * The following SWIs are ARM private.

+ 1 - 0
arch/arm/kernel/Makefile

@@ -42,6 +42,7 @@ obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_ARM_UNWIND)	+= unwind.o
 obj-$(CONFIG_HAVE_TCM)		+= tcm.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 
 obj-$(CONFIG_CRUNCH)		+= crunch.o crunch-bits.o
 AFLAGS_crunch-bits.o		:= -Wa,-mcpu=ep9312

+ 2 - 0
arch/arm/kernel/armksyms.c

@@ -165,6 +165,8 @@ EXPORT_SYMBOL(_find_next_bit_be);
 #endif
 
 #ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_OLD_MCOUNT
 EXPORT_SYMBOL(mcount);
+#endif
 EXPORT_SYMBOL(__gnu_mcount_nc);
 #endif

+ 3 - 0
arch/arm/kernel/calls.S

@@ -376,6 +376,9 @@
 		CALL(sys_perf_event_open)
 /* 365 */	CALL(sys_recvmmsg)
 		CALL(sys_accept4)
+		CALL(sys_fanotify_init)
+		CALL(sys_fanotify_mark)
+		CALL(sys_prlimit64)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted

+ 5 - 6
arch/arm/kernel/entry-armv.S

@@ -46,7 +46,8 @@
 	 * this macro assumes that irqstat (r6) and base (r5) are
 	 * preserved from get_irqnr_and_base above
 	 */
-	test_for_ipi r0, r6, r5, lr
+	ALT_SMP(test_for_ipi r0, r6, r5, lr)
+	ALT_UP_B(9997f)
 	movne	r0, sp
 	adrne	lr, BSYM(1b)
 	bne	do_IPI
@@ -57,6 +58,7 @@
 	adrne	lr, BSYM(1b)
 	bne	do_local_timer
 #endif
+9997:
 #endif
 
 	.endm
@@ -965,11 +967,8 @@ kuser_cmpxchg_fixup:
 	beq	1b
 	rsbs	r0, r3, #0
 	/* beware -- each __kuser slot must be 8 instructions max */
-#ifdef CONFIG_SMP
-	b	__kuser_memory_barrier
-#else
-	usr_ret	lr
-#endif
+	ALT_SMP(b	__kuser_memory_barrier)
+	ALT_UP(usr_ret	lr)
 
 #endif
 

+ 55 - 12
arch/arm/kernel/entry-common.S

@@ -48,6 +48,8 @@ work_pending:
 	beq	no_work_pending
 	mov	r0, sp				@ 'regs'
 	mov	r2, why				@ 'syscall'
+	tst	r1, #_TIF_SIGPENDING		@ delivering a signal?
+	movne	why, #0				@ prevent further restarts
 	bl	do_notify_resume
 	b	ret_slow_syscall		@ Check work again
 
@@ -127,30 +129,58 @@ ENDPROC(ret_from_fork)
  * clobber the ip register.  This is OK because the ARM calling convention
  * allows it to be clobbered in subroutines and doesn't use it to hold
  * parameters.)
+ *
+ * When using dynamic ftrace, we patch out the mcount call by a "mov r0, r0"
+ * for the mcount case, and a "pop {lr}" for the __gnu_mcount_nc case (see
+ * arch/arm/kernel/ftrace.c).
  */
+
+#ifndef CONFIG_OLD_MCOUNT
+#if (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
+#error Ftrace requires CONFIG_FRAME_POINTER=y with GCC older than 4.4.0.
+#endif
+#endif
+
 #ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(mcount)
+ENTRY(__gnu_mcount_nc)
+	mov	ip, lr
+	ldmia	sp!, {lr}
+	mov	pc, ip
+ENDPROC(__gnu_mcount_nc)
+
+ENTRY(ftrace_caller)
 	stmdb	sp!, {r0-r3, lr}
 	mov	r0, lr
 	sub	r0, r0, #MCOUNT_INSN_SIZE
+	ldr	r1, [sp, #20]
 
-	.globl mcount_call
-mcount_call:
+	.global	ftrace_call
+ftrace_call:
 	bl	ftrace_stub
-	ldr	lr, [fp, #-4]			@ restore lr
-	ldmia	sp!, {r0-r3, pc}
+	ldmia	sp!, {r0-r3, ip, lr}
+	mov	pc, ip
+ENDPROC(ftrace_caller)
 
-ENTRY(ftrace_caller)
+#ifdef CONFIG_OLD_MCOUNT
+ENTRY(mcount)
+	stmdb	sp!, {lr}
+	ldr	lr, [fp, #-4]
+	ldmia	sp!, {pc}
+ENDPROC(mcount)
+
+ENTRY(ftrace_caller_old)
 	stmdb	sp!, {r0-r3, lr}
 	ldr	r1, [fp, #-4]
 	mov	r0, lr
 	sub	r0, r0, #MCOUNT_INSN_SIZE
 
-	.globl ftrace_call
-ftrace_call:
+	.globl ftrace_call_old
+ftrace_call_old:
 	bl	ftrace_stub
 	ldr	lr, [fp, #-4]			@ restore lr
 	ldmia	sp!, {r0-r3, pc}
+ENDPROC(ftrace_caller_old)
+#endif
 
 #else
 
@@ -158,7 +188,7 @@ ENTRY(__gnu_mcount_nc)
 	stmdb	sp!, {r0-r3, lr}
 	ldr	r0, =ftrace_trace_function
 	ldr	r2, [r0]
-	adr	r0, ftrace_stub
+	adr	r0, .Lftrace_stub
 	cmp	r0, r2
 	bne	gnu_trace
 	ldmia	sp!, {r0-r3, ip, lr}
@@ -168,11 +198,19 @@ gnu_trace:
 	ldr	r1, [sp, #20]			@ lr of instrumented routine
 	mov	r0, lr
 	sub	r0, r0, #MCOUNT_INSN_SIZE
-	mov	lr, pc
+	adr	lr, BSYM(1f)
 	mov	pc, r2
+1:
 	ldmia	sp!, {r0-r3, ip, lr}
 	mov	pc, ip
+ENDPROC(__gnu_mcount_nc)
 
+#ifdef CONFIG_OLD_MCOUNT
+/*
+ * This is under an ifdef in order to force link-time errors for people trying
+ * to build with !FRAME_POINTER with a GCC which doesn't use the new-style
+ * mcount.
+ */
 ENTRY(mcount)
 	stmdb	sp!, {r0-r3, lr}
 	ldr	r0, =ftrace_trace_function
@@ -191,12 +229,15 @@ trace:
 	mov	pc, r2
 	ldr	lr, [fp, #-4]			@ restore lr
 	ldmia	sp!, {r0-r3, pc}
+ENDPROC(mcount)
+#endif
 
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
-	.globl ftrace_stub
-ftrace_stub:
+ENTRY(ftrace_stub)
+.Lftrace_stub:
 	mov	pc, lr
+ENDPROC(ftrace_stub)
 
 #endif /* CONFIG_FUNCTION_TRACER */
 
@@ -418,11 +459,13 @@ ENDPROC(sys_clone_wrapper)
 
 sys_sigreturn_wrapper:
 		add	r0, sp, #S_OFF
+		mov	why, #0		@ prevent syscall restart handling
 		b	sys_sigreturn
 ENDPROC(sys_sigreturn_wrapper)
 
 sys_rt_sigreturn_wrapper:
 		add	r0, sp, #S_OFF
+		mov	why, #0		@ prevent syscall restart handling
 		b	sys_rt_sigreturn
 ENDPROC(sys_rt_sigreturn_wrapper)
 

+ 15 - 0
arch/arm/kernel/etm.c

@@ -30,6 +30,21 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Alexander Shishkin");
 
+/*
+ * ETM tracer state
+ */
+struct tracectx {
+	unsigned int	etb_bufsz;
+	void __iomem	*etb_regs;
+	void __iomem	*etm_regs;
+	unsigned long	flags;
+	int		ncmppairs;
+	int		etm_portsz;
+	struct device	*dev;
+	struct clk	*emu_clk;
+	struct mutex	mutex;
+};
+
 static struct tracectx tracer;
 
 static inline bool trace_isrunning(struct tracectx *t)

+ 140 - 48
arch/arm/kernel/ftrace.c

@@ -2,102 +2,194 @@
  * Dynamic function tracing support.
  *
  * Copyright (C) 2008 Abhishek Sagar <sagar.abhishek@gmail.com>
+ * Copyright (C) 2010 Rabin Vincent <rabin@rab.in>
  *
  * For licencing details, see COPYING.
  *
  * Defines low-level handling of mcount calls when the kernel
  * is compiled with the -pg flag. When using dynamic ftrace, the
- * mcount call-sites get patched lazily with NOP till they are
- * enabled. All code mutation routines here take effect atomically.
+ * mcount call-sites get patched with NOP till they are enabled.
+ * All code mutation routines here are called under stop_machine().
  */
 
 #include <linux/ftrace.h>
+#include <linux/uaccess.h>
 
 #include <asm/cacheflush.h>
 #include <asm/ftrace.h>
 
-#define PC_OFFSET      8
-#define BL_OPCODE      0xeb000000
-#define BL_OFFSET_MASK 0x00ffffff
+#ifdef CONFIG_THUMB2_KERNEL
+#define	NOP		0xeb04f85d	/* pop.w {lr} */
+#else
+#define	NOP		0xe8bd4000	/* pop {lr} */
+#endif
 
-static unsigned long bl_insn;
-static const unsigned long NOP = 0xe1a00000; /* mov r0, r0 */
+#ifdef CONFIG_OLD_MCOUNT
+#define OLD_MCOUNT_ADDR	((unsigned long) mcount)
+#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old)
 
-unsigned char *ftrace_nop_replace(void)
+#define	OLD_NOP		0xe1a00000	/* mov r0, r0 */
+
+static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec)
 {
-	return (char *)&NOP;
+	return rec->arch.old_mcount ? OLD_NOP : NOP;
 }
 
+static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
+{
+	if (!rec->arch.old_mcount)
+		return addr;
+
+	if (addr == MCOUNT_ADDR)
+		addr = OLD_MCOUNT_ADDR;
+	else if (addr == FTRACE_ADDR)
+		addr = OLD_FTRACE_ADDR;
+
+	return addr;
+}
+#else
+static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec)
+{
+	return NOP;
+}
+
+static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
+{
+	return addr;
+}
+#endif
+
 /* construct a branch (BL) instruction to addr */
-unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr)
+#ifdef CONFIG_THUMB2_KERNEL
+static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
 {
+	unsigned long s, j1, j2, i1, i2, imm10, imm11;
+	unsigned long first, second;
 	long offset;
 
-	offset = (long)addr - (long)(pc + PC_OFFSET);
+	offset = (long)addr - (long)(pc + 4);
+	if (offset < -16777216 || offset > 16777214) {
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+
+	s	= (offset >> 24) & 0x1;
+	i1	= (offset >> 23) & 0x1;
+	i2	= (offset >> 22) & 0x1;
+	imm10	= (offset >> 12) & 0x3ff;
+	imm11	= (offset >>  1) & 0x7ff;
+
+	j1 = (!i1) ^ s;
+	j2 = (!i2) ^ s;
+
+	first = 0xf000 | (s << 10) | imm10;
+	second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11;
+
+	return (second << 16) | first;
+}
+#else
+static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
+{
+	long offset;
+
+	offset = (long)addr - (long)(pc + 8);
 	if (unlikely(offset < -33554432 || offset > 33554428)) {
 		/* Can't generate branches that far (from ARM ARM). Ftrace
 		 * doesn't generate branches outside of kernel text.
 		 */
 		WARN_ON_ONCE(1);
-		return NULL;
+		return 0;
 	}
-	offset = (offset >> 2) & BL_OFFSET_MASK;
-	bl_insn = BL_OPCODE | offset;
-	return (unsigned char *)&bl_insn;
-}
 
-int ftrace_modify_code(unsigned long pc, unsigned char *old_code,
-		       unsigned char *new_code)
-{
-	unsigned long err = 0, replaced = 0, old, new;
+	offset = (offset >> 2) & 0x00ffffff;
 
-	old = *(unsigned long *)old_code;
-	new = *(unsigned long *)new_code;
+	return 0xeb000000 | offset;
+}
+#endif
 
-	__asm__ __volatile__ (
-		"1:  ldr    %1, [%2]  \n"
-		"    cmp    %1, %4    \n"
-		"2:  streq  %3, [%2]  \n"
-		"    cmpne  %1, %3    \n"
-		"    movne  %0, #2    \n"
-		"3:\n"
+static int ftrace_modify_code(unsigned long pc, unsigned long old,
+			      unsigned long new)
+{
+	unsigned long replaced;
 
-		".pushsection .fixup, \"ax\"\n"
-		"4:  mov  %0, #1  \n"
-		"    b    3b      \n"
-		".popsection\n"
+	if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE))
+		return -EFAULT;
 
-		".pushsection __ex_table, \"a\"\n"
-		"    .long 1b, 4b \n"
-		"    .long 2b, 4b \n"
-		".popsection\n"
+	if (replaced != old)
+		return -EINVAL;
 
-		: "=r"(err), "=r"(replaced)
-		: "r"(pc), "r"(new), "r"(old), "0"(err), "1"(replaced)
-		: "memory");
+	if (probe_kernel_write((void *)pc, &new, MCOUNT_INSN_SIZE))
+		return -EPERM;
 
-	if (!err && (replaced == old))
-		flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
+	flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
 
-	return err;
+	return 0;
 }
 
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
-	int ret;
 	unsigned long pc, old;
-	unsigned char *new;
+	unsigned long new;
+	int ret;
 
 	pc = (unsigned long)&ftrace_call;
 	memcpy(&old, &ftrace_call, MCOUNT_INSN_SIZE);
 	new = ftrace_call_replace(pc, (unsigned long)func);
-	ret = ftrace_modify_code(pc, (unsigned char *)&old, new);
+
+	ret = ftrace_modify_code(pc, old, new);
+
+#ifdef CONFIG_OLD_MCOUNT
+	if (!ret) {
+		pc = (unsigned long)&ftrace_call_old;
+		memcpy(&old, &ftrace_call_old, MCOUNT_INSN_SIZE);
+		new = ftrace_call_replace(pc, (unsigned long)func);
+
+		ret = ftrace_modify_code(pc, old, new);
+	}
+#endif
+
+	return ret;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long new, old;
+	unsigned long ip = rec->ip;
+
+	old = ftrace_nop_replace(rec);
+	new = ftrace_call_replace(ip, adjust_address(rec, addr));
+
+	return ftrace_modify_code(rec->ip, old, new);
+}
+
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	unsigned long old;
+	unsigned long new;
+	int ret;
+
+	old = ftrace_call_replace(ip, adjust_address(rec, addr));
+	new = ftrace_nop_replace(rec);
+	ret = ftrace_modify_code(ip, old, new);
+
+#ifdef CONFIG_OLD_MCOUNT
+	if (ret == -EINVAL && addr == MCOUNT_ADDR) {
+		rec->arch.old_mcount = true;
+
+		old = ftrace_call_replace(ip, adjust_address(rec, addr));
+		new = ftrace_nop_replace(rec);
+		ret = ftrace_modify_code(ip, old, new);
+	}
+#endif
+
 	return ret;
 }
 
-/* run from ftrace_init with irqs disabled */
 int __init ftrace_dyn_arch_init(void *data)
 {
-	ftrace_mcount_set(data);
+	*(unsigned long *)data = 0;
+
 	return 0;
 }

+ 1 - 1
arch/arm/kernel/head-common.S

@@ -20,7 +20,7 @@
 __switch_data:
 	.long	__mmap_switched
 	.long	__data_loc			@ r4
-	.long	_data				@ r5
+	.long	_sdata				@ r5
 	.long	__bss_start			@ r6
 	.long	_end				@ r7
 	.long	processor_id			@ r4

+ 50 - 0
arch/arm/kernel/head.S

@@ -86,6 +86,9 @@ ENTRY(stext)
 	movs	r8, r5				@ invalid machine (r5=0)?
 	beq	__error_a			@ yes, error 'a'
 	bl	__vet_atags
+#ifdef CONFIG_SMP_ON_UP
+	bl	__fixup_smp
+#endif
 	bl	__create_page_tables
 
 	/*
@@ -333,4 +336,51 @@ __create_page_tables:
 ENDPROC(__create_page_tables)
 	.ltorg
 
+#ifdef CONFIG_SMP_ON_UP
+__fixup_smp:
+	mov	r7, #0x00070000
+	orr	r6, r7, #0xff000000	@ mask 0xff070000
+	orr	r7, r7, #0x41000000	@ val 0x41070000
+	and	r0, r9, r6
+	teq	r0, r7			@ ARM CPU and ARMv6/v7?
+	bne	__fixup_smp_on_up	@ no, assume UP
+
+	orr	r6, r6, #0x0000ff00
+	orr	r6, r6, #0x000000f0	@ mask 0xff07fff0
+	orr	r7, r7, #0x0000b000
+	orr	r7, r7, #0x00000020	@ val 0x4107b020
+	and	r0, r9, r6
+	teq	r0, r7			@ ARM 11MPCore?
+	moveq	pc, lr			@ yes, assume SMP
+
+	mrc	p15, 0, r0, c0, c0, 5	@ read MPIDR
+	tst	r0, #1 << 31
+	movne	pc, lr			@ bit 31 => SMP
+
+__fixup_smp_on_up:
+	adr	r0, 1f
+	ldmia	r0, {r3, r6, r7}
+	sub	r3, r0, r3
+	add	r6, r6, r3
+	add	r7, r7, r3
+2:	cmp	r6, r7
+	ldmia	r6!, {r0, r4}
+	strlo	r4, [r0, r3]
+	blo	2b
+	mov	pc, lr
+ENDPROC(__fixup_smp)
+
+1:	.word	.
+	.word	__smpalt_begin
+	.word	__smpalt_end
+
+	.pushsection .data
+	.globl	smp_on_up
+smp_on_up:
+	ALT_SMP(.long	1)
+	ALT_UP(.long	0)
+	.popsection
+
+#endif
+
 #include "head-common.S"

+ 849 - 0
arch/arm/kernel/hw_breakpoint.c

@@ -0,0 +1,849 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2009, 2010 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ */
+#define pr_fmt(fmt) "hw-breakpoint: " fmt
+
+#include <linux/errno.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/current.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/kdebug.h>
+#include <asm/system.h>
+#include <asm/traps.h>
+
+/* Breakpoint currently in use for each BRP. */
+static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
+
+/* Watchpoint currently in use for each WRP. */
+static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]);
+
+/* Number of BRP/WRP registers on this CPU. */
+static int core_num_brps;
+static int core_num_wrps;
+
+/* Debug architecture version. */
+static u8 debug_arch;
+
+/* Maximum supported watchpoint length. */
+static u8 max_watchpoint_len;
+
+/* Determine number of BRP registers available. */
+static int get_num_brps(void)
+{
+	u32 didr;
+	ARM_DBG_READ(c0, 0, didr);
+	return ((didr >> 24) & 0xf) + 1;
+}
+
+/* Determine number of WRP registers available. */
+static int get_num_wrps(void)
+{
+	/*
+	 * FIXME: When a watchpoint fires, the only way to work out which
+	 * watchpoint it was is by disassembling the faulting instruction
+	 * and working out the address of the memory access.
+	 *
+	 * Furthermore, we can only do this if the watchpoint was precise
+	 * since imprecise watchpoints prevent us from calculating register
+	 * based addresses.
+	 *
+	 * For the time being, we only report 1 watchpoint register so we
+	 * always know which watchpoint fired. In the future we can either
+	 * add a disassembler and address generation emulator, or we can
+	 * insert a check to see if the DFAR is set on watchpoint exception
+	 * entry [the ARM ARM states that the DFAR is UNKNOWN, but
+	 * experience shows that it is set on some implementations].
+	 */
+
+#if 0
+	u32 didr, wrps;
+	ARM_DBG_READ(c0, 0, didr);
+	return ((didr >> 28) & 0xf) + 1;
+#endif
+
+	return 1;
+}
+
+int hw_breakpoint_slots(int type)
+{
+	/*
+	 * We can be called early, so don't rely on
+	 * our static variables being initialised.
+	 */
+	switch (type) {
+	case TYPE_INST:
+		return get_num_brps();
+	case TYPE_DATA:
+		return get_num_wrps();
+	default:
+		pr_warning("unknown slot type: %d\n", type);
+		return 0;
+	}
+}
+
+/* Determine debug architecture. */
+static u8 get_debug_arch(void)
+{
+	u32 didr;
+
+	/* Do we implement the extended CPUID interface? */
+	if (((read_cpuid_id() >> 16) & 0xf) != 0xf) {
+		pr_warning("CPUID feature registers not supported. "
+				"Assuming v6 debug is present.\n");
+		return ARM_DEBUG_ARCH_V6;
+	}
+
+	ARM_DBG_READ(c0, 0, didr);
+	return (didr >> 16) & 0xf;
+}
+
+/* Does this core support mismatch breakpoints? */
+static int core_has_mismatch_bps(void)
+{
+	return debug_arch >= ARM_DEBUG_ARCH_V7_ECP14 && core_num_brps > 1;
+}
+
+u8 arch_get_debug_arch(void)
+{
+	return debug_arch;
+}
+
+#define READ_WB_REG_CASE(OP2, M, VAL)		\
+	case ((OP2 << 4) + M):			\
+		ARM_DBG_READ(c ## M, OP2, VAL); \
+		break
+
+#define WRITE_WB_REG_CASE(OP2, M, VAL)		\
+	case ((OP2 << 4) + M):			\
+		ARM_DBG_WRITE(c ## M, OP2, VAL);\
+		break
+
+#define GEN_READ_WB_REG_CASES(OP2, VAL)		\
+	READ_WB_REG_CASE(OP2, 0, VAL);		\
+	READ_WB_REG_CASE(OP2, 1, VAL);		\
+	READ_WB_REG_CASE(OP2, 2, VAL);		\
+	READ_WB_REG_CASE(OP2, 3, VAL);		\
+	READ_WB_REG_CASE(OP2, 4, VAL);		\
+	READ_WB_REG_CASE(OP2, 5, VAL);		\
+	READ_WB_REG_CASE(OP2, 6, VAL);		\
+	READ_WB_REG_CASE(OP2, 7, VAL);		\
+	READ_WB_REG_CASE(OP2, 8, VAL);		\
+	READ_WB_REG_CASE(OP2, 9, VAL);		\
+	READ_WB_REG_CASE(OP2, 10, VAL);		\
+	READ_WB_REG_CASE(OP2, 11, VAL);		\
+	READ_WB_REG_CASE(OP2, 12, VAL);		\
+	READ_WB_REG_CASE(OP2, 13, VAL);		\
+	READ_WB_REG_CASE(OP2, 14, VAL);		\
+	READ_WB_REG_CASE(OP2, 15, VAL)
+
+#define GEN_WRITE_WB_REG_CASES(OP2, VAL)	\
+	WRITE_WB_REG_CASE(OP2, 0, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 1, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 2, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 3, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 4, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 5, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 6, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 7, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 8, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 9, VAL);		\
+	WRITE_WB_REG_CASE(OP2, 10, VAL);	\
+	WRITE_WB_REG_CASE(OP2, 11, VAL);	\
+	WRITE_WB_REG_CASE(OP2, 12, VAL);	\
+	WRITE_WB_REG_CASE(OP2, 13, VAL);	\
+	WRITE_WB_REG_CASE(OP2, 14, VAL);	\
+	WRITE_WB_REG_CASE(OP2, 15, VAL)
+
+static u32 read_wb_reg(int n)
+{
+	u32 val = 0;
+
+	switch (n) {
+	GEN_READ_WB_REG_CASES(ARM_OP2_BVR, val);
+	GEN_READ_WB_REG_CASES(ARM_OP2_BCR, val);
+	GEN_READ_WB_REG_CASES(ARM_OP2_WVR, val);
+	GEN_READ_WB_REG_CASES(ARM_OP2_WCR, val);
+	default:
+		pr_warning("attempt to read from unknown breakpoint "
+				"register %d\n", n);
+	}
+
+	return val;
+}
+
+static void write_wb_reg(int n, u32 val)
+{
+	switch (n) {
+	GEN_WRITE_WB_REG_CASES(ARM_OP2_BVR, val);
+	GEN_WRITE_WB_REG_CASES(ARM_OP2_BCR, val);
+	GEN_WRITE_WB_REG_CASES(ARM_OP2_WVR, val);
+	GEN_WRITE_WB_REG_CASES(ARM_OP2_WCR, val);
+	default:
+		pr_warning("attempt to write to unknown breakpoint "
+				"register %d\n", n);
+	}
+	isb();
+}
+
+/*
+ * In order to access the breakpoint/watchpoint control registers,
+ * we must be running in debug monitor mode. Unfortunately, we can
+ * be put into halting debug mode at any time by an external debugger
+ * but there is nothing we can do to prevent that.
+ */
+static int enable_monitor_mode(void)
+{
+	u32 dscr;
+	int ret = 0;
+
+	ARM_DBG_READ(c1, 0, dscr);
+
+	/* Ensure that halting mode is disabled. */
+	if (WARN_ONCE(dscr & ARM_DSCR_HDBGEN, "halting debug mode enabled."
+				"Unable to access hardware resources.")) {
+		ret = -EPERM;
+		goto out;
+	}
+
+	/* Write to the corresponding DSCR. */
+	switch (debug_arch) {
+	case ARM_DEBUG_ARCH_V6:
+	case ARM_DEBUG_ARCH_V6_1:
+		ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN));
+		break;
+	case ARM_DEBUG_ARCH_V7_ECP14:
+		ARM_DBG_WRITE(c2, 2, (dscr | ARM_DSCR_MDBGEN));
+		break;
+	default:
+		ret = -ENODEV;
+		goto out;
+	}
+
+	/* Check that the write made it through. */
+	ARM_DBG_READ(c1, 0, dscr);
+	if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN),
+				"failed to enable monitor mode.")) {
+		ret = -EPERM;
+	}
+
+out:
+	return ret;
+}
+
+/*
+ * Check if 8-bit byte-address select is available.
+ * This clobbers WRP 0.
+ */
+static u8 get_max_wp_len(void)
+{
+	u32 ctrl_reg;
+	struct arch_hw_breakpoint_ctrl ctrl;
+	u8 size = 4;
+
+	if (debug_arch < ARM_DEBUG_ARCH_V7_ECP14)
+		goto out;
+
+	if (enable_monitor_mode())
+		goto out;
+
+	memset(&ctrl, 0, sizeof(ctrl));
+	ctrl.len = ARM_BREAKPOINT_LEN_8;
+	ctrl_reg = encode_ctrl_reg(ctrl);
+
+	write_wb_reg(ARM_BASE_WVR, 0);
+	write_wb_reg(ARM_BASE_WCR, ctrl_reg);
+	if ((read_wb_reg(ARM_BASE_WCR) & ctrl_reg) == ctrl_reg)
+		size = 8;
+
+out:
+	return size;
+}
+
+u8 arch_get_max_wp_len(void)
+{
+	return max_watchpoint_len;
+}
+
+/*
+ * Handler for reactivating a suspended watchpoint when the single
+ * step `mismatch' breakpoint is triggered.
+ */
+static void wp_single_step_handler(struct perf_event *bp, int unused,
+				   struct perf_sample_data *data,
+				   struct pt_regs *regs)
+{
+	perf_event_enable(counter_arch_bp(bp)->suspended_wp);
+	unregister_hw_breakpoint(bp);
+}
+
+static int bp_is_single_step(struct perf_event *bp)
+{
+	return bp->overflow_handler == wp_single_step_handler;
+}
+
+/*
+ * Install a perf counter breakpoint.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	struct perf_event **slot, **slots;
+	int i, max_slots, ctrl_base, val_base, ret = 0;
+
+	/* Ensure that we are in monitor mode and halting mode is disabled. */
+	ret = enable_monitor_mode();
+	if (ret)
+		goto out;
+
+	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
+		/* Breakpoint */
+		ctrl_base = ARM_BASE_BCR;
+		val_base = ARM_BASE_BVR;
+		slots = __get_cpu_var(bp_on_reg);
+		max_slots = core_num_brps - 1;
+
+		if (bp_is_single_step(bp)) {
+			info->ctrl.mismatch = 1;
+			i = max_slots;
+			slots[i] = bp;
+			goto setup;
+		}
+	} else {
+		/* Watchpoint */
+		ctrl_base = ARM_BASE_WCR;
+		val_base = ARM_BASE_WVR;
+		slots = __get_cpu_var(wp_on_reg);
+		max_slots = core_num_wrps;
+	}
+
+	for (i = 0; i < max_slots; ++i) {
+		slot = &slots[i];
+
+		if (!*slot) {
+			*slot = bp;
+			break;
+		}
+	}
+
+	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+setup:
+	/* Setup the address register. */
+	write_wb_reg(val_base + i, info->address);
+
+	/* Setup the control register. */
+	write_wb_reg(ctrl_base + i, encode_ctrl_reg(info->ctrl) | 0x1);
+
+out:
+	return ret;
+}
+
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	struct perf_event **slot, **slots;
+	int i, max_slots, base;
+
+	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
+		/* Breakpoint */
+		base = ARM_BASE_BCR;
+		slots = __get_cpu_var(bp_on_reg);
+		max_slots = core_num_brps - 1;
+
+		if (bp_is_single_step(bp)) {
+			i = max_slots;
+			slots[i] = NULL;
+			goto reset;
+		}
+	} else {
+		/* Watchpoint */
+		base = ARM_BASE_WCR;
+		slots = __get_cpu_var(wp_on_reg);
+		max_slots = core_num_wrps;
+	}
+
+	/* Remove the breakpoint. */
+	for (i = 0; i < max_slots; ++i) {
+		slot = &slots[i];
+
+		if (*slot == bp) {
+			*slot = NULL;
+			break;
+		}
+	}
+
+	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot"))
+		return;
+
+reset:
+	/* Reset the control register. */
+	write_wb_reg(base + i, 0);
+}
+
+static int get_hbp_len(u8 hbp_len)
+{
+	unsigned int len_in_bytes = 0;
+
+	switch (hbp_len) {
+	case ARM_BREAKPOINT_LEN_1:
+		len_in_bytes = 1;
+		break;
+	case ARM_BREAKPOINT_LEN_2:
+		len_in_bytes = 2;
+		break;
+	case ARM_BREAKPOINT_LEN_4:
+		len_in_bytes = 4;
+		break;
+	case ARM_BREAKPOINT_LEN_8:
+		len_in_bytes = 8;
+		break;
+	}
+
+	return len_in_bytes;
+}
+
+/*
+ * Check whether bp virtual address is in kernel space.
+ */
+int arch_check_bp_in_kernelspace(struct perf_event *bp)
+{
+	unsigned int len;
+	unsigned long va;
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	va = info->address;
+	len = get_hbp_len(info->ctrl.len);
+
+	return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
+}
+
+/*
+ * Extract generic type and length encodings from an arch_hw_breakpoint_ctrl.
+ * Hopefully this will disappear when ptrace can bypass the conversion
+ * to generic breakpoint descriptions.
+ */
+int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
+			   int *gen_len, int *gen_type)
+{
+	/* Type */
+	switch (ctrl.type) {
+	case ARM_BREAKPOINT_EXECUTE:
+		*gen_type = HW_BREAKPOINT_X;
+		break;
+	case ARM_BREAKPOINT_LOAD:
+		*gen_type = HW_BREAKPOINT_R;
+		break;
+	case ARM_BREAKPOINT_STORE:
+		*gen_type = HW_BREAKPOINT_W;
+		break;
+	case ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE:
+		*gen_type = HW_BREAKPOINT_RW;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Len */
+	switch (ctrl.len) {
+	case ARM_BREAKPOINT_LEN_1:
+		*gen_len = HW_BREAKPOINT_LEN_1;
+		break;
+	case ARM_BREAKPOINT_LEN_2:
+		*gen_len = HW_BREAKPOINT_LEN_2;
+		break;
+	case ARM_BREAKPOINT_LEN_4:
+		*gen_len = HW_BREAKPOINT_LEN_4;
+		break;
+	case ARM_BREAKPOINT_LEN_8:
+		*gen_len = HW_BREAKPOINT_LEN_8;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Construct an arch_hw_breakpoint from a perf_event.
+ */
+static int arch_build_bp_info(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	/* Type */
+	switch (bp->attr.bp_type) {
+	case HW_BREAKPOINT_X:
+		info->ctrl.type = ARM_BREAKPOINT_EXECUTE;
+		break;
+	case HW_BREAKPOINT_R:
+		info->ctrl.type = ARM_BREAKPOINT_LOAD;
+		break;
+	case HW_BREAKPOINT_W:
+		info->ctrl.type = ARM_BREAKPOINT_STORE;
+		break;
+	case HW_BREAKPOINT_RW:
+		info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Len */
+	switch (bp->attr.bp_len) {
+	case HW_BREAKPOINT_LEN_1:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_1;
+		break;
+	case HW_BREAKPOINT_LEN_2:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_2;
+		break;
+	case HW_BREAKPOINT_LEN_4:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_4;
+		break;
+	case HW_BREAKPOINT_LEN_8:
+		info->ctrl.len = ARM_BREAKPOINT_LEN_8;
+		if ((info->ctrl.type != ARM_BREAKPOINT_EXECUTE)
+			&& max_watchpoint_len >= 8)
+			break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Address */
+	info->address = bp->attr.bp_addr;
+
+	/* Privilege */
+	info->ctrl.privilege = ARM_BREAKPOINT_USER;
+	if (arch_check_bp_in_kernelspace(bp) && !bp_is_single_step(bp))
+		info->ctrl.privilege |= ARM_BREAKPOINT_PRIV;
+
+	/* Enabled? */
+	info->ctrl.enabled = !bp->attr.disabled;
+
+	/* Mismatch */
+	info->ctrl.mismatch = 0;
+
+	return 0;
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings.
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	int ret = 0;
+	u32 bytelen, max_len, offset, alignment_mask = 0x3;
+
+	/* Build the arch_hw_breakpoint. */
+	ret = arch_build_bp_info(bp);
+	if (ret)
+		goto out;
+
+	/* Check address alignment. */
+	if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
+		alignment_mask = 0x7;
+	if (info->address & alignment_mask) {
+		/*
+		 * Try to fix the alignment. This may result in a length
+		 * that is too large, so we must check for that.
+		 */
+		bytelen = get_hbp_len(info->ctrl.len);
+		max_len = info->ctrl.type == ARM_BREAKPOINT_EXECUTE ? 4 :
+				max_watchpoint_len;
+
+		if (max_len >= 8)
+			offset = info->address & 0x7;
+		else
+			offset = info->address & 0x3;
+
+		if (bytelen > (1 << ((max_len - (offset + 1)) >> 1))) {
+			ret = -EFBIG;
+			goto out;
+		}
+
+		info->ctrl.len <<= offset;
+		info->address &= ~offset;
+
+		pr_debug("breakpoint alignment fixup: length = 0x%x, "
+			"address = 0x%x\n", info->ctrl.len, info->address);
+	}
+
+	/*
+	 * Currently we rely on an overflow handler to take
+	 * care of single-stepping the breakpoint when it fires.
+	 * In the case of userspace breakpoints on a core with V7 debug,
+	 * we can use the mismatch feature as a poor-man's hardware single-step.
+	 */
+	if (WARN_ONCE(!bp->overflow_handler &&
+		(arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_bps()),
+			"overflow handler required but none found")) {
+		ret = -EINVAL;
+		goto out;
+	}
+out:
+	return ret;
+}
+
+static void update_mismatch_flag(int idx, int flag)
+{
+	struct perf_event *bp = __get_cpu_var(bp_on_reg[idx]);
+	struct arch_hw_breakpoint *info;
+
+	if (bp == NULL)
+		return;
+
+	info = counter_arch_bp(bp);
+
+	/* Update the mismatch field to enter/exit `single-step' mode */
+	if (!bp->overflow_handler && info->ctrl.mismatch != flag) {
+		info->ctrl.mismatch = flag;
+		write_wb_reg(ARM_BASE_BCR + idx, encode_ctrl_reg(info->ctrl) | 0x1);
+	}
+}
+
+static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
+{
+	int i;
+	struct perf_event *bp, **slots = __get_cpu_var(wp_on_reg);
+	struct arch_hw_breakpoint *info;
+	struct perf_event_attr attr;
+
+	/* Without a disassembler, we can only handle 1 watchpoint. */
+	BUG_ON(core_num_wrps > 1);
+
+	hw_breakpoint_init(&attr);
+	attr.bp_addr	= regs->ARM_pc & ~0x3;
+	attr.bp_len	= HW_BREAKPOINT_LEN_4;
+	attr.bp_type	= HW_BREAKPOINT_X;
+
+	for (i = 0; i < core_num_wrps; ++i) {
+		rcu_read_lock();
+
+		if (slots[i] == NULL) {
+			rcu_read_unlock();
+			continue;
+		}
+
+		/*
+		 * The DFAR is an unknown value. Since we only allow a
+		 * single watchpoint, we can set the trigger to the lowest
+		 * possible faulting address.
+		 */
+		info = counter_arch_bp(slots[i]);
+		info->trigger = slots[i]->attr.bp_addr;
+		pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
+		perf_bp_event(slots[i], regs);
+
+		/*
+		 * If no overflow handler is present, insert a temporary
+		 * mismatch breakpoint so we can single-step over the
+		 * watchpoint trigger.
+		 */
+		if (!slots[i]->overflow_handler) {
+			bp = register_user_hw_breakpoint(&attr,
+							 wp_single_step_handler,
+							 current);
+			counter_arch_bp(bp)->suspended_wp = slots[i];
+			perf_event_disable(slots[i]);
+		}
+
+		rcu_read_unlock();
+	}
+}
+
+static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs)
+{
+	int i;
+	int mismatch;
+	u32 ctrl_reg, val, addr;
+	struct perf_event *bp, **slots = __get_cpu_var(bp_on_reg);
+	struct arch_hw_breakpoint *info;
+	struct arch_hw_breakpoint_ctrl ctrl;
+
+	/* The exception entry code places the amended lr in the PC. */
+	addr = regs->ARM_pc;
+
+	for (i = 0; i < core_num_brps; ++i) {
+		rcu_read_lock();
+
+		bp = slots[i];
+
+		if (bp == NULL) {
+			rcu_read_unlock();
+			continue;
+		}
+
+		mismatch = 0;
+
+		/* Check if the breakpoint value matches. */
+		val = read_wb_reg(ARM_BASE_BVR + i);
+		if (val != (addr & ~0x3))
+			goto unlock;
+
+		/* Possible match, check the byte address select to confirm. */
+		ctrl_reg = read_wb_reg(ARM_BASE_BCR + i);
+		decode_ctrl_reg(ctrl_reg, &ctrl);
+		if ((1 << (addr & 0x3)) & ctrl.len) {
+			mismatch = 1;
+			info = counter_arch_bp(bp);
+			info->trigger = addr;
+		}
+
+unlock:
+		if ((mismatch && !info->ctrl.mismatch) || bp_is_single_step(bp)) {
+			pr_debug("breakpoint fired: address = 0x%x\n", addr);
+			perf_bp_event(bp, regs);
+		}
+
+		update_mismatch_flag(i, mismatch);
+		rcu_read_unlock();
+	}
+}
+
+/*
+ * Called from either the Data Abort Handler [watchpoint] or the
+ * Prefetch Abort Handler [breakpoint].
+ */
+static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
+				 struct pt_regs *regs)
+{
+	int ret = 1; /* Unhandled fault. */
+	u32 dscr;
+
+	/* We only handle watchpoints and hardware breakpoints. */
+	ARM_DBG_READ(c1, 0, dscr);
+
+	/* Perform perf callbacks. */
+	switch (ARM_DSCR_MOE(dscr)) {
+	case ARM_ENTRY_BREAKPOINT:
+		breakpoint_handler(addr, regs);
+		break;
+	case ARM_ENTRY_ASYNC_WATCHPOINT:
+		WARN_ON("Asynchronous watchpoint exception taken. "
+			"Debugging results may be unreliable");
+	case ARM_ENTRY_SYNC_WATCHPOINT:
+		watchpoint_handler(addr, regs);
+		break;
+	default:
+		goto out;
+	}
+
+	ret = 0;
+out:
+	return ret;
+}
+
+/*
+ * One-time initialisation.
+ */
+static void __init reset_ctrl_regs(void *unused)
+{
+	int i;
+
+	if (enable_monitor_mode())
+		return;
+
+	for (i = 0; i < core_num_brps; ++i) {
+		write_wb_reg(ARM_BASE_BCR + i, 0UL);
+		write_wb_reg(ARM_BASE_BVR + i, 0UL);
+	}
+
+	for (i = 0; i < core_num_wrps; ++i) {
+		write_wb_reg(ARM_BASE_WCR + i, 0UL);
+		write_wb_reg(ARM_BASE_WVR + i, 0UL);
+	}
+}
+
+static int __init arch_hw_breakpoint_init(void)
+{
+	int ret = 0;
+	u32 dscr;
+
+	debug_arch = get_debug_arch();
+
+	if (debug_arch > ARM_DEBUG_ARCH_V7_ECP14) {
+		pr_info("debug architecture 0x%x unsupported.\n", debug_arch);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	/* Determine how many BRPs/WRPs are available. */
+	core_num_brps = get_num_brps();
+	core_num_wrps = get_num_wrps();
+
+	pr_info("found %d breakpoint and %d watchpoint registers.\n",
+			core_num_brps, core_num_wrps);
+
+	if (core_has_mismatch_bps())
+		pr_info("1 breakpoint reserved for watchpoint single-step.\n");
+
+	ARM_DBG_READ(c1, 0, dscr);
+	if (dscr & ARM_DSCR_HDBGEN) {
+		pr_warning("halting debug mode enabled. Assuming maximum "
+				"watchpoint size of 4 bytes.");
+	} else {
+		/* Work out the maximum supported watchpoint length. */
+		max_watchpoint_len = get_max_wp_len();
+		pr_info("maximum watchpoint size is %u bytes.\n",
+				max_watchpoint_len);
+
+		/*
+		 * Reset the breakpoint resources. We assume that a halting
+		 * debugger will leave the world in a nice state for us.
+		 */
+		smp_call_function(reset_ctrl_regs, NULL, 1);
+		reset_ctrl_regs(NULL);
+	}
+
+	/* Register debug fault handler. */
+	hook_fault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT,
+			"watchpoint debug exception");
+	hook_ifault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT,
+			"breakpoint debug exception");
+
+out:
+	return ret;
+}
+arch_initcall(arch_hw_breakpoint_init);
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+}
+
+/*
+ * Dummy function to register with die_notifier.
+ */
+int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+					unsigned long val, void *data)
+{
+	return NOTIFY_DONE;
+}

+ 35 - 33
arch/arm/kernel/module.c

@@ -69,20 +69,31 @@ int module_frob_arch_sections(Elf_Ehdr *hdr,
 {
 #ifdef CONFIG_ARM_UNWIND
 	Elf_Shdr *s, *sechdrs_end = sechdrs + hdr->e_shnum;
+	struct arm_unwind_mapping *maps = mod->arch.map;
 
 	for (s = sechdrs; s < sechdrs_end; s++) {
-		if (strcmp(".ARM.exidx.init.text", secstrings + s->sh_name) == 0)
-			mod->arch.unw_sec_init = s;
-		else if (strcmp(".ARM.exidx.devinit.text", secstrings + s->sh_name) == 0)
-			mod->arch.unw_sec_devinit = s;
-		else if (strcmp(".ARM.exidx", secstrings + s->sh_name) == 0)
-			mod->arch.unw_sec_core = s;
-		else if (strcmp(".init.text", secstrings + s->sh_name) == 0)
-			mod->arch.sec_init_text = s;
-		else if (strcmp(".devinit.text", secstrings + s->sh_name) == 0)
-			mod->arch.sec_devinit_text = s;
-		else if (strcmp(".text", secstrings + s->sh_name) == 0)
-			mod->arch.sec_core_text = s;
+		char const *secname = secstrings + s->sh_name;
+
+		if (strcmp(".ARM.exidx.init.text", secname) == 0)
+			maps[ARM_SEC_INIT].unw_sec = s;
+		else if (strcmp(".ARM.exidx.devinit.text", secname) == 0)
+			maps[ARM_SEC_DEVINIT].unw_sec = s;
+		else if (strcmp(".ARM.exidx", secname) == 0)
+			maps[ARM_SEC_CORE].unw_sec = s;
+		else if (strcmp(".ARM.exidx.exit.text", secname) == 0)
+			maps[ARM_SEC_EXIT].unw_sec = s;
+		else if (strcmp(".ARM.exidx.devexit.text", secname) == 0)
+			maps[ARM_SEC_DEVEXIT].unw_sec = s;
+		else if (strcmp(".init.text", secname) == 0)
+			maps[ARM_SEC_INIT].sec_text = s;
+		else if (strcmp(".devinit.text", secname) == 0)
+			maps[ARM_SEC_DEVINIT].sec_text = s;
+		else if (strcmp(".text", secname) == 0)
+			maps[ARM_SEC_CORE].sec_text = s;
+		else if (strcmp(".exit.text", secname) == 0)
+			maps[ARM_SEC_EXIT].sec_text = s;
+		else if (strcmp(".devexit.text", secname) == 0)
+			maps[ARM_SEC_DEVEXIT].sec_text = s;
 	}
 #endif
 	return 0;
@@ -292,31 +303,22 @@ apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
 #ifdef CONFIG_ARM_UNWIND
 static void register_unwind_tables(struct module *mod)
 {
-	if (mod->arch.unw_sec_init && mod->arch.sec_init_text)
-		mod->arch.unwind_init =
-			unwind_table_add(mod->arch.unw_sec_init->sh_addr,
-					 mod->arch.unw_sec_init->sh_size,
-					 mod->arch.sec_init_text->sh_addr,
-					 mod->arch.sec_init_text->sh_size);
-	if (mod->arch.unw_sec_devinit && mod->arch.sec_devinit_text)
-		mod->arch.unwind_devinit =
-			unwind_table_add(mod->arch.unw_sec_devinit->sh_addr,
-					 mod->arch.unw_sec_devinit->sh_size,
-					 mod->arch.sec_devinit_text->sh_addr,
-					 mod->arch.sec_devinit_text->sh_size);
-	if (mod->arch.unw_sec_core && mod->arch.sec_core_text)
-		mod->arch.unwind_core =
-			unwind_table_add(mod->arch.unw_sec_core->sh_addr,
-					 mod->arch.unw_sec_core->sh_size,
-					 mod->arch.sec_core_text->sh_addr,
-					 mod->arch.sec_core_text->sh_size);
+	int i;
+	for (i = 0; i < ARM_SEC_MAX; ++i) {
+		struct arm_unwind_mapping *map = &mod->arch.map[i];
+		if (map->unw_sec && map->sec_text)
+			map->unwind = unwind_table_add(map->unw_sec->sh_addr,
+						       map->unw_sec->sh_size,
+						       map->sec_text->sh_addr,
+						       map->sec_text->sh_size);
+	}
 }
 
 static void unregister_unwind_tables(struct module *mod)
 {
-	unwind_table_del(mod->arch.unwind_init);
-	unwind_table_del(mod->arch.unwind_devinit);
-	unwind_table_del(mod->arch.unwind_core);
+	int i = ARM_SEC_MAX;
+	while (--i >= 0)
+		unwind_table_del(mod->arch.map[i].unwind);
 }
 #else
 static inline void register_unwind_tables(struct module *mod) { }

+ 6 - 6
arch/arm/kernel/perf_event.c

@@ -319,8 +319,8 @@ validate_event(struct cpu_hw_events *cpuc,
 {
 	struct hw_perf_event fake_event = event->hw;
 
-	if (event->pmu && event->pmu != &pmu)
-		return 0;
+	if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
+		return 1;
 
 	return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
 }
@@ -1041,8 +1041,8 @@ armv6pmu_handle_irq(int irq_num,
 	/*
 	 * Handle the pending perf events.
 	 *
-	 * Note: this call *must* be run with interrupts enabled. For
-	 * platforms that can have the PMU interrupts raised as a PMI, this
+	 * Note: this call *must* be run with interrupts disabled. For
+	 * platforms that can have the PMU interrupts raised as an NMI, this
 	 * will not work.
 	 */
 	perf_event_do_pending();
@@ -2017,8 +2017,8 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	/*
 	 * Handle the pending perf events.
 	 *
-	 * Note: this call *must* be run with interrupts enabled. For
-	 * platforms that can have the PMU interrupts raised as a PMI, this
+	 * Note: this call *must* be run with interrupts disabled. For
+	 * platforms that can have the PMU interrupts raised as an NMI, this
 	 * will not work.
 	 */
 	perf_event_do_pending();

+ 24 - 0
arch/arm/kernel/process.c

@@ -29,6 +29,7 @@
 #include <linux/utsname.h>
 #include <linux/uaccess.h>
 #include <linux/random.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/cacheflush.h>
 #include <asm/leds.h>
@@ -135,6 +136,25 @@ EXPORT_SYMBOL(pm_power_off);
 void (*arm_pm_restart)(char str, const char *cmd) = arm_machine_restart;
 EXPORT_SYMBOL_GPL(arm_pm_restart);
 
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
+void cpu_idle_wait(void)
+{
+	smp_mb();
+	/* kick all the CPUs so that they exit out of pm_idle */
+	smp_call_function(do_nothing, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
 /*
  * This is our default idle handler.  We need to disable
@@ -317,6 +337,8 @@ void flush_thread(void)
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = current;
 
+	flush_ptrace_hw_breakpoint(tsk);
+
 	memset(thread->used_cp, 0, sizeof(thread->used_cp));
 	memset(&tsk->thread.debug, 0, sizeof(struct debug_info));
 	memset(&thread->fpstate, 0, sizeof(union fp_state));
@@ -345,6 +367,8 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	thread->cpu_context.sp = (unsigned long)childregs;
 	thread->cpu_context.pc = (unsigned long)ret_from_fork;
 
+	clear_ptrace_hw_breakpoint(p);
+
 	if (clone_flags & CLONE_SETTLS)
 		thread->tp_value = regs->ARM_r3;
 

+ 239 - 0
arch/arm/kernel/ptrace.c

@@ -19,6 +19,8 @@
 #include <linux/init.h>
 #include <linux/signal.h>
 #include <linux/uaccess.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -847,6 +849,232 @@ static int ptrace_setvfpregs(struct task_struct *tsk, void __user *data)
 }
 #endif
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+/*
+ * Convert a virtual register number into an index for a thread_info
+ * breakpoint array. Breakpoints are identified using positive numbers
+ * whilst watchpoints are negative. The registers are laid out as pairs
+ * of (address, control), each pair mapping to a unique hw_breakpoint struct.
+ * Register 0 is reserved for describing resource information.
+ */
+static int ptrace_hbp_num_to_idx(long num)
+{
+	if (num < 0)
+		num = (ARM_MAX_BRP << 1) - num;
+	return (num - 1) >> 1;
+}
+
+/*
+ * Returns the virtual register number for the address of the
+ * breakpoint at index idx.
+ */
+static long ptrace_hbp_idx_to_num(int idx)
+{
+	long mid = ARM_MAX_BRP << 1;
+	long num = (idx << 1) + 1;
+	return num > mid ? mid - num : num;
+}
+
+/*
+ * Handle hitting a HW-breakpoint.
+ */
+static void ptrace_hbptriggered(struct perf_event *bp, int unused,
+				     struct perf_sample_data *data,
+				     struct pt_regs *regs)
+{
+	struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp);
+	long num;
+	int i;
+	siginfo_t info;
+
+	for (i = 0; i < ARM_MAX_HBP_SLOTS; ++i)
+		if (current->thread.debug.hbp[i] == bp)
+			break;
+
+	num = (i == ARM_MAX_HBP_SLOTS) ? 0 : ptrace_hbp_idx_to_num(i);
+
+	info.si_signo	= SIGTRAP;
+	info.si_errno	= (int)num;
+	info.si_code	= TRAP_HWBKPT;
+	info.si_addr	= (void __user *)(bkpt->trigger);
+
+	force_sig_info(SIGTRAP, &info, current);
+}
+
+/*
+ * Set ptrace breakpoint pointers to zero for this task.
+ * This is required in order to prevent child processes from unregistering
+ * breakpoints held by their parent.
+ */
+void clear_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+	memset(tsk->thread.debug.hbp, 0, sizeof(tsk->thread.debug.hbp));
+}
+
+/*
+ * Unregister breakpoints from this task and reset the pointers in
+ * the thread_struct.
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+	int i;
+	struct thread_struct *t = &tsk->thread;
+
+	for (i = 0; i < ARM_MAX_HBP_SLOTS; i++) {
+		if (t->debug.hbp[i]) {
+			unregister_hw_breakpoint(t->debug.hbp[i]);
+			t->debug.hbp[i] = NULL;
+		}
+	}
+}
+
+static u32 ptrace_get_hbp_resource_info(void)
+{
+	u8 num_brps, num_wrps, debug_arch, wp_len;
+	u32 reg = 0;
+
+	num_brps	= hw_breakpoint_slots(TYPE_INST);
+	num_wrps	= hw_breakpoint_slots(TYPE_DATA);
+	debug_arch	= arch_get_debug_arch();
+	wp_len		= arch_get_max_wp_len();
+
+	reg		|= debug_arch;
+	reg		<<= 8;
+	reg		|= wp_len;
+	reg		<<= 8;
+	reg		|= num_wrps;
+	reg		<<= 8;
+	reg		|= num_brps;
+
+	return reg;
+}
+
+static struct perf_event *ptrace_hbp_create(struct task_struct *tsk, int type)
+{
+	struct perf_event_attr attr;
+
+	ptrace_breakpoint_init(&attr);
+
+	/* Initialise fields to sane defaults. */
+	attr.bp_addr	= 0;
+	attr.bp_len	= HW_BREAKPOINT_LEN_4;
+	attr.bp_type	= type;
+	attr.disabled	= 1;
+
+	return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, tsk);
+}
+
+static int ptrace_gethbpregs(struct task_struct *tsk, long num,
+			     unsigned long  __user *data)
+{
+	u32 reg;
+	int idx, ret = 0;
+	struct perf_event *bp;
+	struct arch_hw_breakpoint_ctrl arch_ctrl;
+
+	if (num == 0) {
+		reg = ptrace_get_hbp_resource_info();
+	} else {
+		idx = ptrace_hbp_num_to_idx(num);
+		if (idx < 0 || idx >= ARM_MAX_HBP_SLOTS) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		bp = tsk->thread.debug.hbp[idx];
+		if (!bp) {
+			reg = 0;
+			goto put;
+		}
+
+		arch_ctrl = counter_arch_bp(bp)->ctrl;
+
+		/*
+		 * Fix up the len because we may have adjusted it
+		 * to compensate for an unaligned address.
+		 */
+		while (!(arch_ctrl.len & 0x1))
+			arch_ctrl.len >>= 1;
+
+		if (idx & 0x1)
+			reg = encode_ctrl_reg(arch_ctrl);
+		else
+			reg = bp->attr.bp_addr;
+	}
+
+put:
+	if (put_user(reg, data))
+		ret = -EFAULT;
+
+out:
+	return ret;
+}
+
+static int ptrace_sethbpregs(struct task_struct *tsk, long num,
+			     unsigned long __user *data)
+{
+	int idx, gen_len, gen_type, implied_type, ret = 0;
+	u32 user_val;
+	struct perf_event *bp;
+	struct arch_hw_breakpoint_ctrl ctrl;
+	struct perf_event_attr attr;
+
+	if (num == 0)
+		goto out;
+	else if (num < 0)
+		implied_type = HW_BREAKPOINT_RW;
+	else
+		implied_type = HW_BREAKPOINT_X;
+
+	idx = ptrace_hbp_num_to_idx(num);
+	if (idx < 0 || idx >= ARM_MAX_HBP_SLOTS) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (get_user(user_val, data)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	bp = tsk->thread.debug.hbp[idx];
+	if (!bp) {
+		bp = ptrace_hbp_create(tsk, implied_type);
+		if (IS_ERR(bp)) {
+			ret = PTR_ERR(bp);
+			goto out;
+		}
+		tsk->thread.debug.hbp[idx] = bp;
+	}
+
+	attr = bp->attr;
+
+	if (num & 0x1) {
+		/* Address */
+		attr.bp_addr	= user_val;
+	} else {
+		/* Control */
+		decode_ctrl_reg(user_val, &ctrl);
+		ret = arch_bp_generic_fields(ctrl, &gen_len, &gen_type);
+		if (ret)
+			goto out;
+
+		if ((gen_type & implied_type) != gen_type) {
+				ret = -EINVAL;
+				goto out;
+		}
+
+		attr.bp_len	= gen_len;
+		attr.bp_type	= gen_type;
+		attr.disabled	= !ctrl.enabled;
+	}
+
+	ret = modify_user_hw_breakpoint(bp, &attr);
+out:
+	return ret;
+}
+#endif
+
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
 	int ret;
@@ -916,6 +1144,17 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			break;
 #endif
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+		case PTRACE_GETHBPREGS:
+			ret = ptrace_gethbpregs(child, addr,
+						(unsigned long __user *)data);
+			break;
+		case PTRACE_SETHBPREGS:
+			ret = ptrace_sethbpregs(child, addr,
+						(unsigned long __user *)data);
+			break;
+#endif
+
 		default:
 			ret = ptrace_request(child, request, addr, data);
 			break;

+ 41 - 5
arch/arm/kernel/setup.c

@@ -36,6 +36,7 @@
 #include <asm/procinfo.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
+#include <asm/smp_plat.h>
 #include <asm/mach-types.h>
 #include <asm/cacheflush.h>
 #include <asm/cachetype.h>
@@ -238,6 +239,35 @@ int cpu_architecture(void)
 	return cpu_arch;
 }
 
+static int cpu_has_aliasing_icache(unsigned int arch)
+{
+	int aliasing_icache;
+	unsigned int id_reg, num_sets, line_size;
+
+	/* arch specifies the register format */
+	switch (arch) {
+	case CPU_ARCH_ARMv7:
+		asm("mcr	p15, 2, %0, c0, c0, 0 @ set CSSELR"
+		    : /* No output operands */
+		    : "r" (1));
+		isb();
+		asm("mrc	p15, 1, %0, c0, c0, 0 @ read CCSIDR"
+		    : "=r" (id_reg));
+		line_size = 4 << ((id_reg & 0x7) + 2);
+		num_sets = ((id_reg >> 13) & 0x7fff) + 1;
+		aliasing_icache = (line_size * num_sets) > PAGE_SIZE;
+		break;
+	case CPU_ARCH_ARMv6:
+		aliasing_icache = read_cpuid_cachetype() & (1 << 11);
+		break;
+	default:
+		/* I-cache aliases will be handled by D-cache aliasing code */
+		aliasing_icache = 0;
+	}
+
+	return aliasing_icache;
+}
+
 static void __init cacheid_init(void)
 {
 	unsigned int cachetype = read_cpuid_cachetype();
@@ -249,10 +279,15 @@ static void __init cacheid_init(void)
 			cacheid = CACHEID_VIPT_NONALIASING;
 			if ((cachetype & (3 << 14)) == 1 << 14)
 				cacheid |= CACHEID_ASID_TAGGED;
-		} else if (cachetype & (1 << 23))
+			else if (cpu_has_aliasing_icache(CPU_ARCH_ARMv7))
+				cacheid |= CACHEID_VIPT_I_ALIASING;
+		} else if (cachetype & (1 << 23)) {
 			cacheid = CACHEID_VIPT_ALIASING;
-		else
+		} else {
 			cacheid = CACHEID_VIPT_NONALIASING;
+			if (cpu_has_aliasing_icache(CPU_ARCH_ARMv6))
+				cacheid |= CACHEID_VIPT_I_ALIASING;
+		}
 	} else {
 		cacheid = CACHEID_VIVT;
 	}
@@ -263,7 +298,7 @@ static void __init cacheid_init(void)
 		cache_is_vipt_nonaliasing() ? "VIPT nonaliasing" : "unknown",
 		cache_is_vivt() ? "VIVT" :
 		icache_is_vivt_asid_tagged() ? "VIVT ASID tagged" :
-		cache_is_vipt_aliasing() ? "VIPT aliasing" :
+		icache_is_vipt_aliasing() ? "VIPT aliasing" :
 		cache_is_vipt_nonaliasing() ? "VIPT nonaliasing" : "unknown");
 }
 
@@ -490,7 +525,7 @@ request_standard_resources(struct meminfo *mi, struct machine_desc *mdesc)
 
 	kernel_code.start   = virt_to_phys(_text);
 	kernel_code.end     = virt_to_phys(_etext - 1);
-	kernel_data.start   = virt_to_phys(_data);
+	kernel_data.start   = virt_to_phys(_sdata);
 	kernel_data.end     = virt_to_phys(_end - 1);
 
 	for (i = 0; i < mi->nr_banks; i++) {
@@ -825,7 +860,8 @@ void __init setup_arch(char **cmdline_p)
 	request_standard_resources(&meminfo, mdesc);
 
 #ifdef CONFIG_SMP
-	smp_init_cpus();
+	if (is_smp())
+		smp_init_cpus();
 #endif
 	reserve_crashkernel();
 

+ 2 - 1
arch/arm/kernel/smp.c

@@ -567,7 +567,8 @@ void smp_send_stop(void)
 {
 	cpumask_t mask = cpu_online_map;
 	cpu_clear(smp_processor_id(), mask);
-	send_ipi_message(&mask, IPI_CPU_STOP);
+	if (!cpus_empty(mask))
+		send_ipi_message(&mask, IPI_CPU_STOP);
 }
 
 /*

+ 2 - 0
arch/arm/kernel/unwind.c

@@ -146,6 +146,8 @@ static struct unwind_idx *unwind_find_idx(unsigned long addr)
 			    addr < table->end_addr) {
 				idx = search_index(addr, table->start,
 						   table->stop - 1);
+				/* Move-to-front to exploit common traces */
+				list_move(&table->list, &unwind_tables);
 				break;
 			}
 		}

+ 13 - 2
arch/arm/kernel/vmlinux.lds.S

@@ -40,6 +40,11 @@ SECTIONS
 		__tagtable_begin = .;
 			*(.taglist.init)
 		__tagtable_end = .;
+#ifdef CONFIG_SMP_ON_UP
+		__smpalt_begin = .;
+			*(.alt.smp.init)
+		__smpalt_end = .;
+#endif
 
 		INIT_SETUP(16)
 
@@ -104,8 +109,6 @@ SECTIONS
 
 	RO_DATA(PAGE_SIZE)
 
-	_etext = .;			/* End of text and rodata section */
-
 #ifdef CONFIG_ARM_UNWIND
 	/*
 	 * Stack unwinding tables
@@ -123,6 +126,8 @@ SECTIONS
 	}
 #endif
 
+	_etext = .;			/* End of text and rodata section */
+
 #ifdef CONFIG_XIP_KERNEL
 	__data_loc = ALIGN(4);		/* location in binary */
 	. = PAGE_OFFSET + TEXT_OFFSET;
@@ -237,6 +242,12 @@ SECTIONS
 
 	/* Default discards */
 	DISCARDS
+
+#ifndef CONFIG_SMP_ON_UP
+	/DISCARD/ : {
+		*(.alt.smp.init)
+	}
+#endif
 }
 
 /*

+ 12 - 3
arch/arm/mach-at91/at91sam9g45.c

@@ -121,8 +121,8 @@ static struct clk ssc1_clk = {
 	.pmc_mask	= 1 << AT91SAM9G45_ID_SSC1,
 	.type		= CLK_TYPE_PERIPHERAL,
 };
-static struct clk tcb_clk = {
-	.name		= "tcb_clk",
+static struct clk tcb0_clk = {
+	.name		= "tcb0_clk",
 	.pmc_mask	= 1 << AT91SAM9G45_ID_TCB,
 	.type		= CLK_TYPE_PERIPHERAL,
 };
@@ -192,6 +192,14 @@ static struct clk ohci_clk = {
 	.parent		= &uhphs_clk,
 };
 
+/* One additional fake clock for second TC block */
+static struct clk tcb1_clk = {
+	.name		= "tcb1_clk",
+	.pmc_mask	= 0,
+	.type		= CLK_TYPE_PERIPHERAL,
+	.parent		= &tcb0_clk,
+};
+
 static struct clk *periph_clocks[] __initdata = {
 	&pioA_clk,
 	&pioB_clk,
@@ -208,7 +216,7 @@ static struct clk *periph_clocks[] __initdata = {
 	&spi1_clk,
 	&ssc0_clk,
 	&ssc1_clk,
-	&tcb_clk,
+	&tcb0_clk,
 	&pwm_clk,
 	&tsc_clk,
 	&dma_clk,
@@ -221,6 +229,7 @@ static struct clk *periph_clocks[] __initdata = {
 	&mmc1_clk,
 	// irq0
 	&ohci_clk,
+	&tcb1_clk,
 };
 
 /*

+ 5 - 5
arch/arm/mach-at91/at91sam9g45_devices.c

@@ -46,7 +46,7 @@ static struct resource hdmac_resources[] = {
 		.end	= AT91_BASE_SYS + AT91_DMA + SZ_512 - 1,
 		.flags	= IORESOURCE_MEM,
 	},
-	[2] = {
+	[1] = {
 		.start	= AT91SAM9G45_ID_DMA,
 		.end	= AT91SAM9G45_ID_DMA,
 		.flags	= IORESOURCE_IRQ,
@@ -426,7 +426,7 @@ static struct i2c_gpio_platform_data pdata_i2c0 = {
 	.sda_is_open_drain	= 1,
 	.scl_pin		= AT91_PIN_PA21,
 	.scl_is_open_drain	= 1,
-	.udelay			= 2,		/* ~100 kHz */
+	.udelay			= 5,		/* ~100 kHz */
 };
 
 static struct platform_device at91sam9g45_twi0_device = {
@@ -440,7 +440,7 @@ static struct i2c_gpio_platform_data pdata_i2c1 = {
 	.sda_is_open_drain	= 1,
 	.scl_pin		= AT91_PIN_PB11,
 	.scl_is_open_drain	= 1,
-	.udelay			= 2,		/* ~100 kHz */
+	.udelay			= 5,		/* ~100 kHz */
 };
 
 static struct platform_device at91sam9g45_twi1_device = {
@@ -835,9 +835,9 @@ static struct platform_device at91sam9g45_tcb1_device = {
 static void __init at91_add_device_tc(void)
 {
 	/* this chip has one clock and irq for all six TC channels */
-	at91_clock_associate("tcb_clk", &at91sam9g45_tcb0_device.dev, "t0_clk");
+	at91_clock_associate("tcb0_clk", &at91sam9g45_tcb0_device.dev, "t0_clk");
 	platform_device_register(&at91sam9g45_tcb0_device);
-	at91_clock_associate("tcb_clk", &at91sam9g45_tcb1_device.dev, "t0_clk");
+	at91_clock_associate("tcb1_clk", &at91sam9g45_tcb1_device.dev, "t0_clk");
 	platform_device_register(&at91sam9g45_tcb1_device);
 }
 #else

+ 19 - 12
arch/arm/mach-at91/board-sam9261ek.c

@@ -93,11 +93,12 @@ static struct resource dm9000_resource[] = {
 		.start	= AT91_PIN_PC11,
 		.end	= AT91_PIN_PC11,
 		.flags	= IORESOURCE_IRQ
+			| IORESOURCE_IRQ_LOWEDGE | IORESOURCE_IRQ_HIGHEDGE,
 	}
 };
 
 static struct dm9000_plat_data dm9000_platdata = {
-	.flags		= DM9000_PLATF_16BITONLY,
+	.flags		= DM9000_PLATF_16BITONLY | DM9000_PLATF_NO_EEPROM,
 };
 
 static struct platform_device dm9000_device = {
@@ -167,17 +168,6 @@ static struct at91_udc_data __initdata ek_udc_data = {
 };
 
 
-/*
- * MCI (SD/MMC)
- */
-static struct at91_mmc_data __initdata ek_mmc_data = {
-	.wire4		= 1,
-//	.det_pin	= ... not connected
-//	.wp_pin		= ... not connected
-//	.vcc_pin	= ... not connected
-};
-
-
 /*
  * NAND flash
  */
@@ -246,6 +236,10 @@ static void __init ek_add_device_nand(void)
 	at91_add_device_nand(&ek_nand_data);
 }
 
+/*
+ * SPI related devices
+ */
+#if defined(CONFIG_SPI_ATMEL) || defined(CONFIG_SPI_ATMEL_MODULE)
 
 /*
  * ADS7846 Touchscreen
@@ -356,6 +350,19 @@ static struct spi_board_info ek_spi_devices[] = {
 #endif
 };
 
+#else /* CONFIG_SPI_ATMEL_* */
+/* spi0 and mmc/sd share the same PIO pins: cannot be used at the same time */
+
+/*
+ * MCI (SD/MMC)
+ * det_pin, wp_pin and vcc_pin are not connected
+ */
+static struct at91_mmc_data __initdata ek_mmc_data = {
+	.wire4		= 1,
+};
+
+#endif /* CONFIG_SPI_ATMEL_* */
+
 
 /*
  * LCD Controller

+ 2 - 1
arch/arm/mach-at91/clock.c

@@ -501,7 +501,8 @@ postcore_initcall(at91_clk_debugfs_init);
 int __init clk_register(struct clk *clk)
 {
 	if (clk_is_peripheral(clk)) {
-		clk->parent = &mck;
+		if (!clk->parent)
+			clk->parent = &mck;
 		clk->mode = pmc_periph_mode;
 		list_add_tail(&clk->node, &clocks);
 	}

+ 1 - 2
arch/arm/mach-davinci/dm355.c

@@ -769,8 +769,7 @@ static struct map_desc dm355_io_desc[] = {
 		.virtual	= SRAM_VIRT,
 		.pfn		= __phys_to_pfn(0x00010000),
 		.length		= SZ_32K,
-		/* MT_MEMORY_NONCACHED requires supersection alignment */
-		.type		= MT_DEVICE,
+		.type		= MT_MEMORY_NONCACHED,
 	},
 };
 

+ 1 - 2
arch/arm/mach-davinci/dm365.c

@@ -969,8 +969,7 @@ static struct map_desc dm365_io_desc[] = {
 		.virtual	= SRAM_VIRT,
 		.pfn		= __phys_to_pfn(0x00010000),
 		.length		= SZ_32K,
-		/* MT_MEMORY_NONCACHED requires supersection alignment */
-		.type		= MT_DEVICE,
+		.type		= MT_MEMORY_NONCACHED,
 	},
 };
 

+ 1 - 2
arch/arm/mach-davinci/dm644x.c

@@ -653,8 +653,7 @@ static struct map_desc dm644x_io_desc[] = {
 		.virtual	= SRAM_VIRT,
 		.pfn		= __phys_to_pfn(0x00008000),
 		.length		= SZ_16K,
-		/* MT_MEMORY_NONCACHED requires supersection alignment */
-		.type		= MT_DEVICE,
+		.type		= MT_MEMORY_NONCACHED,
 	},
 };
 

+ 1 - 2
arch/arm/mach-davinci/dm646x.c

@@ -737,8 +737,7 @@ static struct map_desc dm646x_io_desc[] = {
 		.virtual	= SRAM_VIRT,
 		.pfn		= __phys_to_pfn(0x00010000),
 		.length		= SZ_32K,
-		/* MT_MEMORY_NONCACHED requires supersection alignment */
-		.type		= MT_DEVICE,
+		.type		= MT_MEMORY_NONCACHED,
 	},
 };
 

+ 3 - 3
arch/arm/mach-dove/include/mach/io.h

@@ -13,8 +13,8 @@
 
 #define IO_SPACE_LIMIT		0xffffffff
 
-#define __io(a)  ((void __iomem *)(((a) - DOVE_PCIE0_IO_PHYS_BASE) +\
-				   DOVE_PCIE0_IO_VIRT_BASE))
-#define __mem_pci(a)		(a)
+#define __io(a)  	((void __iomem *)(((a) - DOVE_PCIE0_IO_BUS_BASE) + \
+						 DOVE_PCIE0_IO_VIRT_BASE))
+#define __mem_pci(a)	(a)
 
 #endif

+ 1 - 1
arch/arm/mach-ep93xx/clock.c

@@ -560,4 +560,4 @@ static int __init ep93xx_clock_init(void)
 	clkdev_add_table(clocks, ARRAY_SIZE(clocks));
 	return 0;
 }
-arch_initcall(ep93xx_clock_init);
+postcore_initcall(ep93xx_clock_init);

+ 8 - 0
arch/arm/mach-ixp4xx/common-pci.c

@@ -503,6 +503,14 @@ struct pci_bus * __devinit ixp4xx_scan_bus(int nr, struct pci_sys_data *sys)
 	return pci_scan_bus(sys->busnr, &ixp4xx_ops, sys);
 }
 
+int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+	if (mask >= SZ_64M - 1)
+		return 0;
+
+	return -EIO;
+}
+
 EXPORT_SYMBOL(ixp4xx_pci_read);
 EXPORT_SYMBOL(ixp4xx_pci_write);
 

+ 2 - 0
arch/arm/mach-ixp4xx/include/mach/hardware.h

@@ -26,6 +26,8 @@
 #define PCIBIOS_MAX_MEM		0x4BFFFFFF
 #endif
 
+#define ARCH_HAS_DMA_SET_COHERENT_MASK
+
 #define pcibios_assign_all_busses()	1
 
 /* Register locations and bits */

+ 1 - 1
arch/arm/mach-kirkwood/include/mach/kirkwood.h

@@ -38,7 +38,7 @@
 
 #define KIRKWOOD_PCIE1_IO_PHYS_BASE	0xf3000000
 #define KIRKWOOD_PCIE1_IO_VIRT_BASE	0xfef00000
-#define KIRKWOOD_PCIE1_IO_BUS_BASE	0x00000000
+#define KIRKWOOD_PCIE1_IO_BUS_BASE	0x00100000
 #define KIRKWOOD_PCIE1_IO_SIZE		SZ_1M
 
 #define KIRKWOOD_PCIE_IO_PHYS_BASE	0xf2000000

+ 2 - 2
arch/arm/mach-kirkwood/pcie.c

@@ -117,7 +117,7 @@ static void __init pcie0_ioresources_init(struct pcie_port *pp)
 	 * IORESOURCE_IO
 	 */
 	pp->res[0].name = "PCIe 0 I/O Space";
-	pp->res[0].start = KIRKWOOD_PCIE_IO_PHYS_BASE;
+	pp->res[0].start = KIRKWOOD_PCIE_IO_BUS_BASE;
 	pp->res[0].end = pp->res[0].start + KIRKWOOD_PCIE_IO_SIZE - 1;
 	pp->res[0].flags = IORESOURCE_IO;
 
@@ -139,7 +139,7 @@ static void __init pcie1_ioresources_init(struct pcie_port *pp)
 	 * IORESOURCE_IO
 	 */
 	pp->res[0].name = "PCIe 1 I/O Space";
-	pp->res[0].start = KIRKWOOD_PCIE1_IO_PHYS_BASE;
+	pp->res[0].start = KIRKWOOD_PCIE1_IO_BUS_BASE;
 	pp->res[0].end = pp->res[0].start + KIRKWOOD_PCIE1_IO_SIZE - 1;
 	pp->res[0].flags = IORESOURCE_IO;
 

+ 6 - 1
arch/arm/mach-mmp/include/mach/system.h

@@ -9,6 +9,8 @@
 #ifndef __ASM_MACH_SYSTEM_H
 #define __ASM_MACH_SYSTEM_H
 
+#include <mach/cputype.h>
+
 static inline void arch_idle(void)
 {
 	cpu_do_idle();
@@ -16,6 +18,9 @@ static inline void arch_idle(void)
 
 static inline void arch_reset(char mode, const char *cmd)
 {
-	cpu_reset(0);
+	if (cpu_is_pxa168())
+		cpu_reset(0xffff0000);
+	else
+		cpu_reset(0);
 }
 #endif /* __ASM_MACH_SYSTEM_H */

+ 1 - 1
arch/arm/mach-mx25/eukrea_mbimxsd-baseboard.c

@@ -215,7 +215,7 @@ struct imx_ssi_platform_data eukrea_mbimxsd_ssi_pdata = {
  * Add platform devices present on this baseboard and init
  * them from CPU side as far as required to use them later on
  */
-void __init eukrea_mbimxsd_baseboard_init(void)
+void __init eukrea_mbimxsd25_baseboard_init(void)
 {
 	if (mxc_iomux_v3_setup_multiple_pads(eukrea_mbimxsd_pads,
 			ARRAY_SIZE(eukrea_mbimxsd_pads)))

+ 2 - 2
arch/arm/mach-mx25/mach-cpuimx25.c

@@ -147,8 +147,8 @@ static void __init eukrea_cpuimx25_init(void)
 	if (!otg_mode_host)
 		mxc_register_device(&otg_udc_device, &otg_device_pdata);
 
-#ifdef CONFIG_MACH_EUKREA_MBIMXSD_BASEBOARD
-	eukrea_mbimxsd_baseboard_init();
+#ifdef CONFIG_MACH_EUKREA_MBIMXSD25_BASEBOARD
+	eukrea_mbimxsd25_baseboard_init();
 #endif
 }
 

+ 55 - 22
arch/arm/mach-mx3/clock-imx35.c

@@ -155,7 +155,7 @@ static unsigned long get_rate_arm(void)
 
 	aad = &clk_consumer[(pdr0 >> 16) & 0xf];
 	if (aad->sel)
-		fref = fref * 2 / 3;
+		fref = fref * 3 / 4;
 
 	return fref / aad->arm;
 }
@@ -164,7 +164,7 @@ static unsigned long get_rate_ahb(struct clk *clk)
 {
 	unsigned long pdr0 = __raw_readl(CCM_BASE + CCM_PDR0);
 	struct arm_ahb_div *aad;
-	unsigned long fref = get_rate_mpll();
+	unsigned long fref = get_rate_arm();
 
 	aad = &clk_consumer[(pdr0 >> 16) & 0xf];
 
@@ -176,16 +176,11 @@ static unsigned long get_rate_ipg(struct clk *clk)
 	return get_rate_ahb(NULL) >> 1;
 }
 
-static unsigned long get_3_3_div(unsigned long in)
-{
-	return (((in >> 3) & 0x7) + 1) * ((in & 0x7) + 1);
-}
-
 static unsigned long get_rate_uart(struct clk *clk)
 {
 	unsigned long pdr3 = __raw_readl(CCM_BASE + CCM_PDR3);
 	unsigned long pdr4 = __raw_readl(CCM_BASE + CCM_PDR4);
-	unsigned long div = get_3_3_div(pdr4 >> 10);
+	unsigned long div = ((pdr4 >> 10) & 0x3f) + 1;
 
 	if (pdr3 & (1 << 14))
 		return get_rate_arm() / div;
@@ -216,7 +211,7 @@ static unsigned long get_rate_sdhc(struct clk *clk)
 		break;
 	}
 
-	return rate / get_3_3_div(div);
+	return rate / (div + 1);
 }
 
 static unsigned long get_rate_mshc(struct clk *clk)
@@ -270,7 +265,7 @@ static unsigned long get_rate_csi(struct clk *clk)
 	else
 		rate = get_rate_ppll();
 
-	return rate / get_3_3_div((pdr2 >> 16) & 0x3f);
+	return rate / (((pdr2 >> 16) & 0x3f) + 1);
 }
 
 static unsigned long get_rate_otg(struct clk *clk)
@@ -283,25 +278,51 @@ static unsigned long get_rate_otg(struct clk *clk)
 	else
 		rate = get_rate_ppll();
 
-	return rate / get_3_3_div((pdr4 >> 22) & 0x3f);
+	return rate / (((pdr4 >> 22) & 0x3f) + 1);
 }
 
 static unsigned long get_rate_ipg_per(struct clk *clk)
 {
 	unsigned long pdr0 = __raw_readl(CCM_BASE + CCM_PDR0);
 	unsigned long pdr4 = __raw_readl(CCM_BASE + CCM_PDR4);
-	unsigned long div1, div2;
+	unsigned long div;
 
 	if (pdr0 & (1 << 26)) {
-		div1 = (pdr4 >> 19) & 0x7;
-		div2 = (pdr4 >> 16) & 0x7;
-		return get_rate_arm() / ((div1 + 1) * (div2 + 1));
+		div = (pdr4 >> 16) & 0x3f;
+		return get_rate_arm() / (div + 1);
 	} else {
-		div1 = (pdr0 >> 12) & 0x7;
-		return get_rate_ahb(NULL) / div1;
+		div = (pdr0 >> 12) & 0x7;
+		return get_rate_ahb(NULL) / (div + 1);
 	}
 }
 
+static unsigned long get_rate_hsp(struct clk *clk)
+{
+	unsigned long hsp_podf = (__raw_readl(CCM_BASE + CCM_PDR0) >> 20) & 0x03;
+	unsigned long fref = get_rate_mpll();
+
+	if (fref > 400 * 1000 * 1000) {
+		switch (hsp_podf) {
+		case 0:
+			return fref >> 2;
+		case 1:
+			return fref >> 3;
+		case 2:
+			return fref / 3;
+		}
+	} else {
+		switch (hsp_podf) {
+		case 0:
+		case 2:
+			return fref / 3;
+		case 1:
+			return fref / 6;
+		}
+	}
+
+	return 0;
+}
+
 static int clk_cgr_enable(struct clk *clk)
 {
 	u32 reg;
@@ -359,7 +380,7 @@ DEFINE_CLOCK(i2c1_clk,   0, CCM_CGR1, 10, get_rate_ipg_per, NULL);
 DEFINE_CLOCK(i2c2_clk,   1, CCM_CGR1, 12, get_rate_ipg_per, NULL);
 DEFINE_CLOCK(i2c3_clk,   2, CCM_CGR1, 14, get_rate_ipg_per, NULL);
 DEFINE_CLOCK(iomuxc_clk, 0, CCM_CGR1, 16, NULL, NULL);
-DEFINE_CLOCK(ipu_clk,    0, CCM_CGR1, 18, get_rate_ahb, NULL);
+DEFINE_CLOCK(ipu_clk,    0, CCM_CGR1, 18, get_rate_hsp, NULL);
 DEFINE_CLOCK(kpp_clk,    0, CCM_CGR1, 20, get_rate_ipg, NULL);
 DEFINE_CLOCK(mlb_clk,    0, CCM_CGR1, 22, get_rate_ahb, NULL);
 DEFINE_CLOCK(mshc_clk,   0, CCM_CGR1, 24, get_rate_mshc, NULL);
@@ -485,10 +506,10 @@ static struct clk_lookup lookups[] = {
 
 int __init mx35_clocks_init()
 {
-	unsigned int ll = 0;
+	unsigned int cgr2 = 3 << 26, cgr3 = 0;
 
 #if defined(CONFIG_DEBUG_LL) && !defined(CONFIG_DEBUG_ICEDCC)
-	ll = (3 << 16);
+	cgr2 |= 3 << 16;
 #endif
 
 	clkdev_add_table(lookups, ARRAY_SIZE(lookups));
@@ -499,8 +520,20 @@ int __init mx35_clocks_init()
 	__raw_writel((3 << 18), CCM_BASE + CCM_CGR0);
 	__raw_writel((3 << 2) | (3 << 4) | (3 << 6) | (3 << 8) | (3 << 16),
 			CCM_BASE + CCM_CGR1);
-	__raw_writel((3 << 26) | ll, CCM_BASE + CCM_CGR2);
-	__raw_writel(0, CCM_BASE + CCM_CGR3);
+
+	/*
+	 * Check if we came up in internal boot mode. If yes, we need some
+	 * extra clocks turned on, otherwise the MX35 boot ROM code will
+	 * hang after a watchdog reset.
+	 */
+	if (!(__raw_readl(CCM_BASE + CCM_RCSR) & (3 << 10))) {
+		/* Additionally turn on UART1, SCC, and IIM clocks */
+		cgr2 |= 3 << 16 | 3 << 4;
+		cgr3 |= 3 << 2;
+	}
+
+	__raw_writel(cgr2, CCM_BASE + CCM_CGR2);
+	__raw_writel(cgr3, CCM_BASE + CCM_CGR3);
 
 	mxc_timer_init(&gpt_clk,
 			MX35_IO_ADDRESS(MX35_GPT1_BASE_ADDR), MX35_INT_GPT);

Some files were not shown because too many files changed in this diff