Browse Source

Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next-2.6

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next-2.6: (30 commits)
  sparc64: Update defconfig.
  sparc: Wire up sys_rt_tgsigqueueinfo().
  openprom: Squelch useless GCC warning.
  sparc: replace uses of CPU_MASK_ALL_PTR
  sparc64: Add proper dynamic ftrace support.
  sparc: Simplify code using is_power_of_2() routine.
  sparc: move of_device common code to of_device_common
  sparc: remove dma-mapping_{32|64}.h
  sparc: use dma_map_page instead of dma_map_single
  sparc: add sync_single_for_device and sync_sg_for_device to struct dma_ops
  sparc: move the duplication in dma-mapping_{32|64}.h to dma-mapping.h
  p9100: use standard fields for framebuffer physical address and length
  leo: use standard fields for framebuffer physical address and length
  cg6: use standard fields for framebuffer physical address and length
  cg3: use standard fields for framebuffer physical address and length
  cg14: use standard fields for framebuffer physical address and length
  bw2: use standard fields for framebuffer physical address and length
  sparc64: fix and optimize irq distribution
  sparc64: Use new dynamic per-cpu allocator.
  sparc64: Only allocate per-cpu areas for possible cpus.
  ...
Linus Torvalds 16 years ago
parent
commit
98523d4630
45 changed files with 1751 additions and 1350 deletions
  1. 5 0
      arch/sparc/Kconfig
  2. 45 18
      arch/sparc/configs/sparc64_defconfig
  3. 1 196
      arch/sparc/include/asm/cpudata_64.h
  4. 163 5
      arch/sparc/include/asm/dma-mapping.h
  5. 0 60
      arch/sparc/include/asm/dma-mapping_32.h
  6. 0 154
      arch/sparc/include/asm/dma-mapping_64.h
  7. 11 0
      arch/sparc/include/asm/ftrace.h
  8. 2 1
      arch/sparc/include/asm/mdesc.h
  9. 2 6
      arch/sparc/include/asm/percpu_64.h
  10. 2 0
      arch/sparc/include/asm/prom.h
  11. 207 0
      arch/sparc/include/asm/trap_block.h
  12. 2 1
      arch/sparc/include/asm/unistd.h
  13. 2 0
      arch/sparc/kernel/Makefile
  14. 431 0
      arch/sparc/kernel/cpumap.c
  15. 16 0
      arch/sparc/kernel/cpumap.h
  16. 39 88
      arch/sparc/kernel/dma.c
  17. 2 1
      arch/sparc/kernel/ds.c
  18. 32 15
      arch/sparc/kernel/ftrace.c
  19. 0 22
      arch/sparc/kernel/head_64.S
  20. 8 7
      arch/sparc/kernel/iommu.c
  21. 4 25
      arch/sparc/kernel/irq_64.c
  22. 88 61
      arch/sparc/kernel/mdesc.c
  23. 3 192
      arch/sparc/kernel/of_device_32.c
  24. 2 186
      arch/sparc/kernel/of_device_64.c
  25. 174 0
      arch/sparc/kernel/of_device_common.c
  26. 36 0
      arch/sparc/kernel/of_device_common.h
  27. 8 7
      arch/sparc/kernel/pci_sun4v.c
  28. 0 1
      arch/sparc/kernel/prom.h
  29. 123 109
      arch/sparc/kernel/prom_64.c
  30. 0 2
      arch/sparc/kernel/prom_common.c
  31. 167 29
      arch/sparc/kernel/smp_64.c
  32. 3 1
      arch/sparc/kernel/systbls_32.S
  33. 4 2
      arch/sparc/kernel/systbls_64.S
  34. 92 78
      arch/sparc/kernel/traps_64.c
  35. 1 0
      arch/sparc/mm/init_32.c
  36. 4 12
      arch/sparc/mm/init_64.c
  37. 2 1
      arch/sparc/mm/srmmu.c
  38. 1 1
      drivers/sbus/char/openprom.c
  39. 8 12
      drivers/video/bw2.c
  40. 9 10
      drivers/video/cg14.c
  41. 8 12
      drivers/video/cg3.c
  42. 11 14
      drivers/video/cg6.c
  43. 5 9
      drivers/video/leo.c
  44. 8 12
      drivers/video/p9100.c
  45. 20 0
      scripts/recordmcount.pl

+ 5 - 0
arch/sparc/Kconfig

@@ -37,6 +37,8 @@ config SPARC64
 	select HAVE_KPROBES
 	select HAVE_LMB
 	select HAVE_SYSCALL_WRAPPERS
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_FTRACE_MCOUNT_RECORD
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select RTC_DRV_CMOS
 	select RTC_DRV_BQ4802
@@ -93,6 +95,9 @@ config AUDIT_ARCH
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y if SPARC64
 
+config HAVE_DYNAMIC_PER_CPU_AREA
+	def_bool y if SPARC64
+
 config GENERIC_HARDIRQS_NO__DO_IRQ
 	bool
 	def_bool y if SPARC64

+ 45 - 18
arch/sparc/configs/sparc64_defconfig

@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.30-rc2
-# Fri Apr 17 02:03:07 2009
+# Linux kernel version: 2.6.30
+# Tue Jun 16 04:59:36 2009
 #
 CONFIG_64BIT=y
 CONFIG_SPARC=y
@@ -19,6 +19,7 @@ CONFIG_LOCKDEP_SUPPORT=y
 CONFIG_HAVE_LATENCYTOP_SUPPORT=y
 CONFIG_AUDIT_ARCH=y
 CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
 CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_MMU=y
 CONFIG_ARCH_NO_VIRT_TO_BUS=y
@@ -82,7 +83,6 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
-# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
@@ -95,16 +95,21 @@ CONFIG_TIMERFD=y
 CONFIG_EVENTFD=y
 CONFIG_SHMEM=y
 CONFIG_AIO=y
+
+#
+# Performance Counters
+#
 CONFIG_VM_EVENT_COUNTERS=y
 CONFIG_PCI_QUIRKS=y
 CONFIG_SLUB_DEBUG=y
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_COMPAT_BRK is not set
 # CONFIG_SLAB is not set
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
 CONFIG_TRACEPOINTS=y
-# CONFIG_MARKERS is not set
+CONFIG_MARKERS=y
 CONFIG_OPROFILE=m
 CONFIG_HAVE_OPROFILE=y
 CONFIG_KPROBES=y
@@ -202,6 +207,7 @@ CONFIG_NR_QUICK=1
 CONFIG_UNEVICTABLE_LRU=y
 CONFIG_HAVE_MLOCK=y
 CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+CONFIG_DEFAULT_MMAP_MIN_ADDR=8192
 CONFIG_SCHED_SMT=y
 CONFIG_SCHED_MC=y
 # CONFIG_PREEMPT_NONE is not set
@@ -321,6 +327,7 @@ CONFIG_VLAN_8021Q=m
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
 # CONFIG_PHONET is not set
+# CONFIG_IEEE802154 is not set
 # CONFIG_NET_SCHED is not set
 # CONFIG_DCB is not set
 
@@ -340,7 +347,11 @@ CONFIG_WIRELESS=y
 CONFIG_WIRELESS_OLD_REGULATORY=y
 # CONFIG_WIRELESS_EXT is not set
 # CONFIG_LIB80211 is not set
-# CONFIG_MAC80211 is not set
+
+#
+# CFG80211 needs to be enabled for MAC80211
+#
+CONFIG_MAC80211_DEFAULT_PS_VALUE=0
 # CONFIG_WIMAX is not set
 # CONFIG_RFKILL is not set
 # CONFIG_NET_9P is not set
@@ -364,6 +375,7 @@ CONFIG_EXTRA_FIRMWARE=""
 CONFIG_CONNECTOR=m
 # CONFIG_MTD is not set
 CONFIG_OF_DEVICE=y
+CONFIG_OF_MDIO=m
 # CONFIG_PARPORT is not set
 CONFIG_BLK_DEV=y
 # CONFIG_BLK_DEV_FD is not set
@@ -399,6 +411,7 @@ CONFIG_MISC_DEVICES=y
 # CONFIG_EEPROM_AT24 is not set
 # CONFIG_EEPROM_LEGACY is not set
 # CONFIG_EEPROM_93CX6 is not set
+# CONFIG_CB710_CORE is not set
 CONFIG_HAVE_IDE=y
 CONFIG_IDE=y
 
@@ -477,10 +490,6 @@ CONFIG_BLK_DEV_SR=m
 CONFIG_BLK_DEV_SR_VENDOR=y
 CONFIG_CHR_DEV_SG=m
 # CONFIG_CHR_DEV_SCH is not set
-
-#
-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
-#
 CONFIG_SCSI_MULTI_LUN=y
 CONFIG_SCSI_CONSTANTS=y
 # CONFIG_SCSI_LOGGING is not set
@@ -499,6 +508,7 @@ CONFIG_SCSI_FC_ATTRS=y
 CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_SCSI_CXGB3_ISCSI is not set
+# CONFIG_SCSI_BNX2_ISCSI is not set
 # CONFIG_BLK_DEV_3W_XXXX_RAID is not set
 # CONFIG_SCSI_3W_9XXX is not set
 # CONFIG_SCSI_ACARD is not set
@@ -507,6 +517,7 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_AIC7XXX_OLD is not set
 # CONFIG_SCSI_AIC79XX is not set
 # CONFIG_SCSI_AIC94XX is not set
+# CONFIG_SCSI_MVSAS is not set
 # CONFIG_SCSI_ARCMSR is not set
 # CONFIG_MEGARAID_NEWGEN is not set
 # CONFIG_MEGARAID_LEGACY is not set
@@ -521,7 +532,6 @@ CONFIG_SCSI_LOWLEVEL=y
 # CONFIG_SCSI_IPS is not set
 # CONFIG_SCSI_INITIO is not set
 # CONFIG_SCSI_INIA100 is not set
-# CONFIG_SCSI_MVSAS is not set
 # CONFIG_SCSI_STEX is not set
 # CONFIG_SCSI_SYM53C8XX_2 is not set
 # CONFIG_SCSI_QLOGIC_1280 is not set
@@ -569,7 +579,6 @@ CONFIG_DM_ZERO=m
 # CONFIG_IEEE1394 is not set
 # CONFIG_I2O is not set
 CONFIG_NETDEVICES=y
-CONFIG_COMPAT_NET_DEV_OPS=y
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
 # CONFIG_MACVLAN is not set
@@ -635,6 +644,7 @@ CONFIG_NET_PCI=y
 # CONFIG_SMSC9420 is not set
 # CONFIG_SUNDANCE is not set
 # CONFIG_TLAN is not set
+# CONFIG_KS8842 is not set
 # CONFIG_VIA_RHINE is not set
 # CONFIG_SC92031 is not set
 # CONFIG_ATL2 is not set
@@ -1127,6 +1137,11 @@ CONFIG_SND_VERBOSE_PROCFS=y
 # CONFIG_SND_VERBOSE_PRINTK is not set
 # CONFIG_SND_DEBUG is not set
 CONFIG_SND_VMASTER=y
+CONFIG_SND_RAWMIDI_SEQ=m
+# CONFIG_SND_OPL3_LIB_SEQ is not set
+# CONFIG_SND_OPL4_LIB_SEQ is not set
+# CONFIG_SND_SBAWE_SEQ is not set
+# CONFIG_SND_EMU10K1_SEQ is not set
 CONFIG_SND_MPU401_UART=m
 CONFIG_SND_AC97_CODEC=m
 CONFIG_SND_DRIVERS=y
@@ -1153,6 +1168,7 @@ CONFIG_SND_ALI5451=m
 # CONFIG_SND_OXYGEN is not set
 # CONFIG_SND_CS4281 is not set
 # CONFIG_SND_CS46XX is not set
+# CONFIG_SND_CTXFI is not set
 # CONFIG_SND_DARLA20 is not set
 # CONFIG_SND_GINA20 is not set
 # CONFIG_SND_LAYLA20 is not set
@@ -1183,6 +1199,7 @@ CONFIG_SND_ALI5451=m
 # CONFIG_SND_INTEL8X0 is not set
 # CONFIG_SND_INTEL8X0M is not set
 # CONFIG_SND_KORG1212 is not set
+# CONFIG_SND_LX6464ES is not set
 # CONFIG_SND_MAESTRO3 is not set
 # CONFIG_SND_MIXART is not set
 # CONFIG_SND_NM256 is not set
@@ -1229,6 +1246,7 @@ CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
 # CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
 CONFIG_HID_KYE=y
@@ -1246,9 +1264,14 @@ CONFIG_HID_PETALYNX=y
 CONFIG_HID_SAMSUNG=y
 CONFIG_HID_SONY=y
 CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
 # CONFIG_GREENASIA_FF is not set
+CONFIG_HID_SMARTJOYPLUS=y
+# CONFIG_SMARTJOYPLUS_FF is not set
 CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
 # CONFIG_THRUSTMASTER_FF is not set
+CONFIG_HID_ZEROPLUS=y
 # CONFIG_ZEROPLUS_FF is not set
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
@@ -1462,6 +1485,7 @@ CONFIG_FILE_LOCKING=y
 # CONFIG_GFS2_FS is not set
 # CONFIG_OCFS2_FS is not set
 # CONFIG_BTRFS_FS is not set
+CONFIG_FSNOTIFY=y
 CONFIG_DNOTIFY=y
 CONFIG_INOTIFY=y
 CONFIG_INOTIFY_USER=y
@@ -1636,25 +1660,28 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
 # CONFIG_DEBUG_PAGEALLOC is not set
 CONFIG_NOP_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_RING_BUFFER=y
+CONFIG_EVENT_TRACING=y
+CONFIG_CONTEXT_SWITCH_TRACER=y
 CONFIG_TRACING=y
+CONFIG_GENERIC_TRACER=y
 CONFIG_TRACING_SUPPORT=y
-
-#
-# Tracers
-#
+CONFIG_FTRACE=y
 # CONFIG_FUNCTION_TRACER is not set
 # CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
-# CONFIG_CONTEXT_SWITCH_TRACER is not set
-# CONFIG_EVENT_TRACER is not set
 # CONFIG_BOOT_TRACER is not set
-# CONFIG_TRACE_BRANCH_PROFILING is not set
+CONFIG_BRANCH_PROFILE_NONE=y
+# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
+# CONFIG_PROFILE_ALL_BRANCHES is not set
 # CONFIG_STACK_TRACER is not set
 # CONFIG_KMEMTRACE is not set
 # CONFIG_WORKQUEUE_TRACER is not set
 CONFIG_BLK_DEV_IO_TRACE=y
 # CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_RING_BUFFER_BENCHMARK is not set
 # CONFIG_DYNAMIC_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y

+ 1 - 196
arch/sparc/include/asm/cpudata_64.h

@@ -6,9 +6,6 @@
 #ifndef _SPARC64_CPUDATA_H
 #define _SPARC64_CPUDATA_H
 
-#include <asm/hypervisor.h>
-#include <asm/asi.h>
-
 #ifndef __ASSEMBLY__
 
 #include <linux/percpu.h>
@@ -38,202 +35,10 @@ DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
 #define cpu_data(__cpu)		per_cpu(__cpu_data, (__cpu))
 #define local_cpu_data()	__get_cpu_var(__cpu_data)
 
-/* Trap handling code needs to get at a few critical values upon
- * trap entry and to process TSB misses.  These cannot be in the
- * per_cpu() area as we really need to lock them into the TLB and
- * thus make them part of the main kernel image.  As a result we
- * try to make this as small as possible.
- *
- * This is padded out and aligned to 64-bytes to avoid false sharing
- * on SMP.
- */
-
-/* If you modify the size of this structure, please update
- * TRAP_BLOCK_SZ_SHIFT below.
- */
-struct thread_info;
-struct trap_per_cpu {
-/* D-cache line 1: Basic thread information, cpu and device mondo queues */
-	struct thread_info	*thread;
-	unsigned long		pgd_paddr;
-	unsigned long		cpu_mondo_pa;
-	unsigned long		dev_mondo_pa;
-
-/* D-cache line 2: Error Mondo Queue and kernel buffer pointers */
-	unsigned long		resum_mondo_pa;
-	unsigned long		resum_kernel_buf_pa;
-	unsigned long		nonresum_mondo_pa;
-	unsigned long		nonresum_kernel_buf_pa;
-
-/* Dcache lines 3, 4, 5, and 6: Hypervisor Fault Status */
-	struct hv_fault_status	fault_info;
-
-/* Dcache line 7: Physical addresses of CPU send mondo block and CPU list.  */
-	unsigned long		cpu_mondo_block_pa;
-	unsigned long		cpu_list_pa;
-	unsigned long		tsb_huge;
-	unsigned long		tsb_huge_temp;
-
-/* Dcache line 8: IRQ work list, and keep trap_block a power-of-2 in size.  */
-	unsigned long		irq_worklist_pa;
-	unsigned int		cpu_mondo_qmask;
-	unsigned int		dev_mondo_qmask;
-	unsigned int		resum_qmask;
-	unsigned int		nonresum_qmask;
-	void			*hdesc;
-} __attribute__((aligned(64)));
-extern struct trap_per_cpu trap_block[NR_CPUS];
-extern void init_cur_cpu_trap(struct thread_info *);
-extern void setup_tba(void);
-extern int ncpus_probed;
 extern const struct seq_operations cpuinfo_op;
 
-extern unsigned long real_hard_smp_processor_id(void);
-
-struct cpuid_patch_entry {
-	unsigned int	addr;
-	unsigned int	cheetah_safari[4];
-	unsigned int	cheetah_jbus[4];
-	unsigned int	starfire[4];
-	unsigned int	sun4v[4];
-};
-extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end;
-
-struct sun4v_1insn_patch_entry {
-	unsigned int	addr;
-	unsigned int	insn;
-};
-extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch,
-	__sun4v_1insn_patch_end;
-
-struct sun4v_2insn_patch_entry {
-	unsigned int	addr;
-	unsigned int	insns[2];
-};
-extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
-	__sun4v_2insn_patch_end;
-
 #endif /* !(__ASSEMBLY__) */
 
-#define TRAP_PER_CPU_THREAD		0x00
-#define TRAP_PER_CPU_PGD_PADDR		0x08
-#define TRAP_PER_CPU_CPU_MONDO_PA	0x10
-#define TRAP_PER_CPU_DEV_MONDO_PA	0x18
-#define TRAP_PER_CPU_RESUM_MONDO_PA	0x20
-#define TRAP_PER_CPU_RESUM_KBUF_PA	0x28
-#define TRAP_PER_CPU_NONRESUM_MONDO_PA	0x30
-#define TRAP_PER_CPU_NONRESUM_KBUF_PA	0x38
-#define TRAP_PER_CPU_FAULT_INFO		0x40
-#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA	0xc0
-#define TRAP_PER_CPU_CPU_LIST_PA	0xc8
-#define TRAP_PER_CPU_TSB_HUGE		0xd0
-#define TRAP_PER_CPU_TSB_HUGE_TEMP	0xd8
-#define TRAP_PER_CPU_IRQ_WORKLIST_PA	0xe0
-#define TRAP_PER_CPU_CPU_MONDO_QMASK	0xe8
-#define TRAP_PER_CPU_DEV_MONDO_QMASK	0xec
-#define TRAP_PER_CPU_RESUM_QMASK	0xf0
-#define TRAP_PER_CPU_NONRESUM_QMASK	0xf4
-
-#define TRAP_BLOCK_SZ_SHIFT		8
-
-#include <asm/scratchpad.h>
-
-#define __GET_CPUID(REG)				\
-	/* Spitfire implementation (default). */	\
-661:	ldxa		[%g0] ASI_UPA_CONFIG, REG;	\
-	srlx		REG, 17, REG;			\
-	 and		REG, 0x1f, REG;			\
-	nop;						\
-	.section	.cpuid_patch, "ax";		\
-	/* Instruction location. */			\
-	.word		661b;				\
-	/* Cheetah Safari implementation. */		\
-	ldxa		[%g0] ASI_SAFARI_CONFIG, REG;	\
-	srlx		REG, 17, REG;			\
-	and		REG, 0x3ff, REG;		\
-	nop;						\
-	/* Cheetah JBUS implementation. */		\
-	ldxa		[%g0] ASI_JBUS_CONFIG, REG;	\
-	srlx		REG, 17, REG;			\
-	and		REG, 0x1f, REG;			\
-	nop;						\
-	/* Starfire implementation. */			\
-	sethi		%hi(0x1fff40000d0 >> 9), REG;	\
-	sllx		REG, 9, REG;			\
-	or		REG, 0xd0, REG;			\
-	lduwa		[REG] ASI_PHYS_BYPASS_EC_E, REG;\
-	/* sun4v implementation. */			\
-	mov		SCRATCHPAD_CPUID, REG;		\
-	ldxa		[REG] ASI_SCRATCHPAD, REG;	\
-	nop;						\
-	nop;						\
-	.previous;
-
-#ifdef CONFIG_SMP
-
-#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
-	__GET_CPUID(TMP)			\
-	sethi	%hi(trap_block), DEST;		\
-	sllx	TMP, TRAP_BLOCK_SZ_SHIFT, TMP;	\
-	or	DEST, %lo(trap_block), DEST;	\
-	add	DEST, TMP, DEST;		\
-
-/* Clobbers TMP, current address space PGD phys address into DEST.  */
-#define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
-	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
-	ldx	[DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
-
-/* Clobbers TMP, loads local processor's IRQ work area into DEST.  */
-#define TRAP_LOAD_IRQ_WORK_PA(DEST, TMP)	\
-	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
-	add	DEST, TRAP_PER_CPU_IRQ_WORKLIST_PA, DEST;
-
-/* Clobbers TMP, loads DEST with current thread info pointer.  */
-#define TRAP_LOAD_THREAD_REG(DEST, TMP)		\
-	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
-	ldx	[DEST + TRAP_PER_CPU_THREAD], DEST;
-
-/* Given the current thread info pointer in THR, load the per-cpu
- * area base of the current processor into DEST.  REG1, REG2, and REG3 are
- * clobbered.
- *
- * You absolutely cannot use DEST as a temporary in this code.  The
- * reason is that traps can happen during execution, and return from
- * trap will load the fully resolved DEST per-cpu base.  This can corrupt
- * the calculations done by the macro mid-stream.
- */
-#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)	\
-	lduh	[THR + TI_CPU], REG1;			\
-	sethi	%hi(__per_cpu_shift), REG3;		\
-	sethi	%hi(__per_cpu_base), REG2;		\
-	ldx	[REG3 + %lo(__per_cpu_shift)], REG3;	\
-	ldx	[REG2 + %lo(__per_cpu_base)], REG2;	\
-	sllx	REG1, REG3, REG3;			\
-	add	REG3, REG2, DEST;
-
-#else
-
-#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
-	sethi	%hi(trap_block), DEST;		\
-	or	DEST, %lo(trap_block), DEST;	\
-
-/* Uniprocessor versions, we know the cpuid is zero.  */
-#define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
-	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
-	ldx	[DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
-
-/* Clobbers TMP, loads local processor's IRQ work area into DEST.  */
-#define TRAP_LOAD_IRQ_WORK_PA(DEST, TMP)	\
-	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
-	add	DEST, TRAP_PER_CPU_IRQ_WORKLIST_PA, DEST;
-
-#define TRAP_LOAD_THREAD_REG(DEST, TMP)		\
-	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
-	ldx	[DEST + TRAP_PER_CPU_THREAD], DEST;
-
-/* No per-cpu areas on uniprocessor, so no need to load DEST.  */
-#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)
-
-#endif /* !(CONFIG_SMP) */
+#include <asm/trap_block.h>
 
 #endif /* _SPARC64_CPUDATA_H */

+ 163 - 5
arch/sparc/include/asm/dma-mapping.h

@@ -1,8 +1,166 @@
 #ifndef ___ASM_SPARC_DMA_MAPPING_H
 #define ___ASM_SPARC_DMA_MAPPING_H
-#if defined(__sparc__) && defined(__arch64__)
-#include <asm/dma-mapping_64.h>
-#else
-#include <asm/dma-mapping_32.h>
-#endif
+
+#include <linux/scatterlist.h>
+#include <linux/mm.h>
+
+#define DMA_ERROR_CODE	(~(dma_addr_t)0x0)
+
+extern int dma_supported(struct device *dev, u64 mask);
+extern int dma_set_mask(struct device *dev, u64 dma_mask);
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+#define dma_is_consistent(d, h)	(1)
+
+struct dma_ops {
+	void *(*alloc_coherent)(struct device *dev, size_t size,
+				dma_addr_t *dma_handle, gfp_t flag);
+	void (*free_coherent)(struct device *dev, size_t size,
+			      void *cpu_addr, dma_addr_t dma_handle);
+	dma_addr_t (*map_page)(struct device *dev, struct page *page,
+			       unsigned long offset, size_t size,
+			       enum dma_data_direction direction);
+	void (*unmap_page)(struct device *dev, dma_addr_t dma_addr,
+			   size_t size,
+			   enum dma_data_direction direction);
+	int (*map_sg)(struct device *dev, struct scatterlist *sg, int nents,
+		      enum dma_data_direction direction);
+	void (*unmap_sg)(struct device *dev, struct scatterlist *sg,
+			 int nhwentries,
+			 enum dma_data_direction direction);
+	void (*sync_single_for_cpu)(struct device *dev,
+				    dma_addr_t dma_handle, size_t size,
+				    enum dma_data_direction direction);
+	void (*sync_single_for_device)(struct device *dev,
+				       dma_addr_t dma_handle, size_t size,
+				       enum dma_data_direction direction);
+	void (*sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg,
+				int nelems,
+				enum dma_data_direction direction);
+	void (*sync_sg_for_device)(struct device *dev,
+				   struct scatterlist *sg, int nents,
+				   enum dma_data_direction dir);
+};
+extern const struct dma_ops *dma_ops;
+
+static inline void *dma_alloc_coherent(struct device *dev, size_t size,
+				       dma_addr_t *dma_handle, gfp_t flag)
+{
+	return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
+}
+
+static inline void dma_free_coherent(struct device *dev, size_t size,
+				     void *cpu_addr, dma_addr_t dma_handle)
+{
+	dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
+}
+
+static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
+					size_t size,
+					enum dma_data_direction direction)
+{
+	return dma_ops->map_page(dev, virt_to_page(cpu_addr),
+				 (unsigned long)cpu_addr & ~PAGE_MASK, size,
+				 direction);
+}
+
+static inline void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
+				    size_t size,
+				    enum dma_data_direction direction)
+{
+	dma_ops->unmap_page(dev, dma_addr, size, direction);
+}
+
+static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
+				      unsigned long offset, size_t size,
+				      enum dma_data_direction direction)
+{
+	return dma_ops->map_page(dev, page, offset, size, direction);
+}
+
+static inline void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
+				  size_t size,
+				  enum dma_data_direction direction)
+{
+	dma_ops->unmap_page(dev, dma_address, size, direction);
+}
+
+static inline int dma_map_sg(struct device *dev, struct scatterlist *sg,
+			     int nents, enum dma_data_direction direction)
+{
+	return dma_ops->map_sg(dev, sg, nents, direction);
+}
+
+static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+				int nents, enum dma_data_direction direction)
+{
+	dma_ops->unmap_sg(dev, sg, nents, direction);
+}
+
+static inline void dma_sync_single_for_cpu(struct device *dev,
+					   dma_addr_t dma_handle, size_t size,
+					   enum dma_data_direction direction)
+{
+	dma_ops->sync_single_for_cpu(dev, dma_handle, size, direction);
+}
+
+static inline void dma_sync_single_for_device(struct device *dev,
+					      dma_addr_t dma_handle,
+					      size_t size,
+					      enum dma_data_direction direction)
+{
+	if (dma_ops->sync_single_for_device)
+		dma_ops->sync_single_for_device(dev, dma_handle, size,
+						direction);
+}
+
+static inline void dma_sync_sg_for_cpu(struct device *dev,
+				       struct scatterlist *sg, int nelems,
+				       enum dma_data_direction direction)
+{
+	dma_ops->sync_sg_for_cpu(dev, sg, nelems, direction);
+}
+
+static inline void dma_sync_sg_for_device(struct device *dev,
+					  struct scatterlist *sg, int nelems,
+					  enum dma_data_direction direction)
+{
+	if (dma_ops->sync_sg_for_device)
+		dma_ops->sync_sg_for_device(dev, sg, nelems, direction);
+}
+
+static inline void dma_sync_single_range_for_cpu(struct device *dev,
+						 dma_addr_t dma_handle,
+						 unsigned long offset,
+						 size_t size,
+						 enum dma_data_direction dir)
+{
+	dma_sync_single_for_cpu(dev, dma_handle+offset, size, dir);
+}
+
+static inline void dma_sync_single_range_for_device(struct device *dev,
+						    dma_addr_t dma_handle,
+						    unsigned long offset,
+						    size_t size,
+						    enum dma_data_direction dir)
+{
+	dma_sync_single_for_device(dev, dma_handle+offset, size, dir);
+}
+
+
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return (dma_addr == DMA_ERROR_CODE);
+}
+
+static inline int dma_get_cache_alignment(void)
+{
+	/*
+	 * no easy way to get cache size on all processors, so return
+	 * the maximum possible, to be safe
+	 */
+	return (1 << INTERNODE_CACHE_SHIFT);
+}
+
 #endif

+ 0 - 60
arch/sparc/include/asm/dma-mapping_32.h

@@ -1,60 +0,0 @@
-#ifndef _ASM_SPARC_DMA_MAPPING_H
-#define _ASM_SPARC_DMA_MAPPING_H
-
-#include <linux/types.h>
-
-struct device;
-struct scatterlist;
-struct page;
-
-#define DMA_ERROR_CODE	(~(dma_addr_t)0x0)
-
-extern int dma_supported(struct device *dev, u64 mask);
-extern int dma_set_mask(struct device *dev, u64 dma_mask);
-extern void *dma_alloc_coherent(struct device *dev, size_t size,
-				dma_addr_t *dma_handle, gfp_t flag);
-extern void dma_free_coherent(struct device *dev, size_t size,
-			      void *cpu_addr, dma_addr_t dma_handle);
-extern dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
-				 size_t size,
-				 enum dma_data_direction direction);
-extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
-			     size_t size,
-			     enum dma_data_direction direction);
-extern dma_addr_t dma_map_page(struct device *dev, struct page *page,
-			       unsigned long offset, size_t size,
-			       enum dma_data_direction direction);
-extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
-			   size_t size, enum dma_data_direction direction);
-extern int dma_map_sg(struct device *dev, struct scatterlist *sg,
-		      int nents, enum dma_data_direction direction);
-extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-			 int nents, enum dma_data_direction direction);
-extern void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
-				    size_t size,
-				    enum dma_data_direction direction);
-extern void dma_sync_single_for_device(struct device *dev,
-				       dma_addr_t dma_handle,
-				       size_t size,
-				       enum dma_data_direction direction);
-extern void dma_sync_single_range_for_cpu(struct device *dev,
-					  dma_addr_t dma_handle,
-					  unsigned long offset,
-					  size_t size,
-					  enum dma_data_direction direction);
-extern void dma_sync_single_range_for_device(struct device *dev,
-					     dma_addr_t dma_handle,
-					     unsigned long offset, size_t size,
-					     enum dma_data_direction direction);
-extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
-				int nelems, enum dma_data_direction direction);
-extern void dma_sync_sg_for_device(struct device *dev,
-				   struct scatterlist *sg, int nelems,
-				   enum dma_data_direction direction);
-extern int dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
-extern int dma_get_cache_alignment(void);
-
-#define dma_alloc_noncoherent	dma_alloc_coherent
-#define dma_free_noncoherent	dma_free_coherent
-
-#endif /* _ASM_SPARC_DMA_MAPPING_H */

+ 0 - 154
arch/sparc/include/asm/dma-mapping_64.h

@@ -1,154 +0,0 @@
-#ifndef _ASM_SPARC64_DMA_MAPPING_H
-#define _ASM_SPARC64_DMA_MAPPING_H
-
-#include <linux/scatterlist.h>
-#include <linux/mm.h>
-
-#define DMA_ERROR_CODE	(~(dma_addr_t)0x0)
-
-struct dma_ops {
-	void *(*alloc_coherent)(struct device *dev, size_t size,
-				dma_addr_t *dma_handle, gfp_t flag);
-	void (*free_coherent)(struct device *dev, size_t size,
-			      void *cpu_addr, dma_addr_t dma_handle);
-	dma_addr_t (*map_single)(struct device *dev, void *cpu_addr,
-				 size_t size,
-				 enum dma_data_direction direction);
-	void (*unmap_single)(struct device *dev, dma_addr_t dma_addr,
-			     size_t size,
-			     enum dma_data_direction direction);
-	int (*map_sg)(struct device *dev, struct scatterlist *sg, int nents,
-		      enum dma_data_direction direction);
-	void (*unmap_sg)(struct device *dev, struct scatterlist *sg,
-			 int nhwentries,
-			 enum dma_data_direction direction);
-	void (*sync_single_for_cpu)(struct device *dev,
-				    dma_addr_t dma_handle, size_t size,
-				    enum dma_data_direction direction);
-	void (*sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg,
-				int nelems,
-				enum dma_data_direction direction);
-};
-extern const struct dma_ops *dma_ops;
-
-extern int dma_supported(struct device *dev, u64 mask);
-extern int dma_set_mask(struct device *dev, u64 dma_mask);
-
-static inline void *dma_alloc_coherent(struct device *dev, size_t size,
-				       dma_addr_t *dma_handle, gfp_t flag)
-{
-	return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
-}
-
-static inline void dma_free_coherent(struct device *dev, size_t size,
-				     void *cpu_addr, dma_addr_t dma_handle)
-{
-	dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
-}
-
-static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
-					size_t size,
-					enum dma_data_direction direction)
-{
-	return dma_ops->map_single(dev, cpu_addr, size, direction);
-}
-
-static inline void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
-				    size_t size,
-				    enum dma_data_direction direction)
-{
-	dma_ops->unmap_single(dev, dma_addr, size, direction);
-}
-
-static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
-				      unsigned long offset, size_t size,
-				      enum dma_data_direction direction)
-{
-	return dma_ops->map_single(dev, page_address(page) + offset,
-				   size, direction);
-}
-
-static inline void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
-				  size_t size,
-				  enum dma_data_direction direction)
-{
-	dma_ops->unmap_single(dev, dma_address, size, direction);
-}
-
-static inline int dma_map_sg(struct device *dev, struct scatterlist *sg,
-			     int nents, enum dma_data_direction direction)
-{
-	return dma_ops->map_sg(dev, sg, nents, direction);
-}
-
-static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-				int nents, enum dma_data_direction direction)
-{
-	dma_ops->unmap_sg(dev, sg, nents, direction);
-}
-
-static inline void dma_sync_single_for_cpu(struct device *dev,
-					   dma_addr_t dma_handle, size_t size,
-					   enum dma_data_direction direction)
-{
-	dma_ops->sync_single_for_cpu(dev, dma_handle, size, direction);
-}
-
-static inline void dma_sync_single_for_device(struct device *dev,
-					      dma_addr_t dma_handle,
-					      size_t size,
-					      enum dma_data_direction direction)
-{
-	/* No flushing needed to sync cpu writes to the device.  */
-}
-
-static inline void dma_sync_single_range_for_cpu(struct device *dev,
-						 dma_addr_t dma_handle,
-						 unsigned long offset,
-						 size_t size,
-						 enum dma_data_direction direction)
-{
-	dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction);
-}
-
-static inline void dma_sync_single_range_for_device(struct device *dev,
-						    dma_addr_t dma_handle,
-						    unsigned long offset,
-						    size_t size,
-						    enum dma_data_direction direction)
-{
-	/* No flushing needed to sync cpu writes to the device.  */
-}
-
-
-static inline void dma_sync_sg_for_cpu(struct device *dev,
-				       struct scatterlist *sg, int nelems,
-				       enum dma_data_direction direction)
-{
-	dma_ops->sync_sg_for_cpu(dev, sg, nelems, direction);
-}
-
-static inline void dma_sync_sg_for_device(struct device *dev,
-					  struct scatterlist *sg, int nelems,
-					  enum dma_data_direction direction)
-{
-	/* No flushing needed to sync cpu writes to the device.  */
-}
-
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-	return (dma_addr == DMA_ERROR_CODE);
-}
-
-static inline int dma_get_cache_alignment(void)
-{
-	/* no easy way to get cache size on all processors, so return
-	 * the maximum possible, to be safe */
-	return (1 << INTERNODE_CACHE_SHIFT);
-}
-
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-#define dma_is_consistent(d, h)	(1)
-
-#endif /* _ASM_SPARC64_DMA_MAPPING_H */

+ 11 - 0
arch/sparc/include/asm/ftrace.h

@@ -11,4 +11,15 @@ extern void _mcount(void);
 
 #endif
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+/* reloction of mcount call site is the same as the address */
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	return addr;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif /*  CONFIG_DYNAMIC_FTRACE */
+
 #endif /* _ASM_SPARC64_FTRACE */

+ 2 - 1
arch/sparc/include/asm/mdesc.h

@@ -71,7 +71,8 @@ struct mdesc_notifier_client {
 
 extern void mdesc_register_notifier(struct mdesc_notifier_client *client);
 
-extern void mdesc_fill_in_cpu_data(cpumask_t mask);
+extern void mdesc_fill_in_cpu_data(cpumask_t *mask);
+extern void mdesc_populate_present_mask(cpumask_t *mask);
 
 extern void sun4v_mdesc_init(void);
 

+ 2 - 6
arch/sparc/include/asm/percpu_64.h

@@ -7,20 +7,16 @@ register unsigned long __local_per_cpu_offset asm("g5");
 
 #ifdef CONFIG_SMP
 
-extern void real_setup_per_cpu_areas(void);
+#include <asm/trap_block.h>
 
-extern unsigned long __per_cpu_base;
-extern unsigned long __per_cpu_shift;
 #define __per_cpu_offset(__cpu) \
-	(__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift))
+	(trap_block[(__cpu)].__per_cpu_base)
 #define per_cpu_offset(x) (__per_cpu_offset(x))
 
 #define __my_cpu_offset __local_per_cpu_offset
 
 #else /* ! SMP */
 
-#define real_setup_per_cpu_areas()		do { } while (0)
-
 #endif	/* SMP */
 
 #include <asm-generic/percpu.h>

+ 2 - 0
arch/sparc/include/asm/prom.h

@@ -86,6 +86,8 @@ extern int of_node_to_nid(struct device_node *dp);
 #endif
 
 extern void prom_build_devicetree(void);
+extern void of_populate_present_mask(void);
+extern void of_fill_in_cpu_data(void);
 
 /* Dummy ref counting routines - to be implemented later */
 static inline struct device_node *of_node_get(struct device_node *node)

+ 207 - 0
arch/sparc/include/asm/trap_block.h

@@ -0,0 +1,207 @@
+#ifndef _SPARC_TRAP_BLOCK_H
+#define _SPARC_TRAP_BLOCK_H
+
+#include <asm/hypervisor.h>
+#include <asm/asi.h>
+
+#ifndef __ASSEMBLY__
+
+/* Trap handling code needs to get at a few critical values upon
+ * trap entry and to process TSB misses.  These cannot be in the
+ * per_cpu() area as we really need to lock them into the TLB and
+ * thus make them part of the main kernel image.  As a result we
+ * try to make this as small as possible.
+ *
+ * This is padded out and aligned to 64-bytes to avoid false sharing
+ * on SMP.
+ */
+
+/* If you modify the size of this structure, please update
+ * TRAP_BLOCK_SZ_SHIFT below.
+ */
+struct thread_info;
+struct trap_per_cpu {
+/* D-cache line 1: Basic thread information, cpu and device mondo queues */
+	struct thread_info	*thread;
+	unsigned long		pgd_paddr;
+	unsigned long		cpu_mondo_pa;
+	unsigned long		dev_mondo_pa;
+
+/* D-cache line 2: Error Mondo Queue and kernel buffer pointers */
+	unsigned long		resum_mondo_pa;
+	unsigned long		resum_kernel_buf_pa;
+	unsigned long		nonresum_mondo_pa;
+	unsigned long		nonresum_kernel_buf_pa;
+
+/* Dcache lines 3, 4, 5, and 6: Hypervisor Fault Status */
+	struct hv_fault_status	fault_info;
+
+/* Dcache line 7: Physical addresses of CPU send mondo block and CPU list.  */
+	unsigned long		cpu_mondo_block_pa;
+	unsigned long		cpu_list_pa;
+	unsigned long		tsb_huge;
+	unsigned long		tsb_huge_temp;
+
+/* Dcache line 8: IRQ work list, and keep trap_block a power-of-2 in size.  */
+	unsigned long		irq_worklist_pa;
+	unsigned int		cpu_mondo_qmask;
+	unsigned int		dev_mondo_qmask;
+	unsigned int		resum_qmask;
+	unsigned int		nonresum_qmask;
+	unsigned long		__per_cpu_base;
+} __attribute__((aligned(64)));
+extern struct trap_per_cpu trap_block[NR_CPUS];
+extern void init_cur_cpu_trap(struct thread_info *);
+extern void setup_tba(void);
+extern int ncpus_probed;
+
+extern unsigned long real_hard_smp_processor_id(void);
+
+struct cpuid_patch_entry {
+	unsigned int	addr;
+	unsigned int	cheetah_safari[4];
+	unsigned int	cheetah_jbus[4];
+	unsigned int	starfire[4];
+	unsigned int	sun4v[4];
+};
+extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end;
+
+struct sun4v_1insn_patch_entry {
+	unsigned int	addr;
+	unsigned int	insn;
+};
+extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch,
+	__sun4v_1insn_patch_end;
+
+struct sun4v_2insn_patch_entry {
+	unsigned int	addr;
+	unsigned int	insns[2];
+};
+extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
+	__sun4v_2insn_patch_end;
+
+
+#endif /* !(__ASSEMBLY__) */
+
+#define TRAP_PER_CPU_THREAD		0x00
+#define TRAP_PER_CPU_PGD_PADDR		0x08
+#define TRAP_PER_CPU_CPU_MONDO_PA	0x10
+#define TRAP_PER_CPU_DEV_MONDO_PA	0x18
+#define TRAP_PER_CPU_RESUM_MONDO_PA	0x20
+#define TRAP_PER_CPU_RESUM_KBUF_PA	0x28
+#define TRAP_PER_CPU_NONRESUM_MONDO_PA	0x30
+#define TRAP_PER_CPU_NONRESUM_KBUF_PA	0x38
+#define TRAP_PER_CPU_FAULT_INFO		0x40
+#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA	0xc0
+#define TRAP_PER_CPU_CPU_LIST_PA	0xc8
+#define TRAP_PER_CPU_TSB_HUGE		0xd0
+#define TRAP_PER_CPU_TSB_HUGE_TEMP	0xd8
+#define TRAP_PER_CPU_IRQ_WORKLIST_PA	0xe0
+#define TRAP_PER_CPU_CPU_MONDO_QMASK	0xe8
+#define TRAP_PER_CPU_DEV_MONDO_QMASK	0xec
+#define TRAP_PER_CPU_RESUM_QMASK	0xf0
+#define TRAP_PER_CPU_NONRESUM_QMASK	0xf4
+#define TRAP_PER_CPU_PER_CPU_BASE	0xf8
+
+#define TRAP_BLOCK_SZ_SHIFT		8
+
+#include <asm/scratchpad.h>
+
+#define __GET_CPUID(REG)				\
+	/* Spitfire implementation (default). */	\
+661:	ldxa		[%g0] ASI_UPA_CONFIG, REG;	\
+	srlx		REG, 17, REG;			\
+	 and		REG, 0x1f, REG;			\
+	nop;						\
+	.section	.cpuid_patch, "ax";		\
+	/* Instruction location. */			\
+	.word		661b;				\
+	/* Cheetah Safari implementation. */		\
+	ldxa		[%g0] ASI_SAFARI_CONFIG, REG;	\
+	srlx		REG, 17, REG;			\
+	and		REG, 0x3ff, REG;		\
+	nop;						\
+	/* Cheetah JBUS implementation. */		\
+	ldxa		[%g0] ASI_JBUS_CONFIG, REG;	\
+	srlx		REG, 17, REG;			\
+	and		REG, 0x1f, REG;			\
+	nop;						\
+	/* Starfire implementation. */			\
+	sethi		%hi(0x1fff40000d0 >> 9), REG;	\
+	sllx		REG, 9, REG;			\
+	or		REG, 0xd0, REG;			\
+	lduwa		[REG] ASI_PHYS_BYPASS_EC_E, REG;\
+	/* sun4v implementation. */			\
+	mov		SCRATCHPAD_CPUID, REG;		\
+	ldxa		[REG] ASI_SCRATCHPAD, REG;	\
+	nop;						\
+	nop;						\
+	.previous;
+
+#ifdef CONFIG_SMP
+
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	__GET_CPUID(TMP)			\
+	sethi	%hi(trap_block), DEST;		\
+	sllx	TMP, TRAP_BLOCK_SZ_SHIFT, TMP;	\
+	or	DEST, %lo(trap_block), DEST;	\
+	add	DEST, TMP, DEST;		\
+
+/* Clobbers TMP, current address space PGD phys address into DEST.  */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	ldx	[DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
+
+/* Clobbers TMP, loads local processor's IRQ work area into DEST.  */
+#define TRAP_LOAD_IRQ_WORK_PA(DEST, TMP)	\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	add	DEST, TRAP_PER_CPU_IRQ_WORKLIST_PA, DEST;
+
+/* Clobbers TMP, loads DEST with current thread info pointer.  */
+#define TRAP_LOAD_THREAD_REG(DEST, TMP)		\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	ldx	[DEST + TRAP_PER_CPU_THREAD], DEST;
+
+/* Given the current thread info pointer in THR, load the per-cpu
+ * area base of the current processor into DEST.  REG1, REG2, and REG3 are
+ * clobbered.
+ *
+ * You absolutely cannot use DEST as a temporary in this code.  The
+ * reason is that traps can happen during execution, and return from
+ * trap will load the fully resolved DEST per-cpu base.  This can corrupt
+ * the calculations done by the macro mid-stream.
+ */
+#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)	\
+	lduh	[THR + TI_CPU], REG1;			\
+	sethi	%hi(trap_block), REG2;			\
+	sllx	REG1, TRAP_BLOCK_SZ_SHIFT, REG1;	\
+	or	REG2, %lo(trap_block), REG2;		\
+	add	REG2, REG1, REG2;			\
+	ldx	[REG2 + TRAP_PER_CPU_PER_CPU_BASE], DEST;
+
+#else
+
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	sethi	%hi(trap_block), DEST;		\
+	or	DEST, %lo(trap_block), DEST;	\
+
+/* Uniprocessor versions, we know the cpuid is zero.  */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	ldx	[DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
+
+/* Clobbers TMP, loads local processor's IRQ work area into DEST.  */
+#define TRAP_LOAD_IRQ_WORK_PA(DEST, TMP)	\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	add	DEST, TRAP_PER_CPU_IRQ_WORKLIST_PA, DEST;
+
+#define TRAP_LOAD_THREAD_REG(DEST, TMP)		\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	ldx	[DEST + TRAP_PER_CPU_THREAD], DEST;
+
+/* No per-cpu areas on uniprocessor, so no need to load DEST.  */
+#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)
+
+#endif /* !(CONFIG_SMP) */
+
+#endif /* _SPARC_TRAP_BLOCK_H */

+ 2 - 1
arch/sparc/include/asm/unistd.h

@@ -394,8 +394,9 @@
 #define __NR_accept4		323
 #define __NR_preadv		324
 #define __NR_pwritev		325
+#define __NR_rt_tgsigqueueinfo	326
 
-#define NR_SYSCALLS		326
+#define NR_SYSCALLS		327
 
 #ifdef __32bit_syscall_numbers__
 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,

+ 2 - 0
arch/sparc/kernel/Makefile

@@ -37,6 +37,7 @@ obj-y                   += una_asm_$(BITS).o
 obj-$(CONFIG_SPARC32)   += muldiv.o
 obj-y                   += prom_common.o
 obj-y                   += prom_$(BITS).o
+obj-y                   += of_device_common.o
 obj-y                   += of_device_$(BITS).o
 obj-$(CONFIG_SPARC64)   += prom_irqtrans.o
 
@@ -54,6 +55,7 @@ obj-$(CONFIG_SPARC64)   += sstate.o
 obj-$(CONFIG_SPARC64)   += mdesc.o
 obj-$(CONFIG_SPARC64)	+= pcr.o
 obj-$(CONFIG_SPARC64)	+= nmi.o
+obj-$(CONFIG_SPARC64_SMP) += cpumap.o
 
 # sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation
 obj-$(CONFIG_SPARC32)     += devres.o

+ 431 - 0
arch/sparc/kernel/cpumap.c

@@ -0,0 +1,431 @@
+/* cpumap.c: used for optimizing CPU assignment
+ *
+ * Copyright (C) 2009 Hong H. Pham <hong.pham@windriver.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/cpumask.h>
+#include <linux/spinlock.h>
+#include <asm/cpudata.h>
+#include "cpumap.h"
+
+
+enum {
+	CPUINFO_LVL_ROOT = 0,
+	CPUINFO_LVL_NODE,
+	CPUINFO_LVL_CORE,
+	CPUINFO_LVL_PROC,
+	CPUINFO_LVL_MAX,
+};
+
+enum {
+	ROVER_NO_OP              = 0,
+	/* Increment rover every time level is visited */
+	ROVER_INC_ON_VISIT       = 1 << 0,
+	/* Increment parent's rover every time rover wraps around */
+	ROVER_INC_PARENT_ON_LOOP = 1 << 1,
+};
+
+struct cpuinfo_node {
+	int id;
+	int level;
+	int num_cpus;    /* Number of CPUs in this hierarchy */
+	int parent_index;
+	int child_start; /* Array index of the first child node */
+	int child_end;   /* Array index of the last child node */
+	int rover;       /* Child node iterator */
+};
+
+struct cpuinfo_level {
+	int start_index; /* Index of first node of a level in a cpuinfo tree */
+	int end_index;   /* Index of last node of a level in a cpuinfo tree */
+	int num_nodes;   /* Number of nodes in a level in a cpuinfo tree */
+};
+
+struct cpuinfo_tree {
+	int total_nodes;
+
+	/* Offsets into nodes[] for each level of the tree */
+	struct cpuinfo_level level[CPUINFO_LVL_MAX];
+	struct cpuinfo_node  nodes[0];
+};
+
+
+static struct cpuinfo_tree *cpuinfo_tree;
+
+static u16 cpu_distribution_map[NR_CPUS];
+static DEFINE_SPINLOCK(cpu_map_lock);
+
+
+/* Niagara optimized cpuinfo tree traversal. */
+static const int niagara_iterate_method[] = {
+	[CPUINFO_LVL_ROOT] = ROVER_NO_OP,
+
+	/* Strands (or virtual CPUs) within a core may not run concurrently
+	 * on the Niagara, as instruction pipeline(s) are shared.  Distribute
+	 * work to strands in different cores first for better concurrency.
+	 * Go to next NUMA node when all cores are used.
+	 */
+	[CPUINFO_LVL_NODE] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
+
+	/* Strands are grouped together by proc_id in cpuinfo_sparc, i.e.
+	 * a proc_id represents an instruction pipeline.  Distribute work to
+	 * strands in different proc_id groups if the core has multiple
+	 * instruction pipelines (e.g. the Niagara 2/2+ has two).
+	 */
+	[CPUINFO_LVL_CORE] = ROVER_INC_ON_VISIT,
+
+	/* Pick the next strand in the proc_id group. */
+	[CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT,
+};
+
+/* Generic cpuinfo tree traversal.  Distribute work round robin across NUMA
+ * nodes.
+ */
+static const int generic_iterate_method[] = {
+	[CPUINFO_LVL_ROOT] = ROVER_INC_ON_VISIT,
+	[CPUINFO_LVL_NODE] = ROVER_NO_OP,
+	[CPUINFO_LVL_CORE] = ROVER_INC_PARENT_ON_LOOP,
+	[CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
+};
+
+
+static int cpuinfo_id(int cpu, int level)
+{
+	int id;
+
+	switch (level) {
+	case CPUINFO_LVL_ROOT:
+		id = 0;
+		break;
+	case CPUINFO_LVL_NODE:
+		id = cpu_to_node(cpu);
+		break;
+	case CPUINFO_LVL_CORE:
+		id = cpu_data(cpu).core_id;
+		break;
+	case CPUINFO_LVL_PROC:
+		id = cpu_data(cpu).proc_id;
+		break;
+	default:
+		id = -EINVAL;
+	}
+	return id;
+}
+
+/*
+ * Enumerate the CPU information in __cpu_data to determine the start index,
+ * end index, and number of nodes for each level in the cpuinfo tree.  The
+ * total number of cpuinfo nodes required to build the tree is returned.
+ */
+static int enumerate_cpuinfo_nodes(struct cpuinfo_level *tree_level)
+{
+	int prev_id[CPUINFO_LVL_MAX];
+	int i, n, num_nodes;
+
+	for (i = CPUINFO_LVL_ROOT; i < CPUINFO_LVL_MAX; i++) {
+		struct cpuinfo_level *lv = &tree_level[i];
+
+		prev_id[i] = -1;
+		lv->start_index = lv->end_index = lv->num_nodes = 0;
+	}
+
+	num_nodes = 1; /* Include the root node */
+
+	for (i = 0; i < num_possible_cpus(); i++) {
+		if (!cpu_online(i))
+			continue;
+
+		n = cpuinfo_id(i, CPUINFO_LVL_NODE);
+		if (n > prev_id[CPUINFO_LVL_NODE]) {
+			tree_level[CPUINFO_LVL_NODE].num_nodes++;
+			prev_id[CPUINFO_LVL_NODE] = n;
+			num_nodes++;
+		}
+		n = cpuinfo_id(i, CPUINFO_LVL_CORE);
+		if (n > prev_id[CPUINFO_LVL_CORE]) {
+			tree_level[CPUINFO_LVL_CORE].num_nodes++;
+			prev_id[CPUINFO_LVL_CORE] = n;
+			num_nodes++;
+		}
+		n = cpuinfo_id(i, CPUINFO_LVL_PROC);
+		if (n > prev_id[CPUINFO_LVL_PROC]) {
+			tree_level[CPUINFO_LVL_PROC].num_nodes++;
+			prev_id[CPUINFO_LVL_PROC] = n;
+			num_nodes++;
+		}
+	}
+
+	tree_level[CPUINFO_LVL_ROOT].num_nodes = 1;
+
+	n = tree_level[CPUINFO_LVL_NODE].num_nodes;
+	tree_level[CPUINFO_LVL_NODE].start_index = 1;
+	tree_level[CPUINFO_LVL_NODE].end_index   = n;
+
+	n++;
+	tree_level[CPUINFO_LVL_CORE].start_index = n;
+	n += tree_level[CPUINFO_LVL_CORE].num_nodes;
+	tree_level[CPUINFO_LVL_CORE].end_index   = n - 1;
+
+	tree_level[CPUINFO_LVL_PROC].start_index = n;
+	n += tree_level[CPUINFO_LVL_PROC].num_nodes;
+	tree_level[CPUINFO_LVL_PROC].end_index   = n - 1;
+
+	return num_nodes;
+}
+
+/* Build a tree representation of the CPU hierarchy using the per CPU
+ * information in __cpu_data.  Entries in __cpu_data[0..NR_CPUS] are
+ * assumed to be sorted in ascending order based on node, core_id, and
+ * proc_id (in order of significance).
+ */
+static struct cpuinfo_tree *build_cpuinfo_tree(void)
+{
+	struct cpuinfo_tree *new_tree;
+	struct cpuinfo_node *node;
+	struct cpuinfo_level tmp_level[CPUINFO_LVL_MAX];
+	int num_cpus[CPUINFO_LVL_MAX];
+	int level_rover[CPUINFO_LVL_MAX];
+	int prev_id[CPUINFO_LVL_MAX];
+	int n, id, cpu, prev_cpu, last_cpu, level;
+
+	n = enumerate_cpuinfo_nodes(tmp_level);
+
+	new_tree = kzalloc(sizeof(struct cpuinfo_tree) +
+	                   (sizeof(struct cpuinfo_node) * n), GFP_ATOMIC);
+	if (!new_tree)
+		return NULL;
+
+	new_tree->total_nodes = n;
+	memcpy(&new_tree->level, tmp_level, sizeof(tmp_level));
+
+	prev_cpu = cpu = first_cpu(cpu_online_map);
+
+	/* Initialize all levels in the tree with the first CPU */
+	for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) {
+		n = new_tree->level[level].start_index;
+
+		level_rover[level] = n;
+		node = &new_tree->nodes[n];
+
+		id = cpuinfo_id(cpu, level);
+		if (unlikely(id < 0)) {
+			kfree(new_tree);
+			return NULL;
+		}
+		node->id = id;
+		node->level = level;
+		node->num_cpus = 1;
+
+		node->parent_index = (level > CPUINFO_LVL_ROOT)
+		    ? new_tree->level[level - 1].start_index : -1;
+
+		node->child_start = node->child_end = node->rover =
+		    (level == CPUINFO_LVL_PROC)
+		    ? cpu : new_tree->level[level + 1].start_index;
+
+		prev_id[level] = node->id;
+		num_cpus[level] = 1;
+	}
+
+	for (last_cpu = (num_possible_cpus() - 1); last_cpu >= 0; last_cpu--) {
+		if (cpu_online(last_cpu))
+			break;
+	}
+
+	while (++cpu <= last_cpu) {
+		if (!cpu_online(cpu))
+			continue;
+
+		for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT;
+		     level--) {
+			id = cpuinfo_id(cpu, level);
+			if (unlikely(id < 0)) {
+				kfree(new_tree);
+				return NULL;
+			}
+
+			if ((id != prev_id[level]) || (cpu == last_cpu)) {
+				prev_id[level] = id;
+				node = &new_tree->nodes[level_rover[level]];
+				node->num_cpus = num_cpus[level];
+				num_cpus[level] = 1;
+
+				if (cpu == last_cpu)
+					node->num_cpus++;
+
+				/* Connect tree node to parent */
+				if (level == CPUINFO_LVL_ROOT)
+					node->parent_index = -1;
+				else
+					node->parent_index =
+					    level_rover[level - 1];
+
+				if (level == CPUINFO_LVL_PROC) {
+					node->child_end =
+					    (cpu == last_cpu) ? cpu : prev_cpu;
+				} else {
+					node->child_end =
+					    level_rover[level + 1] - 1;
+				}
+
+				/* Initialize the next node in the same level */
+				n = ++level_rover[level];
+				if (n <= new_tree->level[level].end_index) {
+					node = &new_tree->nodes[n];
+					node->id = id;
+					node->level = level;
+
+					/* Connect node to child */
+					node->child_start = node->child_end =
+					node->rover =
+					    (level == CPUINFO_LVL_PROC)
+					    ? cpu : level_rover[level + 1];
+				}
+			} else
+				num_cpus[level]++;
+		}
+		prev_cpu = cpu;
+	}
+
+	return new_tree;
+}
+
+static void increment_rover(struct cpuinfo_tree *t, int node_index,
+                            int root_index, const int *rover_inc_table)
+{
+	struct cpuinfo_node *node = &t->nodes[node_index];
+	int top_level, level;
+
+	top_level = t->nodes[root_index].level;
+	for (level = node->level; level >= top_level; level--) {
+		node->rover++;
+		if (node->rover <= node->child_end)
+			return;
+
+		node->rover = node->child_start;
+		/* If parent's rover does not need to be adjusted, stop here. */
+		if ((level == top_level) ||
+		    !(rover_inc_table[level] & ROVER_INC_PARENT_ON_LOOP))
+			return;
+
+		node = &t->nodes[node->parent_index];
+	}
+}
+
+static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
+{
+	const int *rover_inc_table;
+	int level, new_index, index = root_index;
+
+	switch (sun4v_chip_type) {
+	case SUN4V_CHIP_NIAGARA1:
+	case SUN4V_CHIP_NIAGARA2:
+		rover_inc_table = niagara_iterate_method;
+		break;
+	default:
+		rover_inc_table = generic_iterate_method;
+	}
+
+	for (level = t->nodes[root_index].level; level < CPUINFO_LVL_MAX;
+	     level++) {
+		new_index = t->nodes[index].rover;
+		if (rover_inc_table[level] & ROVER_INC_ON_VISIT)
+			increment_rover(t, index, root_index, rover_inc_table);
+
+		index = new_index;
+	}
+	return index;
+}
+
+static void _cpu_map_rebuild(void)
+{
+	int i;
+
+	if (cpuinfo_tree) {
+		kfree(cpuinfo_tree);
+		cpuinfo_tree = NULL;
+	}
+
+	cpuinfo_tree = build_cpuinfo_tree();
+	if (!cpuinfo_tree)
+		return;
+
+	/* Build CPU distribution map that spans all online CPUs.  No need
+	 * to check if the CPU is online, as that is done when the cpuinfo
+	 * tree is being built.
+	 */
+	for (i = 0; i < cpuinfo_tree->nodes[0].num_cpus; i++)
+		cpu_distribution_map[i] = iterate_cpu(cpuinfo_tree, 0);
+}
+
+/* Fallback if the cpuinfo tree could not be built.  CPU mapping is linear
+ * round robin.
+ */
+static int simple_map_to_cpu(unsigned int index)
+{
+	int i, end, cpu_rover;
+
+	cpu_rover = 0;
+	end = index % num_online_cpus();
+	for (i = 0; i < num_possible_cpus(); i++) {
+		if (cpu_online(cpu_rover)) {
+			if (cpu_rover >= end)
+				return cpu_rover;
+
+			cpu_rover++;
+		}
+	}
+
+	/* Impossible, since num_online_cpus() <= num_possible_cpus() */
+	return first_cpu(cpu_online_map);
+}
+
+static int _map_to_cpu(unsigned int index)
+{
+	struct cpuinfo_node *root_node;
+
+	if (unlikely(!cpuinfo_tree)) {
+		_cpu_map_rebuild();
+		if (!cpuinfo_tree)
+			return simple_map_to_cpu(index);
+	}
+
+	root_node = &cpuinfo_tree->nodes[0];
+#ifdef CONFIG_HOTPLUG_CPU
+	if (unlikely(root_node->num_cpus != num_online_cpus())) {
+		_cpu_map_rebuild();
+		if (!cpuinfo_tree)
+			return simple_map_to_cpu(index);
+	}
+#endif
+	return cpu_distribution_map[index % root_node->num_cpus];
+}
+
+int map_to_cpu(unsigned int index)
+{
+	int mapped_cpu;
+	unsigned long flag;
+
+	spin_lock_irqsave(&cpu_map_lock, flag);
+	mapped_cpu = _map_to_cpu(index);
+
+#ifdef CONFIG_HOTPLUG_CPU
+	while (unlikely(!cpu_online(mapped_cpu)))
+		mapped_cpu = _map_to_cpu(index);
+#endif
+	spin_unlock_irqrestore(&cpu_map_lock, flag);
+	return mapped_cpu;
+}
+EXPORT_SYMBOL(map_to_cpu);
+
+void cpu_map_rebuild(void)
+{
+	unsigned long flag;
+
+	spin_lock_irqsave(&cpu_map_lock, flag);
+	_cpu_map_rebuild();
+	spin_unlock_irqrestore(&cpu_map_lock, flag);
+}

+ 16 - 0
arch/sparc/kernel/cpumap.h

@@ -0,0 +1,16 @@
+#ifndef _CPUMAP_H
+#define _CPUMAP_H
+
+#ifdef CONFIG_SMP
+extern void cpu_map_rebuild(void);
+extern int  map_to_cpu(unsigned int index);
+#define cpu_map_init() cpu_map_rebuild()
+#else
+#define cpu_map_init() do {} while (0)
+static inline int map_to_cpu(unsigned int index)
+{
+	return raw_smp_processor_id();
+}
+#endif
+
+#endif

+ 39 - 88
arch/sparc/kernel/dma.c

@@ -35,8 +35,8 @@ int dma_set_mask(struct device *dev, u64 dma_mask)
 }
 EXPORT_SYMBOL(dma_set_mask);
 
-void *dma_alloc_coherent(struct device *dev, size_t size,
-			 dma_addr_t *dma_handle, gfp_t flag)
+static void *dma32_alloc_coherent(struct device *dev, size_t size,
+				  dma_addr_t *dma_handle, gfp_t flag)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type)
@@ -44,10 +44,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 #endif
 	return sbus_alloc_consistent(dev, size, dma_handle);
 }
-EXPORT_SYMBOL(dma_alloc_coherent);
 
-void dma_free_coherent(struct device *dev, size_t size,
-		       void *cpu_addr, dma_addr_t dma_handle)
+static void dma32_free_coherent(struct device *dev, size_t size,
+				void *cpu_addr, dma_addr_t dma_handle)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type) {
@@ -58,38 +57,10 @@ void dma_free_coherent(struct device *dev, size_t size,
 #endif
 	sbus_free_consistent(dev, size, cpu_addr, dma_handle);
 }
-EXPORT_SYMBOL(dma_free_coherent);
 
-dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
-			  size_t size, enum dma_data_direction direction)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type)
-		return pci_map_single(to_pci_dev(dev), cpu_addr,
-				      size, (int)direction);
-#endif
-	return sbus_map_single(dev, cpu_addr, size, (int)direction);
-}
-EXPORT_SYMBOL(dma_map_single);
-
-void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
-		      size_t size,
-		      enum dma_data_direction direction)
-{
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type) {
-		pci_unmap_single(to_pci_dev(dev), dma_addr,
-				 size, (int)direction);
-		return;
-	}
-#endif
-	sbus_unmap_single(dev, dma_addr, size, (int)direction);
-}
-EXPORT_SYMBOL(dma_unmap_single);
-
-dma_addr_t dma_map_page(struct device *dev, struct page *page,
-			unsigned long offset, size_t size,
-			enum dma_data_direction direction)
+static dma_addr_t dma32_map_page(struct device *dev, struct page *page,
+				 unsigned long offset, size_t size,
+				 enum dma_data_direction direction)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type)
@@ -99,10 +70,9 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page,
 	return sbus_map_single(dev, page_address(page) + offset,
 			       size, (int)direction);
 }
-EXPORT_SYMBOL(dma_map_page);
 
-void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
-		    size_t size, enum dma_data_direction direction)
+static void dma32_unmap_page(struct device *dev, dma_addr_t dma_address,
+			     size_t size, enum dma_data_direction direction)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type) {
@@ -113,10 +83,9 @@ void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
 #endif
 	sbus_unmap_single(dev, dma_address, size, (int)direction);
 }
-EXPORT_SYMBOL(dma_unmap_page);
 
-int dma_map_sg(struct device *dev, struct scatterlist *sg,
-			     int nents, enum dma_data_direction direction)
+static int dma32_map_sg(struct device *dev, struct scatterlist *sg,
+			int nents, enum dma_data_direction direction)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type)
@@ -124,10 +93,9 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg,
 #endif
 	return sbus_map_sg(dev, sg, nents, direction);
 }
-EXPORT_SYMBOL(dma_map_sg);
 
-void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-		  int nents, enum dma_data_direction direction)
+void dma32_unmap_sg(struct device *dev, struct scatterlist *sg,
+		    int nents, enum dma_data_direction direction)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type) {
@@ -137,10 +105,10 @@ void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 #endif
 	sbus_unmap_sg(dev, sg, nents, (int)direction);
 }
-EXPORT_SYMBOL(dma_unmap_sg);
 
-void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
-			     size_t size, enum dma_data_direction direction)
+static void dma32_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+				      size_t size,
+				      enum dma_data_direction direction)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type) {
@@ -151,10 +119,10 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
 #endif
 	sbus_dma_sync_single_for_cpu(dev, dma_handle, size, (int) direction);
 }
-EXPORT_SYMBOL(dma_sync_single_for_cpu);
 
-void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
-				size_t size, enum dma_data_direction direction)
+static void dma32_sync_single_for_device(struct device *dev,
+					 dma_addr_t dma_handle, size_t size,
+					 enum dma_data_direction direction)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type) {
@@ -165,28 +133,9 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
 #endif
 	sbus_dma_sync_single_for_device(dev, dma_handle, size, (int) direction);
 }
-EXPORT_SYMBOL(dma_sync_single_for_device);
-
-void dma_sync_single_range_for_cpu(struct device *dev,
-				   dma_addr_t dma_handle,
-				   unsigned long offset,
-				   size_t size,
-				   enum dma_data_direction direction)
-{
-	dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction);
-}
-EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
-
-void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
-				      unsigned long offset, size_t size,
-				      enum dma_data_direction direction)
-{
-	dma_sync_single_for_device(dev, dma_handle+offset, size, direction);
-}
-EXPORT_SYMBOL(dma_sync_single_range_for_device);
 
-void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
-			 int nelems, enum dma_data_direction direction)
+static void dma32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+				  int nelems, enum dma_data_direction direction)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type) {
@@ -197,11 +146,10 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 #endif
 	BUG();
 }
-EXPORT_SYMBOL(dma_sync_sg_for_cpu);
 
-void dma_sync_sg_for_device(struct device *dev,
-			    struct scatterlist *sg, int nelems,
-			    enum dma_data_direction direction)
+static void dma32_sync_sg_for_device(struct device *dev,
+				     struct scatterlist *sg, int nelems,
+				     enum dma_data_direction direction)
 {
 #ifdef CONFIG_PCI
 	if (dev->bus == &pci_bus_type) {
@@ -212,16 +160,19 @@ void dma_sync_sg_for_device(struct device *dev,
 #endif
 	BUG();
 }
-EXPORT_SYMBOL(dma_sync_sg_for_device);
 
-int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-	return (dma_addr == DMA_ERROR_CODE);
-}
-EXPORT_SYMBOL(dma_mapping_error);
-
-int dma_get_cache_alignment(void)
-{
-	return 32;
-}
-EXPORT_SYMBOL(dma_get_cache_alignment);
+static const struct dma_ops dma32_dma_ops = {
+	.alloc_coherent		= dma32_alloc_coherent,
+	.free_coherent		= dma32_free_coherent,
+	.map_page		= dma32_map_page,
+	.unmap_page		= dma32_unmap_page,
+	.map_sg			= dma32_map_sg,
+	.unmap_sg		= dma32_unmap_sg,
+	.sync_single_for_cpu	= dma32_sync_single_for_cpu,
+	.sync_single_for_device	= dma32_sync_single_for_device,
+	.sync_sg_for_cpu	= dma32_sync_sg_for_cpu,
+	.sync_sg_for_device	= dma32_sync_sg_for_device,
+};
+
+const struct dma_ops *dma_ops = &dma32_dma_ops;
+EXPORT_SYMBOL(dma_ops);

+ 2 - 1
arch/sparc/kernel/ds.c

@@ -544,7 +544,8 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp,
 			     resp_len, ncpus, mask,
 			     DR_CPU_STAT_CONFIGURED);
 
-	mdesc_fill_in_cpu_data(*mask);
+	mdesc_populate_present_mask(mask);
+	mdesc_fill_in_cpu_data(mask);
 
 	for_each_cpu_mask(cpu, *mask) {
 		int err;

+ 32 - 15
arch/sparc/kernel/ftrace.c

@@ -7,14 +7,10 @@
 
 #include <asm/ftrace.h>
 
+#ifdef CONFIG_DYNAMIC_FTRACE
 static const u32 ftrace_nop = 0x01000000;
 
-unsigned char *ftrace_nop_replace(void)
-{
-	return (char *)&ftrace_nop;
-}
-
-unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+static u32 ftrace_call_replace(unsigned long ip, unsigned long addr)
 {
 	static u32 call;
 	s32 off;
@@ -22,15 +18,11 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 	off = ((s32)addr - (s32)ip);
 	call = 0x40000000 | ((u32)off >> 2);
 
-	return (unsigned char *) &call;
+	return call;
 }
 
-int
-ftrace_modify_code(unsigned long ip, unsigned char *old_code,
-		   unsigned char *new_code)
+static int ftrace_modify_code(unsigned long ip, u32 old, u32 new)
 {
-	u32 old = *(u32 *)old_code;
-	u32 new = *(u32 *)new_code;
 	u32 replaced;
 	int faulted;
 
@@ -59,18 +51,43 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	return faulted;
 }
 
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	u32 old, new;
+
+	old = ftrace_call_replace(ip, addr);
+	new = ftrace_nop;
+	return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	u32 old, new;
+
+	old = ftrace_nop;
+	new = ftrace_call_replace(ip, addr);
+	return ftrace_modify_code(ip, old, new);
+}
+
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	unsigned long ip = (unsigned long)(&ftrace_call);
-	unsigned char old[MCOUNT_INSN_SIZE], *new;
+	u32 old, new;
 
-	memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+	old = *(u32 *) &ftrace_call;
 	new = ftrace_call_replace(ip, (unsigned long)func);
 	return ftrace_modify_code(ip, old, new);
 }
 
 int __init ftrace_dyn_arch_init(void *data)
 {
-	ftrace_mcount_set(data);
+	unsigned long *p = data;
+
+	*p = 0;
+
 	return 0;
 }
+#endif
+

+ 0 - 22
arch/sparc/kernel/head_64.S

@@ -641,28 +641,6 @@ tlb_fixup_done:
 	/* Not reached... */
 
 1:
-	/* If we boot on a non-zero cpu, all of the per-cpu
-	 * variable references we make before setting up the
-	 * per-cpu areas will use a bogus offset.  Put a
-	 * compensating factor into __per_cpu_base to handle
-	 * this cleanly.
-	 *
-	 * What the per-cpu code calculates is:
-	 *
-	 *	__per_cpu_base + (cpu << __per_cpu_shift)
-	 *
-	 * These two variables are zero initially, so to
-	 * make it all cancel out to zero we need to put
-	 * "0 - (cpu << 0)" into __per_cpu_base so that the
-	 * above formula evaluates to zero.
-	 *
-	 * We cannot even perform a printk() until this stuff
-	 * is setup as that calls cpu_clock() which uses
-	 * per-cpu variables.
-	 */
-	sub	%g0, %o0, %o1
-	sethi	%hi(__per_cpu_base), %o2
-	stx	%o1, [%o2 + %lo(__per_cpu_base)]
 #else
 	mov	0, %o0
 #endif

+ 8 - 7
arch/sparc/kernel/iommu.c

@@ -351,8 +351,9 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
 		free_pages((unsigned long)cpu, order);
 }
 
-static dma_addr_t dma_4u_map_single(struct device *dev, void *ptr, size_t sz,
-				    enum dma_data_direction direction)
+static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
+				  unsigned long offset, size_t sz,
+				  enum dma_data_direction direction)
 {
 	struct iommu *iommu;
 	struct strbuf *strbuf;
@@ -368,7 +369,7 @@ static dma_addr_t dma_4u_map_single(struct device *dev, void *ptr, size_t sz,
 	if (unlikely(direction == DMA_NONE))
 		goto bad_no_ctx;
 
-	oaddr = (unsigned long)ptr;
+	oaddr = (unsigned long)(page_address(page) + offset);
 	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
 	npages >>= IO_PAGE_SHIFT;
 
@@ -472,8 +473,8 @@ do_flush_sync:
 		       vaddr, ctx, npages);
 }
 
-static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr,
-				size_t sz, enum dma_data_direction direction)
+static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
+			      size_t sz, enum dma_data_direction direction)
 {
 	struct iommu *iommu;
 	struct strbuf *strbuf;
@@ -824,8 +825,8 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
 static const struct dma_ops sun4u_dma_ops = {
 	.alloc_coherent		= dma_4u_alloc_coherent,
 	.free_coherent		= dma_4u_free_coherent,
-	.map_single		= dma_4u_map_single,
-	.unmap_single		= dma_4u_unmap_single,
+	.map_page		= dma_4u_map_page,
+	.unmap_page		= dma_4u_unmap_page,
 	.map_sg			= dma_4u_map_sg,
 	.unmap_sg		= dma_4u_unmap_sg,
 	.sync_single_for_cpu	= dma_4u_sync_single_for_cpu,

+ 4 - 25
arch/sparc/kernel/irq_64.c

@@ -45,6 +45,7 @@
 #include <asm/cacheflush.h>
 
 #include "entry.h"
+#include "cpumap.h"
 
 #define NUM_IVECS	(IMAP_INR + 1)
 
@@ -256,35 +257,13 @@ static int irq_choose_cpu(unsigned int virt_irq)
 	int cpuid;
 
 	cpumask_copy(&mask, irq_desc[virt_irq].affinity);
-	if (cpus_equal(mask, CPU_MASK_ALL)) {
-		static int irq_rover;
-		static DEFINE_SPINLOCK(irq_rover_lock);
-		unsigned long flags;
-
-		/* Round-robin distribution... */
-	do_round_robin:
-		spin_lock_irqsave(&irq_rover_lock, flags);
-
-		while (!cpu_online(irq_rover)) {
-			if (++irq_rover >= nr_cpu_ids)
-				irq_rover = 0;
-		}
-		cpuid = irq_rover;
-		do {
-			if (++irq_rover >= nr_cpu_ids)
-				irq_rover = 0;
-		} while (!cpu_online(irq_rover));
-
-		spin_unlock_irqrestore(&irq_rover_lock, flags);
+	if (cpus_equal(mask, cpu_online_map)) {
+		cpuid = map_to_cpu(virt_irq);
 	} else {
 		cpumask_t tmp;
 
 		cpus_and(tmp, cpu_online_map, mask);
-
-		if (cpus_empty(tmp))
-			goto do_round_robin;
-
-		cpuid = first_cpu(tmp);
+		cpuid = cpus_empty(tmp) ? map_to_cpu(virt_irq) : first_cpu(tmp);
 	}
 
 	return cpuid;

+ 88 - 61
arch/sparc/kernel/mdesc.c

@@ -574,7 +574,7 @@ static void __init report_platform_properties(void)
 	mdesc_release(hp);
 }
 
-static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
+static void __cpuinit fill_in_one_cache(cpuinfo_sparc *c,
 					struct mdesc_handle *hp,
 					u64 mp)
 {
@@ -619,8 +619,7 @@ static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
 	}
 }
 
-static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp,
-				    int core_id)
+static void __cpuinit mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id)
 {
 	u64 a;
 
@@ -653,7 +652,7 @@ static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp,
 	}
 }
 
-static void __devinit set_core_ids(struct mdesc_handle *hp)
+static void __cpuinit set_core_ids(struct mdesc_handle *hp)
 {
 	int idx;
 	u64 mp;
@@ -678,8 +677,7 @@ static void __devinit set_core_ids(struct mdesc_handle *hp)
 	}
 }
 
-static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp,
-				    int proc_id)
+static void __cpuinit mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
 {
 	u64 a;
 
@@ -698,8 +696,7 @@ static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp,
 	}
 }
 
-static void __devinit __set_proc_ids(struct mdesc_handle *hp,
-				     const char *exec_unit_name)
+static void __cpuinit __set_proc_ids(struct mdesc_handle *hp, const char *exec_unit_name)
 {
 	int idx;
 	u64 mp;
@@ -720,13 +717,13 @@ static void __devinit __set_proc_ids(struct mdesc_handle *hp,
 	}
 }
 
-static void __devinit set_proc_ids(struct mdesc_handle *hp)
+static void __cpuinit set_proc_ids(struct mdesc_handle *hp)
 {
 	__set_proc_ids(hp, "exec_unit");
 	__set_proc_ids(hp, "exec-unit");
 }
 
-static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
+static void __cpuinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
 					 unsigned char def)
 {
 	u64 val;
@@ -745,7 +742,7 @@ use_default:
 	*mask = ((1U << def) * 64U) - 1U;
 }
 
-static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
+static void __cpuinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
 				     struct trap_per_cpu *tb)
 {
 	const u64 *val;
@@ -763,23 +760,15 @@ static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
 	get_one_mondo_bits(val, &tb->nonresum_qmask, 2);
 }
 
-void __cpuinit mdesc_fill_in_cpu_data(cpumask_t mask)
+static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handle *, u64, int, void *), void *arg, cpumask_t *mask)
 {
 	struct mdesc_handle *hp = mdesc_grab();
+	void *ret = NULL;
 	u64 mp;
 
-	ncpus_probed = 0;
 	mdesc_for_each_node_by_name(hp, mp, "cpu") {
 		const u64 *id = mdesc_get_property(hp, mp, "id", NULL);
-		const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
-		struct trap_per_cpu *tb;
-		cpuinfo_sparc *c;
-		int cpuid;
-		u64 a;
-
-		ncpus_probed++;
-
-		cpuid = *id;
+		int cpuid = *id;
 
 #ifdef CONFIG_SMP
 		if (cpuid >= NR_CPUS) {
@@ -788,62 +777,104 @@ void __cpuinit mdesc_fill_in_cpu_data(cpumask_t mask)
 			       cpuid, NR_CPUS);
 			continue;
 		}
-		if (!cpu_isset(cpuid, mask))
+		if (!cpu_isset(cpuid, *mask))
 			continue;
-#else
-		/* On uniprocessor we only want the values for the
-		 * real physical cpu the kernel booted onto, however
-		 * cpu_data() only has one entry at index 0.
-		 */
-		if (cpuid != real_hard_smp_processor_id())
-			continue;
-		cpuid = 0;
 #endif
 
-		c = &cpu_data(cpuid);
-		c->clock_tick = *cfreq;
+		ret = func(hp, mp, cpuid, arg);
+		if (ret)
+			goto out;
+	}
+out:
+	mdesc_release(hp);
+	return ret;
+}
 
-		tb = &trap_block[cpuid];
-		get_mondo_data(hp, mp, tb);
+static void * __cpuinit record_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
+{
+	ncpus_probed++;
+#ifdef CONFIG_SMP
+	set_cpu_present(cpuid, true);
+#endif
+	return NULL;
+}
 
-		mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
-			u64 j, t = mdesc_arc_target(hp, a);
-			const char *t_name;
+void __cpuinit mdesc_populate_present_mask(cpumask_t *mask)
+{
+	if (tlb_type != hypervisor)
+		return;
 
-			t_name = mdesc_node_name(hp, t);
-			if (!strcmp(t_name, "cache")) {
-				fill_in_one_cache(c, hp, t);
-				continue;
-			}
+	ncpus_probed = 0;
+	mdesc_iterate_over_cpus(record_one_cpu, NULL, mask);
+}
 
-			mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) {
-				u64 n = mdesc_arc_target(hp, j);
-				const char *n_name;
+static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
+{
+	const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
+	struct trap_per_cpu *tb;
+	cpuinfo_sparc *c;
+	u64 a;
 
-				n_name = mdesc_node_name(hp, n);
-				if (!strcmp(n_name, "cache"))
-					fill_in_one_cache(c, hp, n);
-			}
+#ifndef CONFIG_SMP
+	/* On uniprocessor we only want the values for the
+	 * real physical cpu the kernel booted onto, however
+	 * cpu_data() only has one entry at index 0.
+	 */
+	if (cpuid != real_hard_smp_processor_id())
+		return NULL;
+	cpuid = 0;
+#endif
+
+	c = &cpu_data(cpuid);
+	c->clock_tick = *cfreq;
+
+	tb = &trap_block[cpuid];
+	get_mondo_data(hp, mp, tb);
+
+	mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+		u64 j, t = mdesc_arc_target(hp, a);
+		const char *t_name;
+
+		t_name = mdesc_node_name(hp, t);
+		if (!strcmp(t_name, "cache")) {
+			fill_in_one_cache(c, hp, t);
+			continue;
 		}
 
-#ifdef CONFIG_SMP
-		cpu_set(cpuid, cpu_present_map);
-#endif
+		mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) {
+			u64 n = mdesc_arc_target(hp, j);
+			const char *n_name;
 
-		c->core_id = 0;
-		c->proc_id = -1;
+			n_name = mdesc_node_name(hp, n);
+			if (!strcmp(n_name, "cache"))
+				fill_in_one_cache(c, hp, n);
+		}
 	}
 
+	c->core_id = 0;
+	c->proc_id = -1;
+
+	return NULL;
+}
+
+void __cpuinit mdesc_fill_in_cpu_data(cpumask_t *mask)
+{
+	struct mdesc_handle *hp;
+
+	mdesc_iterate_over_cpus(fill_in_one_cpu, NULL, mask);
+
 #ifdef CONFIG_SMP
 	sparc64_multi_core = 1;
 #endif
 
+	hp = mdesc_grab();
+
 	set_core_ids(hp);
 	set_proc_ids(hp);
 
-	smp_fill_in_sib_core_maps();
-
 	mdesc_release(hp);
+
+	smp_fill_in_sib_core_maps();
 }
 
 static ssize_t mdesc_read(struct file *file, char __user *buf,
@@ -887,7 +918,6 @@ void __init sun4v_mdesc_init(void)
 {
 	struct mdesc_handle *hp;
 	unsigned long len, real_len, status;
-	cpumask_t mask;
 
 	(void) sun4v_mach_desc(0UL, 0UL, &len);
 
@@ -911,7 +941,4 @@ void __init sun4v_mdesc_init(void)
 	cur_mdesc = hp;
 
 	report_platform_properties();
-
-	cpus_setall(mask);
-	mdesc_fill_in_cpu_data(mask);
 }

+ 3 - 192
arch/sparc/kernel/of_device_32.c

@@ -6,159 +6,11 @@
 #include <linux/mod_devicetable.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
+#include <linux/irq.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
 
-static int node_match(struct device *dev, void *data)
-{
-	struct of_device *op = to_of_device(dev);
-	struct device_node *dp = data;
-
-	return (op->node == dp);
-}
-
-struct of_device *of_find_device_by_node(struct device_node *dp)
-{
-	struct device *dev = bus_find_device(&of_platform_bus_type, NULL,
-					     dp, node_match);
-
-	if (dev)
-		return to_of_device(dev);
-
-	return NULL;
-}
-EXPORT_SYMBOL(of_find_device_by_node);
-
-unsigned int irq_of_parse_and_map(struct device_node *node, int index)
-{
-	struct of_device *op = of_find_device_by_node(node);
-
-	if (!op || index >= op->num_irqs)
-		return 0;
-
-	return op->irqs[index];
-}
-EXPORT_SYMBOL(irq_of_parse_and_map);
-
-/* Take the archdata values for IOMMU, STC, and HOSTDATA found in
- * BUS and propagate to all child of_device objects.
- */
-void of_propagate_archdata(struct of_device *bus)
-{
-	struct dev_archdata *bus_sd = &bus->dev.archdata;
-	struct device_node *bus_dp = bus->node;
-	struct device_node *dp;
-
-	for (dp = bus_dp->child; dp; dp = dp->sibling) {
-		struct of_device *op = of_find_device_by_node(dp);
-
-		op->dev.archdata.iommu = bus_sd->iommu;
-		op->dev.archdata.stc = bus_sd->stc;
-		op->dev.archdata.host_controller = bus_sd->host_controller;
-		op->dev.archdata.numa_node = bus_sd->numa_node;
-
-		if (dp->child)
-			of_propagate_archdata(op);
-	}
-}
-
-struct bus_type of_platform_bus_type;
-EXPORT_SYMBOL(of_platform_bus_type);
-
-static inline u64 of_read_addr(const u32 *cell, int size)
-{
-	u64 r = 0;
-	while (size--)
-		r = (r << 32) | *(cell++);
-	return r;
-}
-
-static void __init get_cells(struct device_node *dp,
-			     int *addrc, int *sizec)
-{
-	if (addrc)
-		*addrc = of_n_addr_cells(dp);
-	if (sizec)
-		*sizec = of_n_size_cells(dp);
-}
-
-/* Max address size we deal with */
-#define OF_MAX_ADDR_CELLS	4
-
-struct of_bus {
-	const char	*name;
-	const char	*addr_prop_name;
-	int		(*match)(struct device_node *parent);
-	void		(*count_cells)(struct device_node *child,
-				       int *addrc, int *sizec);
-	int		(*map)(u32 *addr, const u32 *range,
-			       int na, int ns, int pna);
-	unsigned long	(*get_flags)(const u32 *addr, unsigned long);
-};
-
-/*
- * Default translator (generic bus)
- */
-
-static void of_bus_default_count_cells(struct device_node *dev,
-				       int *addrc, int *sizec)
-{
-	get_cells(dev, addrc, sizec);
-}
-
-/* Make sure the least significant 64-bits are in-range.  Even
- * for 3 or 4 cell values it is a good enough approximation.
- */
-static int of_out_of_range(const u32 *addr, const u32 *base,
-			   const u32 *size, int na, int ns)
-{
-	u64 a = of_read_addr(addr, na);
-	u64 b = of_read_addr(base, na);
-
-	if (a < b)
-		return 1;
-
-	b += of_read_addr(size, ns);
-	if (a >= b)
-		return 1;
-
-	return 0;
-}
-
-static int of_bus_default_map(u32 *addr, const u32 *range,
-			      int na, int ns, int pna)
-{
-	u32 result[OF_MAX_ADDR_CELLS];
-	int i;
-
-	if (ns > 2) {
-		printk("of_device: Cannot handle size cells (%d) > 2.", ns);
-		return -EINVAL;
-	}
-
-	if (of_out_of_range(addr, range, range + na + pna, na, ns))
-		return -EINVAL;
-
-	/* Start with the parent range base.  */
-	memcpy(result, range + na, pna * 4);
-
-	/* Add in the child address offset.  */
-	for (i = 0; i < na; i++)
-		result[pna - 1 - i] +=
-			(addr[na - 1 - i] -
-			 range[na - 1 - i]);
-
-	memcpy(addr, result, pna * 4);
-
-	return 0;
-}
-
-static unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags)
-{
-	if (flags)
-		return flags;
-	return IORESOURCE_MEM;
-}
+#include "of_device_common.h"
 
 /*
  * PCI bus specific translator
@@ -240,47 +92,6 @@ static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags)
 	return flags;
 }
 
-/*
- * SBUS bus specific translator
- */
-
-static int of_bus_sbus_match(struct device_node *np)
-{
-	struct device_node *dp = np;
-
-	while (dp) {
-		if (!strcmp(dp->name, "sbus") ||
-		    !strcmp(dp->name, "sbi"))
-			return 1;
-
-		/* Have a look at use_1to1_mapping().  We're trying
-		 * to match SBUS if that's the top-level bus and we
-		 * don't have some intervening real bus that provides
-		 * ranges based translations.
-		 */
-		if (of_find_property(dp, "ranges", NULL) != NULL)
-			break;
-
-		dp = dp->parent;
-	}
-
-	return 0;
-}
-
-static void of_bus_sbus_count_cells(struct device_node *child,
-				   int *addrc, int *sizec)
-{
-	if (addrc)
-		*addrc = 2;
-	if (sizec)
-		*sizec = 1;
-}
-
-static int of_bus_sbus_map(u32 *addr, const u32 *range, int na, int ns, int pna)
-{
-	return of_bus_default_map(addr, range, na, ns, pna);
-}
-
 static unsigned long of_bus_sbus_get_flags(const u32 *addr, unsigned long flags)
 {
 	return IORESOURCE_MEM;
@@ -307,7 +118,7 @@ static struct of_bus of_busses[] = {
 		.addr_prop_name = "reg",
 		.match = of_bus_sbus_match,
 		.count_cells = of_bus_sbus_count_cells,
-		.map = of_bus_sbus_map,
+		.map = of_bus_default_map,
 		.get_flags = of_bus_sbus_get_flags,
 	},
 	/* Default */

+ 2 - 186
arch/sparc/kernel/of_device_64.c

@@ -10,6 +10,8 @@
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
 
+#include "of_device_common.h"
+
 void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name)
 {
 	unsigned long ret = res->start + offset;
@@ -35,156 +37,6 @@ void of_iounmap(struct resource *res, void __iomem *base, unsigned long size)
 }
 EXPORT_SYMBOL(of_iounmap);
 
-static int node_match(struct device *dev, void *data)
-{
-	struct of_device *op = to_of_device(dev);
-	struct device_node *dp = data;
-
-	return (op->node == dp);
-}
-
-struct of_device *of_find_device_by_node(struct device_node *dp)
-{
-	struct device *dev = bus_find_device(&of_platform_bus_type, NULL,
-					     dp, node_match);
-
-	if (dev)
-		return to_of_device(dev);
-
-	return NULL;
-}
-EXPORT_SYMBOL(of_find_device_by_node);
-
-unsigned int irq_of_parse_and_map(struct device_node *node, int index)
-{
-	struct of_device *op = of_find_device_by_node(node);
-
-	if (!op || index >= op->num_irqs)
-		return 0;
-
-	return op->irqs[index];
-}
-EXPORT_SYMBOL(irq_of_parse_and_map);
-
-/* Take the archdata values for IOMMU, STC, and HOSTDATA found in
- * BUS and propagate to all child of_device objects.
- */
-void of_propagate_archdata(struct of_device *bus)
-{
-	struct dev_archdata *bus_sd = &bus->dev.archdata;
-	struct device_node *bus_dp = bus->node;
-	struct device_node *dp;
-
-	for (dp = bus_dp->child; dp; dp = dp->sibling) {
-		struct of_device *op = of_find_device_by_node(dp);
-
-		op->dev.archdata.iommu = bus_sd->iommu;
-		op->dev.archdata.stc = bus_sd->stc;
-		op->dev.archdata.host_controller = bus_sd->host_controller;
-		op->dev.archdata.numa_node = bus_sd->numa_node;
-
-		if (dp->child)
-			of_propagate_archdata(op);
-	}
-}
-
-struct bus_type of_platform_bus_type;
-EXPORT_SYMBOL(of_platform_bus_type);
-
-static inline u64 of_read_addr(const u32 *cell, int size)
-{
-	u64 r = 0;
-	while (size--)
-		r = (r << 32) | *(cell++);
-	return r;
-}
-
-static void get_cells(struct device_node *dp, int *addrc, int *sizec)
-{
-	if (addrc)
-		*addrc = of_n_addr_cells(dp);
-	if (sizec)
-		*sizec = of_n_size_cells(dp);
-}
-
-/* Max address size we deal with */
-#define OF_MAX_ADDR_CELLS	4
-
-struct of_bus {
-	const char	*name;
-	const char	*addr_prop_name;
-	int		(*match)(struct device_node *parent);
-	void		(*count_cells)(struct device_node *child,
-				       int *addrc, int *sizec);
-	int		(*map)(u32 *addr, const u32 *range,
-			       int na, int ns, int pna);
-	unsigned long	(*get_flags)(const u32 *addr, unsigned long);
-};
-
-/*
- * Default translator (generic bus)
- */
-
-static void of_bus_default_count_cells(struct device_node *dev,
-				       int *addrc, int *sizec)
-{
-	get_cells(dev, addrc, sizec);
-}
-
-/* Make sure the least significant 64-bits are in-range.  Even
- * for 3 or 4 cell values it is a good enough approximation.
- */
-static int of_out_of_range(const u32 *addr, const u32 *base,
-			   const u32 *size, int na, int ns)
-{
-	u64 a = of_read_addr(addr, na);
-	u64 b = of_read_addr(base, na);
-
-	if (a < b)
-		return 1;
-
-	b += of_read_addr(size, ns);
-	if (a >= b)
-		return 1;
-
-	return 0;
-}
-
-static int of_bus_default_map(u32 *addr, const u32 *range,
-			      int na, int ns, int pna)
-{
-	u32 result[OF_MAX_ADDR_CELLS];
-	int i;
-
-	if (ns > 2) {
-		printk("of_device: Cannot handle size cells (%d) > 2.", ns);
-		return -EINVAL;
-	}
-
-	if (of_out_of_range(addr, range, range + na + pna, na, ns))
-		return -EINVAL;
-
-	/* Start with the parent range base.  */
-	memcpy(result, range + na, pna * 4);
-
-	/* Add in the child address offset.  */
-	for (i = 0; i < na; i++)
-		result[pna - 1 - i] +=
-			(addr[na - 1 - i] -
-			 range[na - 1 - i]);
-
-	memcpy(addr, result, pna * 4);
-
-	return 0;
-}
-
-static unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags)
-{
-	if (flags)
-		return flags;
-	return IORESOURCE_MEM;
-}
-
 /*
  * PCI bus specific translator
  */
@@ -294,42 +146,6 @@ static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags)
 	return flags;
 }
 
-/*
- * SBUS bus specific translator
- */
-
-static int of_bus_sbus_match(struct device_node *np)
-{
-	struct device_node *dp = np;
-
-	while (dp) {
-		if (!strcmp(dp->name, "sbus") ||
-		    !strcmp(dp->name, "sbi"))
-			return 1;
-
-		/* Have a look at use_1to1_mapping().  We're trying
-		 * to match SBUS if that's the top-level bus and we
-		 * don't have some intervening real bus that provides
-		 * ranges based translations.
-		 */
-		if (of_find_property(dp, "ranges", NULL) != NULL)
-			break;
-
-		dp = dp->parent;
-	}
-
-	return 0;
-}
-
-static void of_bus_sbus_count_cells(struct device_node *child,
-				   int *addrc, int *sizec)
-{
-	if (addrc)
-		*addrc = 2;
-	if (sizec)
-		*sizec = 1;
-}
-
 /*
  * FHC/Central bus specific translator.
  *

+ 174 - 0
arch/sparc/kernel/of_device_common.c

@@ -0,0 +1,174 @@
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/irq.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+#include "of_device_common.h"
+
+static int node_match(struct device *dev, void *data)
+{
+	struct of_device *op = to_of_device(dev);
+	struct device_node *dp = data;
+
+	return (op->node == dp);
+}
+
+struct of_device *of_find_device_by_node(struct device_node *dp)
+{
+	struct device *dev = bus_find_device(&of_platform_bus_type, NULL,
+					     dp, node_match);
+
+	if (dev)
+		return to_of_device(dev);
+
+	return NULL;
+}
+EXPORT_SYMBOL(of_find_device_by_node);
+
+unsigned int irq_of_parse_and_map(struct device_node *node, int index)
+{
+	struct of_device *op = of_find_device_by_node(node);
+
+	if (!op || index >= op->num_irqs)
+		return 0;
+
+	return op->irqs[index];
+}
+EXPORT_SYMBOL(irq_of_parse_and_map);
+
+/* Take the archdata values for IOMMU, STC, and HOSTDATA found in
+ * BUS and propagate to all child of_device objects.
+ */
+void of_propagate_archdata(struct of_device *bus)
+{
+	struct dev_archdata *bus_sd = &bus->dev.archdata;
+	struct device_node *bus_dp = bus->node;
+	struct device_node *dp;
+
+	for (dp = bus_dp->child; dp; dp = dp->sibling) {
+		struct of_device *op = of_find_device_by_node(dp);
+
+		op->dev.archdata.iommu = bus_sd->iommu;
+		op->dev.archdata.stc = bus_sd->stc;
+		op->dev.archdata.host_controller = bus_sd->host_controller;
+		op->dev.archdata.numa_node = bus_sd->numa_node;
+
+		if (dp->child)
+			of_propagate_archdata(op);
+	}
+}
+
+struct bus_type of_platform_bus_type;
+EXPORT_SYMBOL(of_platform_bus_type);
+
+static void get_cells(struct device_node *dp, int *addrc, int *sizec)
+{
+	if (addrc)
+		*addrc = of_n_addr_cells(dp);
+	if (sizec)
+		*sizec = of_n_size_cells(dp);
+}
+
+/*
+ * Default translator (generic bus)
+ */
+
+void of_bus_default_count_cells(struct device_node *dev, int *addrc, int *sizec)
+{
+	get_cells(dev, addrc, sizec);
+}
+
+/* Make sure the least significant 64-bits are in-range.  Even
+ * for 3 or 4 cell values it is a good enough approximation.
+ */
+int of_out_of_range(const u32 *addr, const u32 *base,
+		    const u32 *size, int na, int ns)
+{
+	u64 a = of_read_addr(addr, na);
+	u64 b = of_read_addr(base, na);
+
+	if (a < b)
+		return 1;
+
+	b += of_read_addr(size, ns);
+	if (a >= b)
+		return 1;
+
+	return 0;
+}
+
+int of_bus_default_map(u32 *addr, const u32 *range, int na, int ns, int pna)
+{
+	u32 result[OF_MAX_ADDR_CELLS];
+	int i;
+
+	if (ns > 2) {
+		printk("of_device: Cannot handle size cells (%d) > 2.", ns);
+		return -EINVAL;
+	}
+
+	if (of_out_of_range(addr, range, range + na + pna, na, ns))
+		return -EINVAL;
+
+	/* Start with the parent range base.  */
+	memcpy(result, range + na, pna * 4);
+
+	/* Add in the child address offset.  */
+	for (i = 0; i < na; i++)
+		result[pna - 1 - i] +=
+			(addr[na - 1 - i] -
+			 range[na - 1 - i]);
+
+	memcpy(addr, result, pna * 4);
+
+	return 0;
+}
+
+unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags)
+{
+	if (flags)
+		return flags;
+	return IORESOURCE_MEM;
+}
+
+/*
+ * SBUS bus specific translator
+ */
+
+int of_bus_sbus_match(struct device_node *np)
+{
+	struct device_node *dp = np;
+
+	while (dp) {
+		if (!strcmp(dp->name, "sbus") ||
+		    !strcmp(dp->name, "sbi"))
+			return 1;
+
+		/* Have a look at use_1to1_mapping().  We're trying
+		 * to match SBUS if that's the top-level bus and we
+		 * don't have some intervening real bus that provides
+		 * ranges based translations.
+		 */
+		if (of_find_property(dp, "ranges", NULL) != NULL)
+			break;
+
+		dp = dp->parent;
+	}
+
+	return 0;
+}
+
+void of_bus_sbus_count_cells(struct device_node *child, int *addrc, int *sizec)
+{
+	if (addrc)
+		*addrc = 2;
+	if (sizec)
+		*sizec = 1;
+}

+ 36 - 0
arch/sparc/kernel/of_device_common.h

@@ -0,0 +1,36 @@
+#ifndef _OF_DEVICE_COMMON_H
+#define _OF_DEVICE_COMMON_H
+
+static inline u64 of_read_addr(const u32 *cell, int size)
+{
+	u64 r = 0;
+	while (size--)
+		r = (r << 32) | *(cell++);
+	return r;
+}
+
+void of_bus_default_count_cells(struct device_node *dev, int *addrc,
+				int *sizec);
+int of_out_of_range(const u32 *addr, const u32 *base,
+		    const u32 *size, int na, int ns);
+int of_bus_default_map(u32 *addr, const u32 *range, int na, int ns, int pna);
+unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags);
+
+int of_bus_sbus_match(struct device_node *np);
+void of_bus_sbus_count_cells(struct device_node *child, int *addrc, int *sizec);
+
+/* Max address size we deal with */
+#define OF_MAX_ADDR_CELLS	4
+
+struct of_bus {
+	const char	*name;
+	const char	*addr_prop_name;
+	int		(*match)(struct device_node *parent);
+	void		(*count_cells)(struct device_node *child,
+				       int *addrc, int *sizec);
+	int		(*map)(u32 *addr, const u32 *range,
+			       int na, int ns, int pna);
+	unsigned long	(*get_flags)(const u32 *addr, unsigned long);
+};
+
+#endif /* _OF_DEVICE_COMMON_H */

+ 8 - 7
arch/sparc/kernel/pci_sun4v.c

@@ -230,8 +230,9 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
 		free_pages((unsigned long)cpu, order);
 }
 
-static dma_addr_t dma_4v_map_single(struct device *dev, void *ptr, size_t sz,
-				    enum dma_data_direction direction)
+static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
+				  unsigned long offset, size_t sz,
+				  enum dma_data_direction direction)
 {
 	struct iommu *iommu;
 	unsigned long flags, npages, oaddr;
@@ -245,7 +246,7 @@ static dma_addr_t dma_4v_map_single(struct device *dev, void *ptr, size_t sz,
 	if (unlikely(direction == DMA_NONE))
 		goto bad;
 
-	oaddr = (unsigned long)ptr;
+	oaddr = (unsigned long)(page_address(page) + offset);
 	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
 	npages >>= IO_PAGE_SHIFT;
 
@@ -294,8 +295,8 @@ iommu_map_fail:
 	return DMA_ERROR_CODE;
 }
 
-static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr,
-				size_t sz, enum dma_data_direction direction)
+static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
+			      size_t sz, enum dma_data_direction direction)
 {
 	struct pci_pbm_info *pbm;
 	struct iommu *iommu;
@@ -537,8 +538,8 @@ static void dma_4v_sync_sg_for_cpu(struct device *dev,
 static const struct dma_ops sun4v_dma_ops = {
 	.alloc_coherent			= dma_4v_alloc_coherent,
 	.free_coherent			= dma_4v_free_coherent,
-	.map_single			= dma_4v_map_single,
-	.unmap_single			= dma_4v_unmap_single,
+	.map_page			= dma_4v_map_page,
+	.unmap_page			= dma_4v_unmap_page,
 	.map_sg				= dma_4v_map_sg,
 	.unmap_sg			= dma_4v_unmap_sg,
 	.sync_single_for_cpu		= dma_4v_sync_single_for_cpu,

+ 0 - 1
arch/sparc/kernel/prom.h

@@ -22,7 +22,6 @@ static inline int is_root_node(const struct device_node *dp)
 
 extern char *build_path_component(struct device_node *dp);
 extern void of_console_init(void);
-extern void of_fill_in_cpu_data(void);
 
 extern unsigned int prom_early_allocated;
 

+ 123 - 109
arch/sparc/kernel/prom_64.c

@@ -374,75 +374,26 @@ static const char *get_mid_prop(void)
 	return (tlb_type == spitfire ? "upa-portid" : "portid");
 }
 
-struct device_node *of_find_node_by_cpuid(int cpuid)
-{
-	struct device_node *dp;
-	const char *mid_prop = get_mid_prop();
-
-	for_each_node_by_type(dp, "cpu") {
-		int id = of_getintprop_default(dp, mid_prop, -1);
-		const char *this_mid_prop = mid_prop;
-
-		if (id < 0) {
-			this_mid_prop = "cpuid";
-			id = of_getintprop_default(dp, this_mid_prop, -1);
-		}
-
-		if (id < 0) {
-			prom_printf("OF: Serious problem, cpu lacks "
-				    "%s property", this_mid_prop);
-			prom_halt();
-		}
-		if (cpuid == id)
-			return dp;
-	}
-	return NULL;
-}
-
-void __init of_fill_in_cpu_data(void)
+static void *of_iterate_over_cpus(void *(*func)(struct device_node *, int, int), int arg)
 {
 	struct device_node *dp;
 	const char *mid_prop;
 
-	if (tlb_type == hypervisor)
-		return;
-
 	mid_prop = get_mid_prop();
-	ncpus_probed = 0;
 	for_each_node_by_type(dp, "cpu") {
 		int cpuid = of_getintprop_default(dp, mid_prop, -1);
 		const char *this_mid_prop = mid_prop;
-		struct device_node *portid_parent;
-		int portid = -1;
+		void *ret;
 
-		portid_parent = NULL;
 		if (cpuid < 0) {
 			this_mid_prop = "cpuid";
 			cpuid = of_getintprop_default(dp, this_mid_prop, -1);
-			if (cpuid >= 0) {
-				int limit = 2;
-
-				portid_parent = dp;
-				while (limit--) {
-					portid_parent = portid_parent->parent;
-					if (!portid_parent)
-						break;
-					portid = of_getintprop_default(portid_parent,
-								       "portid", -1);
-					if (portid >= 0)
-						break;
-				}
-			}
 		}
-
 		if (cpuid < 0) {
 			prom_printf("OF: Serious problem, cpu lacks "
 				    "%s property", this_mid_prop);
 			prom_halt();
 		}
-
-		ncpus_probed++;
-
 #ifdef CONFIG_SMP
 		if (cpuid >= NR_CPUS) {
 			printk(KERN_WARNING "Ignoring CPU %d which is "
@@ -450,79 +401,142 @@ void __init of_fill_in_cpu_data(void)
 			       cpuid, NR_CPUS);
 			continue;
 		}
-#else
-		/* On uniprocessor we only want the values for the
-		 * real physical cpu the kernel booted onto, however
-		 * cpu_data() only has one entry at index 0.
-		 */
-		if (cpuid != real_hard_smp_processor_id())
-			continue;
-		cpuid = 0;
 #endif
+		ret = func(dp, cpuid, arg);
+		if (ret)
+			return ret;
+	}
+	return NULL;
+}
 
-		cpu_data(cpuid).clock_tick =
-			of_getintprop_default(dp, "clock-frequency", 0);
-
-		if (portid_parent) {
-			cpu_data(cpuid).dcache_size =
-				of_getintprop_default(dp, "l1-dcache-size",
-						      16 * 1024);
-			cpu_data(cpuid).dcache_line_size =
-				of_getintprop_default(dp, "l1-dcache-line-size",
-						      32);
-			cpu_data(cpuid).icache_size =
-				of_getintprop_default(dp, "l1-icache-size",
-						      8 * 1024);
-			cpu_data(cpuid).icache_line_size =
-				of_getintprop_default(dp, "l1-icache-line-size",
-						      32);
-			cpu_data(cpuid).ecache_size =
-				of_getintprop_default(dp, "l2-cache-size", 0);
-			cpu_data(cpuid).ecache_line_size =
-				of_getintprop_default(dp, "l2-cache-line-size", 0);
-			if (!cpu_data(cpuid).ecache_size ||
-			    !cpu_data(cpuid).ecache_line_size) {
-				cpu_data(cpuid).ecache_size =
-					of_getintprop_default(portid_parent,
-							      "l2-cache-size",
-							      (4 * 1024 * 1024));
-				cpu_data(cpuid).ecache_line_size =
-					of_getintprop_default(portid_parent,
-							      "l2-cache-line-size", 64);
-			}
-
-			cpu_data(cpuid).core_id = portid + 1;
-			cpu_data(cpuid).proc_id = portid;
+static void *check_cpu_node(struct device_node *dp, int cpuid, int id)
+{
+	if (id == cpuid)
+		return dp;
+	return NULL;
+}
+
+struct device_node *of_find_node_by_cpuid(int cpuid)
+{
+	return of_iterate_over_cpus(check_cpu_node, cpuid);
+}
+
+static void *record_one_cpu(struct device_node *dp, int cpuid, int arg)
+{
+	ncpus_probed++;
 #ifdef CONFIG_SMP
-			sparc64_multi_core = 1;
+	set_cpu_present(cpuid, true);
+	set_cpu_possible(cpuid, true);
 #endif
-		} else {
-			cpu_data(cpuid).dcache_size =
-				of_getintprop_default(dp, "dcache-size", 16 * 1024);
-			cpu_data(cpuid).dcache_line_size =
-				of_getintprop_default(dp, "dcache-line-size", 32);
+	return NULL;
+}
 
-			cpu_data(cpuid).icache_size =
-				of_getintprop_default(dp, "icache-size", 16 * 1024);
-			cpu_data(cpuid).icache_line_size =
-				of_getintprop_default(dp, "icache-line-size", 32);
+void __init of_populate_present_mask(void)
+{
+	if (tlb_type == hypervisor)
+		return;
+
+	ncpus_probed = 0;
+	of_iterate_over_cpus(record_one_cpu, 0);
+}
 
+static void *fill_in_one_cpu(struct device_node *dp, int cpuid, int arg)
+{
+	struct device_node *portid_parent = NULL;
+	int portid = -1;
+
+	if (of_find_property(dp, "cpuid", NULL)) {
+		int limit = 2;
+
+		portid_parent = dp;
+		while (limit--) {
+			portid_parent = portid_parent->parent;
+			if (!portid_parent)
+				break;
+			portid = of_getintprop_default(portid_parent,
+						       "portid", -1);
+			if (portid >= 0)
+				break;
+		}
+	}
+
+#ifndef CONFIG_SMP
+	/* On uniprocessor we only want the values for the
+	 * real physical cpu the kernel booted onto, however
+	 * cpu_data() only has one entry at index 0.
+	 */
+	if (cpuid != real_hard_smp_processor_id())
+		return NULL;
+	cpuid = 0;
+#endif
+
+	cpu_data(cpuid).clock_tick =
+		of_getintprop_default(dp, "clock-frequency", 0);
+
+	if (portid_parent) {
+		cpu_data(cpuid).dcache_size =
+			of_getintprop_default(dp, "l1-dcache-size",
+					      16 * 1024);
+		cpu_data(cpuid).dcache_line_size =
+			of_getintprop_default(dp, "l1-dcache-line-size",
+					      32);
+		cpu_data(cpuid).icache_size =
+			of_getintprop_default(dp, "l1-icache-size",
+					      8 * 1024);
+		cpu_data(cpuid).icache_line_size =
+			of_getintprop_default(dp, "l1-icache-line-size",
+					      32);
+		cpu_data(cpuid).ecache_size =
+			of_getintprop_default(dp, "l2-cache-size", 0);
+		cpu_data(cpuid).ecache_line_size =
+			of_getintprop_default(dp, "l2-cache-line-size", 0);
+		if (!cpu_data(cpuid).ecache_size ||
+		    !cpu_data(cpuid).ecache_line_size) {
 			cpu_data(cpuid).ecache_size =
-				of_getintprop_default(dp, "ecache-size",
+				of_getintprop_default(portid_parent,
+						      "l2-cache-size",
 						      (4 * 1024 * 1024));
 			cpu_data(cpuid).ecache_line_size =
-				of_getintprop_default(dp, "ecache-line-size", 64);
-
-			cpu_data(cpuid).core_id = 0;
-			cpu_data(cpuid).proc_id = -1;
+				of_getintprop_default(portid_parent,
+						      "l2-cache-line-size", 64);
 		}
 
+		cpu_data(cpuid).core_id = portid + 1;
+		cpu_data(cpuid).proc_id = portid;
 #ifdef CONFIG_SMP
-		set_cpu_present(cpuid, true);
-		set_cpu_possible(cpuid, true);
+		sparc64_multi_core = 1;
 #endif
+	} else {
+		cpu_data(cpuid).dcache_size =
+			of_getintprop_default(dp, "dcache-size", 16 * 1024);
+		cpu_data(cpuid).dcache_line_size =
+			of_getintprop_default(dp, "dcache-line-size", 32);
+
+		cpu_data(cpuid).icache_size =
+			of_getintprop_default(dp, "icache-size", 16 * 1024);
+		cpu_data(cpuid).icache_line_size =
+			of_getintprop_default(dp, "icache-line-size", 32);
+
+		cpu_data(cpuid).ecache_size =
+			of_getintprop_default(dp, "ecache-size",
+					      (4 * 1024 * 1024));
+		cpu_data(cpuid).ecache_line_size =
+			of_getintprop_default(dp, "ecache-line-size", 64);
+
+		cpu_data(cpuid).core_id = 0;
+		cpu_data(cpuid).proc_id = -1;
 	}
 
+	return NULL;
+}
+
+void __init of_fill_in_cpu_data(void)
+{
+	if (tlb_type == hypervisor)
+		return;
+
+	of_iterate_over_cpus(fill_in_one_cpu, 0);
+
 	smp_fill_in_sib_core_maps();
 }
 

+ 0 - 2
arch/sparc/kernel/prom_common.c

@@ -313,6 +313,4 @@ void __init prom_build_devicetree(void)
 
 	printk("PROM: Built device tree with %u bytes of memory.\n",
 	       prom_early_allocated);
-
-	of_fill_in_cpu_data();
 }

+ 167 - 29
arch/sparc/kernel/smp_64.c

@@ -20,7 +20,8 @@
 #include <linux/cache.h>
 #include <linux/jiffies.h>
 #include <linux/profile.h>
-#include <linux/lmb.h>
+#include <linux/bootmem.h>
+#include <linux/vmalloc.h>
 #include <linux/cpu.h>
 
 #include <asm/head.h>
@@ -47,6 +48,8 @@
 #include <asm/ldc.h>
 #include <asm/hypervisor.h>
 
+#include "cpumap.h"
+
 int sparc64_multi_core __read_mostly;
 
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
@@ -278,7 +281,7 @@ static unsigned long kimage_addr_to_ra(void *p)
 	return kern_base + (val - KERNBASE);
 }
 
-static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
+static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg, void **descrp)
 {
 	extern unsigned long sparc64_ttable_tl0;
 	extern unsigned long kern_locked_tte_data;
@@ -298,12 +301,12 @@ static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread
 		       "hvtramp_descr.\n");
 		return;
 	}
+	*descrp = hdesc;
 
 	hdesc->cpu = cpu;
 	hdesc->num_mappings = num_kernel_image_mappings;
 
 	tb = &trap_block[cpu];
-	tb->hdesc = hdesc;
 
 	hdesc->fault_info_va = (unsigned long) &tb->fault_info;
 	hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
@@ -341,12 +344,12 @@ static struct thread_info *cpu_new_thread = NULL;
 
 static int __cpuinit smp_boot_one_cpu(unsigned int cpu)
 {
-	struct trap_per_cpu *tb = &trap_block[cpu];
 	unsigned long entry =
 		(unsigned long)(&sparc64_cpu_startup);
 	unsigned long cookie =
 		(unsigned long)(&cpu_new_thread);
 	struct task_struct *p;
+	void *descr = NULL;
 	int timeout, ret;
 
 	p = fork_idle(cpu);
@@ -359,7 +362,8 @@ static int __cpuinit smp_boot_one_cpu(unsigned int cpu)
 #if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
 		if (ldom_domaining_enabled)
 			ldom_startcpu_cpuid(cpu,
-					    (unsigned long) cpu_new_thread);
+					    (unsigned long) cpu_new_thread,
+					    &descr);
 		else
 #endif
 			prom_startcpu_cpuid(cpu, entry, cookie);
@@ -383,10 +387,7 @@ static int __cpuinit smp_boot_one_cpu(unsigned int cpu)
 	}
 	cpu_new_thread = NULL;
 
-	if (tb->hdesc) {
-		kfree(tb->hdesc);
-		tb->hdesc = NULL;
-	}
+	kfree(descr);
 
 	return ret;
 }
@@ -1315,6 +1316,8 @@ int __cpu_disable(void)
 	cpu_clear(cpu, cpu_online_map);
 	ipi_call_unlock();
 
+	cpu_map_rebuild();
+
 	return 0;
 }
 
@@ -1373,36 +1376,171 @@ void smp_send_stop(void)
 {
 }
 
-unsigned long __per_cpu_base __read_mostly;
-unsigned long __per_cpu_shift __read_mostly;
+/**
+ * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
+ * @cpu: cpu to allocate for
+ * @size: size allocation in bytes
+ * @align: alignment
+ *
+ * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
+ * does the right thing for NUMA regardless of the current
+ * configuration.
+ *
+ * RETURNS:
+ * Pointer to the allocated area on success, NULL on failure.
+ */
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
+					unsigned long align)
+{
+	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	int node = cpu_to_node(cpu);
+	void *ptr;
+
+	if (!node_online(node) || !NODE_DATA(node)) {
+		ptr = __alloc_bootmem(size, align, goal);
+		pr_info("cpu %d has no node %d or node-local memory\n",
+			cpu, node);
+		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
+			 cpu, size, __pa(ptr));
+	} else {
+		ptr = __alloc_bootmem_node(NODE_DATA(node),
+					   size, align, goal);
+		pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
+			 "%016lx\n", cpu, size, node, __pa(ptr));
+	}
+	return ptr;
+#else
+	return __alloc_bootmem(size, align, goal);
+#endif
+}
 
-EXPORT_SYMBOL(__per_cpu_base);
-EXPORT_SYMBOL(__per_cpu_shift);
+static size_t pcpur_size __initdata;
+static void **pcpur_ptrs __initdata;
 
-void __init real_setup_per_cpu_areas(void)
+static struct page * __init pcpur_get_page(unsigned int cpu, int pageno)
 {
-	unsigned long paddr, goal, size, i;
-	char *ptr;
+	size_t off = (size_t)pageno << PAGE_SHIFT;
 
-	/* Copy section for each CPU (we discard the original) */
-	goal = PERCPU_ENOUGH_ROOM;
+	if (off >= pcpur_size)
+		return NULL;
 
-	__per_cpu_shift = PAGE_SHIFT;
-	for (size = PAGE_SIZE; size < goal; size <<= 1UL)
-		__per_cpu_shift++;
+	return virt_to_page(pcpur_ptrs[cpu] + off);
+}
+
+#define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL)
+
+static void __init pcpu_map_range(unsigned long start, unsigned long end,
+				  struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page);
+	unsigned long pte_base;
+
+	BUG_ON((pfn<<PAGE_SHIFT)&(PCPU_CHUNK_SIZE - 1UL));
+
+	pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
+		    _PAGE_CP_4U | _PAGE_CV_4U |
+		    _PAGE_P_4U | _PAGE_W_4U);
+	if (tlb_type == hypervisor)
+		pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
+			    _PAGE_CP_4V | _PAGE_CV_4V |
+			    _PAGE_P_4V | _PAGE_W_4V);
+
+	while (start < end) {
+		pgd_t *pgd = pgd_offset_k(start);
+		unsigned long this_end;
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+
+		pud = pud_offset(pgd, start);
+		if (pud_none(*pud)) {
+			pmd_t *new;
+
+			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			pud_populate(&init_mm, pud, new);
+		}
+
+		pmd = pmd_offset(pud, start);
+		if (!pmd_present(*pmd)) {
+			pte_t *new;
+
+			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			pmd_populate_kernel(&init_mm, pmd, new);
+		}
 
-	paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE);
-	if (!paddr) {
-		prom_printf("Cannot allocate per-cpu memory.\n");
-		prom_halt();
+		pte = pte_offset_kernel(pmd, start);
+		this_end = (start + PMD_SIZE) & PMD_MASK;
+		if (this_end > end)
+			this_end = end;
+
+		while (start < this_end) {
+			unsigned long paddr = pfn << PAGE_SHIFT;
+
+			pte_val(*pte) = (paddr | pte_base);
+
+			start += PAGE_SIZE;
+			pte++;
+			pfn++;
+		}
+	}
+}
+
+void __init setup_per_cpu_areas(void)
+{
+	size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start;
+	static struct vm_struct vm;
+	unsigned long delta, cpu;
+	size_t pcpu_unit_size;
+	size_t ptrs_size;
+
+	pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
+			       PERCPU_DYNAMIC_RESERVE);
+	dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE;
+
+
+	ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0]));
+	pcpur_ptrs = alloc_bootmem(ptrs_size);
+
+	for_each_possible_cpu(cpu) {
+		pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE,
+						     PCPU_CHUNK_SIZE);
+
+		free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size),
+			     PCPU_CHUNK_SIZE - pcpur_size);
+
+		memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size);
 	}
 
-	ptr = __va(paddr);
-	__per_cpu_base = ptr - __per_cpu_start;
+	/* allocate address and map */
+	vm.flags = VM_ALLOC;
+	vm.size = num_possible_cpus() * PCPU_CHUNK_SIZE;
+	vm_area_register_early(&vm, PCPU_CHUNK_SIZE);
+
+	for_each_possible_cpu(cpu) {
+		unsigned long start = (unsigned long) vm.addr;
+		unsigned long end;
+
+		start += cpu * PCPU_CHUNK_SIZE;
+		end = start + PCPU_CHUNK_SIZE;
+		pcpu_map_range(start, end, virt_to_page(pcpur_ptrs[cpu]));
+	}
 
-	for (i = 0; i < NR_CPUS; i++, ptr += size)
-		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+	pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size,
+						PERCPU_MODULE_RESERVE, dyn_size,
+						PCPU_CHUNK_SIZE, vm.addr, NULL);
+
+	free_bootmem(__pa(pcpur_ptrs), ptrs_size);
+
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu) {
+		__per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
+	}
 
 	/* Setup %g5 for the boot cpu.  */
 	__local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
+
+	of_fill_in_cpu_data();
+	if (tlb_type == hypervisor)
+		mdesc_fill_in_cpu_data(cpu_all_mask);
 }

+ 3 - 1
arch/sparc/kernel/systbls_32.S

@@ -81,4 +81,6 @@ sys_call_table:
 /*305*/	.long sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
 /*310*/	.long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 /*315*/	.long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
-/*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv, sys_pwritev
+/*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
+/*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo
+

+ 4 - 2
arch/sparc/kernel/systbls_64.S

@@ -82,7 +82,8 @@ sys_call_table32:
 	.word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait
 /*310*/	.word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
 	.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
-/*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv, compat_sys_pwritev
+/*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
+	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo
 
 #endif /* CONFIG_COMPAT */
 
@@ -156,4 +157,5 @@ sys_call_table:
 	.word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
 /*310*/	.word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 	.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
-/*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv, sys_pwritev
+/*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
+	.word sys_pwritev, sys_rt_tgsigqueueinfo

+ 92 - 78
arch/sparc/kernel/traps_64.c

@@ -2509,6 +2509,7 @@ void do_getpsr(struct pt_regs *regs)
 }
 
 struct trap_per_cpu trap_block[NR_CPUS];
+EXPORT_SYMBOL(trap_block);
 
 /* This can get invoked before sched_init() so play it super safe
  * and use hard_smp_processor_id().
@@ -2530,84 +2531,97 @@ extern void tsb_config_offsets_are_bolixed_dave(void);
 void __init trap_init(void)
 {
 	/* Compile time sanity check. */
-	if (TI_TASK != offsetof(struct thread_info, task) ||
-	    TI_FLAGS != offsetof(struct thread_info, flags) ||
-	    TI_CPU != offsetof(struct thread_info, cpu) ||
-	    TI_FPSAVED != offsetof(struct thread_info, fpsaved) ||
-	    TI_KSP != offsetof(struct thread_info, ksp) ||
-	    TI_FAULT_ADDR != offsetof(struct thread_info, fault_address) ||
-	    TI_KREGS != offsetof(struct thread_info, kregs) ||
-	    TI_UTRAPS != offsetof(struct thread_info, utraps) ||
-	    TI_EXEC_DOMAIN != offsetof(struct thread_info, exec_domain) ||
-	    TI_REG_WINDOW != offsetof(struct thread_info, reg_window) ||
-	    TI_RWIN_SPTRS != offsetof(struct thread_info, rwbuf_stkptrs) ||
-	    TI_GSR != offsetof(struct thread_info, gsr) ||
-	    TI_XFSR != offsetof(struct thread_info, xfsr) ||
-	    TI_USER_CNTD0 != offsetof(struct thread_info, user_cntd0) ||
-	    TI_USER_CNTD1 != offsetof(struct thread_info, user_cntd1) ||
-	    TI_KERN_CNTD0 != offsetof(struct thread_info, kernel_cntd0) ||
-	    TI_KERN_CNTD1 != offsetof(struct thread_info, kernel_cntd1) ||
-	    TI_PCR != offsetof(struct thread_info, pcr_reg) ||
-	    TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) ||
-	    TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
-	    TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) ||
-	    TI_RESTART_BLOCK != offsetof(struct thread_info, restart_block) ||
-	    TI_KUNA_REGS != offsetof(struct thread_info, kern_una_regs) ||
-	    TI_KUNA_INSN != offsetof(struct thread_info, kern_una_insn) ||
-	    TI_FPREGS != offsetof(struct thread_info, fpregs) ||
-	    (TI_FPREGS & (64 - 1)))
-		thread_info_offsets_are_bolixed_dave();
-
-	if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) ||
-	    (TRAP_PER_CPU_PGD_PADDR !=
-	     offsetof(struct trap_per_cpu, pgd_paddr)) ||
-	    (TRAP_PER_CPU_CPU_MONDO_PA !=
-	     offsetof(struct trap_per_cpu, cpu_mondo_pa)) ||
-	    (TRAP_PER_CPU_DEV_MONDO_PA !=
-	     offsetof(struct trap_per_cpu, dev_mondo_pa)) ||
-	    (TRAP_PER_CPU_RESUM_MONDO_PA !=
-	     offsetof(struct trap_per_cpu, resum_mondo_pa)) ||
-	    (TRAP_PER_CPU_RESUM_KBUF_PA !=
-	     offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) ||
-	    (TRAP_PER_CPU_NONRESUM_MONDO_PA !=
-	     offsetof(struct trap_per_cpu, nonresum_mondo_pa)) ||
-	    (TRAP_PER_CPU_NONRESUM_KBUF_PA !=
-	     offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) ||
-	    (TRAP_PER_CPU_FAULT_INFO !=
-	     offsetof(struct trap_per_cpu, fault_info)) ||
-	    (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
-	     offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) ||
-	    (TRAP_PER_CPU_CPU_LIST_PA !=
-	     offsetof(struct trap_per_cpu, cpu_list_pa)) ||
-	    (TRAP_PER_CPU_TSB_HUGE !=
-	     offsetof(struct trap_per_cpu, tsb_huge)) ||
-	    (TRAP_PER_CPU_TSB_HUGE_TEMP !=
-	     offsetof(struct trap_per_cpu, tsb_huge_temp)) ||
-	    (TRAP_PER_CPU_IRQ_WORKLIST_PA !=
-	     offsetof(struct trap_per_cpu, irq_worklist_pa)) ||
-	    (TRAP_PER_CPU_CPU_MONDO_QMASK !=
-	     offsetof(struct trap_per_cpu, cpu_mondo_qmask)) ||
-	    (TRAP_PER_CPU_DEV_MONDO_QMASK !=
-	     offsetof(struct trap_per_cpu, dev_mondo_qmask)) ||
-	    (TRAP_PER_CPU_RESUM_QMASK !=
-	     offsetof(struct trap_per_cpu, resum_qmask)) ||
-	    (TRAP_PER_CPU_NONRESUM_QMASK !=
-	     offsetof(struct trap_per_cpu, nonresum_qmask)))
-		trap_per_cpu_offsets_are_bolixed_dave();
-
-	if ((TSB_CONFIG_TSB !=
-	     offsetof(struct tsb_config, tsb)) ||
-	    (TSB_CONFIG_RSS_LIMIT !=
-	     offsetof(struct tsb_config, tsb_rss_limit)) ||
-	    (TSB_CONFIG_NENTRIES !=
-	     offsetof(struct tsb_config, tsb_nentries)) ||
-	    (TSB_CONFIG_REG_VAL !=
-	     offsetof(struct tsb_config, tsb_reg_val)) ||
-	    (TSB_CONFIG_MAP_VADDR !=
-	     offsetof(struct tsb_config, tsb_map_vaddr)) ||
-	    (TSB_CONFIG_MAP_PTE !=
-	     offsetof(struct tsb_config, tsb_map_pte)))
-		tsb_config_offsets_are_bolixed_dave();
+	BUILD_BUG_ON(TI_TASK != offsetof(struct thread_info, task) ||
+		     TI_FLAGS != offsetof(struct thread_info, flags) ||
+		     TI_CPU != offsetof(struct thread_info, cpu) ||
+		     TI_FPSAVED != offsetof(struct thread_info, fpsaved) ||
+		     TI_KSP != offsetof(struct thread_info, ksp) ||
+		     TI_FAULT_ADDR != offsetof(struct thread_info,
+					       fault_address) ||
+		     TI_KREGS != offsetof(struct thread_info, kregs) ||
+		     TI_UTRAPS != offsetof(struct thread_info, utraps) ||
+		     TI_EXEC_DOMAIN != offsetof(struct thread_info,
+						exec_domain) ||
+		     TI_REG_WINDOW != offsetof(struct thread_info,
+					       reg_window) ||
+		     TI_RWIN_SPTRS != offsetof(struct thread_info,
+					       rwbuf_stkptrs) ||
+		     TI_GSR != offsetof(struct thread_info, gsr) ||
+		     TI_XFSR != offsetof(struct thread_info, xfsr) ||
+		     TI_USER_CNTD0 != offsetof(struct thread_info,
+					       user_cntd0) ||
+		     TI_USER_CNTD1 != offsetof(struct thread_info,
+					       user_cntd1) ||
+		     TI_KERN_CNTD0 != offsetof(struct thread_info,
+					       kernel_cntd0) ||
+		     TI_KERN_CNTD1 != offsetof(struct thread_info,
+					       kernel_cntd1) ||
+		     TI_PCR != offsetof(struct thread_info, pcr_reg) ||
+		     TI_PRE_COUNT != offsetof(struct thread_info,
+					      preempt_count) ||
+		     TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
+		     TI_SYS_NOERROR != offsetof(struct thread_info,
+						syscall_noerror) ||
+		     TI_RESTART_BLOCK != offsetof(struct thread_info,
+						  restart_block) ||
+		     TI_KUNA_REGS != offsetof(struct thread_info,
+					      kern_una_regs) ||
+		     TI_KUNA_INSN != offsetof(struct thread_info,
+					      kern_una_insn) ||
+		     TI_FPREGS != offsetof(struct thread_info, fpregs) ||
+		     (TI_FPREGS & (64 - 1)));
+
+	BUILD_BUG_ON(TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu,
+						     thread) ||
+		     (TRAP_PER_CPU_PGD_PADDR !=
+		      offsetof(struct trap_per_cpu, pgd_paddr)) ||
+		     (TRAP_PER_CPU_CPU_MONDO_PA !=
+		      offsetof(struct trap_per_cpu, cpu_mondo_pa)) ||
+		     (TRAP_PER_CPU_DEV_MONDO_PA !=
+		      offsetof(struct trap_per_cpu, dev_mondo_pa)) ||
+		     (TRAP_PER_CPU_RESUM_MONDO_PA !=
+		      offsetof(struct trap_per_cpu, resum_mondo_pa)) ||
+		     (TRAP_PER_CPU_RESUM_KBUF_PA !=
+		      offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) ||
+		     (TRAP_PER_CPU_NONRESUM_MONDO_PA !=
+		      offsetof(struct trap_per_cpu, nonresum_mondo_pa)) ||
+		     (TRAP_PER_CPU_NONRESUM_KBUF_PA !=
+		      offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) ||
+		     (TRAP_PER_CPU_FAULT_INFO !=
+		      offsetof(struct trap_per_cpu, fault_info)) ||
+		     (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
+		      offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) ||
+		     (TRAP_PER_CPU_CPU_LIST_PA !=
+		      offsetof(struct trap_per_cpu, cpu_list_pa)) ||
+		     (TRAP_PER_CPU_TSB_HUGE !=
+		      offsetof(struct trap_per_cpu, tsb_huge)) ||
+		     (TRAP_PER_CPU_TSB_HUGE_TEMP !=
+		      offsetof(struct trap_per_cpu, tsb_huge_temp)) ||
+		     (TRAP_PER_CPU_IRQ_WORKLIST_PA !=
+		      offsetof(struct trap_per_cpu, irq_worklist_pa)) ||
+		     (TRAP_PER_CPU_CPU_MONDO_QMASK !=
+		      offsetof(struct trap_per_cpu, cpu_mondo_qmask)) ||
+		     (TRAP_PER_CPU_DEV_MONDO_QMASK !=
+		      offsetof(struct trap_per_cpu, dev_mondo_qmask)) ||
+		     (TRAP_PER_CPU_RESUM_QMASK !=
+		      offsetof(struct trap_per_cpu, resum_qmask)) ||
+		     (TRAP_PER_CPU_NONRESUM_QMASK !=
+		      offsetof(struct trap_per_cpu, nonresum_qmask)) ||
+		     (TRAP_PER_CPU_PER_CPU_BASE !=
+		      offsetof(struct trap_per_cpu, __per_cpu_base)));
+
+	BUILD_BUG_ON((TSB_CONFIG_TSB !=
+		      offsetof(struct tsb_config, tsb)) ||
+		     (TSB_CONFIG_RSS_LIMIT !=
+		      offsetof(struct tsb_config, tsb_rss_limit)) ||
+		     (TSB_CONFIG_NENTRIES !=
+		      offsetof(struct tsb_config, tsb_nentries)) ||
+		     (TSB_CONFIG_REG_VAL !=
+		      offsetof(struct tsb_config, tsb_reg_val)) ||
+		     (TSB_CONFIG_MAP_VADDR !=
+		      offsetof(struct tsb_config, tsb_map_vaddr)) ||
+		     (TSB_CONFIG_MAP_PTE !=
+		      offsetof(struct tsb_config, tsb_map_pte)));
 
 	/* Attach to the address space of init_task.  On SMP we
 	 * do this in smp.c:smp_callin for other cpus.

+ 1 - 0
arch/sparc/mm/init_32.c

@@ -358,6 +358,7 @@ void __init paging_init(void)
 	protection_map[15] = PAGE_SHARED;
 	btfixup();
 	prom_build_devicetree();
+	of_fill_in_cpu_data();
 	device_scan();
 }
 

+ 4 - 12
arch/sparc/mm/init_64.c

@@ -1679,11 +1679,6 @@ pgd_t swapper_pg_dir[2048];
 static void sun4u_pgprot_init(void);
 static void sun4v_pgprot_init(void);
 
-/* Dummy function */
-void __init setup_per_cpu_areas(void)
-{
-}
-
 void __init paging_init(void)
 {
 	unsigned long end_pfn, shift, phys_base;
@@ -1799,16 +1794,13 @@ void __init paging_init(void)
 	if (tlb_type == hypervisor)
 		sun4v_ktsb_register();
 
-	/* We must setup the per-cpu areas before we pull in the
-	 * PROM and the MDESC.  The code there fills in cpu and
-	 * other information into per-cpu data structures.
-	 */
-	real_setup_per_cpu_areas();
-
 	prom_build_devicetree();
+	of_populate_present_mask();
 
-	if (tlb_type == hypervisor)
+	if (tlb_type == hypervisor) {
 		sun4v_mdesc_init();
+		mdesc_populate_present_mask(cpu_all_mask);
+	}
 
 	/* Once the OF device tree and MDESC have been setup, we know
 	 * the list of possible cpus.  Therefore we can allocate the

+ 2 - 1
arch/sparc/mm/srmmu.c

@@ -19,6 +19,7 @@
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 #include <linux/kdebug.h>
+#include <linux/log2.h>
 
 #include <asm/bitext.h>
 #include <asm/page.h>
@@ -349,7 +350,7 @@ static void srmmu_free_nocache(unsigned long vaddr, int size)
 		    vaddr, srmmu_nocache_end);
 		BUG();
 	}
-	if (size & (size-1)) {
+	if (!is_power_of_2(size)) {
 		printk("Size 0x%x is not a power of 2\n", size);
 		BUG();
 	}

+ 1 - 1
drivers/sbus/char/openprom.c

@@ -303,7 +303,7 @@ static int openprom_sunos_ioctl(struct inode * inode, struct file * file,
 				struct device_node *dp)
 {
 	DATA *data = file->private_data;
-	struct openpromio *opp;
+	struct openpromio *opp = NULL;
 	int bufsize, error = 0;
 	static int cnt;
 	void __user *argp = (void __user *)arg;

+ 8 - 12
drivers/video/bw2.c

@@ -111,9 +111,7 @@ struct bw2_par {
 	u32			flags;
 #define BW2_FLAG_BLANKED	0x00000001
 
-	unsigned long		physbase;
 	unsigned long		which_io;
-	unsigned long		fbsize;
 };
 
 /**
@@ -167,17 +165,15 @@ static int bw2_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	struct bw2_par *par = (struct bw2_par *)info->par;
 
 	return sbusfb_mmap_helper(bw2_mmap_map,
-				  par->physbase, par->fbsize,
+				  info->fix.smem_start, info->fix.smem_len,
 				  par->which_io,
 				  vma);
 }
 
 static int bw2_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg)
 {
-	struct bw2_par *par = (struct bw2_par *) info->par;
-
 	return sbusfb_ioctl_helper(cmd, arg, info,
-				   FBTYPE_SUN2BW, 1, par->fbsize);
+				   FBTYPE_SUN2BW, 1, info->fix.smem_len);
 }
 
 /*
@@ -294,7 +290,7 @@ static int __devinit bw2_probe(struct of_device *op, const struct of_device_id *
 
 	spin_lock_init(&par->lock);
 
-	par->physbase = op->resource[0].start;
+	info->fix.smem_start = op->resource[0].start;
 	par->which_io = op->resource[0].flags & IORESOURCE_BITS;
 
 	sbusfb_fill_var(&info->var, dp, 1);
@@ -317,13 +313,13 @@ static int __devinit bw2_probe(struct of_device *op, const struct of_device_id *
 			goto out_unmap_regs;
 	}
 
-	par->fbsize = PAGE_ALIGN(linebytes * info->var.yres);
+	info->fix.smem_len = PAGE_ALIGN(linebytes * info->var.yres);
 
 	info->flags = FBINFO_DEFAULT;
 	info->fbops = &bw2_ops;
 
 	info->screen_base = of_ioremap(&op->resource[0], 0,
-				       par->fbsize, "bw2 ram");
+				       info->fix.smem_len, "bw2 ram");
 	if (!info->screen_base)
 		goto out_unmap_regs;
 
@@ -338,12 +334,12 @@ static int __devinit bw2_probe(struct of_device *op, const struct of_device_id *
 	dev_set_drvdata(&op->dev, info);
 
 	printk(KERN_INFO "%s: bwtwo at %lx:%lx\n",
-	       dp->full_name, par->which_io, par->physbase);
+	       dp->full_name, par->which_io, info->fix.smem_start);
 
 	return 0;
 
 out_unmap_screen:
-	of_iounmap(&op->resource[0], info->screen_base, par->fbsize);
+	of_iounmap(&op->resource[0], info->screen_base, info->fix.smem_len);
 
 out_unmap_regs:
 	of_iounmap(&op->resource[0], par->regs, sizeof(struct bw2_regs));
@@ -363,7 +359,7 @@ static int __devexit bw2_remove(struct of_device *op)
 	unregister_framebuffer(info);
 
 	of_iounmap(&op->resource[0], par->regs, sizeof(struct bw2_regs));
-	of_iounmap(&op->resource[0], info->screen_base, par->fbsize);
+	of_iounmap(&op->resource[0], info->screen_base, info->fix.smem_len);
 
 	framebuffer_release(info);
 

+ 9 - 10
drivers/video/cg14.c

@@ -196,9 +196,7 @@ struct cg14_par {
 	u32			flags;
 #define CG14_FLAG_BLANKED	0x00000001
 
-	unsigned long		physbase;
 	unsigned long		iospace;
-	unsigned long		fbsize;
 
 	struct sbus_mmap_map	mmap_map[CG14_MMAP_ENTRIES];
 
@@ -271,7 +269,7 @@ static int cg14_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	struct cg14_par *par = (struct cg14_par *) info->par;
 
 	return sbusfb_mmap_helper(par->mmap_map,
-				  par->physbase, par->fbsize,
+				  info->fix.smem_start, info->fix.smem_len,
 				  par->iospace, vma);
 }
 
@@ -343,7 +341,8 @@ static int cg14_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg)
 
 	default:
 		ret = sbusfb_ioctl_helper(cmd, arg, info,
-					  FBTYPE_MDICOLOR, 8, par->fbsize);
+					  FBTYPE_MDICOLOR, 8,
+					  info->fix.smem_len);
 		break;
 	};
 
@@ -462,7 +461,7 @@ static void cg14_unmap_regs(struct of_device *op, struct fb_info *info,
 			   par->cursor, sizeof(struct cg14_cursor));
 	if (info->screen_base)
 		of_iounmap(&op->resource[1],
-			   info->screen_base, par->fbsize);
+			   info->screen_base, info->fix.smem_len);
 }
 
 static int __devinit cg14_probe(struct of_device *op, const struct of_device_id *match)
@@ -488,14 +487,14 @@ static int __devinit cg14_probe(struct of_device *op, const struct of_device_id
 
 	linebytes = of_getintprop_default(dp, "linebytes",
 					  info->var.xres);
-	par->fbsize = PAGE_ALIGN(linebytes * info->var.yres);
+	info->fix.smem_len = PAGE_ALIGN(linebytes * info->var.yres);
 
 	if (!strcmp(dp->parent->name, "sbus") ||
 	    !strcmp(dp->parent->name, "sbi")) {
-		par->physbase = op->resource[0].start;
+		info->fix.smem_start = op->resource[0].start;
 		par->iospace = op->resource[0].flags & IORESOURCE_BITS;
 	} else {
-		par->physbase = op->resource[1].start;
+		info->fix.smem_start = op->resource[1].start;
 		par->iospace = op->resource[0].flags & IORESOURCE_BITS;
 	}
 
@@ -507,7 +506,7 @@ static int __devinit cg14_probe(struct of_device *op, const struct of_device_id
 				 sizeof(struct cg14_cursor), "cg14 cursor");
 
 	info->screen_base = of_ioremap(&op->resource[1], 0,
-				       par->fbsize, "cg14 ram");
+				       info->fix.smem_len, "cg14 ram");
 
 	if (!par->regs || !par->clut || !par->cursor || !info->screen_base)
 		goto out_unmap_regs;
@@ -557,7 +556,7 @@ static int __devinit cg14_probe(struct of_device *op, const struct of_device_id
 
 	printk(KERN_INFO "%s: cgfourteen at %lx:%lx, %dMB\n",
 	       dp->full_name,
-	       par->iospace, par->physbase,
+	       par->iospace, info->fix.smem_start,
 	       par->ramsize >> 20);
 
 	return 0;

+ 8 - 12
drivers/video/cg3.c

@@ -118,9 +118,7 @@ struct cg3_par {
 #define CG3_FLAG_BLANKED	0x00000001
 #define CG3_FLAG_RDI		0x00000002
 
-	unsigned long		physbase;
 	unsigned long		which_io;
-	unsigned long		fbsize;
 };
 
 /**
@@ -231,17 +229,15 @@ static int cg3_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	struct cg3_par *par = (struct cg3_par *)info->par;
 
 	return sbusfb_mmap_helper(cg3_mmap_map,
-				  par->physbase, par->fbsize,
+				  info->fix.smem_start, info->fix.smem_len,
 				  par->which_io,
 				  vma);
 }
 
 static int cg3_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg)
 {
-	struct cg3_par *par = (struct cg3_par *) info->par;
-
 	return sbusfb_ioctl_helper(cmd, arg, info,
-				   FBTYPE_SUN3COLOR, 8, par->fbsize);
+				   FBTYPE_SUN3COLOR, 8, info->fix.smem_len);
 }
 
 /*
@@ -368,7 +364,7 @@ static int __devinit cg3_probe(struct of_device *op,
 
 	spin_lock_init(&par->lock);
 
-	par->physbase = op->resource[0].start;
+	info->fix.smem_start = op->resource[0].start;
 	par->which_io = op->resource[0].flags & IORESOURCE_BITS;
 
 	sbusfb_fill_var(&info->var, dp, 8);
@@ -382,7 +378,7 @@ static int __devinit cg3_probe(struct of_device *op,
 
 	linebytes = of_getintprop_default(dp, "linebytes",
 					  info->var.xres);
-	par->fbsize = PAGE_ALIGN(linebytes * info->var.yres);
+	info->fix.smem_len = PAGE_ALIGN(linebytes * info->var.yres);
 
 	par->regs = of_ioremap(&op->resource[0], CG3_REGS_OFFSET,
 			       sizeof(struct cg3_regs), "cg3 regs");
@@ -392,7 +388,7 @@ static int __devinit cg3_probe(struct of_device *op,
 	info->flags = FBINFO_DEFAULT;
 	info->fbops = &cg3_ops;
 	info->screen_base = of_ioremap(&op->resource[0], CG3_RAM_OFFSET,
-				       par->fbsize, "cg3 ram");
+				       info->fix.smem_len, "cg3 ram");
 	if (!info->screen_base)
 		goto out_unmap_regs;
 
@@ -418,7 +414,7 @@ static int __devinit cg3_probe(struct of_device *op,
 	dev_set_drvdata(&op->dev, info);
 
 	printk(KERN_INFO "%s: cg3 at %lx:%lx\n",
-	       dp->full_name, par->which_io, par->physbase);
+	       dp->full_name, par->which_io, info->fix.smem_start);
 
 	return 0;
 
@@ -426,7 +422,7 @@ out_dealloc_cmap:
 	fb_dealloc_cmap(&info->cmap);
 
 out_unmap_screen:
-	of_iounmap(&op->resource[0], info->screen_base, par->fbsize);
+	of_iounmap(&op->resource[0], info->screen_base, info->fix.smem_len);
 
 out_unmap_regs:
 	of_iounmap(&op->resource[0], par->regs, sizeof(struct cg3_regs));
@@ -447,7 +443,7 @@ static int __devexit cg3_remove(struct of_device *op)
 	fb_dealloc_cmap(&info->cmap);
 
 	of_iounmap(&op->resource[0], par->regs, sizeof(struct cg3_regs));
-	of_iounmap(&op->resource[0], info->screen_base, par->fbsize);
+	of_iounmap(&op->resource[0], info->screen_base, info->fix.smem_len);
 
 	framebuffer_release(info);
 

+ 11 - 14
drivers/video/cg6.c

@@ -263,9 +263,7 @@ struct cg6_par {
 	u32			flags;
 #define CG6_FLAG_BLANKED	0x00000001
 
-	unsigned long		physbase;
 	unsigned long		which_io;
-	unsigned long		fbsize;
 };
 
 static int cg6_sync(struct fb_info *info)
@@ -596,16 +594,14 @@ static int cg6_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	struct cg6_par *par = (struct cg6_par *)info->par;
 
 	return sbusfb_mmap_helper(cg6_mmap_map,
-				  par->physbase, par->fbsize,
+				  info->fix.smem_start, info->fix.smem_len,
 				  par->which_io, vma);
 }
 
 static int cg6_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg)
 {
-	struct cg6_par *par = (struct cg6_par *)info->par;
-
 	return sbusfb_ioctl_helper(cmd, arg, info,
-				   FBTYPE_SUNFAST_COLOR, 8, par->fbsize);
+				   FBTYPE_SUNFAST_COLOR, 8, info->fix.smem_len);
 }
 
 /*
@@ -631,12 +627,12 @@ static void __devinit cg6_init_fix(struct fb_info *info, int linebytes)
 		break;
 	};
 	if (((conf >> CG6_FHC_REV_SHIFT) & CG6_FHC_REV_MASK) >= 11) {
-		if (par->fbsize <= 0x100000)
+		if (info->fix.smem_len <= 0x100000)
 			cg6_card_name = "TGX";
 		else
 			cg6_card_name = "TGX+";
 	} else {
-		if (par->fbsize <= 0x100000)
+		if (info->fix.smem_len <= 0x100000)
 			cg6_card_name = "GX";
 		else
 			cg6_card_name = "GX+";
@@ -738,7 +734,8 @@ static void cg6_unmap_regs(struct of_device *op, struct fb_info *info,
 		of_iounmap(&op->resource[0], par->fhc, sizeof(u32));
 
 	if (info->screen_base)
-		of_iounmap(&op->resource[0], info->screen_base, par->fbsize);
+		of_iounmap(&op->resource[0], info->screen_base,
+			   info->fix.smem_len);
 }
 
 static int __devinit cg6_probe(struct of_device *op,
@@ -759,7 +756,7 @@ static int __devinit cg6_probe(struct of_device *op,
 
 	spin_lock_init(&par->lock);
 
-	par->physbase = op->resource[0].start;
+	info->fix.smem_start = op->resource[0].start;
 	par->which_io = op->resource[0].flags & IORESOURCE_BITS;
 
 	sbusfb_fill_var(&info->var, dp, 8);
@@ -769,11 +766,11 @@ static int __devinit cg6_probe(struct of_device *op,
 
 	linebytes = of_getintprop_default(dp, "linebytes",
 					  info->var.xres);
-	par->fbsize = PAGE_ALIGN(linebytes * info->var.yres);
+	info->fix.smem_len = PAGE_ALIGN(linebytes * info->var.yres);
 
 	dblbuf = of_getintprop_default(dp, "dblbuf", 0);
 	if (dblbuf)
-		par->fbsize *= 4;
+		info->fix.smem_len *= 4;
 
 	par->fbc = of_ioremap(&op->resource[0], CG6_FBC_OFFSET,
 				4096, "cgsix fbc");
@@ -792,7 +789,7 @@ static int __devinit cg6_probe(struct of_device *op,
 	info->fbops = &cg6_ops;
 
 	info->screen_base = of_ioremap(&op->resource[0], CG6_RAM_OFFSET,
-					par->fbsize, "cgsix ram");
+					info->fix.smem_len, "cgsix ram");
 	if (!par->fbc || !par->tec || !par->thc ||
 	    !par->bt || !par->fhc || !info->screen_base)
 		goto out_unmap_regs;
@@ -817,7 +814,7 @@ static int __devinit cg6_probe(struct of_device *op,
 
 	printk(KERN_INFO "%s: CGsix [%s] at %lx:%lx\n",
 	       dp->full_name, info->fix.id,
-	       par->which_io, par->physbase);
+	       par->which_io, info->fix.smem_start);
 
 	return 0;
 

+ 5 - 9
drivers/video/leo.c

@@ -191,9 +191,7 @@ struct leo_par {
 	u32			flags;
 #define LEO_FLAG_BLANKED	0x00000001
 
-	unsigned long		physbase;
 	unsigned long		which_io;
-	unsigned long		fbsize;
 };
 
 static void leo_wait(struct leo_lx_krn __iomem *lx_krn)
@@ -420,16 +418,14 @@ static int leo_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	struct leo_par *par = (struct leo_par *)info->par;
 
 	return sbusfb_mmap_helper(leo_mmap_map,
-				  par->physbase, par->fbsize,
+				  info->fix.smem_start, info->fix.smem_len,
 				  par->which_io, vma);
 }
 
 static int leo_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg)
 {
-	struct leo_par *par = (struct leo_par *) info->par;
-
 	return sbusfb_ioctl_helper(cmd, arg, info,
-				   FBTYPE_SUNLEO, 32, par->fbsize);
+				   FBTYPE_SUNLEO, 32, info->fix.smem_len);
 }
 
 /*
@@ -569,7 +565,7 @@ static int __devinit leo_probe(struct of_device *op,
 
 	spin_lock_init(&par->lock);
 
-	par->physbase = op->resource[0].start;
+	info->fix.smem_start = op->resource[0].start;
 	par->which_io = op->resource[0].flags & IORESOURCE_BITS;
 
 	sbusfb_fill_var(&info->var, dp, 32);
@@ -577,7 +573,7 @@ static int __devinit leo_probe(struct of_device *op,
 
 	linebytes = of_getintprop_default(dp, "linebytes",
 					  info->var.xres);
-	par->fbsize = PAGE_ALIGN(linebytes * info->var.yres);
+	info->fix.smem_len = PAGE_ALIGN(linebytes * info->var.yres);
 
 	par->lc_ss0_usr =
 		of_ioremap(&op->resource[0], LEO_OFF_LC_SS0_USR,
@@ -627,7 +623,7 @@ static int __devinit leo_probe(struct of_device *op,
 
 	printk(KERN_INFO "%s: leo at %lx:%lx\n",
 	       dp->full_name,
-	       par->which_io, par->physbase);
+	       par->which_io, info->fix.smem_start);
 
 	return 0;
 

+ 8 - 12
drivers/video/p9100.c

@@ -134,9 +134,7 @@ struct p9100_par {
 	u32			flags;
 #define P9100_FLAG_BLANKED	0x00000001
 
-	unsigned long		physbase;
 	unsigned long		which_io;
-	unsigned long		fbsize;
 };
 
 /**
@@ -224,18 +222,16 @@ static int p9100_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	struct p9100_par *par = (struct p9100_par *)info->par;
 
 	return sbusfb_mmap_helper(p9100_mmap_map,
-				  par->physbase, par->fbsize,
+				  info->fix.smem_start, info->fix.smem_len,
 				  par->which_io, vma);
 }
 
 static int p9100_ioctl(struct fb_info *info, unsigned int cmd,
 		       unsigned long arg)
 {
-	struct p9100_par *par = (struct p9100_par *) info->par;
-
 	/* Make it look like a cg3. */
 	return sbusfb_ioctl_helper(cmd, arg, info,
-				   FBTYPE_SUN3COLOR, 8, par->fbsize);
+				   FBTYPE_SUN3COLOR, 8, info->fix.smem_len);
 }
 
 /*
@@ -271,7 +267,7 @@ static int __devinit p9100_probe(struct of_device *op, const struct of_device_id
 	spin_lock_init(&par->lock);
 
 	/* This is the framebuffer and the only resource apps can mmap.  */
-	par->physbase = op->resource[2].start;
+	info->fix.smem_start = op->resource[2].start;
 	par->which_io = op->resource[2].flags & IORESOURCE_BITS;
 
 	sbusfb_fill_var(&info->var, dp, 8);
@@ -280,7 +276,7 @@ static int __devinit p9100_probe(struct of_device *op, const struct of_device_id
 	info->var.blue.length = 8;
 
 	linebytes = of_getintprop_default(dp, "linebytes", info->var.xres);
-	par->fbsize = PAGE_ALIGN(linebytes * info->var.yres);
+	info->fix.smem_len = PAGE_ALIGN(linebytes * info->var.yres);
 
 	par->regs = of_ioremap(&op->resource[0], 0,
 			       sizeof(struct p9100_regs), "p9100 regs");
@@ -290,7 +286,7 @@ static int __devinit p9100_probe(struct of_device *op, const struct of_device_id
 	info->flags = FBINFO_DEFAULT;
 	info->fbops = &p9100_ops;
 	info->screen_base = of_ioremap(&op->resource[2], 0,
-				       par->fbsize, "p9100 ram");
+				       info->fix.smem_len, "p9100 ram");
 	if (!info->screen_base)
 		goto out_unmap_regs;
 
@@ -311,7 +307,7 @@ static int __devinit p9100_probe(struct of_device *op, const struct of_device_id
 
 	printk(KERN_INFO "%s: p9100 at %lx:%lx\n",
 	       dp->full_name,
-	       par->which_io, par->physbase);
+	       par->which_io, info->fix.smem_start);
 
 	return 0;
 
@@ -319,7 +315,7 @@ out_dealloc_cmap:
 	fb_dealloc_cmap(&info->cmap);
 
 out_unmap_screen:
-	of_iounmap(&op->resource[2], info->screen_base, par->fbsize);
+	of_iounmap(&op->resource[2], info->screen_base, info->fix.smem_len);
 
 out_unmap_regs:
 	of_iounmap(&op->resource[0], par->regs, sizeof(struct p9100_regs));
@@ -340,7 +336,7 @@ static int __devexit p9100_remove(struct of_device *op)
 	fb_dealloc_cmap(&info->cmap);
 
 	of_iounmap(&op->resource[0], par->regs, sizeof(struct p9100_regs));
-	of_iounmap(&op->resource[2], info->screen_base, par->fbsize);
+	of_iounmap(&op->resource[2], info->screen_base, info->fix.smem_len);
 
 	framebuffer_release(info);
 

+ 20 - 0
scripts/recordmcount.pl

@@ -226,6 +226,26 @@ if ($arch eq "x86_64") {
     if ($is_module eq "0") {
         $cc .= " -mconstant-gp";
     }
+} elsif ($arch eq "sparc64") {
+    # In the objdump output there are giblets like:
+    # 0000000000000000 <igmp_net_exit-0x18>:
+    # As there's some data blobs that get emitted into the
+    # text section before the first instructions and the first
+    # real symbols.  We don't want to match that, so to combat
+    # this we use '\w' so we'll match just plain symbol names,
+    # and not those that also include hex offsets inside of the
+    # '<>' brackets.  Actually the generic function_regex setting
+    # could safely use this too.
+    $function_regex = "^([0-9a-fA-F]+)\\s+<(\\w*?)>:";
+
+    # Sparc64 calls '_mcount' instead of plain 'mcount'.
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
+
+    $alignment = 8;
+    $type = ".xword";
+    $ld .= " -m elf64_sparc";
+    $cc .= " -m64";
+    $objcopy .= " -O elf64-sparc";
 } else {
     die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
 }