浏览代码

Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6

* 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6:
  [SPARC64]: Fill holes in hypervisor APIs and fix KTSB registry.
  [SPARC64]: Fix two bugs wrt. kernel 4MB TSB.
  [SPARC]: Mark as emulating cmpxchg, add appropriate depends for DRM.
  [SPARC]: Emulate cmpxchg like parisc
  [SPARC64]: Fix _PAGE_EXEC_4U check in sun4u I-TLB miss handler.
  [SPARC]: Linux always started with 9600 8N1
  [SPARC64]: arch/sparc64/time.c doesn't compile on Ultra 1 (no PCI)
  [SPARC64]: Eliminate NR_CPUS limitations.
  [SPARC64]: Use machine description and OBP properly for cpu probing.
  [SPARC64]: Negotiate hypervisor API for PCI services.
  [SPARC64]: Report proper system soft state to the hypervisor.
  [SPARC64]: Fix typo in sun4v_hvapi_register error handling.
  [SCSI] ESP: Kill SCSI_ESP_CORE and link directly just like jazz_esp
  [SCSI] jazz_esp: Converted to use esp_core.
  [SPARC64]: PCI device scan is way too verbose by default.
  [SERIAL] sunzilog: section mismatch fix
  [SPARC32]: Removes mismatch section warnigs in sparc time.c file
  [SPARC64]: Don't be picky about virtual-dma values on sun4v.
  [SPARC64]: Kill unused DIE_PAGE_FAULT enum value.
  [SCSI] pluto: Use wait_for_completion_timeout.
Linus Torvalds 18 年之前
父节点
当前提交
6e98ee75c3
共有 47 个文件被更改,包括 2728 次插入853 次删除
  1. 7 0
      arch/sparc/Kconfig
  2. 2 2
      arch/sparc/kernel/time.c
  3. 15 0
      arch/sparc/lib/atomic32.c
  4. 3 3
      arch/sparc64/Kconfig
  5. 2 2
      arch/sparc64/kernel/Makefile
  6. 0 196
      arch/sparc64/kernel/devices.c
  7. 568 7
      arch/sparc64/kernel/entry.S
  8. 26 5
      arch/sparc64/kernel/head.S
  9. 4 1
      arch/sparc64/kernel/hvapi.c
  10. 53 30
      arch/sparc64/kernel/irq.c
  11. 2 2
      arch/sparc64/kernel/itlb_miss.S
  12. 619 0
      arch/sparc64/kernel/mdesc.c
  13. 40 14
      arch/sparc64/kernel/pci.c
  14. 4 3
      arch/sparc64/kernel/pci_sabre.c
  15. 28 26
      arch/sparc64/kernel/pci_sun4v.c
  16. 2 0
      arch/sparc64/kernel/power.c
  17. 4 0
      arch/sparc64/kernel/process.c
  18. 148 0
      arch/sparc64/kernel/prom.c
  19. 7 11
      arch/sparc64/kernel/setup.c
  20. 50 105
      arch/sparc64/kernel/smp.c
  21. 104 0
      arch/sparc64/kernel/sstate.c
  22. 14 16
      arch/sparc64/kernel/sun4v_ivec.S
  23. 18 29
      arch/sparc64/kernel/time.c
  24. 18 9
      arch/sparc64/kernel/traps.c
  25. 49 41
      arch/sparc64/mm/init.c
  26. 19 0
      arch/sparc64/prom/misc.c
  27. 1 1
      drivers/char/drm/Kconfig
  28. 0 14
      drivers/scsi/Kconfig
  29. 2 3
      drivers/scsi/Makefile
  30. 183 246
      drivers/scsi/jazz_esp.c
  31. 4 14
      drivers/scsi/pluto.c
  32. 3 3
      drivers/serial/suncore.c
  33. 2 2
      drivers/serial/sunzilog.c
  34. 38 0
      include/asm-sparc/atomic.h
  35. 4 4
      include/asm-sparc64/bugs.h
  36. 17 7
      include/asm-sparc64/cpudata.h
  37. 600 43
      include/asm-sparc64/hypervisor.h
  38. 0 1
      include/asm-sparc64/kdebug.h
  39. 39 0
      include/asm-sparc64/mdesc.h
  40. 2 5
      include/asm-sparc64/oplib.h
  41. 3 1
      include/asm-sparc64/percpu.h
  42. 1 0
      include/asm-sparc64/prom.h
  43. 2 2
      include/asm-sparc64/smp.h
  44. 13 0
      include/asm-sparc64/sstate.h
  45. 4 4
      include/asm-sparc64/thread_info.h
  46. 3 0
      include/asm-sparc64/topology.h
  47. 1 1
      include/asm-sparc64/tsb.h

+ 7 - 0
arch/sparc/Kconfig

@@ -178,6 +178,13 @@ config ARCH_HAS_ILOG2_U64
 	bool
 	default n
 
+config EMULATED_CMPXCHG
+	bool
+	default y
+	help
+	  Sparc32 does not have a CAS instruction like sparc64. cmpxchg()
+	  is emulated, and therefore it is not completely atomic.
+
 config SUN_PM
 	bool
 	default y

+ 2 - 2
arch/sparc/kernel/time.c

@@ -148,7 +148,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
 }
 
 /* Kick start a stopped clock (procedure from the Sun NVRAM/hostid FAQ). */
-static void __init kick_start_clock(void)
+static void __devinit kick_start_clock(void)
 {
 	struct mostek48t02 *regs = (struct mostek48t02 *)mstk48t02_regs;
 	unsigned char sec;
@@ -223,7 +223,7 @@ static __inline__ int has_low_battery(void)
 	return (data1 == data2);	/* Was the write blocked? */
 }
 
-static void __init mostek_set_system_time(void)
+static void __devinit mostek_set_system_time(void)
 {
 	unsigned int year, mon, day, hour, min, sec;
 	struct mostek48t02 *mregs;

+ 15 - 0
arch/sparc/lib/atomic32.c

@@ -2,6 +2,7 @@
  * atomic32.c: 32-bit atomic_t implementation
  *
  * Copyright (C) 2004 Keith M Wesolowski
+ * Copyright (C) 2007 Kyle McMartin
  * 
  * Based on asm-parisc/atomic.h Copyright (C) 2000 Philipp Rumpf
  */
@@ -117,3 +118,17 @@ unsigned long ___change_bit(unsigned long *addr, unsigned long mask)
 	return old & mask;
 }
 EXPORT_SYMBOL(___change_bit);
+
+unsigned long __cmpxchg_u32(volatile u32 *ptr, u32 old, u32 new)
+{
+	unsigned long flags;
+	u32 prev;
+
+	spin_lock_irqsave(ATOMIC_HASH(addr), flags);
+	if ((prev = *ptr) == old)
+		*ptr = new;
+	spin_unlock_irqrestore(ATOMIC_HASH(addr), flags);
+
+	return (unsigned long)prev;
+}
+EXPORT_SYMBOL(__cmpxchg_u32);

+ 3 - 3
arch/sparc64/Kconfig

@@ -147,10 +147,10 @@ config SMP
 	  If you don't know what to do here, say N.
 
 config NR_CPUS
-	int "Maximum number of CPUs (2-64)"
-	range 2 64
+	int "Maximum number of CPUs (2-1024)"
+	range 2 1024
 	depends on SMP
-	default "32"
+	default "64"
 
 source "drivers/cpufreq/Kconfig"
 

+ 2 - 2
arch/sparc64/kernel/Makefile

@@ -8,11 +8,11 @@ EXTRA_CFLAGS := -Werror
 extra-y		:= head.o init_task.o vmlinux.lds
 
 obj-y		:= process.o setup.o cpu.o idprom.o \
-		   traps.o devices.o auxio.o una_asm.o \
+		   traps.o auxio.o una_asm.o \
 		   irq.o ptrace.o time.o sys_sparc.o signal.o \
 		   unaligned.o central.o pci.o starfire.o semaphore.o \
 		   power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o \
-		   visemul.o prom.o of_device.o hvapi.o
+		   visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o
 
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-$(CONFIG_PCI)	 += ebus.o isa.o pci_common.o pci_iommu.o \

+ 0 - 196
arch/sparc64/kernel/devices.c

@@ -1,196 +0,0 @@
-/* devices.c: Initial scan of the prom device tree for important
- *            Sparc device nodes which we need to find.
- *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/string.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/bootmem.h>
-
-#include <asm/page.h>
-#include <asm/oplib.h>
-#include <asm/system.h>
-#include <asm/smp.h>
-#include <asm/spitfire.h>
-#include <asm/timer.h>
-#include <asm/cpudata.h>
-
-/* Used to synchronize accesses to NatSemi SUPER I/O chip configure
- * operations in asm/ns87303.h
- */
-DEFINE_SPINLOCK(ns87303_lock);
-
-extern void cpu_probe(void);
-extern void central_probe(void);
-
-static const char *cpu_mid_prop(void)
-{
-	if (tlb_type == spitfire)
-		return "upa-portid";
-	return "portid";
-}
-
-static int get_cpu_mid(struct device_node *dp)
-{
-	struct property *prop;
-
-	if (tlb_type == hypervisor) {
-		struct linux_prom64_registers *reg;
-		int len;
-
-		prop = of_find_property(dp, "cpuid", &len);
-		if (prop && len == 4)
-			return *(int *) prop->value;
-
-		prop = of_find_property(dp, "reg", NULL);
-		reg = prop->value;
-		return (reg[0].phys_addr >> 32) & 0x0fffffffUL;
-	} else {
-		const char *prop_name = cpu_mid_prop();
-
-		prop = of_find_property(dp, prop_name, NULL);
-		if (prop)
-			return *(int *) prop->value;
-		return 0;
-	}
-}
-
-static int check_cpu_node(struct device_node *dp, int *cur_inst,
-			  int (*compare)(struct device_node *, int, void *),
-			  void *compare_arg,
-			  struct device_node **dev_node, int *mid)
-{
-	if (!compare(dp, *cur_inst, compare_arg)) {
-		if (dev_node)
-			*dev_node = dp;
-		if (mid)
-			*mid = get_cpu_mid(dp);
-		return 0;
-	}
-
-	(*cur_inst)++;
-
-	return -ENODEV;
-}
-
-static int __cpu_find_by(int (*compare)(struct device_node *, int, void *),
-			 void *compare_arg,
-			 struct device_node **dev_node, int *mid)
-{
-	struct device_node *dp;
-	int cur_inst;
-
-	cur_inst = 0;
-	for_each_node_by_type(dp, "cpu") {
-		int err = check_cpu_node(dp, &cur_inst,
-					 compare, compare_arg,
-					 dev_node, mid);
-		if (err == 0)
-			return 0;
-	}
-
-	return -ENODEV;
-}
-
-static int cpu_instance_compare(struct device_node *dp, int instance, void *_arg)
-{
-	int desired_instance = (int) (long) _arg;
-
-	if (instance == desired_instance)
-		return 0;
-	return -ENODEV;
-}
-
-int cpu_find_by_instance(int instance, struct device_node **dev_node, int *mid)
-{
-	return __cpu_find_by(cpu_instance_compare, (void *)(long)instance,
-			     dev_node, mid);
-}
-
-static int cpu_mid_compare(struct device_node *dp, int instance, void *_arg)
-{
-	int desired_mid = (int) (long) _arg;
-	int this_mid;
-
-	this_mid = get_cpu_mid(dp);
-	if (this_mid == desired_mid)
-		return 0;
-	return -ENODEV;
-}
-
-int cpu_find_by_mid(int mid, struct device_node **dev_node)
-{
-	return __cpu_find_by(cpu_mid_compare, (void *)(long)mid,
-			     dev_node, NULL);
-}
-
-void __init device_scan(void)
-{
-	/* FIX ME FAST... -DaveM */
-	ioport_resource.end = 0xffffffffffffffffUL;
-
-	prom_printf("Booting Linux...\n");
-
-#ifndef CONFIG_SMP
-	{
-		struct device_node *dp;
-		int err, def;
-
-		err = cpu_find_by_instance(0, &dp, NULL);
-		if (err) {
-			prom_printf("No cpu nodes, cannot continue\n");
-			prom_halt();
-		}
-		cpu_data(0).clock_tick =
-			of_getintprop_default(dp, "clock-frequency", 0);
-
-		def = ((tlb_type == hypervisor) ?
-		       (8 * 1024) :
-		       (16 * 1024));
-		cpu_data(0).dcache_size = of_getintprop_default(dp,
-								"dcache-size",
-								def);
-
-		def = 32;
-		cpu_data(0).dcache_line_size =
-			of_getintprop_default(dp, "dcache-line-size", def);
-
-		def = 16 * 1024;
-		cpu_data(0).icache_size = of_getintprop_default(dp,
-								"icache-size",
-								def);
-
-		def = 32;
-		cpu_data(0).icache_line_size =
-			of_getintprop_default(dp, "icache-line-size", def);
-
-		def = ((tlb_type == hypervisor) ?
-		       (3 * 1024 * 1024) :
-		       (4 * 1024 * 1024));
-		cpu_data(0).ecache_size = of_getintprop_default(dp,
-								"ecache-size",
-								def);
-
-		def = 64;
-		cpu_data(0).ecache_line_size =
-			of_getintprop_default(dp, "ecache-line-size", def);
-		printk("CPU[0]: Caches "
-		       "D[sz(%d):line_sz(%d)] "
-		       "I[sz(%d):line_sz(%d)] "
-		       "E[sz(%d):line_sz(%d)]\n",
-		       cpu_data(0).dcache_size, cpu_data(0).dcache_line_size,
-		       cpu_data(0).icache_size, cpu_data(0).icache_line_size,
-		       cpu_data(0).ecache_size, cpu_data(0).ecache_line_size);
-	}
-#endif
-
-	central_probe();
-
-	cpu_probe();
-}

+ 568 - 7
arch/sparc64/kernel/entry.S

@@ -1725,96 +1725,142 @@ real_hard_smp_processor_id:
 	 * returns %o0: sysino
 	 */
 	.globl	sun4v_devino_to_sysino
+	.type	sun4v_devino_to_sysino,#function
 sun4v_devino_to_sysino:
 	mov	HV_FAST_INTR_DEVINO2SYSINO, %o5
 	ta	HV_FAST_TRAP
 	retl
 	 mov	%o1, %o0
+	.size	sun4v_devino_to_sysino, .-sun4v_devino_to_sysino
 
 	/* %o0: sysino
 	 *
 	 * returns %o0: intr_enabled (HV_INTR_{DISABLED,ENABLED})
 	 */
 	.globl	sun4v_intr_getenabled
+	.type	sun4v_intr_getenabled,#function
 sun4v_intr_getenabled:
 	mov	HV_FAST_INTR_GETENABLED, %o5
 	ta	HV_FAST_TRAP
 	retl
 	 mov	%o1, %o0
+	.size	sun4v_intr_getenabled, .-sun4v_intr_getenabled
 
 	/* %o0: sysino
 	 * %o1: intr_enabled (HV_INTR_{DISABLED,ENABLED})
 	 */
 	.globl	sun4v_intr_setenabled
+	.type	sun4v_intr_setenabled,#function
 sun4v_intr_setenabled:
 	mov	HV_FAST_INTR_SETENABLED, %o5
 	ta	HV_FAST_TRAP
 	retl
 	 nop
+	.size	sun4v_intr_setenabled, .-sun4v_intr_setenabled
 
 	/* %o0: sysino
 	 *
 	 * returns %o0: intr_state (HV_INTR_STATE_*)
 	 */
 	.globl	sun4v_intr_getstate
+	.type	sun4v_intr_getstate,#function
 sun4v_intr_getstate:
 	mov	HV_FAST_INTR_GETSTATE, %o5
 	ta	HV_FAST_TRAP
 	retl
 	 mov	%o1, %o0
+	.size	sun4v_intr_getstate, .-sun4v_intr_getstate
 
 	/* %o0: sysino
 	 * %o1: intr_state (HV_INTR_STATE_*)
 	 */
 	.globl	sun4v_intr_setstate
+	.type	sun4v_intr_setstate,#function
 sun4v_intr_setstate:
 	mov	HV_FAST_INTR_SETSTATE, %o5
 	ta	HV_FAST_TRAP
 	retl
 	 nop
+	.size	sun4v_intr_setstate, .-sun4v_intr_setstate
 
 	/* %o0: sysino
 	 *
 	 * returns %o0: cpuid
 	 */
 	.globl	sun4v_intr_gettarget
+	.type	sun4v_intr_gettarget,#function
 sun4v_intr_gettarget:
 	mov	HV_FAST_INTR_GETTARGET, %o5
 	ta	HV_FAST_TRAP
 	retl
 	 mov	%o1, %o0
+	.size	sun4v_intr_gettarget, .-sun4v_intr_gettarget
 
 	/* %o0: sysino
 	 * %o1: cpuid
 	 */
 	.globl	sun4v_intr_settarget
+	.type	sun4v_intr_settarget,#function
 sun4v_intr_settarget:
 	mov	HV_FAST_INTR_SETTARGET, %o5
 	ta	HV_FAST_TRAP
 	retl
 	 nop
+	.size	sun4v_intr_settarget, .-sun4v_intr_settarget
 
-	/* %o0:	type
-	 * %o1:	queue paddr
-	 * %o2:	num queue entries
+	/* %o0:	cpuid
+	 * %o1: pc
+	 * %o2:	rtba
+	 * %o3:	arg0
 	 *
 	 * returns %o0:	status
 	 */
-	.globl	sun4v_cpu_qconf
-sun4v_cpu_qconf:
-	mov	HV_FAST_CPU_QCONF, %o5
+	.globl	sun4v_cpu_start
+	.type	sun4v_cpu_start,#function
+sun4v_cpu_start:
+	mov	HV_FAST_CPU_START, %o5
 	ta	HV_FAST_TRAP
 	retl
 	 nop
+	.size	sun4v_cpu_start, .-sun4v_cpu_start
 
-	/* returns %o0:	status
+	/* %o0:	cpuid
+	 *
+	 * returns %o0: status
 	 */
+	.globl	sun4v_cpu_stop
+	.type	sun4v_cpu_stop,#function
+sun4v_cpu_stop:
+	mov	HV_FAST_CPU_STOP, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_cpu_stop, .-sun4v_cpu_stop
+
+	/* returns %o0:	status  */
 	.globl	sun4v_cpu_yield
+	.type	sun4v_cpu_yield, #function
 sun4v_cpu_yield:
 	mov	HV_FAST_CPU_YIELD, %o5
 	ta	HV_FAST_TRAP
 	retl
 	 nop
+	.size	sun4v_cpu_yield, .-sun4v_cpu_yield
+
+	/* %o0:	type
+	 * %o1:	queue paddr
+	 * %o2:	num queue entries
+	 *
+	 * returns %o0:	status
+	 */
+	.globl	sun4v_cpu_qconf
+	.type	sun4v_cpu_qconf,#function
+sun4v_cpu_qconf:
+	mov	HV_FAST_CPU_QCONF, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_cpu_qconf, .-sun4v_cpu_qconf
 
 	/* %o0:	num cpus in cpu list
 	 * %o1:	cpu list paddr
@@ -1823,11 +1869,13 @@ sun4v_cpu_yield:
 	 * returns %o0: status
 	 */
 	.globl	sun4v_cpu_mondo_send
+	.type	sun4v_cpu_mondo_send,#function
 sun4v_cpu_mondo_send:
 	mov	HV_FAST_CPU_MONDO_SEND, %o5
 	ta	HV_FAST_TRAP
 	retl
 	 nop
+	.size	sun4v_cpu_mondo_send, .-sun4v_cpu_mondo_send
 
 	/* %o0:	CPU ID
 	 *
@@ -1835,6 +1883,7 @@ sun4v_cpu_mondo_send:
 	 *         %o0:	cpu state as HV_CPU_STATE_*
 	 */
 	.globl	sun4v_cpu_state
+	.type	sun4v_cpu_state,#function
 sun4v_cpu_state:
 	mov	HV_FAST_CPU_STATE, %o5
 	ta	HV_FAST_TRAP
@@ -1843,6 +1892,37 @@ sun4v_cpu_state:
 	mov	%o1, %o0
 1:	retl
 	 nop
+	.size	sun4v_cpu_state, .-sun4v_cpu_state
+
+	/* %o0: virtual address
+	 * %o1: must be zero
+	 * %o2: TTE
+	 * %o3: HV_MMU_* flags
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_mmu_map_perm_addr
+	.type	sun4v_mmu_map_perm_addr,#function
+sun4v_mmu_map_perm_addr:
+	mov	HV_FAST_MMU_MAP_PERM_ADDR, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_mmu_map_perm_addr, .-sun4v_mmu_map_perm_addr
+
+	/* %o0: number of TSB descriptions
+	 * %o1: TSB descriptions real address
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_mmu_tsb_ctx0
+	.type	sun4v_mmu_tsb_ctx0,#function
+sun4v_mmu_tsb_ctx0:
+	mov	HV_FAST_MMU_TSB_CTX0, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_mmu_tsb_ctx0, .-sun4v_mmu_tsb_ctx0
 
 	/* %o0:	API group number
 	 * %o1: pointer to unsigned long major number storage
@@ -1851,6 +1931,7 @@ sun4v_cpu_state:
 	 * returns %o0: status
 	 */
 	.globl	sun4v_get_version
+	.type	sun4v_get_version,#function
 sun4v_get_version:
 	mov	HV_CORE_GET_VER, %o5
 	mov	%o1, %o3
@@ -1859,6 +1940,7 @@ sun4v_get_version:
 	stx	%o1, [%o3]
 	retl
 	 stx	%o2, [%o4]
+	.size	sun4v_get_version, .-sun4v_get_version
 
 	/* %o0: API group number
 	 * %o1: desired major number
@@ -1868,18 +1950,49 @@ sun4v_get_version:
 	 * returns %o0: status
 	 */
 	.globl	sun4v_set_version
+	.type	sun4v_set_version,#function
 sun4v_set_version:
 	mov	HV_CORE_SET_VER, %o5
 	mov	%o3, %o4
 	ta	HV_CORE_TRAP
 	retl
 	 stx	%o1, [%o4]
+	.size	sun4v_set_version, .-sun4v_set_version
+
+	/* %o0: pointer to unsigned long time
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_tod_get
+	.type	sun4v_tod_get,#function
+sun4v_tod_get:
+	mov	%o0, %o4
+	mov	HV_FAST_TOD_GET, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+	.size	sun4v_tod_get, .-sun4v_tod_get
+
+	/* %o0: time
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_tod_set
+	.type	sun4v_tod_set,#function
+sun4v_tod_set:
+	mov	HV_FAST_TOD_SET, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_tod_set, .-sun4v_tod_set
 
 	/* %o0: pointer to unsigned long status
 	 *
 	 * returns %o0: signed character
 	 */
 	.globl	sun4v_con_getchar
+	.type	sun4v_con_getchar,#function
 sun4v_con_getchar:
 	mov	%o0, %o4
 	mov	HV_FAST_CONS_GETCHAR, %o5
@@ -1889,17 +2002,20 @@ sun4v_con_getchar:
 	stx	%o0, [%o4]
 	retl
 	 sra	%o1, 0, %o0
+	.size	sun4v_con_getchar, .-sun4v_con_getchar
 
 	/* %o0: signed long character
 	 *
 	 * returns %o0: status
 	 */
 	.globl	sun4v_con_putchar
+	.type	sun4v_con_putchar,#function
 sun4v_con_putchar:
 	mov	HV_FAST_CONS_PUTCHAR, %o5
 	ta	HV_FAST_TRAP
 	retl
 	 sra	%o0, 0, %o0
+	.size	sun4v_con_putchar, .-sun4v_con_putchar
 
 	/* %o0: buffer real address
 	 * %o1: buffer size
@@ -1908,6 +2024,7 @@ sun4v_con_putchar:
 	 * returns %o0: status
 	 */
 	.globl	sun4v_con_read
+	.type	sun4v_con_read,#function
 sun4v_con_read:
 	mov	%o2, %o4
 	mov	HV_FAST_CONS_READ, %o5
@@ -1922,6 +2039,7 @@ sun4v_con_read:
 	stx	%o1, [%o4]
 1:	retl
 	 nop
+	.size	sun4v_con_read, .-sun4v_con_read
 
 	/* %o0: buffer real address
 	 * %o1: buffer size
@@ -1930,6 +2048,7 @@ sun4v_con_read:
 	 * returns %o0: status
 	 */
 	.globl	sun4v_con_write
+	.type	sun4v_con_write,#function
 sun4v_con_write:
 	mov	%o2, %o4
 	mov	HV_FAST_CONS_WRITE, %o5
@@ -1937,3 +2056,445 @@ sun4v_con_write:
 	stx	%o1, [%o4]
 	retl
 	 nop
+	.size	sun4v_con_write, .-sun4v_con_write
+
+	/* %o0:	soft state
+	 * %o1:	address of description string
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_mach_set_soft_state
+	.type	sun4v_mach_set_soft_state,#function
+sun4v_mach_set_soft_state:
+	mov	HV_FAST_MACH_SET_SOFT_STATE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_mach_set_soft_state, .-sun4v_mach_set_soft_state
+
+	/* %o0: exit code
+	 *
+	 * Does not return.
+	 */
+	.globl	sun4v_mach_exit
+	.type	sun4v_mach_exit,#function
+sun4v_mach_exit:
+	mov	HV_FAST_MACH_EXIT, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_mach_exit, .-sun4v_mach_exit
+
+	/* %o0: buffer real address
+	 * %o1: buffer length
+	 * %o2: pointer to unsigned long real_buf_len
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_mach_desc
+	.type	sun4v_mach_desc,#function
+sun4v_mach_desc:
+	mov	%o2, %o4
+	mov	HV_FAST_MACH_DESC, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+	.size	sun4v_mach_desc, .-sun4v_mach_desc
+
+	/* %o0: new timeout in milliseconds
+	 * %o1: pointer to unsigned long orig_timeout
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_mach_set_watchdog
+	.type	sun4v_mach_set_watchdog,#function
+sun4v_mach_set_watchdog:
+	mov	%o1, %o4
+	mov	HV_FAST_MACH_SET_WATCHDOG, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+	.size	sun4v_mach_set_watchdog, .-sun4v_mach_set_watchdog
+
+	/* No inputs and does not return.  */
+	.globl	sun4v_mach_sir
+	.type	sun4v_mach_sir,#function
+sun4v_mach_sir:
+	mov	%o1, %o4
+	mov	HV_FAST_MACH_SIR, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+	.size	sun4v_mach_sir, .-sun4v_mach_sir
+
+	/* %o0: channel
+	 * %o1:	ra
+	 * %o2:	num_entries
+	 *
+	 * returns %o0:	status
+	 */
+	.globl	sun4v_ldc_tx_qconf
+	.type	sun4v_ldc_tx_qconf,#function
+sun4v_ldc_tx_qconf:
+	mov	HV_FAST_LDC_TX_QCONF, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_ldc_tx_qconf, .-sun4v_ldc_tx_qconf
+
+	/* %o0: channel
+	 * %o1:	pointer to unsigned long ra
+	 * %o2:	pointer to unsigned long num_entries
+	 *
+	 * returns %o0:	status
+	 */
+	.globl	sun4v_ldc_tx_qinfo
+	.type	sun4v_ldc_tx_qinfo,#function
+sun4v_ldc_tx_qinfo:
+	mov	%o1, %g1
+	mov	%o2, %g2
+	mov	HV_FAST_LDC_TX_QINFO, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	stx	%o2, [%g2]
+	retl
+	 nop
+	.size	sun4v_ldc_tx_qinfo, .-sun4v_ldc_tx_qinfo
+
+	/* %o0: channel
+	 * %o1:	pointer to unsigned long head_off
+	 * %o2:	pointer to unsigned long tail_off
+	 * %o2:	pointer to unsigned long chan_state
+	 *
+	 * returns %o0:	status
+	 */
+	.globl	sun4v_ldc_tx_get_state
+	.type	sun4v_ldc_tx_get_state,#function
+sun4v_ldc_tx_get_state:
+	mov	%o1, %g1
+	mov	%o2, %g2
+	mov	%o3, %g3
+	mov	HV_FAST_LDC_TX_GET_STATE, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	stx	%o2, [%g2]
+	stx	%o3, [%g3]
+	retl
+	 nop
+	.size	sun4v_ldc_tx_get_state, .-sun4v_ldc_tx_get_state
+
+	/* %o0: channel
+	 * %o1:	tail_off
+	 *
+	 * returns %o0:	status
+	 */
+	.globl	sun4v_ldc_tx_set_qtail
+	.type	sun4v_ldc_tx_set_qtail,#function
+sun4v_ldc_tx_set_qtail:
+	mov	HV_FAST_LDC_TX_SET_QTAIL, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_ldc_tx_set_qtail, .-sun4v_ldc_tx_set_qtail
+
+	/* %o0: channel
+	 * %o1:	ra
+	 * %o2:	num_entries
+	 *
+	 * returns %o0:	status
+	 */
+	.globl	sun4v_ldc_rx_qconf
+	.type	sun4v_ldc_rx_qconf,#function
+sun4v_ldc_rx_qconf:
+	mov	HV_FAST_LDC_RX_QCONF, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_ldc_rx_qconf, .-sun4v_ldc_rx_qconf
+
+	/* %o0: channel
+	 * %o1:	pointer to unsigned long ra
+	 * %o2:	pointer to unsigned long num_entries
+	 *
+	 * returns %o0:	status
+	 */
+	.globl	sun4v_ldc_rx_qinfo
+	.type	sun4v_ldc_rx_qinfo,#function
+sun4v_ldc_rx_qinfo:
+	mov	%o1, %g1
+	mov	%o2, %g2
+	mov	HV_FAST_LDC_RX_QINFO, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	stx	%o2, [%g2]
+	retl
+	 nop
+	.size	sun4v_ldc_rx_qinfo, .-sun4v_ldc_rx_qinfo
+
+	/* %o0: channel
+	 * %o1:	pointer to unsigned long head_off
+	 * %o2:	pointer to unsigned long tail_off
+	 * %o2:	pointer to unsigned long chan_state
+	 *
+	 * returns %o0:	status
+	 */
+	.globl	sun4v_ldc_rx_get_state
+	.type	sun4v_ldc_rx_get_state,#function
+sun4v_ldc_rx_get_state:
+	mov	%o1, %g1
+	mov	%o2, %g2
+	mov	%o3, %g3
+	mov	HV_FAST_LDC_RX_GET_STATE, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	stx	%o2, [%g2]
+	stx	%o3, [%g3]
+	retl
+	 nop
+	.size	sun4v_ldc_rx_get_state, .-sun4v_ldc_rx_get_state
+
+	/* %o0: channel
+	 * %o1:	head_off
+	 *
+	 * returns %o0:	status
+	 */
+	.globl	sun4v_ldc_rx_set_qhead
+	.type	sun4v_ldc_rx_set_qhead,#function
+sun4v_ldc_rx_set_qhead:
+	mov	HV_FAST_LDC_RX_SET_QHEAD, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_ldc_rx_set_qhead, .-sun4v_ldc_rx_set_qhead
+
+	/* %o0: channel
+	 * %o1:	ra
+	 * %o2:	num_entries
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_ldc_set_map_table
+	.type	sun4v_ldc_set_map_table,#function
+sun4v_ldc_set_map_table:
+	mov	HV_FAST_LDC_SET_MAP_TABLE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_ldc_set_map_table, .-sun4v_ldc_set_map_table
+
+	/* %o0: channel
+	 * %o1:	pointer to unsigned long ra
+	 * %o2:	pointer to unsigned long num_entries
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_ldc_get_map_table
+	.type	sun4v_ldc_get_map_table,#function
+sun4v_ldc_get_map_table:
+	mov	%o1, %g1
+	mov	%o2, %g2
+	mov	HV_FAST_LDC_GET_MAP_TABLE, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	stx	%o2, [%g2]
+	retl
+	 nop
+	.size	sun4v_ldc_get_map_table, .-sun4v_ldc_get_map_table
+
+	/* %o0:	channel
+	 * %o1:	dir_code
+	 * %o2:	tgt_raddr
+	 * %o3:	lcl_raddr
+	 * %o4:	len
+	 * %o5:	pointer to unsigned long actual_len
+	 *
+	 * returns %o0:	status
+	 */
+	.globl	sun4v_ldc_copy
+	.type	sun4v_ldc_copy,#function
+sun4v_ldc_copy:
+	mov	%o5, %g1
+	mov	HV_FAST_LDC_COPY, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	retl
+	 nop
+	.size	sun4v_ldc_copy, .-sun4v_ldc_copy
+
+	/* %o0:	channel
+	 * %o1:	cookie
+	 * %o2:	pointer to unsigned long ra
+	 * %o3:	pointer to unsigned long perm
+	 *
+	 * returns %o0:	status
+	 */
+	.globl	sun4v_ldc_mapin
+	.type	sun4v_ldc_mapin,#function
+sun4v_ldc_mapin:
+	mov	%o2, %g1
+	mov	%o3, %g2
+	mov	HV_FAST_LDC_MAPIN, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	stx	%o2, [%g2]
+	retl
+	 nop
+	.size	sun4v_ldc_mapin, .-sun4v_ldc_mapin
+
+	/* %o0:	ra
+	 *
+	 * returns %o0:	status
+	 */
+	.globl	sun4v_ldc_unmap
+	.type	sun4v_ldc_unmap,#function
+sun4v_ldc_unmap:
+	mov	HV_FAST_LDC_UNMAP, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_ldc_unmap, .-sun4v_ldc_unmap
+
+	/* %o0:	cookie
+	 * %o1:	mte_cookie
+	 *
+	 * returns %o0:	status
+	 */
+	.globl	sun4v_ldc_revoke
+	.type	sun4v_ldc_revoke,#function
+sun4v_ldc_revoke:
+	mov	HV_FAST_LDC_REVOKE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_ldc_revoke, .-sun4v_ldc_revoke
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	pointer to unsigned long cookie
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_vintr_get_cookie
+	.type	sun4v_vintr_get_cookie,#function
+sun4v_vintr_get_cookie:
+	mov	%o2, %g1
+	mov	HV_FAST_VINTR_GET_COOKIE, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	retl
+	 nop
+	.size	sun4v_vintr_get_cookie, .-sun4v_vintr_get_cookie
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	cookie
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_vintr_set_cookie
+	.type	sun4v_vintr_set_cookie,#function
+sun4v_vintr_set_cookie:
+	mov	HV_FAST_VINTR_SET_COOKIE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_vintr_set_cookie, .-sun4v_vintr_set_cookie
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	pointer to unsigned long valid_state
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_vintr_get_valid
+	.type	sun4v_vintr_get_valid,#function
+sun4v_vintr_get_valid:
+	mov	%o2, %g1
+	mov	HV_FAST_VINTR_GET_VALID, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	retl
+	 nop
+	.size	sun4v_vintr_get_valid, .-sun4v_vintr_get_valid
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	valid_state
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_vintr_set_valid
+	.type	sun4v_vintr_set_valid,#function
+sun4v_vintr_set_valid:
+	mov	HV_FAST_VINTR_SET_VALID, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_vintr_set_valid, .-sun4v_vintr_set_valid
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	pointer to unsigned long state
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_vintr_get_state
+	.type	sun4v_vintr_get_state,#function
+sun4v_vintr_get_state:
+	mov	%o2, %g1
+	mov	HV_FAST_VINTR_GET_STATE, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	retl
+	 nop
+	.size	sun4v_vintr_get_state, .-sun4v_vintr_get_state
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	state
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_vintr_set_state
+	.type	sun4v_vintr_set_state,#function
+sun4v_vintr_set_state:
+	mov	HV_FAST_VINTR_SET_STATE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_vintr_set_state, .-sun4v_vintr_set_state
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	pointer to unsigned long cpuid
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_vintr_get_target
+	.type	sun4v_vintr_get_target,#function
+sun4v_vintr_get_target:
+	mov	%o2, %g1
+	mov	HV_FAST_VINTR_GET_TARGET, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	retl
+	 nop
+	.size	sun4v_vintr_get_target, .-sun4v_vintr_get_target
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	cpuid
+	 *
+	 * returns %o0: status
+	 */
+	.globl	sun4v_vintr_set_target
+	.type	sun4v_vintr_set_target,#function
+sun4v_vintr_set_target:
+	mov	HV_FAST_VINTR_SET_TARGET, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+	.size	sun4v_vintr_set_target, .-sun4v_vintr_set_target

+ 26 - 5
arch/sparc64/kernel/head.S

@@ -523,7 +523,7 @@ tlb_fixup_done:
 #else
 	mov	0, %o0
 #endif
-	stb	%o0, [%g6 + TI_CPU]
+	sth	%o0, [%g6 + TI_CPU]
 
 	/* Off we go.... */
 	call	start_kernel
@@ -653,33 +653,54 @@ setup_tba:
 	 restore
 sparc64_boot_end:
 
-#include "ktlb.S"
-#include "tsb.S"
 #include "etrap.S"
 #include "rtrap.S"
 #include "winfixup.S"
 #include "entry.S"
 #include "sun4v_tlb_miss.S"
 #include "sun4v_ivec.S"
+#include "ktlb.S"
+#include "tsb.S"
 
 /*
  * The following skip makes sure the trap table in ttable.S is aligned
  * on a 32K boundary as required by the v9 specs for TBA register.
  *
  * We align to a 32K boundary, then we have the 32K kernel TSB,
- * then the 32K aligned trap table.
+ * the 64K kernel 4MB TSB, and then the 32K aligned trap table.
  */
 1:
 	.skip	0x4000 + _start - 1b
 
+! 0x0000000000408000
+
 	.globl	swapper_tsb
 swapper_tsb:
 	.skip	(32 * 1024)
 
-! 0x0000000000408000
+	.globl	swapper_4m_tsb
+swapper_4m_tsb:
+	.skip	(64 * 1024)
+
+! 0x0000000000420000
 
+	/* Some care needs to be exercised if you try to move the
+	 * location of the trap table relative to other things.  For
+	 * one thing there are br* instructions in some of the
+	 * trap table entires which branch back to code in ktlb.S
+	 * Those instructions can only handle a signed 16-bit
+	 * displacement.
+	 *
+	 * There is a binutils bug (bugzilla #4558) which causes
+	 * the relocation overflow checks for such instructions to
+	 * not be done correctly.  So bintuils will not notice the
+	 * error and will instead write junk into the relocation and
+	 * you'll have an unbootable kernel.
+	 */
 #include "ttable.S"
 
+! 0x0000000000428000
+
 #include "systbls.S"
 
 	.data

+ 4 - 1
arch/sparc64/kernel/hvapi.c

@@ -9,6 +9,7 @@
 
 #include <asm/hypervisor.h>
 #include <asm/oplib.h>
+#include <asm/sstate.h>
 
 /* If the hypervisor indicates that the API setting
  * calls are unsupported, by returning HV_EBADTRAP or
@@ -107,7 +108,7 @@ int sun4v_hvapi_register(unsigned long group, unsigned long major,
 				p->minor = actual_minor;
 				ret = 0;
 			} else if (hv_ret == HV_EBADTRAP ||
-				   HV_ENOTSUPPORTED) {
+				   hv_ret == HV_ENOTSUPPORTED) {
 				if (p->flags & FLAG_PRE_API) {
 					if (major == 1) {
 						p->major = 1;
@@ -179,6 +180,8 @@ void __init sun4v_hvapi_init(void)
 	if (sun4v_hvapi_register(group, major, &minor))
 		goto bad;
 
+	sun4v_sstate_init();
+
 	return;
 
 bad:

+ 53 - 30
arch/sparc64/kernel/irq.c

@@ -171,8 +171,6 @@ skip:
 	return 0;
 }
 
-extern unsigned long real_hard_smp_processor_id(void);
-
 static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
 {
 	unsigned int tid;
@@ -694,9 +692,20 @@ void init_irqwork_curcpu(void)
 	trap_block[cpu].irq_worklist = 0;
 }
 
-static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type)
+/* Please be very careful with register_one_mondo() and
+ * sun4v_register_mondo_queues().
+ *
+ * On SMP this gets invoked from the CPU trampoline before
+ * the cpu has fully taken over the trap table from OBP,
+ * and it's kernel stack + %g6 thread register state is
+ * not fully cooked yet.
+ *
+ * Therefore you cannot make any OBP calls, not even prom_printf,
+ * from these two routines.
+ */
+static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask)
 {
-	unsigned long num_entries = 128;
+	unsigned long num_entries = (qmask + 1) / 64;
 	unsigned long status;
 
 	status = sun4v_cpu_qconf(type, paddr, num_entries);
@@ -711,44 +720,58 @@ static void __cpuinit sun4v_register_mondo_queues(int this_cpu)
 {
 	struct trap_per_cpu *tb = &trap_block[this_cpu];
 
-	register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO);
-	register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO);
-	register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR);
-	register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR);
+	register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO,
+			   tb->cpu_mondo_qmask);
+	register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO,
+			   tb->dev_mondo_qmask);
+	register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR,
+			   tb->resum_qmask);
+	register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR,
+			   tb->nonresum_qmask);
 }
 
-static void __cpuinit alloc_one_mondo(unsigned long *pa_ptr, int use_bootmem)
+static void __cpuinit alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask, int use_bootmem)
 {
-	void *page;
+	unsigned long size = PAGE_ALIGN(qmask + 1);
+	unsigned long order = get_order(size);
+	void *p = NULL;
 
-	if (use_bootmem)
-		page = alloc_bootmem_low_pages(PAGE_SIZE);
-	else
-		page = (void *) get_zeroed_page(GFP_ATOMIC);
+	if (use_bootmem) {
+		p = __alloc_bootmem_low(size, size, 0);
+	} else {
+		struct page *page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, order);
+		if (page)
+			p = page_address(page);
+	}
 
-	if (!page) {
+	if (!p) {
 		prom_printf("SUN4V: Error, cannot allocate mondo queue.\n");
 		prom_halt();
 	}
 
-	*pa_ptr = __pa(page);
+	*pa_ptr = __pa(p);
 }
 
-static void __cpuinit alloc_one_kbuf(unsigned long *pa_ptr, int use_bootmem)
+static void __cpuinit alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask, int use_bootmem)
 {
-	void *page;
+	unsigned long size = PAGE_ALIGN(qmask + 1);
+	unsigned long order = get_order(size);
+	void *p = NULL;
 
-	if (use_bootmem)
-		page = alloc_bootmem_low_pages(PAGE_SIZE);
-	else
-		page = (void *) get_zeroed_page(GFP_ATOMIC);
+	if (use_bootmem) {
+		p = __alloc_bootmem_low(size, size, 0);
+	} else {
+		struct page *page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, order);
+		if (page)
+			p = page_address(page);
+	}
 
-	if (!page) {
+	if (!p) {
 		prom_printf("SUN4V: Error, cannot allocate kbuf page.\n");
 		prom_halt();
 	}
 
-	*pa_ptr = __pa(page);
+	*pa_ptr = __pa(p);
 }
 
 static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_bootmem)
@@ -779,12 +802,12 @@ void __cpuinit sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int
 	struct trap_per_cpu *tb = &trap_block[cpu];
 
 	if (alloc) {
-		alloc_one_mondo(&tb->cpu_mondo_pa, use_bootmem);
-		alloc_one_mondo(&tb->dev_mondo_pa, use_bootmem);
-		alloc_one_mondo(&tb->resum_mondo_pa, use_bootmem);
-		alloc_one_kbuf(&tb->resum_kernel_buf_pa, use_bootmem);
-		alloc_one_mondo(&tb->nonresum_mondo_pa, use_bootmem);
-		alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, use_bootmem);
+		alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask, use_bootmem);
+		alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask, use_bootmem);
+		alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask, use_bootmem);
+		alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask, use_bootmem);
+		alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask, use_bootmem);
+		alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, tb->nonresum_qmask, use_bootmem);
 
 		init_cpu_send_mondo_info(tb, use_bootmem);
 	}

+ 2 - 2
arch/sparc64/kernel/itlb_miss.S

@@ -11,12 +11,12 @@
 /* ITLB ** ICACHE line 2: TSB compare and TLB load	*/
 	bne,pn	%xcc, tsb_miss_itlb		! Miss
 	 mov	FAULT_CODE_ITLB, %g3
-	andcc	%g5, _PAGE_EXEC_4U, %g0		! Executable?
+	sethi	%hi(_PAGE_EXEC_4U), %g4
+	andcc	%g5, %g4, %g0			! Executable?
 	be,pn	%xcc, tsb_do_fault
 	 nop					! Delay slot, fill me
 	stxa	%g5, [%g0] ASI_ITLB_DATA_IN	! Load TLB
 	retry					! Trap done
-	nop
 
 /* ITLB ** ICACHE line 3: 				*/
 	nop

+ 619 - 0
arch/sparc64/kernel/mdesc.c

@@ -0,0 +1,619 @@
+/* mdesc.c: Sun4V machine description handling.
+ *
+ * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bootmem.h>
+#include <linux/log2.h>
+
+#include <asm/hypervisor.h>
+#include <asm/mdesc.h>
+#include <asm/prom.h>
+#include <asm/oplib.h>
+#include <asm/smp.h>
+
+/* Unlike the OBP device tree, the machine description is a full-on
+ * DAG.  An arbitrary number of ARCs are possible from one
+ * node to other nodes and thus we can't use the OBP device_node
+ * data structure to represent these nodes inside of the kernel.
+ *
+ * Actually, it isn't even a DAG, because there are back pointers
+ * which create cycles in the graph.
+ *
+ * mdesc_hdr and mdesc_elem describe the layout of the data structure
+ * we get from the Hypervisor.
+ */
+struct mdesc_hdr {
+	u32	version; /* Transport version */
+	u32	node_sz; /* node block size */
+	u32	name_sz; /* name block size */
+	u32	data_sz; /* data block size */
+};
+
+struct mdesc_elem {
+	u8	tag;
+#define MD_LIST_END	0x00
+#define MD_NODE		0x4e
+#define MD_NODE_END	0x45
+#define MD_NOOP		0x20
+#define MD_PROP_ARC	0x61
+#define MD_PROP_VAL	0x76
+#define MD_PROP_STR	0x73
+#define MD_PROP_DATA	0x64
+	u8	name_len;
+	u16	resv;
+	u32	name_offset;
+	union {
+		struct {
+			u32	data_len;
+			u32	data_offset;
+		} data;
+		u64	val;
+	} d;
+};
+
+static struct mdesc_hdr *main_mdesc;
+static struct mdesc_node *allnodes;
+
+static struct mdesc_node *allnodes_tail;
+static unsigned int unique_id;
+
+static struct mdesc_node **mdesc_hash;
+static unsigned int mdesc_hash_size;
+
+static inline unsigned int node_hashfn(u64 node)
+{
+	return ((unsigned int) (node ^ (node >> 8) ^ (node >> 16)))
+		& (mdesc_hash_size - 1);
+}
+
+static inline void hash_node(struct mdesc_node *mp)
+{
+	struct mdesc_node **head = &mdesc_hash[node_hashfn(mp->node)];
+
+	mp->hash_next = *head;
+	*head = mp;
+
+	if (allnodes_tail) {
+		allnodes_tail->allnodes_next = mp;
+		allnodes_tail = mp;
+	} else {
+		allnodes = allnodes_tail = mp;
+	}
+}
+
+static struct mdesc_node *find_node(u64 node)
+{
+	struct mdesc_node *mp = mdesc_hash[node_hashfn(node)];
+
+	while (mp) {
+		if (mp->node == node)
+			return mp;
+
+		mp = mp->hash_next;
+	}
+	return NULL;
+}
+
+struct property *md_find_property(const struct mdesc_node *mp,
+				  const char *name,
+				  int *lenp)
+{
+	struct property *pp;
+
+	for (pp = mp->properties; pp != 0; pp = pp->next) {
+		if (strcasecmp(pp->name, name) == 0) {
+			if (lenp)
+				*lenp = pp->length;
+			break;
+		}
+	}
+	return pp;
+}
+EXPORT_SYMBOL(md_find_property);
+
+/*
+ * Find a property with a given name for a given node
+ * and return the value.
+ */
+const void *md_get_property(const struct mdesc_node *mp, const char *name,
+			    int *lenp)
+{
+	struct property *pp = md_find_property(mp, name, lenp);
+	return pp ? pp->value : NULL;
+}
+EXPORT_SYMBOL(md_get_property);
+
+struct mdesc_node *md_find_node_by_name(struct mdesc_node *from,
+					const char *name)
+{
+	struct mdesc_node *mp;
+
+	mp = from ? from->allnodes_next : allnodes;
+	for (; mp != NULL; mp = mp->allnodes_next) {
+		if (strcmp(mp->name, name) == 0)
+			break;
+	}
+	return mp;
+}
+EXPORT_SYMBOL(md_find_node_by_name);
+
+static unsigned int mdesc_early_allocated;
+
+static void * __init mdesc_early_alloc(unsigned long size)
+{
+	void *ret;
+
+	ret = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL);
+	if (ret == NULL) {
+		prom_printf("MDESC: alloc of %lu bytes failed.\n", size);
+		prom_halt();
+	}
+
+	memset(ret, 0, size);
+
+	mdesc_early_allocated += size;
+
+	return ret;
+}
+
+static unsigned int __init count_arcs(struct mdesc_elem *ep)
+{
+	unsigned int ret = 0;
+
+	ep++;
+	while (ep->tag != MD_NODE_END) {
+		if (ep->tag == MD_PROP_ARC)
+			ret++;
+		ep++;
+	}
+	return ret;
+}
+
+static void __init mdesc_node_alloc(u64 node, struct mdesc_elem *ep, const char *names)
+{
+	unsigned int num_arcs = count_arcs(ep);
+	struct mdesc_node *mp;
+
+	mp = mdesc_early_alloc(sizeof(*mp) +
+			       (num_arcs * sizeof(struct mdesc_arc)));
+	mp->name = names + ep->name_offset;
+	mp->node = node;
+	mp->unique_id = unique_id++;
+	mp->num_arcs = num_arcs;
+
+	hash_node(mp);
+}
+
+static inline struct mdesc_elem *node_block(struct mdesc_hdr *mdesc)
+{
+	return (struct mdesc_elem *) (mdesc + 1);
+}
+
+static inline void *name_block(struct mdesc_hdr *mdesc)
+{
+	return ((void *) node_block(mdesc)) + mdesc->node_sz;
+}
+
+static inline void *data_block(struct mdesc_hdr *mdesc)
+{
+	return ((void *) name_block(mdesc)) + mdesc->name_sz;
+}
+
+/* In order to avoid recursion (the graph can be very deep) we use a
+ * two pass algorithm.  First we allocate all the nodes and hash them.
+ * Then we iterate over each node, filling in the arcs and properties.
+ */
+static void __init build_all_nodes(struct mdesc_hdr *mdesc)
+{
+	struct mdesc_elem *start, *ep;
+	struct mdesc_node *mp;
+	const char *names;
+	void *data;
+	u64 last_node;
+
+	start = ep = node_block(mdesc);
+	last_node = mdesc->node_sz / 16;
+
+	names = name_block(mdesc);
+
+	while (1) {
+		u64 node = ep - start;
+
+		if (ep->tag == MD_LIST_END)
+			break;
+
+		if (ep->tag != MD_NODE) {
+			prom_printf("MDESC: Inconsistent element list.\n");
+			prom_halt();
+		}
+
+		mdesc_node_alloc(node, ep, names);
+
+		if (ep->d.val >= last_node) {
+			printk("MDESC: Warning, early break out of node scan.\n");
+			printk("MDESC: Next node [%lu] last_node [%lu].\n",
+			       node, last_node);
+			break;
+		}
+
+		ep = start + ep->d.val;
+	}
+
+	data = data_block(mdesc);
+	for (mp = allnodes; mp; mp = mp->allnodes_next) {
+		struct mdesc_elem *ep = start + mp->node;
+		struct property **link = &mp->properties;
+		unsigned int this_arc = 0;
+
+		ep++;
+		while (ep->tag != MD_NODE_END) {
+			switch (ep->tag) {
+			case MD_PROP_ARC: {
+				struct mdesc_node *target;
+
+				if (this_arc >= mp->num_arcs) {
+					prom_printf("MDESC: ARC overrun [%u:%u]\n",
+						    this_arc, mp->num_arcs);
+					prom_halt();
+				}
+				target = find_node(ep->d.val);
+				if (!target) {
+					printk("MDESC: Warning, arc points to "
+					       "missing node, ignoring.\n");
+					break;
+				}
+				mp->arcs[this_arc].name =
+					(names + ep->name_offset);
+				mp->arcs[this_arc].arc = target;
+				this_arc++;
+				break;
+			}
+
+			case MD_PROP_VAL:
+			case MD_PROP_STR:
+			case MD_PROP_DATA: {
+				struct property *p = mdesc_early_alloc(sizeof(*p));
+
+				p->unique_id = unique_id++;
+				p->name = (char *) names + ep->name_offset;
+				if (ep->tag == MD_PROP_VAL) {
+					p->value = &ep->d.val;
+					p->length = 8;
+				} else {
+					p->value = data + ep->d.data.data_offset;
+					p->length = ep->d.data.data_len;
+				}
+				*link = p;
+				link = &p->next;
+				break;
+			}
+
+			case MD_NOOP:
+				break;
+
+			default:
+				printk("MDESC: Warning, ignoring unknown tag type %02x\n",
+				       ep->tag);
+			}
+			ep++;
+		}
+	}
+}
+
+static unsigned int __init count_nodes(struct mdesc_hdr *mdesc)
+{
+	struct mdesc_elem *ep = node_block(mdesc);
+	struct mdesc_elem *end;
+	unsigned int cnt = 0;
+
+	end = ((void *)ep) + mdesc->node_sz;
+	while (ep < end) {
+		if (ep->tag == MD_NODE)
+			cnt++;
+		ep++;
+	}
+	return cnt;
+}
+
+static void __init report_platform_properties(void)
+{
+	struct mdesc_node *pn = md_find_node_by_name(NULL, "platform");
+	const char *s;
+	const u64 *v;
+
+	if (!pn) {
+		prom_printf("No platform node in machine-description.\n");
+		prom_halt();
+	}
+
+	s = md_get_property(pn, "banner-name", NULL);
+	printk("PLATFORM: banner-name [%s]\n", s);
+	s = md_get_property(pn, "name", NULL);
+	printk("PLATFORM: name [%s]\n", s);
+
+	v = md_get_property(pn, "hostid", NULL);
+	if (v)
+		printk("PLATFORM: hostid [%08lx]\n", *v);
+	v = md_get_property(pn, "serial#", NULL);
+	if (v)
+		printk("PLATFORM: serial# [%08lx]\n", *v);
+	v = md_get_property(pn, "stick-frequency", NULL);
+	printk("PLATFORM: stick-frequency [%08lx]\n", *v);
+	v = md_get_property(pn, "mac-address", NULL);
+	if (v)
+		printk("PLATFORM: mac-address [%lx]\n", *v);
+	v = md_get_property(pn, "watchdog-resolution", NULL);
+	if (v)
+		printk("PLATFORM: watchdog-resolution [%lu ms]\n", *v);
+	v = md_get_property(pn, "watchdog-max-timeout", NULL);
+	if (v)
+		printk("PLATFORM: watchdog-max-timeout [%lu ms]\n", *v);
+	v = md_get_property(pn, "max-cpus", NULL);
+	if (v)
+		printk("PLATFORM: max-cpus [%lu]\n", *v);
+}
+
+static int inline find_in_proplist(const char *list, const char *match, int len)
+{
+	while (len > 0) {
+		int l;
+
+		if (!strcmp(list, match))
+			return 1;
+		l = strlen(list) + 1;
+		list += l;
+		len -= l;
+	}
+	return 0;
+}
+
+static void __init fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_node *mp)
+{
+	const u64 *level = md_get_property(mp, "level", NULL);
+	const u64 *size = md_get_property(mp, "size", NULL);
+	const u64 *line_size = md_get_property(mp, "line-size", NULL);
+	const char *type;
+	int type_len;
+
+	type = md_get_property(mp, "type", &type_len);
+
+	switch (*level) {
+	case 1:
+		if (find_in_proplist(type, "instn", type_len)) {
+			c->icache_size = *size;
+			c->icache_line_size = *line_size;
+		} else if (find_in_proplist(type, "data", type_len)) {
+			c->dcache_size = *size;
+			c->dcache_line_size = *line_size;
+		}
+		break;
+
+	case 2:
+		c->ecache_size = *size;
+		c->ecache_line_size = *line_size;
+		break;
+
+	default:
+		break;
+	}
+
+	if (*level == 1) {
+		unsigned int i;
+
+		for (i = 0; i < mp->num_arcs; i++) {
+			struct mdesc_node *t = mp->arcs[i].arc;
+
+			if (strcmp(mp->arcs[i].name, "fwd"))
+				continue;
+
+			if (!strcmp(t->name, "cache"))
+				fill_in_one_cache(c, t);
+		}
+	}
+}
+
+static void __init mark_core_ids(struct mdesc_node *mp, int core_id)
+{
+	unsigned int i;
+
+	for (i = 0; i < mp->num_arcs; i++) {
+		struct mdesc_node *t = mp->arcs[i].arc;
+		const u64 *id;
+
+		if (strcmp(mp->arcs[i].name, "back"))
+			continue;
+
+		if (!strcmp(t->name, "cpu")) {
+			id = md_get_property(t, "id", NULL);
+			if (*id < NR_CPUS)
+				cpu_data(*id).core_id = core_id;
+		} else {
+			unsigned int j;
+
+			for (j = 0; j < t->num_arcs; j++) {
+				struct mdesc_node *n = t->arcs[j].arc;
+
+				if (strcmp(t->arcs[j].name, "back"))
+					continue;
+
+				if (strcmp(n->name, "cpu"))
+					continue;
+
+				id = md_get_property(n, "id", NULL);
+				if (*id < NR_CPUS)
+					cpu_data(*id).core_id = core_id;
+			}
+		}
+	}
+}
+
+static void __init set_core_ids(void)
+{
+	struct mdesc_node *mp;
+	int idx;
+
+	idx = 1;
+	md_for_each_node_by_name(mp, "cache") {
+		const u64 *level = md_get_property(mp, "level", NULL);
+		const char *type;
+		int len;
+
+		if (*level != 1)
+			continue;
+
+		type = md_get_property(mp, "type", &len);
+		if (!find_in_proplist(type, "instn", len))
+			continue;
+
+		mark_core_ids(mp, idx);
+
+		idx++;
+	}
+}
+
+static void __init get_one_mondo_bits(const u64 *p, unsigned int *mask, unsigned char def)
+{
+	u64 val;
+
+	if (!p)
+		goto use_default;
+	val = *p;
+
+	if (!val || val >= 64)
+		goto use_default;
+
+	*mask = ((1U << val) * 64U) - 1U;
+	return;
+
+use_default:
+	*mask = ((1U << def) * 64U) - 1U;
+}
+
+static void __init get_mondo_data(struct mdesc_node *mp, struct trap_per_cpu *tb)
+{
+	const u64 *val;
+
+	val = md_get_property(mp, "q-cpu-mondo-#bits", NULL);
+	get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7);
+
+	val = md_get_property(mp, "q-dev-mondo-#bits", NULL);
+	get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7);
+
+	val = md_get_property(mp, "q-resumable-#bits", NULL);
+	get_one_mondo_bits(val, &tb->resum_qmask, 6);
+
+	val = md_get_property(mp, "q-nonresumable-#bits", NULL);
+	get_one_mondo_bits(val, &tb->nonresum_qmask, 2);
+}
+
+static void __init mdesc_fill_in_cpu_data(void)
+{
+	struct mdesc_node *mp;
+
+	ncpus_probed = 0;
+	md_for_each_node_by_name(mp, "cpu") {
+		const u64 *id = md_get_property(mp, "id", NULL);
+		const u64 *cfreq = md_get_property(mp, "clock-frequency", NULL);
+		struct trap_per_cpu *tb;
+		cpuinfo_sparc *c;
+		unsigned int i;
+		int cpuid;
+
+		ncpus_probed++;
+
+		cpuid = *id;
+
+#ifdef CONFIG_SMP
+		if (cpuid >= NR_CPUS)
+			continue;
+#else
+		/* On uniprocessor we only want the values for the
+		 * real physical cpu the kernel booted onto, however
+		 * cpu_data() only has one entry at index 0.
+		 */
+		if (cpuid != real_hard_smp_processor_id())
+			continue;
+		cpuid = 0;
+#endif
+
+		c = &cpu_data(cpuid);
+		c->clock_tick = *cfreq;
+
+		tb = &trap_block[cpuid];
+		get_mondo_data(mp, tb);
+
+		for (i = 0; i < mp->num_arcs; i++) {
+			struct mdesc_node *t = mp->arcs[i].arc;
+			unsigned int j;
+
+			if (strcmp(mp->arcs[i].name, "fwd"))
+				continue;
+
+			if (!strcmp(t->name, "cache")) {
+				fill_in_one_cache(c, t);
+				continue;
+			}
+
+			for (j = 0; j < t->num_arcs; j++) {
+				struct mdesc_node *n;
+
+				n = t->arcs[j].arc;
+				if (strcmp(t->arcs[j].name, "fwd"))
+					continue;
+
+				if (!strcmp(n->name, "cache"))
+					fill_in_one_cache(c, n);
+			}
+		}
+
+#ifdef CONFIG_SMP
+		cpu_set(cpuid, cpu_present_map);
+		cpu_set(cpuid, phys_cpu_present_map);
+#endif
+
+		c->core_id = 0;
+	}
+
+	set_core_ids();
+
+	smp_fill_in_sib_core_maps();
+}
+
+void __init sun4v_mdesc_init(void)
+{
+	unsigned long len, real_len, status;
+
+	(void) sun4v_mach_desc(0UL, 0UL, &len);
+
+	printk("MDESC: Size is %lu bytes.\n", len);
+
+	main_mdesc = mdesc_early_alloc(len);
+
+	status = sun4v_mach_desc(__pa(main_mdesc), len, &real_len);
+	if (status != HV_EOK || real_len > len) {
+		prom_printf("sun4v_mach_desc fails, err(%lu), "
+			    "len(%lu), real_len(%lu)\n",
+			    status, len, real_len);
+		prom_halt();
+	}
+
+	len = count_nodes(main_mdesc);
+	printk("MDESC: %lu nodes.\n", len);
+
+	len = roundup_pow_of_two(len);
+
+	mdesc_hash = mdesc_early_alloc(len * sizeof(struct mdesc_node *));
+	mdesc_hash_size = len;
+
+	printk("MDESC: Hash size %lu entries.\n", len);
+
+	build_all_nodes(main_mdesc);
+
+	printk("MDESC: Built graph with %u bytes of memory.\n",
+	       mdesc_early_allocated);
+
+	report_platform_properties();
+	mdesc_fill_in_cpu_data();
+}

+ 40 - 14
arch/sparc64/kernel/pci.c

@@ -306,6 +306,20 @@ static void __init pci_controller_probe(void)
 	pci_controller_scan(pci_controller_init);
 }
 
+static int ofpci_verbose;
+
+static int __init ofpci_debug(char *str)
+{
+	int val = 0;
+
+	get_option(&str, &val);
+	if (val)
+		ofpci_verbose = 1;
+	return 1;
+}
+
+__setup("ofpci_debug=", ofpci_debug);
+
 static unsigned long pci_parse_of_flags(u32 addr0)
 {
 	unsigned long flags = 0;
@@ -337,7 +351,9 @@ static void pci_parse_of_addrs(struct of_device *op,
 	addrs = of_get_property(node, "assigned-addresses", &proplen);
 	if (!addrs)
 		return;
-	printk("    parse addresses (%d bytes) @ %p\n", proplen, addrs);
+	if (ofpci_verbose)
+		printk("    parse addresses (%d bytes) @ %p\n",
+		       proplen, addrs);
 	op_res = &op->resource[0];
 	for (; proplen >= 20; proplen -= 20, addrs += 5, op_res++) {
 		struct resource *res;
@@ -348,8 +364,9 @@ static void pci_parse_of_addrs(struct of_device *op,
 		if (!flags)
 			continue;
 		i = addrs[0] & 0xff;
-		printk("  start: %lx, end: %lx, i: %x\n",
-		       op_res->start, op_res->end, i);
+		if (ofpci_verbose)
+			printk("  start: %lx, end: %lx, i: %x\n",
+			       op_res->start, op_res->end, i);
 
 		if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
 			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
@@ -393,8 +410,9 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 	if (type == NULL)
 		type = "";
 
-	printk("    create device, devfn: %x, type: %s hostcontroller(%d)\n",
-	       devfn, type, host_controller);
+	if (ofpci_verbose)
+		printk("    create device, devfn: %x, type: %s\n",
+		       devfn, type);
 
 	dev->bus = bus;
 	dev->sysdata = node;
@@ -434,8 +452,9 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 		sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
 			dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
 	}
-	printk("    class: 0x%x device name: %s\n",
-	       dev->class, pci_name(dev));
+	if (ofpci_verbose)
+		printk("    class: 0x%x device name: %s\n",
+		       dev->class, pci_name(dev));
 
 	/* I have seen IDE devices which will not respond to
 	 * the bmdma simplex check reads if bus mastering is
@@ -469,7 +488,8 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 	}
 	pci_parse_of_addrs(sd->op, node, dev);
 
-	printk("    adding to system ...\n");
+	if (ofpci_verbose)
+		printk("    adding to system ...\n");
 
 	pci_device_add(dev, bus);
 
@@ -547,7 +567,8 @@ static void __devinit of_scan_pci_bridge(struct pci_pbm_info *pbm,
 	unsigned int flags;
 	u64 size;
 
-	printk("of_scan_pci_bridge(%s)\n", node->full_name);
+	if (ofpci_verbose)
+		printk("of_scan_pci_bridge(%s)\n", node->full_name);
 
 	/* parse bus-range property */
 	busrange = of_get_property(node, "bus-range", &len);
@@ -632,7 +653,8 @@ static void __devinit of_scan_pci_bridge(struct pci_pbm_info *pbm,
 simba_cont:
 	sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
 		bus->number);
-	printk("    bus name: %s\n", bus->name);
+	if (ofpci_verbose)
+		printk("    bus name: %s\n", bus->name);
 
 	pci_of_scan_bus(pbm, node, bus);
 }
@@ -646,12 +668,14 @@ static void __devinit pci_of_scan_bus(struct pci_pbm_info *pbm,
 	int reglen, devfn;
 	struct pci_dev *dev;
 
-	printk("PCI: scan_bus[%s] bus no %d\n",
-	       node->full_name, bus->number);
+	if (ofpci_verbose)
+		printk("PCI: scan_bus[%s] bus no %d\n",
+		       node->full_name, bus->number);
 
 	child = NULL;
 	while ((child = of_get_next_child(node, child)) != NULL) {
-		printk("  * %s\n", child->full_name);
+		if (ofpci_verbose)
+			printk("  * %s\n", child->full_name);
 		reg = of_get_property(child, "reg", &reglen);
 		if (reg == NULL || reglen < 20)
 			continue;
@@ -661,7 +685,9 @@ static void __devinit pci_of_scan_bus(struct pci_pbm_info *pbm,
 		dev = of_create_pci_dev(pbm, child, bus, devfn, 0);
 		if (!dev)
 			continue;
-		printk("PCI: dev header type: %x\n", dev->hdr_type);
+		if (ofpci_verbose)
+			printk("PCI: dev header type: %x\n",
+			       dev->hdr_type);
 
 		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
 		    dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)

+ 4 - 3
arch/sparc64/kernel/pci_sabre.c

@@ -762,9 +762,10 @@ void sabre_init(struct device_node *dp, char *model_name)
 			/* Of course, Sun has to encode things a thousand
 			 * different ways, inconsistently.
 			 */
-			cpu_find_by_instance(0, &dp, NULL);
-			if (!strcmp(dp->name, "SUNW,UltraSPARC-IIe"))
-				hummingbird_p = 1;
+			for_each_node_by_type(dp, "cpu") {
+				if (!strcmp(dp->name, "SUNW,UltraSPARC-IIe"))
+					hummingbird_p = 1;
+			}
 		}
 	}
 

+ 28 - 26
arch/sparc64/kernel/pci_sun4v.c

@@ -12,6 +12,7 @@
 #include <linux/percpu.h>
 #include <linux/irq.h>
 #include <linux/msi.h>
+#include <linux/log2.h>
 
 #include <asm/iommu.h>
 #include <asm/irq.h>
@@ -26,6 +27,9 @@
 
 #include "pci_sun4v.h"
 
+static unsigned long vpci_major = 1;
+static unsigned long vpci_minor = 1;
+
 #define PGLIST_NENTS	(PAGE_SIZE / sizeof(u64))
 
 struct iommu_batch {
@@ -638,9 +642,8 @@ static void pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
 {
 	struct iommu *iommu = pbm->iommu;
 	struct property *prop;
-	unsigned long num_tsb_entries, sz;
+	unsigned long num_tsb_entries, sz, tsbsize;
 	u32 vdma[2], dma_mask, dma_offset;
-	int tsbsize;
 
 	prop = of_find_property(pbm->prom_node, "virtual-dma", NULL);
 	if (prop) {
@@ -654,31 +657,15 @@ static void pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
 		vdma[1] = 0x80000000;
 	}
 
-	dma_mask = vdma[0];
-	switch (vdma[1]) {
-		case 0x20000000:
-			dma_mask |= 0x1fffffff;
-			tsbsize = 64;
-			break;
-
-		case 0x40000000:
-			dma_mask |= 0x3fffffff;
-			tsbsize = 128;
-			break;
-
-		case 0x80000000:
-			dma_mask |= 0x7fffffff;
-			tsbsize = 256;
-			break;
-
-		default:
-			prom_printf("PCI-SUN4V: strange virtual-dma size.\n");
-			prom_halt();
+	if ((vdma[0] | vdma[1]) & ~IO_PAGE_MASK) {
+		prom_printf("PCI-SUN4V: strange virtual-dma[%08x:%08x].\n",
+			    vdma[0], vdma[1]);
+		prom_halt();
 	};
 
-	tsbsize *= (8 * 1024);
-
-	num_tsb_entries = tsbsize / sizeof(iopte_t);
+	dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
+	num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
+	tsbsize = num_tsb_entries * sizeof(iopte_t);
 
 	dma_offset = vdma[0];
 
@@ -689,7 +676,7 @@ static void pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
 	iommu->dma_addr_mask = dma_mask;
 
 	/* Allocate and initialize the free area map.  */
-	sz = num_tsb_entries / 8;
+	sz = (num_tsb_entries + 7) / 8;
 	sz = (sz + 7UL) & ~7UL;
 	iommu->arena.map = kzalloc(sz, GFP_KERNEL);
 	if (!iommu->arena.map) {
@@ -1178,6 +1165,7 @@ static void pci_sun4v_pbm_init(struct pci_controller_info *p, struct device_node
 
 void sun4v_pci_init(struct device_node *dp, char *model_name)
 {
+	static int hvapi_negotiated = 0;
 	struct pci_controller_info *p;
 	struct pci_pbm_info *pbm;
 	struct iommu *iommu;
@@ -1186,6 +1174,20 @@ void sun4v_pci_init(struct device_node *dp, char *model_name)
 	u32 devhandle;
 	int i;
 
+	if (!hvapi_negotiated++) {
+		int err = sun4v_hvapi_register(HV_GRP_PCI,
+					       vpci_major,
+					       &vpci_minor);
+
+		if (err) {
+			prom_printf("SUN4V_PCI: Could not register hvapi, "
+				    "err=%d\n", err);
+			prom_halt();
+		}
+		printk("SUN4V_PCI: Registered hvapi major[%lu] minor[%lu]\n",
+		       vpci_major, vpci_minor);
+	}
+
 	prop = of_find_property(dp, "reg", NULL);
 	regs = prop->value;
 

+ 2 - 0
arch/sparc64/kernel/power.c

@@ -19,6 +19,7 @@
 #include <asm/prom.h>
 #include <asm/of_device.h>
 #include <asm/io.h>
+#include <asm/sstate.h>
 
 #include <linux/unistd.h>
 
@@ -53,6 +54,7 @@ static void (*poweroff_method)(void) = machine_alt_power_off;
 
 void machine_power_off(void)
 {
+	sstate_poweroff();
 	if (!serial_console || scons_pwroff) {
 #ifdef CONFIG_PCI
 		if (power_reg) {

+ 4 - 0
arch/sparc64/kernel/process.c

@@ -45,6 +45,7 @@
 #include <asm/mmu_context.h>
 #include <asm/unistd.h>
 #include <asm/hypervisor.h>
+#include <asm/sstate.h>
 
 /* #define VERBOSE_SHOWREGS */
 
@@ -106,6 +107,7 @@ extern void (*prom_keyboard)(void);
 
 void machine_halt(void)
 {
+	sstate_halt();
 	if (!serial_console && prom_palette)
 		prom_palette (1);
 	if (prom_keyboard)
@@ -116,6 +118,7 @@ void machine_halt(void)
 
 void machine_alt_power_off(void)
 {
+	sstate_poweroff();
 	if (!serial_console && prom_palette)
 		prom_palette(1);
 	if (prom_keyboard)
@@ -128,6 +131,7 @@ void machine_restart(char * cmd)
 {
 	char *p;
 	
+	sstate_reboot();
 	p = strchr (reboot_command, '\n');
 	if (p) *p = 0;
 	if (!serial_console && prom_palette)

+ 148 - 0
arch/sparc64/kernel/prom.c

@@ -28,6 +28,7 @@
 #include <asm/irq.h>
 #include <asm/asi.h>
 #include <asm/upa.h>
+#include <asm/smp.h>
 
 static struct device_node *allnodes;
 
@@ -1665,6 +1666,150 @@ static struct device_node * __init build_tree(struct device_node *parent, phandl
 	return ret;
 }
 
+static const char *get_mid_prop(void)
+{
+	return (tlb_type == spitfire ? "upa-portid" : "portid");
+}
+
+struct device_node *of_find_node_by_cpuid(int cpuid)
+{
+	struct device_node *dp;
+	const char *mid_prop = get_mid_prop();
+
+	for_each_node_by_type(dp, "cpu") {
+		int id = of_getintprop_default(dp, mid_prop, -1);
+		const char *this_mid_prop = mid_prop;
+
+		if (id < 0) {
+			this_mid_prop = "cpuid";
+			id = of_getintprop_default(dp, this_mid_prop, -1);
+		}
+
+		if (id < 0) {
+			prom_printf("OF: Serious problem, cpu lacks "
+				    "%s property", this_mid_prop);
+			prom_halt();
+		}
+		if (cpuid == id)
+			return dp;
+	}
+	return NULL;
+}
+
+static void __init of_fill_in_cpu_data(void)
+{
+	struct device_node *dp;
+	const char *mid_prop = get_mid_prop();
+
+	ncpus_probed = 0;
+	for_each_node_by_type(dp, "cpu") {
+		int cpuid = of_getintprop_default(dp, mid_prop, -1);
+		const char *this_mid_prop = mid_prop;
+		struct device_node *portid_parent;
+		int portid = -1;
+
+		portid_parent = NULL;
+		if (cpuid < 0) {
+			this_mid_prop = "cpuid";
+			cpuid = of_getintprop_default(dp, this_mid_prop, -1);
+			if (cpuid >= 0) {
+				int limit = 2;
+
+				portid_parent = dp;
+				while (limit--) {
+					portid_parent = portid_parent->parent;
+					if (!portid_parent)
+						break;
+					portid = of_getintprop_default(portid_parent,
+								       "portid", -1);
+					if (portid >= 0)
+						break;
+				}
+			}
+		}
+
+		if (cpuid < 0) {
+			prom_printf("OF: Serious problem, cpu lacks "
+				    "%s property", this_mid_prop);
+			prom_halt();
+		}
+
+		ncpus_probed++;
+
+#ifdef CONFIG_SMP
+		if (cpuid >= NR_CPUS)
+			continue;
+#else
+		/* On uniprocessor we only want the values for the
+		 * real physical cpu the kernel booted onto, however
+		 * cpu_data() only has one entry at index 0.
+		 */
+		if (cpuid != real_hard_smp_processor_id())
+			continue;
+		cpuid = 0;
+#endif
+
+		cpu_data(cpuid).clock_tick =
+			of_getintprop_default(dp, "clock-frequency", 0);
+
+		if (portid_parent) {
+			cpu_data(cpuid).dcache_size =
+				of_getintprop_default(dp, "l1-dcache-size",
+						      16 * 1024);
+			cpu_data(cpuid).dcache_line_size =
+				of_getintprop_default(dp, "l1-dcache-line-size",
+						      32);
+			cpu_data(cpuid).icache_size =
+				of_getintprop_default(dp, "l1-icache-size",
+						      8 * 1024);
+			cpu_data(cpuid).icache_line_size =
+				of_getintprop_default(dp, "l1-icache-line-size",
+						      32);
+			cpu_data(cpuid).ecache_size =
+				of_getintprop_default(dp, "l2-cache-size", 0);
+			cpu_data(cpuid).ecache_line_size =
+				of_getintprop_default(dp, "l2-cache-line-size", 0);
+			if (!cpu_data(cpuid).ecache_size ||
+			    !cpu_data(cpuid).ecache_line_size) {
+				cpu_data(cpuid).ecache_size =
+					of_getintprop_default(portid_parent,
+							      "l2-cache-size",
+							      (4 * 1024 * 1024));
+				cpu_data(cpuid).ecache_line_size =
+					of_getintprop_default(portid_parent,
+							      "l2-cache-line-size", 64);
+			}
+
+			cpu_data(cpuid).core_id = portid + 1;
+		} else {
+			cpu_data(cpuid).dcache_size =
+				of_getintprop_default(dp, "dcache-size", 16 * 1024);
+			cpu_data(cpuid).dcache_line_size =
+				of_getintprop_default(dp, "dcache-line-size", 32);
+
+			cpu_data(cpuid).icache_size =
+				of_getintprop_default(dp, "icache-size", 16 * 1024);
+			cpu_data(cpuid).icache_line_size =
+				of_getintprop_default(dp, "icache-line-size", 32);
+
+			cpu_data(cpuid).ecache_size =
+				of_getintprop_default(dp, "ecache-size",
+						      (4 * 1024 * 1024));
+			cpu_data(cpuid).ecache_line_size =
+				of_getintprop_default(dp, "ecache-line-size", 64);
+
+			cpu_data(cpuid).core_id = 0;
+		}
+
+#ifdef CONFIG_SMP
+		cpu_set(cpuid, cpu_present_map);
+		cpu_set(cpuid, phys_cpu_present_map);
+#endif
+	}
+
+	smp_fill_in_sib_core_maps();
+}
+
 void __init prom_build_devicetree(void)
 {
 	struct device_node **nextp;
@@ -1679,4 +1824,7 @@ void __init prom_build_devicetree(void)
 				     &nextp);
 	printk("PROM: Built device tree with %u bytes of memory.\n",
 	       prom_early_allocated);
+
+	if (tlb_type != hypervisor)
+		of_fill_in_cpu_data();
 }

+ 7 - 11
arch/sparc64/kernel/setup.c

@@ -46,11 +46,17 @@
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/mmu.h>
+#include <asm/ns87303.h>
 
 #ifdef CONFIG_IP_PNP
 #include <net/ipconfig.h>
 #endif
 
+/* Used to synchronize accesses to NatSemi SUPER I/O chip configure
+ * operations in asm/ns87303.h
+ */
+DEFINE_SPINLOCK(ns87303_lock);
+
 struct screen_info screen_info = {
 	0, 0,			/* orig-x, orig-y */
 	0,			/* unused */
@@ -370,8 +376,6 @@ void __init setup_arch(char **cmdline_p)
 	init_cur_cpu_trap(current_thread_info());
 
 	paging_init();
-
-	smp_setup_cpu_possible_map();
 }
 
 static int __init set_preferred_console(void)
@@ -424,7 +428,7 @@ extern void mmu_info(struct seq_file *);
 unsigned int dcache_parity_tl1_occurred;
 unsigned int icache_parity_tl1_occurred;
 
-static int ncpus_probed;
+int ncpus_probed;
 
 static int show_cpuinfo(struct seq_file *m, void *__unused)
 {
@@ -516,14 +520,6 @@ static int __init topology_init(void)
 
 	err = -ENOMEM;
 
-	/* Count the number of physically present processors in
-	 * the machine, even on uniprocessor, so that /proc/cpuinfo
-	 * output is consistent with 2.4.x
-	 */
-	ncpus_probed = 0;
-	while (!cpu_find_by_instance(ncpus_probed, NULL, NULL))
-		ncpus_probed++;
-
 	for_each_possible_cpu(i) {
 		struct cpu *p = kzalloc(sizeof(*p), GFP_KERNEL);
 		if (p) {

+ 50 - 105
arch/sparc64/kernel/smp.c

@@ -40,6 +40,7 @@
 #include <asm/tlb.h>
 #include <asm/sections.h>
 #include <asm/prom.h>
+#include <asm/mdesc.h>
 
 extern void calibrate_delay(void);
 
@@ -75,53 +76,6 @@ void smp_bogo(struct seq_file *m)
 			   i, cpu_data(i).clock_tick);
 }
 
-void __init smp_store_cpu_info(int id)
-{
-	struct device_node *dp;
-	int def;
-
-	cpu_data(id).udelay_val			= loops_per_jiffy;
-
-	cpu_find_by_mid(id, &dp);
-	cpu_data(id).clock_tick =
-		of_getintprop_default(dp, "clock-frequency", 0);
-
-	def = ((tlb_type == hypervisor) ? (8 * 1024) : (16 * 1024));
-	cpu_data(id).dcache_size =
-		of_getintprop_default(dp, "dcache-size", def);
-
-	def = 32;
-	cpu_data(id).dcache_line_size =
-		of_getintprop_default(dp, "dcache-line-size", def);
-
-	def = 16 * 1024;
-	cpu_data(id).icache_size =
-		of_getintprop_default(dp, "icache-size", def);
-
-	def = 32;
-	cpu_data(id).icache_line_size =
-		of_getintprop_default(dp, "icache-line-size", def);
-
-	def = ((tlb_type == hypervisor) ?
-	       (3 * 1024 * 1024) :
-	       (4 * 1024 * 1024));
-	cpu_data(id).ecache_size =
-		of_getintprop_default(dp, "ecache-size", def);
-
-	def = 64;
-	cpu_data(id).ecache_line_size =
-		of_getintprop_default(dp, "ecache-line-size", def);
-
-	printk("CPU[%d]: Caches "
-	       "D[sz(%d):line_sz(%d)] "
-	       "I[sz(%d):line_sz(%d)] "
-	       "E[sz(%d):line_sz(%d)]\n",
-	       id,
-	       cpu_data(id).dcache_size, cpu_data(id).dcache_line_size,
-	       cpu_data(id).icache_size, cpu_data(id).icache_line_size,
-	       cpu_data(id).ecache_size, cpu_data(id).ecache_line_size);
-}
-
 extern void setup_sparc64_timer(void);
 
 static volatile unsigned long callin_flag = 0;
@@ -145,7 +99,7 @@ void __init smp_callin(void)
 	local_irq_enable();
 
 	calibrate_delay();
-	smp_store_cpu_info(cpuid);
+	cpu_data(cpuid).udelay_val = loops_per_jiffy;
 	callin_flag = 1;
 	__asm__ __volatile__("membar #Sync\n\t"
 			     "flush  %%g6" : : : "memory");
@@ -340,9 +294,8 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
 
 		prom_startcpu_cpuid(cpu, entry, cookie);
 	} else {
-		struct device_node *dp;
+		struct device_node *dp = of_find_node_by_cpuid(cpu);
 
-		cpu_find_by_mid(cpu, &dp);
 		prom_startcpu(dp->node, entry, cookie);
 	}
 
@@ -447,7 +400,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c
 static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
 {
 	u64 pstate, ver;
-	int nack_busy_id, is_jbus;
+	int nack_busy_id, is_jbus, need_more;
 
 	if (cpus_empty(mask))
 		return;
@@ -463,6 +416,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
 	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
 
 retry:
+	need_more = 0;
 	__asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
 			     : : "r" (pstate), "i" (PSTATE_IE));
 
@@ -491,6 +445,10 @@ retry:
 				: /* no outputs */
 				: "r" (target), "i" (ASI_INTR_W));
 			nack_busy_id++;
+			if (nack_busy_id == 32) {
+				need_more = 1;
+				break;
+			}
 		}
 	}
 
@@ -507,6 +465,16 @@ retry:
 			if (dispatch_stat == 0UL) {
 				__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
 						     : : "r" (pstate));
+				if (unlikely(need_more)) {
+					int i, cnt = 0;
+					for_each_cpu_mask(i, mask) {
+						cpu_clear(i, mask);
+						cnt++;
+						if (cnt == 32)
+							break;
+					}
+					goto retry;
+				}
 				return;
 			}
 			if (!--stuck)
@@ -544,6 +512,8 @@ retry:
 				if ((dispatch_stat & check_mask) == 0)
 					cpu_clear(i, mask);
 				this_busy_nack += 2;
+				if (this_busy_nack == 64)
+					break;
 			}
 
 			goto retry;
@@ -1191,23 +1161,14 @@ int setup_profiling_timer(unsigned int multiplier)
 
 static void __init smp_tune_scheduling(void)
 {
-	struct device_node *dp;
-	int instance;
-	unsigned int def, smallest = ~0U;
-
-	def = ((tlb_type == hypervisor) ?
-	       (3 * 1024 * 1024) :
-	       (4 * 1024 * 1024));
+	unsigned int smallest = ~0U;
+	int i;
 
-	instance = 0;
-	while (!cpu_find_by_instance(instance, &dp, NULL)) {
-		unsigned int val;
+	for (i = 0; i < NR_CPUS; i++) {
+		unsigned int val = cpu_data(i).ecache_size;
 
-		val = of_getintprop_default(dp, "ecache-size", def);
-		if (val < smallest)
+		if (val && val < smallest)
 			smallest = val;
-
-		instance++;
 	}
 
 	/* Any value less than 256K is nonsense.  */
@@ -1230,58 +1191,42 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	int i;
 
 	if (num_possible_cpus() > max_cpus) {
-		int instance, mid;
-
-		instance = 0;
-		while (!cpu_find_by_instance(instance, NULL, &mid)) {
-			if (mid != boot_cpu_id) {
-				cpu_clear(mid, phys_cpu_present_map);
-				cpu_clear(mid, cpu_present_map);
+		for_each_possible_cpu(i) {
+			if (i != boot_cpu_id) {
+				cpu_clear(i, phys_cpu_present_map);
+				cpu_clear(i, cpu_present_map);
 				if (num_possible_cpus() <= max_cpus)
 					break;
 			}
-			instance++;
 		}
 	}
 
-	for_each_possible_cpu(i) {
-		if (tlb_type == hypervisor) {
-			int j;
-
-			/* XXX get this mapping from machine description */
-			for_each_possible_cpu(j) {
-				if ((j >> 2) == (i >> 2))
-					cpu_set(j, cpu_sibling_map[i]);
-			}
-		} else {
-			cpu_set(i, cpu_sibling_map[i]);
-		}
-	}
-
-	smp_store_cpu_info(boot_cpu_id);
+	cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy;
 	smp_tune_scheduling();
 }
 
-/* Set this up early so that things like the scheduler can init
- * properly.  We use the same cpu mask for both the present and
- * possible cpu map.
- */
-void __init smp_setup_cpu_possible_map(void)
+void __devinit smp_prepare_boot_cpu(void)
 {
-	int instance, mid;
-
-	instance = 0;
-	while (!cpu_find_by_instance(instance, NULL, &mid)) {
-		if (mid < NR_CPUS) {
-			cpu_set(mid, phys_cpu_present_map);
-			cpu_set(mid, cpu_present_map);
-		}
-		instance++;
-	}
 }
 
-void __devinit smp_prepare_boot_cpu(void)
+void __devinit smp_fill_in_sib_core_maps(void)
 {
+	unsigned int i;
+
+	for_each_possible_cpu(i) {
+		unsigned int j;
+
+		if (cpu_data(i).core_id == 0) {
+			cpu_set(i, cpu_sibling_map[i]);
+			continue;
+		}
+
+		for_each_possible_cpu(j) {
+			if (cpu_data(i).core_id ==
+			    cpu_data(j).core_id)
+				cpu_set(j, cpu_sibling_map[i]);
+		}
+	}
 }
 
 int __cpuinit __cpu_up(unsigned int cpu)
@@ -1337,7 +1282,7 @@ unsigned long __per_cpu_shift __read_mostly;
 EXPORT_SYMBOL(__per_cpu_base);
 EXPORT_SYMBOL(__per_cpu_shift);
 
-void __init setup_per_cpu_areas(void)
+void __init real_setup_per_cpu_areas(void)
 {
 	unsigned long goal, size, i;
 	char *ptr;

+ 104 - 0
arch/sparc64/kernel/sstate.c

@@ -0,0 +1,104 @@
+/* sstate.c: System soft state support.
+ *
+ * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+
+#include <asm/hypervisor.h>
+#include <asm/sstate.h>
+#include <asm/oplib.h>
+#include <asm/head.h>
+#include <asm/io.h>
+
+static int hv_supports_soft_state;
+
+static unsigned long kimage_addr_to_ra(const char *p)
+{
+	unsigned long val = (unsigned long) p;
+
+	return kern_base + (val - KERNBASE);
+}
+
+static void do_set_sstate(unsigned long state, const char *msg)
+{
+	unsigned long err;
+
+	if (!hv_supports_soft_state)
+		return;
+
+	err = sun4v_mach_set_soft_state(state, kimage_addr_to_ra(msg));
+	if (err) {
+		printk(KERN_WARNING "SSTATE: Failed to set soft-state to "
+		       "state[%lx] msg[%s], err=%lu\n",
+		       state, msg, err);
+	}
+}
+
+static const char booting_msg[32] __attribute__((aligned(32))) =
+	"Linux booting";
+static const char running_msg[32] __attribute__((aligned(32))) =
+	"Linux running";
+static const char halting_msg[32] __attribute__((aligned(32))) =
+	"Linux halting";
+static const char poweroff_msg[32] __attribute__((aligned(32))) =
+	"Linux powering off";
+static const char rebooting_msg[32] __attribute__((aligned(32))) =
+	"Linux rebooting";
+static const char panicing_msg[32] __attribute__((aligned(32))) =
+	"Linux panicing";
+
+void sstate_booting(void)
+{
+	do_set_sstate(HV_SOFT_STATE_TRANSITION, booting_msg);
+}
+
+void sstate_running(void)
+{
+	do_set_sstate(HV_SOFT_STATE_NORMAL, running_msg);
+}
+
+void sstate_halt(void)
+{
+	do_set_sstate(HV_SOFT_STATE_TRANSITION, halting_msg);
+}
+
+void sstate_poweroff(void)
+{
+	do_set_sstate(HV_SOFT_STATE_TRANSITION, poweroff_msg);
+}
+
+void sstate_reboot(void)
+{
+	do_set_sstate(HV_SOFT_STATE_TRANSITION, rebooting_msg);
+}
+
+static int sstate_panic_event(struct notifier_block *n, unsigned long event, void *ptr)
+{
+	do_set_sstate(HV_SOFT_STATE_TRANSITION, panicing_msg);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block sstate_panic_block = {
+	.notifier_call	=	sstate_panic_event,
+	.priority	=	INT_MAX,
+};
+
+void __init sun4v_sstate_init(void)
+{
+	unsigned long major, minor;
+
+	major = 1;
+	minor = 0;
+	if (sun4v_hvapi_register(HV_GRP_SOFT_STATE, major, &minor))
+		return;
+
+	hv_supports_soft_state = 1;
+
+	prom_sun4v_guest_soft_state();
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &sstate_panic_block);
+}

+ 14 - 16
arch/sparc64/kernel/sun4v_ivec.S

@@ -22,12 +22,12 @@ sun4v_cpu_mondo:
 	be,pn	%xcc, sun4v_cpu_mondo_queue_empty
 	 nop
 
-	/* Get &trap_block[smp_processor_id()] into %g3.  */
-	ldxa	[%g0] ASI_SCRATCHPAD, %g3
-	sub	%g3, TRAP_PER_CPU_FAULT_INFO, %g3
+	/* Get &trap_block[smp_processor_id()] into %g4.  */
+	ldxa	[%g0] ASI_SCRATCHPAD, %g4
+	sub	%g4, TRAP_PER_CPU_FAULT_INFO, %g4
 
 	/* Get CPU mondo queue base phys address into %g7.  */
-	ldx	[%g3 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
+	ldx	[%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
 
 	/* Now get the cross-call arguments and handler PC, same
 	 * layout as sun4u:
@@ -47,8 +47,7 @@ sun4v_cpu_mondo:
 	add	%g2, 0x40 - 0x8 - 0x8, %g2
 
 	/* Update queue head pointer.  */
-	sethi	%hi(8192 - 1), %g4
-	or	%g4, %lo(8192 - 1), %g4
+	lduw	[%g4 + TRAP_PER_CPU_CPU_MONDO_QMASK], %g4
 	and	%g2, %g4, %g2
 
 	mov	INTRQ_CPU_MONDO_HEAD, %g4
@@ -71,12 +70,12 @@ sun4v_dev_mondo:
 	be,pn	%xcc, sun4v_dev_mondo_queue_empty
 	 nop
 
-	/* Get &trap_block[smp_processor_id()] into %g3.  */
-	ldxa	[%g0] ASI_SCRATCHPAD, %g3
-	sub	%g3, TRAP_PER_CPU_FAULT_INFO, %g3
+	/* Get &trap_block[smp_processor_id()] into %g4.  */
+	ldxa	[%g0] ASI_SCRATCHPAD, %g4
+	sub	%g4, TRAP_PER_CPU_FAULT_INFO, %g4
 
 	/* Get DEV mondo queue base phys address into %g5.  */
-	ldx	[%g3 + TRAP_PER_CPU_DEV_MONDO_PA], %g5
+	ldx	[%g4 + TRAP_PER_CPU_DEV_MONDO_PA], %g5
 
 	/* Load IVEC into %g3.  */
 	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
@@ -90,8 +89,7 @@ sun4v_dev_mondo:
 	 */
 
 	/* Update queue head pointer, this frees up some registers.  */
-	sethi	%hi(8192 - 1), %g4
-	or	%g4, %lo(8192 - 1), %g4
+	lduw	[%g4 + TRAP_PER_CPU_DEV_MONDO_QMASK], %g4
 	and	%g2, %g4, %g2
 
 	mov	INTRQ_DEVICE_MONDO_HEAD, %g4
@@ -143,6 +141,8 @@ sun4v_res_mondo:
 	brnz,pn	%g1, sun4v_res_mondo_queue_full
 	 nop
 
+	lduw	[%g3 + TRAP_PER_CPU_RESUM_QMASK], %g4
+
 	/* Remember this entry's offset in %g1.  */
 	mov	%g2, %g1
 
@@ -173,8 +173,6 @@ sun4v_res_mondo:
 	add	%g2, 0x08, %g2
 
 	/* Update queue head pointer.  */
-	sethi	%hi(8192 - 1), %g4
-	or	%g4, %lo(8192 - 1), %g4
 	and	%g2, %g4, %g2
 
 	mov	INTRQ_RESUM_MONDO_HEAD, %g4
@@ -254,6 +252,8 @@ sun4v_nonres_mondo:
 	brnz,pn	%g1, sun4v_nonres_mondo_queue_full
 	 nop
 
+	lduw	[%g3 + TRAP_PER_CPU_NONRESUM_QMASK], %g4
+
 	/* Remember this entry's offset in %g1.  */
 	mov	%g2, %g1
 
@@ -284,8 +284,6 @@ sun4v_nonres_mondo:
 	add	%g2, 0x08, %g2
 
 	/* Update queue head pointer.  */
-	sethi	%hi(8192 - 1), %g4
-	or	%g4, %lo(8192 - 1), %g4
 	and	%g2, %g4, %g2
 
 	mov	INTRQ_NONRESUM_MONDO_HEAD, %g4

+ 18 - 29
arch/sparc64/kernel/time.c

@@ -680,22 +680,14 @@ static int starfire_set_time(u32 val)
 
 static u32 hypervisor_get_time(void)
 {
-	register unsigned long func asm("%o5");
-	register unsigned long arg0 asm("%o0");
-	register unsigned long arg1 asm("%o1");
+	unsigned long ret, time;
 	int retries = 10000;
 
 retry:
-	func = HV_FAST_TOD_GET;
-	arg0 = 0;
-	arg1 = 0;
-	__asm__ __volatile__("ta	%6"
-			     : "=&r" (func), "=&r" (arg0), "=&r" (arg1)
-			     : "0" (func), "1" (arg0), "2" (arg1),
-			       "i" (HV_FAST_TRAP));
-	if (arg0 == HV_EOK)
-		return arg1;
-	if (arg0 == HV_EWOULDBLOCK) {
+	ret = sun4v_tod_get(&time);
+	if (ret == HV_EOK)
+		return time;
+	if (ret == HV_EWOULDBLOCK) {
 		if (--retries > 0) {
 			udelay(100);
 			goto retry;
@@ -709,20 +701,14 @@ retry:
 
 static int hypervisor_set_time(u32 secs)
 {
-	register unsigned long func asm("%o5");
-	register unsigned long arg0 asm("%o0");
+	unsigned long ret;
 	int retries = 10000;
 
 retry:
-	func = HV_FAST_TOD_SET;
-	arg0 = secs;
-	__asm__ __volatile__("ta	%4"
-			     : "=&r" (func), "=&r" (arg0)
-			     : "0" (func), "1" (arg0),
-			       "i" (HV_FAST_TRAP));
-	if (arg0 == HV_EOK)
+	ret = sun4v_tod_set(secs);
+	if (ret == HV_EOK)
 		return 0;
-	if (arg0 == HV_EWOULDBLOCK) {
+	if (ret == HV_EWOULDBLOCK) {
 		if (--retries > 0) {
 			udelay(100);
 			goto retry;
@@ -862,7 +848,6 @@ fs_initcall(clock_init);
 static unsigned long sparc64_init_timers(void)
 {
 	struct device_node *dp;
-	struct property *prop;
 	unsigned long clock;
 #ifdef CONFIG_SMP
 	extern void smp_tick_init(void);
@@ -879,17 +864,15 @@ static unsigned long sparc64_init_timers(void)
 		if (manuf == 0x17 && impl == 0x13) {
 			/* Hummingbird, aka Ultra-IIe */
 			tick_ops = &hbtick_operations;
-			prop = of_find_property(dp, "stick-frequency", NULL);
+			clock = of_getintprop_default(dp, "stick-frequency", 0);
 		} else {
 			tick_ops = &tick_operations;
-			cpu_find_by_instance(0, &dp, NULL);
-			prop = of_find_property(dp, "clock-frequency", NULL);
+			clock = local_cpu_data().clock_tick;
 		}
 	} else {
 		tick_ops = &stick_operations;
-		prop = of_find_property(dp, "stick-frequency", NULL);
+		clock = of_getintprop_default(dp, "stick-frequency", 0);
 	}
-	clock = *(unsigned int *) prop->value;
 
 #ifdef CONFIG_SMP
 	smp_tick_init();
@@ -1365,6 +1348,7 @@ static int hypervisor_set_rtc_time(struct rtc_time *time)
 	return hypervisor_set_time(seconds);
 }
 
+#ifdef CONFIG_PCI
 static void bq4802_get_rtc_time(struct rtc_time *time)
 {
 	unsigned char val = readb(bq4802_regs + 0x0e);
@@ -1436,6 +1420,7 @@ static int bq4802_set_rtc_time(struct rtc_time *time)
 
 	return 0;
 }
+#endif /* CONFIG_PCI */
 
 struct mini_rtc_ops {
 	void (*get_rtc_time)(struct rtc_time *);
@@ -1452,10 +1437,12 @@ static struct mini_rtc_ops hypervisor_rtc_ops = {
 	.set_rtc_time = hypervisor_set_rtc_time,
 };
 
+#ifdef CONFIG_PCI
 static struct mini_rtc_ops bq4802_rtc_ops = {
 	.get_rtc_time = bq4802_get_rtc_time,
 	.set_rtc_time = bq4802_set_rtc_time,
 };
+#endif /* CONFIG_PCI */
 
 static struct mini_rtc_ops *mini_rtc_ops;
 
@@ -1579,8 +1566,10 @@ static int __init rtc_mini_init(void)
 		mini_rtc_ops = &hypervisor_rtc_ops;
 	else if (this_is_starfire)
 		mini_rtc_ops = &starfire_rtc_ops;
+#ifdef CONFIG_PCI
 	else if (bq4802_regs)
 		mini_rtc_ops = &bq4802_rtc_ops;
+#endif /* CONFIG_PCI */
 	else
 		return -ENODEV;
 

+ 18 - 9
arch/sparc64/kernel/traps.c

@@ -795,8 +795,7 @@ extern unsigned int cheetah_deferred_trap_vector[], cheetah_deferred_trap_vector
 void __init cheetah_ecache_flush_init(void)
 {
 	unsigned long largest_size, smallest_linesize, order, ver;
-	struct device_node *dp;
-	int i, instance, sz;
+	int i, sz;
 
 	/* Scan all cpu device tree nodes, note two values:
 	 * 1) largest E-cache size
@@ -805,18 +804,20 @@ void __init cheetah_ecache_flush_init(void)
 	largest_size = 0UL;
 	smallest_linesize = ~0UL;
 
-	instance = 0;
-	while (!cpu_find_by_instance(instance, &dp, NULL)) {
+	for (i = 0; i < NR_CPUS; i++) {
 		unsigned long val;
 
-		val = of_getintprop_default(dp, "ecache-size",
-					    (2 * 1024 * 1024));
+		val = cpu_data(i).ecache_size;
+		if (!val)
+			continue;
+
 		if (val > largest_size)
 			largest_size = val;
-		val = of_getintprop_default(dp, "ecache-line-size", 64);
+
+		val = cpu_data(i).ecache_line_size;
 		if (val < smallest_linesize)
 			smallest_linesize = val;
-		instance++;
+
 	}
 
 	if (largest_size == 0UL || smallest_linesize == ~0UL) {
@@ -2564,7 +2565,15 @@ void __init trap_init(void)
 	    (TRAP_PER_CPU_TSB_HUGE_TEMP !=
 	     offsetof(struct trap_per_cpu, tsb_huge_temp)) ||
 	    (TRAP_PER_CPU_IRQ_WORKLIST !=
-	     offsetof(struct trap_per_cpu, irq_worklist)))
+	     offsetof(struct trap_per_cpu, irq_worklist)) ||
+	    (TRAP_PER_CPU_CPU_MONDO_QMASK !=
+	     offsetof(struct trap_per_cpu, cpu_mondo_qmask)) ||
+	    (TRAP_PER_CPU_DEV_MONDO_QMASK !=
+	     offsetof(struct trap_per_cpu, dev_mondo_qmask)) ||
+	    (TRAP_PER_CPU_RESUM_QMASK !=
+	     offsetof(struct trap_per_cpu, resum_qmask)) ||
+	    (TRAP_PER_CPU_NONRESUM_QMASK !=
+	     offsetof(struct trap_per_cpu, nonresum_qmask)))
 		trap_per_cpu_offsets_are_bolixed_dave();
 
 	if ((TSB_CONFIG_TSB !=

+ 49 - 41
arch/sparc64/mm/init.c

@@ -23,6 +23,7 @@
 #include <linux/kprobes.h>
 #include <linux/cache.h>
 #include <linux/sort.h>
+#include <linux/percpu.h>
 
 #include <asm/head.h>
 #include <asm/system.h>
@@ -43,8 +44,8 @@
 #include <asm/tsb.h>
 #include <asm/hypervisor.h>
 #include <asm/prom.h>
-
-extern void device_scan(void);
+#include <asm/sstate.h>
+#include <asm/mdesc.h>
 
 #define MAX_PHYS_ADDRESS	(1UL << 42UL)
 #define KPTE_BITMAP_CHUNK_SZ	(256UL * 1024UL * 1024UL)
@@ -60,8 +61,11 @@ unsigned long kern_linear_pte_xor[2] __read_mostly;
 unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
 
 #ifndef CONFIG_DEBUG_PAGEALLOC
-/* A special kernel TSB for 4MB and 256MB linear mappings.  */
-struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
+/* A special kernel TSB for 4MB and 256MB linear mappings.
+ * Space is allocated for this right after the trap table
+ * in arch/sparc64/kernel/head.S
+ */
+extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
 #endif
 
 #define MAX_BANKS	32
@@ -190,12 +194,9 @@ inline void flush_dcache_page_impl(struct page *page)
 }
 
 #define PG_dcache_dirty		PG_arch_1
-#define PG_dcache_cpu_shift	24UL
-#define PG_dcache_cpu_mask	(256UL - 1UL)
-
-#if NR_CPUS > 256
-#error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus
-#endif
+#define PG_dcache_cpu_shift	32UL
+#define PG_dcache_cpu_mask	\
+	((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL)
 
 #define dcache_dirty_cpu(page) \
 	(((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask)
@@ -557,26 +558,11 @@ static void __init hypervisor_tlb_lock(unsigned long vaddr,
 				       unsigned long pte,
 				       unsigned long mmu)
 {
-	register unsigned long func asm("%o5");
-	register unsigned long arg0 asm("%o0");
-	register unsigned long arg1 asm("%o1");
-	register unsigned long arg2 asm("%o2");
-	register unsigned long arg3 asm("%o3");
-
-	func = HV_FAST_MMU_MAP_PERM_ADDR;
-	arg0 = vaddr;
-	arg1 = 0;
-	arg2 = pte;
-	arg3 = mmu;
-	__asm__ __volatile__("ta	0x80"
-			     : "=&r" (func), "=&r" (arg0),
-			       "=&r" (arg1), "=&r" (arg2),
-			       "=&r" (arg3)
-			     : "0" (func), "1" (arg0), "2" (arg1),
-			       "3" (arg2), "4" (arg3));
-	if (arg0 != 0) {
+	unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu);
+
+	if (ret != 0) {
 		prom_printf("hypervisor_tlb_lock[%lx:%lx:%lx:%lx]: "
-			    "errors with %lx\n", vaddr, 0, pte, mmu, arg0);
+			    "errors with %lx\n", vaddr, 0, pte, mmu, ret);
 		prom_halt();
 	}
 }
@@ -1313,20 +1299,16 @@ static void __init sun4v_ktsb_init(void)
 
 void __cpuinit sun4v_ktsb_register(void)
 {
-	register unsigned long func asm("%o5");
-	register unsigned long arg0 asm("%o0");
-	register unsigned long arg1 asm("%o1");
-	unsigned long pa;
+	unsigned long pa, ret;
 
 	pa = kern_base + ((unsigned long)&ktsb_descr[0] - KERNBASE);
 
-	func = HV_FAST_MMU_TSB_CTX0;
-	arg0 = NUM_KTSB_DESCR;
-	arg1 = pa;
-	__asm__ __volatile__("ta	%6"
-			     : "=&r" (func), "=&r" (arg0), "=&r" (arg1)
-			     : "0" (func), "1" (arg0), "2" (arg1),
-			       "i" (HV_FAST_TRAP));
+	ret = sun4v_mmu_tsb_ctx0(NUM_KTSB_DESCR, pa);
+	if (ret != 0) {
+		prom_printf("hypervisor_mmu_tsb_ctx0[%lx]: "
+			    "errors with %lx\n", pa, ret);
+		prom_halt();
+	}
 }
 
 /* paging_init() sets up the page tables */
@@ -1334,6 +1316,9 @@ void __cpuinit sun4v_ktsb_register(void)
 extern void cheetah_ecache_flush_init(void);
 extern void sun4v_patch_tlb_handlers(void);
 
+extern void cpu_probe(void);
+extern void central_probe(void);
+
 static unsigned long last_valid_pfn;
 pgd_t swapper_pg_dir[2048];
 
@@ -1345,9 +1330,24 @@ void __init paging_init(void)
 	unsigned long end_pfn, pages_avail, shift, phys_base;
 	unsigned long real_end, i;
 
+	/* These build time checkes make sure that the dcache_dirty_cpu()
+	 * page->flags usage will work.
+	 *
+	 * When a page gets marked as dcache-dirty, we store the
+	 * cpu number starting at bit 32 in the page->flags.  Also,
+	 * functions like clear_dcache_dirty_cpu use the cpu mask
+	 * in 13-bit signed-immediate instruction fields.
+	 */
+	BUILD_BUG_ON(FLAGS_RESERVED != 32);
+	BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH +
+		     ilog2(roundup_pow_of_two(NR_CPUS)) > FLAGS_RESERVED);
+	BUILD_BUG_ON(NR_CPUS > 4096);
+
 	kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
 	kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
 
+	sstate_booting();
+
 	/* Invalidate both kernel TSBs.  */
 	memset(swapper_tsb, 0x40, sizeof(swapper_tsb));
 #ifndef CONFIG_DEBUG_PAGEALLOC
@@ -1416,8 +1416,13 @@ void __init paging_init(void)
 
 	kernel_physical_mapping_init();
 
+	real_setup_per_cpu_areas();
+
 	prom_build_devicetree();
 
+	if (tlb_type == hypervisor)
+		sun4v_mdesc_init();
+
 	{
 		unsigned long zones_size[MAX_NR_ZONES];
 		unsigned long zholes_size[MAX_NR_ZONES];
@@ -1434,7 +1439,10 @@ void __init paging_init(void)
 				    zholes_size);
 	}
 
-	device_scan();
+	prom_printf("Booting Linux...\n");
+
+	central_probe();
+	cpu_probe();
 }
 
 static void __init taint_real_pages(void)

+ 19 - 0
arch/sparc64/prom/misc.c

@@ -15,6 +15,25 @@
 #include <asm/oplib.h>
 #include <asm/system.h>
 
+int prom_service_exists(const char *service_name)
+{
+	int err = p1275_cmd("test", P1275_ARG(0, P1275_ARG_IN_STRING) |
+			    P1275_INOUT(1, 1), service_name);
+
+	if (err)
+		return 0;
+	return 1;
+}
+
+void prom_sun4v_guest_soft_state(void)
+{
+	const char *svc = "SUNW,soft-state-supported";
+
+	if (!prom_service_exists(svc))
+		return;
+	p1275_cmd(svc, P1275_INOUT(0, 0));
+}
+
 /* Reset and reboot the machine with the command 'bcommand'. */
 void prom_reboot(const char *bcommand)
 {

+ 1 - 1
drivers/char/drm/Kconfig

@@ -6,7 +6,7 @@
 #
 config DRM
 	tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)"
-	depends on (AGP || AGP=n) && PCI
+	depends on (AGP || AGP=n) && PCI && !EMULATED_CMPXCHG
 	help
 	  Kernel-level support for the Direct Rendering Infrastructure (DRI)
 	  introduced in XFree86 4.0. If you say Y here, you need to select

+ 0 - 14
drivers/scsi/Kconfig

@@ -1753,23 +1753,9 @@ config SUN3X_ESP
 	  The ESP was an on-board SCSI controller used on Sun 3/80
 	  machines.  Say Y here to compile in support for it.
 
-config SCSI_ESP_CORE
-	tristate "ESP Scsi Driver Core"
-	depends on SCSI
-	select SCSI_SPI_ATTRS
-	help
-	  This is a core driver for NCR53c9x based scsi chipsets,
-	  also known as "ESP" for Emulex Scsi Processor or
-	  Enhanced Scsi Processor.  This driver does not exist by
-	  itself, there are front-end drivers which, when enabled,
-	  select and enable this driver.  One example is SCSI_SUNESP.
-	  These front-end drivers provide probing, DMA, and register
-	  access support for the core driver.
-
 config SCSI_SUNESP
 	tristate "Sparc ESP Scsi Driver"
 	depends on SBUS && SCSI
-	select SCSI_ESP_CORE
 	help
 	  This is the driver for the Sun ESP SCSI host adapter. The ESP
 	  chipset is present in most SPARC SBUS-based computers.

+ 2 - 3
drivers/scsi/Makefile

@@ -106,8 +106,7 @@ obj-$(CONFIG_MEGARAID_LEGACY)	+= megaraid.o
 obj-$(CONFIG_MEGARAID_NEWGEN)	+= megaraid/
 obj-$(CONFIG_MEGARAID_SAS)	+= megaraid/
 obj-$(CONFIG_SCSI_ACARD)	+= atp870u.o
-obj-$(CONFIG_SCSI_ESP_CORE)	+= esp_scsi.o
-obj-$(CONFIG_SCSI_SUNESP)	+= sun_esp.o
+obj-$(CONFIG_SCSI_SUNESP)	+= esp_scsi.o	sun_esp.o
 obj-$(CONFIG_SCSI_GDTH)		+= gdth.o
 obj-$(CONFIG_SCSI_INITIO)	+= initio.o
 obj-$(CONFIG_SCSI_INIA100)	+= a100u2w.o
@@ -121,7 +120,7 @@ obj-$(CONFIG_BLK_DEV_3W_XXXX_RAID) += 3w-xxxx.o
 obj-$(CONFIG_SCSI_3W_9XXX)	+= 3w-9xxx.o
 obj-$(CONFIG_SCSI_PPA)		+= ppa.o
 obj-$(CONFIG_SCSI_IMM)		+= imm.o
-obj-$(CONFIG_JAZZ_ESP)		+= NCR53C9x.o	jazz_esp.o
+obj-$(CONFIG_JAZZ_ESP)		+= esp_scsi.o	jazz_esp.o
 obj-$(CONFIG_SUN3X_ESP)		+= NCR53C9x.o	sun3x_esp.o
 obj-$(CONFIG_SCSI_FCAL)		+= fcal.o
 obj-$(CONFIG_SCSI_LASI700)	+= 53c700.o lasi700.o

+ 183 - 246
drivers/scsi/jazz_esp.c

@@ -1,307 +1,244 @@
-/*
- * jazz_esp.c: Driver for SCSI chip on Mips Magnum Boards (JAZZ architecture)
+/* jazz_esp.c: ESP front-end for MIPS JAZZ systems.
  *
- * Copyright (C) 1997 Thomas Bogendoerfer (tsbogend@alpha.franken.de)
- *
- * jazz_esp is based on David S. Miller's ESP driver and cyber_esp
+ * Copyright (C) 2007 Thomas Bogendörfer (tsbogend@alpha.frankende)
  */
 
-#include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/delay.h>
 #include <linux/types.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-
-#include "scsi.h"
-#include <scsi/scsi_host.h>
-#include "NCR53C9x.h"
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/dma.h>
+
 #include <asm/jazz.h>
 #include <asm/jazzdma.h>
-#include <asm/dma.h>
 
-#include <asm/pgtable.h>
-
-static int  dma_bytes_sent(struct NCR_ESP *esp, int fifo_count);
-static int  dma_can_transfer(struct NCR_ESP *esp, struct scsi_cmnd *sp);
-static void dma_dump_state(struct NCR_ESP *esp);
-static void dma_init_read(struct NCR_ESP *esp, __u32 vaddress, int length);
-static void dma_init_write(struct NCR_ESP *esp, __u32 vaddress, int length);
-static void dma_ints_off(struct NCR_ESP *esp);
-static void dma_ints_on(struct NCR_ESP *esp);
-static int  dma_irq_p(struct NCR_ESP *esp);
-static int  dma_ports_p(struct NCR_ESP *esp);
-static void dma_setup(struct NCR_ESP *esp, __u32 addr, int count, int write);
-static void dma_mmu_get_scsi_one (struct NCR_ESP *esp, struct scsi_cmnd *sp);
-static void dma_mmu_get_scsi_sgl (struct NCR_ESP *esp, struct scsi_cmnd *sp);
-static void dma_mmu_release_scsi_one (struct NCR_ESP *esp, struct scsi_cmnd *sp);
-static void dma_mmu_release_scsi_sgl (struct NCR_ESP *esp, struct scsi_cmnd *sp);
-static void dma_advance_sg (struct scsi_cmnd *sp);
-static void dma_led_off(struct NCR_ESP *);
-static void dma_led_on(struct NCR_ESP *);
-
-
-static volatile unsigned char cmd_buffer[16];
-				/* This is where all commands are put
-				 * before they are trasfered to the ESP chip
-				 * via PIO.
-				 */
-
-static int jazz_esp_release(struct Scsi_Host *shost)
-{
-	if (shost->irq)
-		free_irq(shost->irq, NULL);
-	if (shost->dma_channel != 0xff)
-		free_dma(shost->dma_channel);
-	if (shost->io_port && shost->n_io_port)
-		release_region(shost->io_port, shost->n_io_port);
-	scsi_unregister(shost);
-	return 0;
-}
+#include <scsi/scsi_host.h>
 
-/***************************************************************** Detection */
-static int jazz_esp_detect(struct scsi_host_template *tpnt)
-{
-    struct NCR_ESP *esp;
-    struct ConfigDev *esp_dev;
-
-    /*
-     * first assumption it is there:-)
-     */
-    if (1) {
-	esp_dev = NULL;
-	esp = esp_allocate(tpnt, esp_dev, 0);
-	
-	/* Do command transfer with programmed I/O */
-	esp->do_pio_cmds = 1;
-	
-	/* Required functions */
-	esp->dma_bytes_sent = &dma_bytes_sent;
-	esp->dma_can_transfer = &dma_can_transfer;
-	esp->dma_dump_state = &dma_dump_state;
-	esp->dma_init_read = &dma_init_read;
-	esp->dma_init_write = &dma_init_write;
-	esp->dma_ints_off = &dma_ints_off;
-	esp->dma_ints_on = &dma_ints_on;
-	esp->dma_irq_p = &dma_irq_p;
-	esp->dma_ports_p = &dma_ports_p;
-	esp->dma_setup = &dma_setup;
-
-	/* Optional functions */
-	esp->dma_barrier = NULL;
-	esp->dma_drain = NULL;
-	esp->dma_invalidate = NULL;
-	esp->dma_irq_entry = NULL;
-	esp->dma_irq_exit = NULL;
-	esp->dma_poll = NULL;
-	esp->dma_reset = NULL;
-	esp->dma_led_off = &dma_led_off;
-	esp->dma_led_on = &dma_led_on;
-	
-	/* virtual DMA functions */
-	esp->dma_mmu_get_scsi_one = &dma_mmu_get_scsi_one;
-	esp->dma_mmu_get_scsi_sgl = &dma_mmu_get_scsi_sgl;
-	esp->dma_mmu_release_scsi_one = &dma_mmu_release_scsi_one;
-	esp->dma_mmu_release_scsi_sgl = &dma_mmu_release_scsi_sgl;
-	esp->dma_advance_sg = &dma_advance_sg;
-
-
-	/* SCSI chip speed */
-	esp->cfreq = 40000000;
+#include "esp_scsi.h"
 
-	/* 
-	 * we don't give the address of DMA channel, but the number
-	 * of DMA channel, so we can use the jazz DMA functions
-	 * 
-	 */
-	esp->dregs = (void *) JAZZ_SCSI_DMA;
-	
-	/* ESP register base */
-	esp->eregs = (struct ESP_regs *)(JAZZ_SCSI_BASE);
-	
-	/* Set the command buffer */
-	esp->esp_command = (volatile unsigned char *)cmd_buffer;
-	
-	/* get virtual dma address for command buffer */
-	esp->esp_command_dvma = vdma_alloc(CPHYSADDR(cmd_buffer), sizeof (cmd_buffer));
-	
-	esp->irq = JAZZ_SCSI_IRQ;
-	request_irq(JAZZ_SCSI_IRQ, esp_intr, IRQF_DISABLED, "JAZZ SCSI",
-	            esp->ehost);
-
-	/*
-	 * FIXME, look if the scsi id is available from NVRAM
-	 */
-	esp->scsi_id = 7;
-		
-	/* Check for differential SCSI-bus */
-	/* What is this stuff? */
-	esp->diff = 0;
-
-	esp_initialize(esp);
-	
-	printk("ESP: Total of %d ESP hosts found, %d actually in use.\n", nesps,esps_in_use);
-	esps_running = esps_in_use;
-	return esps_in_use;
-    }
-    return 0;
-}
+#define DRV_MODULE_NAME		"jazz_esp"
+#define PFX DRV_MODULE_NAME	": "
+#define DRV_VERSION		"1.000"
+#define DRV_MODULE_RELDATE	"May 19, 2007"
 
-/************************************************************* DMA Functions */
-static int dma_bytes_sent(struct NCR_ESP *esp, int fifo_count)
+static void jazz_esp_write8(struct esp *esp, u8 val, unsigned long reg)
 {
-    return fifo_count;
+	*(volatile u8 *)(esp->regs + reg) = val;
 }
 
-static int dma_can_transfer(struct NCR_ESP *esp, struct scsi_cmnd *sp)
+static u8 jazz_esp_read8(struct esp *esp, unsigned long reg)
 {
-    /*
-     * maximum DMA size is 1MB
-     */
-    unsigned long sz = sp->SCp.this_residual;
-    if(sz > 0x100000)
-	sz = 0x100000;
-    return sz;
+	return *(volatile u8 *)(esp->regs + reg);
 }
 
-static void dma_dump_state(struct NCR_ESP *esp)
+static dma_addr_t jazz_esp_map_single(struct esp *esp, void *buf,
+				      size_t sz, int dir)
 {
-    
-    ESPLOG(("esp%d: dma -- enable <%08x> residue <%08x\n",
-	    esp->esp_id, vdma_get_enable((int)esp->dregs), vdma_get_residue((int)esp->dregs)));
+	return dma_map_single(esp->dev, buf, sz, dir);
 }
 
-static void dma_init_read(struct NCR_ESP *esp, __u32 vaddress, int length)
+static int jazz_esp_map_sg(struct esp *esp, struct scatterlist *sg,
+				  int num_sg, int dir)
 {
-    dma_cache_wback_inv ((unsigned long)phys_to_virt(vdma_log2phys(vaddress)), length);
-    vdma_disable ((int)esp->dregs);
-    vdma_set_mode ((int)esp->dregs, DMA_MODE_READ);
-    vdma_set_addr ((int)esp->dregs, vaddress);
-    vdma_set_count ((int)esp->dregs, length);
-    vdma_enable ((int)esp->dregs);
+	return dma_map_sg(esp->dev, sg, num_sg, dir);
 }
 
-static void dma_init_write(struct NCR_ESP *esp, __u32 vaddress, int length)
+static void jazz_esp_unmap_single(struct esp *esp, dma_addr_t addr,
+				  size_t sz, int dir)
 {
-    dma_cache_wback_inv ((unsigned long)phys_to_virt(vdma_log2phys(vaddress)), length);    
-    vdma_disable ((int)esp->dregs);    
-    vdma_set_mode ((int)esp->dregs, DMA_MODE_WRITE);
-    vdma_set_addr ((int)esp->dregs, vaddress);
-    vdma_set_count ((int)esp->dregs, length);
-    vdma_enable ((int)esp->dregs);    
+	dma_unmap_single(esp->dev, addr, sz, dir);
 }
 
-static void dma_ints_off(struct NCR_ESP *esp)
+static void jazz_esp_unmap_sg(struct esp *esp, struct scatterlist *sg,
+			      int num_sg, int dir)
 {
-    disable_irq(esp->irq);
+	dma_unmap_sg(esp->dev, sg, num_sg, dir);
 }
 
-static void dma_ints_on(struct NCR_ESP *esp)
+static int jazz_esp_irq_pending(struct esp *esp)
 {
-    enable_irq(esp->irq);
+	if (jazz_esp_read8(esp, ESP_STATUS) & ESP_STAT_INTR)
+		return 1;
+	return 0;
 }
 
-static int dma_irq_p(struct NCR_ESP *esp)
+static void jazz_esp_reset_dma(struct esp *esp)
 {
-    return (esp_read(esp->eregs->esp_status) & ESP_STAT_INTR);
+	vdma_disable ((int)esp->dma_regs);
 }
 
-static int dma_ports_p(struct NCR_ESP *esp)
+static void jazz_esp_dma_drain(struct esp *esp)
 {
-    int enable = vdma_get_enable((int)esp->dregs);
-    
-    return (enable & R4030_CHNL_ENABLE);
+	/* nothing to do */
 }
 
-static void dma_setup(struct NCR_ESP *esp, __u32 addr, int count, int write)
+static void jazz_esp_dma_invalidate(struct esp *esp)
 {
-    /* 
-     * On the Sparc, DMA_ST_WRITE means "move data from device to memory"
-     * so when (write) is true, it actually means READ!
-     */
-    if(write){
-	dma_init_read(esp, addr, count);
-    } else {
-	dma_init_write(esp, addr, count);
-    }
+	vdma_disable ((int)esp->dma_regs);
 }
 
-static void dma_mmu_get_scsi_one (struct NCR_ESP *esp, struct scsi_cmnd *sp)
+static void jazz_esp_send_dma_cmd(struct esp *esp, u32 addr, u32 esp_count,
+				  u32 dma_count, int write, u8 cmd)
 {
-    sp->SCp.have_data_in = vdma_alloc(CPHYSADDR(sp->SCp.buffer), sp->SCp.this_residual);
-    sp->SCp.ptr = (char *)((unsigned long)sp->SCp.have_data_in);
+	BUG_ON(!(cmd & ESP_CMD_DMA));
+
+	jazz_esp_write8(esp, (esp_count >> 0) & 0xff, ESP_TCLOW);
+	jazz_esp_write8(esp, (esp_count >> 8) & 0xff, ESP_TCMED);
+	vdma_disable ((int)esp->dma_regs);
+	if (write)
+		vdma_set_mode ((int)esp->dma_regs, DMA_MODE_READ);
+	else
+		vdma_set_mode ((int)esp->dma_regs, DMA_MODE_WRITE);
+
+	vdma_set_addr ((int)esp->dma_regs, addr);
+	vdma_set_count ((int)esp->dma_regs, dma_count);
+	vdma_enable ((int)esp->dma_regs);
+
+	scsi_esp_cmd(esp, cmd);
 }
 
-static void dma_mmu_get_scsi_sgl (struct NCR_ESP *esp, struct scsi_cmnd *sp)
-{
-    int sz = sp->SCp.buffers_residual;
-    struct scatterlist *sg = (struct scatterlist *) sp->SCp.buffer;
-    
-    while (sz >= 0) {
-	sg[sz].dma_address = vdma_alloc(CPHYSADDR(page_address(sg[sz].page) + sg[sz].offset), sg[sz].length);
-	sz--;
-    }
-    sp->SCp.ptr=(char *)(sp->SCp.buffer->dma_address);
-}    
-
-static void dma_mmu_release_scsi_one (struct NCR_ESP *esp, struct scsi_cmnd *sp)
+static int jazz_esp_dma_error(struct esp *esp)
 {
-    vdma_free(sp->SCp.have_data_in);
+	u32 enable = vdma_get_enable((int)esp->dma_regs);
+
+	if (enable & (R4030_MEM_INTR|R4030_ADDR_INTR))
+		return 1;
+
+	return 0;
 }
 
-static void dma_mmu_release_scsi_sgl (struct NCR_ESP *esp, struct scsi_cmnd *sp)
+static const struct esp_driver_ops jazz_esp_ops = {
+	.esp_write8	=	jazz_esp_write8,
+	.esp_read8	=	jazz_esp_read8,
+	.map_single	=	jazz_esp_map_single,
+	.map_sg		=	jazz_esp_map_sg,
+	.unmap_single	=	jazz_esp_unmap_single,
+	.unmap_sg	=	jazz_esp_unmap_sg,
+	.irq_pending	=	jazz_esp_irq_pending,
+	.reset_dma	=	jazz_esp_reset_dma,
+	.dma_drain	=	jazz_esp_dma_drain,
+	.dma_invalidate	=	jazz_esp_dma_invalidate,
+	.send_dma_cmd	=	jazz_esp_send_dma_cmd,
+	.dma_error	=	jazz_esp_dma_error,
+};
+
+static int __devinit esp_jazz_probe(struct platform_device *dev)
 {
-    int sz = sp->use_sg - 1;
-    struct scatterlist *sg = (struct scatterlist *)sp->request_buffer;
-			
-    while(sz >= 0) {
-	vdma_free(sg[sz].dma_address);
-	sz--;
-    }
+	struct scsi_host_template *tpnt = &scsi_esp_template;
+	struct Scsi_Host *host;
+	struct esp *esp;
+	struct resource *res;
+	int err;
+
+	host = scsi_host_alloc(tpnt, sizeof(struct esp));
+
+	err = -ENOMEM;
+	if (!host)
+		goto fail;
+
+	host->max_id = 8;
+	esp = host_to_esp(host);
+
+	esp->host = host;
+	esp->dev = dev;
+	esp->ops = &jazz_esp_ops;
+
+	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!res)
+		goto fail_unlink;
+
+	esp->regs = (void __iomem *)res->start;
+	if (!esp->regs)
+		goto fail_unlink;
+
+	res = platform_get_resource(dev, IORESOURCE_MEM, 1);
+	if (!res)
+		goto fail_unlink;
+
+	esp->dma_regs = (void __iomem *)res->start;
+
+	esp->command_block = dma_alloc_coherent(esp->dev, 16,
+						&esp->command_block_dma,
+						GFP_KERNEL);
+	if (!esp->command_block)
+		goto fail_unmap_regs;
+
+	host->irq = platform_get_irq(dev, 0);
+	err = request_irq(host->irq, scsi_esp_intr, IRQF_SHARED, "ESP", esp);
+	if (err < 0)
+		goto fail_unmap_command_block;
+
+	esp->scsi_id = 7;
+	esp->host->this_id = esp->scsi_id;
+	esp->scsi_id_mask = (1 << esp->scsi_id);
+	esp->cfreq = 40000000;
+
+	dev_set_drvdata(&dev->dev, esp);
+
+	err = scsi_esp_register(esp, &dev->dev);
+	if (err)
+		goto fail_free_irq;
+
+	return 0;
+
+fail_free_irq:
+	free_irq(host->irq, esp);
+fail_unmap_command_block:
+	dma_free_coherent(esp->dev, 16,
+			  esp->command_block,
+			  esp->command_block_dma);
+fail_unmap_regs:
+fail_unlink:
+	scsi_host_put(host);
+fail:
+	return err;
 }
 
-static void dma_advance_sg (struct scsi_cmnd *sp)
+static int __devexit esp_jazz_remove(struct platform_device *dev)
 {
-    sp->SCp.ptr = (char *)(sp->SCp.buffer->dma_address);
+	struct esp *esp = dev_get_drvdata(&dev->dev);
+	unsigned int irq = esp->host->irq;
+
+	scsi_esp_unregister(esp);
+
+	free_irq(irq, esp);
+	dma_free_coherent(esp->dev, 16,
+			  esp->command_block,
+			  esp->command_block_dma);
+
+	scsi_host_put(esp->host);
+
+	return 0;
 }
 
-#define JAZZ_HDC_LED   0xe000d100 /* FIXME, find correct address */
+static struct platform_driver esp_jazz_driver = {
+	.probe		= esp_jazz_probe,
+	.remove		= __devexit_p(esp_jazz_remove),
+	.driver	= {
+		.name	= "jazz_esp",
+	},
+};
 
-static void dma_led_off(struct NCR_ESP *esp)
+static int __init jazz_esp_init(void)
 {
-#if 0    
-    *(unsigned char *)JAZZ_HDC_LED = 0;
-#endif    
+	return platform_driver_register(&esp_jazz_driver);
 }
 
-static void dma_led_on(struct NCR_ESP *esp)
-{    
-#if 0    
-    *(unsigned char *)JAZZ_HDC_LED = 1;
-#endif    
+static void __exit jazz_esp_exit(void)
+{
+	platform_driver_unregister(&esp_jazz_driver);
 }
 
-static struct scsi_host_template driver_template = {
-	.proc_name		= "jazz_esp",
-	.proc_info		= esp_proc_info,
-	.name			= "ESP 100/100a/200",
-	.detect			= jazz_esp_detect,
-	.slave_alloc		= esp_slave_alloc,
-	.slave_destroy		= esp_slave_destroy,
-	.release		= jazz_esp_release,
-	.info			= esp_info,
-	.queuecommand		= esp_queue,
-	.eh_abort_handler	= esp_abort,
-	.eh_bus_reset_handler	= esp_reset,
-	.can_queue		= 7,
-	.this_id		= 7,
-	.sg_tablesize		= SG_ALL,
-	.cmd_per_lun		= 1,
-	.use_clustering		= DISABLE_CLUSTERING,
-};
-#include "scsi_module.c"
+MODULE_DESCRIPTION("JAZZ ESP SCSI driver");
+MODULE_AUTHOR("Thomas Bogendoerfer (tsbogend@alpha.franken.de)");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+module_init(jazz_esp_init);
+module_exit(jazz_esp_exit);

+ 4 - 14
drivers/scsi/pluto.c

@@ -4,6 +4,7 @@
  *
  */
 
+#include <linux/completion.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/types.h>
@@ -50,16 +51,10 @@ static struct ctrl_inquiry {
 } *fcs __initdata;
 static int fcscount __initdata = 0;
 static atomic_t fcss __initdata = ATOMIC_INIT(0);
-DECLARE_MUTEX_LOCKED(fc_sem);
+static DECLARE_COMPLETION(fc_detect_complete);
 
 static int pluto_encode_addr(Scsi_Cmnd *SCpnt, u16 *addr, fc_channel *fc, fcp_cmnd *fcmd);
 
-static void __init pluto_detect_timeout(unsigned long data)
-{
-	PLND(("Timeout\n"))
-	up(&fc_sem);
-}
-
 static void __init pluto_detect_done(Scsi_Cmnd *SCpnt)
 {
 	/* Do nothing */
@@ -69,7 +64,7 @@ static void __init pluto_detect_scsi_done(Scsi_Cmnd *SCpnt)
 {
 	PLND(("Detect done %08lx\n", (long)SCpnt))
 	if (atomic_dec_and_test (&fcss))
-		up(&fc_sem);
+		complete(&fc_detect_complete);
 }
 
 int pluto_slave_configure(struct scsi_device *device)
@@ -96,7 +91,6 @@ int __init pluto_detect(struct scsi_host_template *tpnt)
 	int i, retry, nplutos;
 	fc_channel *fc;
 	struct scsi_device dev;
-	DEFINE_TIMER(fc_timer, pluto_detect_timeout, 0, 0);
 
 	tpnt->proc_name = "pluto";
 	fcscount = 0;
@@ -187,15 +181,11 @@ int __init pluto_detect(struct scsi_host_template *tpnt)
 			}
 		}
 	    
-		fc_timer.expires = jiffies + 10 * HZ;
-		add_timer(&fc_timer);
-		
-		down(&fc_sem);
+		wait_for_completion_timeout(&fc_detect_complete, 10 * HZ);
 		PLND(("Woken up\n"))
 		if (!atomic_read(&fcss))
 			break; /* All fc channels have answered us */
 	}
-	del_timer_sync(&fc_timer);
 
 	PLND(("Finished search\n"))
 	for (i = 0, nplutos = 0; i < fcscount; i++) {

+ 3 - 3
drivers/serial/suncore.c

@@ -30,9 +30,9 @@ void
 sunserial_console_termios(struct console *con)
 {
 	char mode[16], buf[16], *s;
-	char *mode_prop = "ttyX-mode";
-	char *cd_prop = "ttyX-ignore-cd";
-	char *dtr_prop = "ttyX-rts-dtr-off";
+	char mode_prop[] = "ttyX-mode";
+	char cd_prop[]   = "ttyX-ignore-cd";
+	char dtr_prop[]  = "ttyX-rts-dtr-off";
 	char *ssp_console_modes_prop = "ssp-console-modes";
 	int baud, bits, stop, cflag;
 	char parity;

+ 2 - 2
drivers/serial/sunzilog.c

@@ -1239,7 +1239,7 @@ static inline struct console *SUNZILOG_CONSOLE(void)
 #define SUNZILOG_CONSOLE()	(NULL)
 #endif
 
-static void __init sunzilog_init_kbdms(struct uart_sunzilog_port *up, int channel)
+static void __devinit sunzilog_init_kbdms(struct uart_sunzilog_port *up, int channel)
 {
 	int baud, brg;
 
@@ -1259,7 +1259,7 @@ static void __init sunzilog_init_kbdms(struct uart_sunzilog_port *up, int channe
 }
 
 #ifdef CONFIG_SERIO
-static void __init sunzilog_register_serio(struct uart_sunzilog_port *up)
+static void __devinit sunzilog_register_serio(struct uart_sunzilog_port *up)
 {
 	struct serio *serio = &up->serio;
 

+ 38 - 0
include/asm-sparc/atomic.h

@@ -2,6 +2,7 @@
  *
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  * Copyright (C) 2000 Anton Blanchard (anton@linuxcare.com.au)
+ * Copyright (C) 2007 Kyle McMartin (kyle@parisc-linux.org)
  *
  * Additions by Keith M Wesolowski (wesolows@foobazco.org) based
  * on asm-parisc/atomic.h Copyright (C) 2000 Philipp Rumpf <prumpf@tux.org>.
@@ -10,11 +11,48 @@
 #ifndef __ARCH_SPARC_ATOMIC__
 #define __ARCH_SPARC_ATOMIC__
 
+#include <linux/types.h>
 
 typedef struct { volatile int counter; } atomic_t;
 
 #ifdef __KERNEL__
 
+/* Emulate cmpxchg() the same way we emulate atomics,
+ * by hashing the object address and indexing into an array
+ * of spinlocks to get a bit of performance...
+ *
+ * See arch/sparc/lib/atomic32.c for implementation.
+ *
+ * Cribbed from <asm-parisc/atomic.h>
+ */
+#define __HAVE_ARCH_CMPXCHG	1
+
+/* bug catcher for when unsupported size is used - won't link */
+extern void __cmpxchg_called_with_bad_pointer(void);
+/* we only need to support cmpxchg of a u32 on sparc */
+extern unsigned long __cmpxchg_u32(volatile u32 *m, u32 old, u32 new_);
+
+/* don't worry...optimizer will get rid of most of this */
+static __inline__ unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size)
+{
+	switch(size) {
+	case 4:
+		return __cmpxchg_u32((u32 *)ptr, (u32)old, (u32)new_);
+	default:
+		__cmpxchg_called_with_bad_pointer();
+		break;
+	}
+	return old;
+}
+
+#define cmpxchg(ptr,o,n) ({						\
+	__typeof__(*(ptr)) _o_ = (o);					\
+	__typeof__(*(ptr)) _n_ = (n);					\
+	(__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,	\
+			(unsigned long)_n_, sizeof(*(ptr)));		\
+})
+
 #define ATOMIC_INIT(i)  { (i) }
 
 extern int __atomic_add_return(int, atomic_t *);

+ 4 - 4
include/asm-sparc64/bugs.h

@@ -1,9 +1,8 @@
-/*  $Id: bugs.h,v 1.1 1996/12/26 13:25:20 davem Exp $
- *  include/asm-sparc64/bugs.h:  Sparc probes for various bugs.
+/* bugs.h: Sparc64 probes for various bugs.
  *
- *  Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net)
  */
-
+#include <asm/sstate.h>
 
 extern unsigned long loops_per_jiffy;
 
@@ -12,4 +11,5 @@ static void __init check_bugs(void)
 #ifndef CONFIG_SMP
 	cpu_data(0).udelay_val = loops_per_jiffy;
 #endif
+	sstate_running();
 }

+ 17 - 7
include/asm-sparc64/cpudata.h

@@ -17,11 +17,11 @@
 typedef struct {
 	/* Dcache line 1 */
 	unsigned int	__softirq_pending; /* must be 1st, see rtrap.S */
-	unsigned int	__pad0_1;
-	unsigned int	__pad0_2;
-	unsigned int	__pad1;
+	unsigned int	__pad0;
 	unsigned long	clock_tick;	/* %tick's per second */
 	unsigned long	udelay_val;
+	unsigned int	__pad1;
+	unsigned int	__pad2;
 
 	/* Dcache line 2, rarely used */
 	unsigned int	dcache_size;
@@ -30,8 +30,8 @@ typedef struct {
 	unsigned int	icache_line_size;
 	unsigned int	ecache_size;
 	unsigned int	ecache_line_size;
+	int		core_id;
 	unsigned int	__pad3;
-	unsigned int	__pad4;
 } cpuinfo_sparc;
 
 DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
@@ -76,12 +76,18 @@ struct trap_per_cpu {
 
 /* Dcache line 8: IRQ work list, and keep trap_block a power-of-2 in size.  */
 	unsigned int		irq_worklist;
-	unsigned int		__pad1;
-	unsigned long		__pad2[3];
+	unsigned int		cpu_mondo_qmask;
+	unsigned int		dev_mondo_qmask;
+	unsigned int		resum_qmask;
+	unsigned int		nonresum_qmask;
+	unsigned int		__pad2[3];
 } __attribute__((aligned(64)));
 extern struct trap_per_cpu trap_block[NR_CPUS];
 extern void init_cur_cpu_trap(struct thread_info *);
 extern void setup_tba(void);
+extern int ncpus_probed;
+
+extern unsigned long real_hard_smp_processor_id(void);
 
 struct cpuid_patch_entry {
 	unsigned int	addr;
@@ -122,6 +128,10 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
 #define TRAP_PER_CPU_TSB_HUGE		0xd0
 #define TRAP_PER_CPU_TSB_HUGE_TEMP	0xd8
 #define TRAP_PER_CPU_IRQ_WORKLIST	0xe0
+#define TRAP_PER_CPU_CPU_MONDO_QMASK	0xe4
+#define TRAP_PER_CPU_DEV_MONDO_QMASK	0xe8
+#define TRAP_PER_CPU_RESUM_QMASK	0xec
+#define TRAP_PER_CPU_NONRESUM_QMASK	0xf0
 
 #define TRAP_BLOCK_SZ_SHIFT		8
 
@@ -192,7 +202,7 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
  * the calculations done by the macro mid-stream.
  */
 #define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)	\
-	ldub	[THR + TI_CPU], REG1;			\
+	lduh	[THR + TI_CPU], REG1;			\
 	sethi	%hi(__per_cpu_shift), REG3;		\
 	sethi	%hi(__per_cpu_base), REG2;		\
 	ldx	[REG3 + %lo(__per_cpu_shift)], REG3;	\

+ 600 - 43
include/asm-sparc64/hypervisor.h

@@ -73,6 +73,8 @@
 #define HV_ENOTSUPPORTED		13 /* Function not supported       */
 #define HV_ENOMAP			14 /* No mapping found             */
 #define HV_ETOOMANY			15 /* Too many items specified     */
+#define HV_ECHANNEL			16 /* Invalid LDC channel          */
+#define HV_EBUSY			17 /* Resource busy                */
 
 /* mach_exit()
  * TRAP:	HV_FAST_TRAP
@@ -95,6 +97,10 @@
  */
 #define HV_FAST_MACH_EXIT		0x00
 
+#ifndef __ASSEMBLY__
+extern void sun4v_mach_exit(unsigned long exit_core);
+#endif
+
 /* Domain services.  */
 
 /* mach_desc()
@@ -120,7 +126,13 @@
  */
 #define HV_FAST_MACH_DESC		0x01
 
-/* mach_exit()
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_mach_desc(unsigned long buffer_pa,
+				     unsigned long buf_len,
+				     unsigned long *real_buf_len);
+#endif
+
+/* mach_sir()
  * TRAP:	HV_FAST_TRAP
  * FUNCTION:	HV_FAST_MACH_SIR
  * ERRORS:	This service does not return.
@@ -135,53 +147,66 @@
  */
 #define HV_FAST_MACH_SIR		0x02
 
-/* mach_set_soft_state()
+#ifndef __ASSEMBLY__
+extern void sun4v_mach_sir(void);
+#endif
+
+/* mach_set_watchdog()
  * TRAP:	HV_FAST_TRAP
- * FUNCTION:	HV_FAST_MACH_SET_SOFT_STATE
- * ARG0:	software state
- * ARG1:	software state description pointer
+ * FUNCTION:	HV_FAST_MACH_SET_WATCHDOG
+ * ARG0:	timeout in milliseconds
  * RET0:	status
- * ERRORS:	EINVAL		software state not valid or software state
- *				description is not NULL terminated
- *		ENORADDR	software state description pointer is not a
- *				valid real address
- *		EBADALIGNED	software state description is not correctly
- *				aligned
+ * RET1:	time remaining in milliseconds
  *
- * This allows the guest to report it's soft state to the hypervisor.  There
- * are two primary components to this state.  The first part states whether
- * the guest software is running or not.  The second containts optional
- * details specific to the software.
+ * A guest uses this API to set a watchdog timer.  Once the gues has set
+ * the timer, it must call the timer service again either to disable or
+ * postpone the expiration.  If the timer expires before being reset or
+ * disabled, then the hypervisor take a platform specific action leading
+ * to guest termination within a bounded time period.  The platform action
+ * may include recovery actions such as reporting the expiration to a
+ * Service Processor, and/or automatically restarting the gues.
  *
- * The software state argument is defined below in HV_SOFT_STATE_*, and
- * indicates whether the guest is operating normally or in a transitional
- * state.
+ * The 'timeout' parameter is specified in milliseconds, however the
+ * implementated granularity is given by the 'watchdog-resolution'
+ * property in the 'platform' node of the guest's machine description.
+ * The largest allowed timeout value is specified by the
+ * 'watchdog-max-timeout' property of the 'platform' node.
  *
- * The software state description argument is a real address of a data buffer
- * of size 32-bytes aligned on a 32-byte boundary.  It is treated as a NULL
- * terminated 7-bit ASCII string of up to 31 characters not including the
- * NULL termination.
- */
-#define HV_FAST_MACH_SET_SOFT_STATE	0x03
-#define  HV_SOFT_STATE_NORMAL		 0x01
-#define  HV_SOFT_STATE_TRANSITION	 0x02
-
-/* mach_get_soft_state()
- * TRAP:	HV_FAST_TRAP
- * FUNCTION:	HV_FAST_MACH_GET_SOFT_STATE
- * ARG0:	software state description pointer
- * RET0:	status
- * RET1:	software state
- * ERRORS:	ENORADDR	software state description pointer is not a
- *				valid real address
- *		EBADALIGNED	software state description is not correctly
- *				aligned
+ * If the 'timeout' argument is not zero, the watchdog timer is set to
+ * expire after a minimum of 'timeout' milliseconds.
  *
- * Retrieve the current value of the guest's software state.  The rules
- * for the software state pointer are the same as for mach_set_soft_state()
- * above.
+ * If the 'timeout' argument is zero, the watchdog timer is disabled.
+ *
+ * If the 'timeout' value exceeds the value of the 'max-watchdog-timeout'
+ * property, the hypervisor leaves the watchdog timer state unchanged,
+ * and returns a status of EINVAL.
+ *
+ * The 'time remaining' return value is valid regardless of whether the
+ * return status is EOK or EINVAL.  A non-zero return value indicates the
+ * number of milliseconds that were remaining until the timer was to expire.
+ * If less than one millisecond remains, the return value is '1'.  If the
+ * watchdog timer was disabled at the time of the call, the return value is
+ * zero.
+ *
+ * If the hypervisor cannot support the exact timeout value requested, but
+ * can support a larger timeout value, the hypervisor may round the actual
+ * timeout to a value larger than the requested timeout, consequently the
+ * 'time remaining' return value may be larger than the previously requested
+ * timeout value.
+ *
+ * Any guest OS debugger should be aware that the watchdog service may be in
+ * use.  Consequently, it is recommended that the watchdog service is
+ * disabled upon debugger entry (e.g. reaching a breakpoint), and then
+ * re-enabled upon returning to normal execution.  The API has been designed
+ * with this in mind, and the 'time remaining' result of the disable call may
+ * be used directly as the timeout argument of the re-enable call.
  */
-#define HV_FAST_MACH_GET_SOFT_STATE	0x04
+#define HV_FAST_MACH_SET_WATCHDOG	0x05
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_mach_set_watchdog(unsigned long timeout,
+					     unsigned long *orig_timeout);
+#endif
 
 /* CPU services.
  *
@@ -206,8 +231,8 @@
  * FUNCTION:	HV_FAST_CPU_START
  * ARG0:	CPU ID
  * ARG1:	PC
- * ARG1:	RTBA
- * ARG1:	target ARG0
+ * ARG2:	RTBA
+ * ARG3:	target ARG0
  * RET0:	status
  * ERRORS:	ENOCPU		Invalid CPU ID
  *		EINVAL		Target CPU ID is not in the stopped state
@@ -224,6 +249,13 @@
  */
 #define HV_FAST_CPU_START		0x10
 
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_cpu_start(unsigned long cpuid,
+				     unsigned long pc,
+				     unsigned long rtba,
+				     unsigned long arg0);
+#endif
+
 /* cpu_stop()
  * TRAP:	HV_FAST_TRAP
  * FUNCTION:	HV_FAST_CPU_STOP
@@ -245,6 +277,10 @@
  */
 #define HV_FAST_CPU_STOP		0x11
 
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_cpu_stop(unsigned long cpuid);
+#endif
+
 /* cpu_yield()
  * TRAP:	HV_FAST_TRAP
  * FUNCTION:	HV_FAST_CPU_YIELD
@@ -588,6 +624,11 @@ struct hv_fault_status {
  */
 #define HV_FAST_MMU_TSB_CTX0		0x20
 
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_mmu_tsb_ctx0(unsigned long num_descriptions,
+					unsigned long tsb_desc_ra);
+#endif
+
 /* mmu_tsb_ctxnon0()
  * TRAP:	HV_FAST_TRAP
  * FUNCTION:	HV_FAST_MMU_TSB_CTXNON0
@@ -694,6 +735,13 @@ struct hv_fault_status {
  */
 #define HV_FAST_MMU_MAP_PERM_ADDR	0x25
 
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_mmu_map_perm_addr(unsigned long vaddr,
+					     unsigned long set_to_zero,
+					     unsigned long tte,
+					     unsigned long flags);
+#endif
+
 /* mmu_fault_area_conf()
  * TRAP:	HV_FAST_TRAP
  * FUNCTION:	HV_FAST_MMU_FAULT_AREA_CONF
@@ -892,6 +940,10 @@ struct hv_fault_status {
  */
 #define HV_FAST_TOD_GET			0x50
 
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_tod_get(unsigned long *time);
+#endif
+
 /* tod_set()
  * TRAP:	HV_FAST_TRAP
  * FUNCTION:	HV_FAST_TOD_SET
@@ -905,6 +957,10 @@ struct hv_fault_status {
  */
 #define HV_FAST_TOD_SET			0x51
 
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_tod_set(unsigned long time);
+#endif
+
 /* Console services */
 
 /* con_getchar()
@@ -988,6 +1044,59 @@ extern unsigned long sun4v_con_write(unsigned long buffer,
 				     unsigned long *bytes_written);
 #endif
 
+/* mach_set_soft_state()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MACH_SET_SOFT_STATE
+ * ARG0:	software state
+ * ARG1:	software state description pointer
+ * RET0:	status
+ * ERRORS:	EINVAL		software state not valid or software state
+ *				description is not NULL terminated
+ *		ENORADDR	software state description pointer is not a
+ *				valid real address
+ *		EBADALIGNED	software state description is not correctly
+ *				aligned
+ *
+ * This allows the guest to report it's soft state to the hypervisor.  There
+ * are two primary components to this state.  The first part states whether
+ * the guest software is running or not.  The second containts optional
+ * details specific to the software.
+ *
+ * The software state argument is defined below in HV_SOFT_STATE_*, and
+ * indicates whether the guest is operating normally or in a transitional
+ * state.
+ *
+ * The software state description argument is a real address of a data buffer
+ * of size 32-bytes aligned on a 32-byte boundary.  It is treated as a NULL
+ * terminated 7-bit ASCII string of up to 31 characters not including the
+ * NULL termination.
+ */
+#define HV_FAST_MACH_SET_SOFT_STATE	0x70
+#define  HV_SOFT_STATE_NORMAL		 0x01
+#define  HV_SOFT_STATE_TRANSITION	 0x02
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_mach_set_soft_state(unsigned long soft_state,
+					       unsigned long msg_string_ra);
+#endif
+
+/* mach_get_soft_state()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MACH_GET_SOFT_STATE
+ * ARG0:	software state description pointer
+ * RET0:	status
+ * RET1:	software state
+ * ERRORS:	ENORADDR	software state description pointer is not a
+ *				valid real address
+ *		EBADALIGNED	software state description is not correctly
+ *				aligned
+ *
+ * Retrieve the current value of the guest's software state.  The rules
+ * for the software state pointer are the same as for mach_set_soft_state()
+ * above.
+ */
+#define HV_FAST_MACH_GET_SOFT_STATE	0x71
+
 /* Trap trace services.
  *
  * The hypervisor provides a trap tracing capability for privileged
@@ -1379,6 +1488,113 @@ extern unsigned long sun4v_intr_gettarget(unsigned long sysino);
 extern unsigned long sun4v_intr_settarget(unsigned long sysino, unsigned long cpuid);
 #endif
 
+/* vintr_get_cookie()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_GET_COOKIE
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * RET0:	status
+ * RET1:	cookie
+ */
+#define HV_FAST_VINTR_GET_COOKIE	0xa7
+
+/* vintr_set_cookie()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_SET_COOKIE
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * ARG2:	cookie
+ * RET0:	status
+ */
+#define HV_FAST_VINTR_SET_COOKIE	0xa8
+
+/* vintr_get_valid()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_GET_VALID
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * RET0:	status
+ * RET1:	valid state
+ */
+#define HV_FAST_VINTR_GET_VALID		0xa9
+
+/* vintr_set_valid()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_SET_VALID
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * ARG2:	valid state
+ * RET0:	status
+ */
+#define HV_FAST_VINTR_SET_VALID		0xaa
+
+/* vintr_get_state()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_GET_STATE
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * RET0:	status
+ * RET1:	state
+ */
+#define HV_FAST_VINTR_GET_STATE		0xab
+
+/* vintr_set_state()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_SET_STATE
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * ARG2:	state
+ * RET0:	status
+ */
+#define HV_FAST_VINTR_SET_STATE		0xac
+
+/* vintr_get_target()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_GET_TARGET
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * RET0:	status
+ * RET1:	cpuid
+ */
+#define HV_FAST_VINTR_GET_TARGET	0xad
+
+/* vintr_set_target()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_SET_TARGET
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * ARG2:	cpuid
+ * RET0:	status
+ */
+#define HV_FAST_VINTR_SET_TARGET	0xae
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_vintr_get_cookie(unsigned long dev_handle,
+					    unsigned long dev_ino,
+					    unsigned long *cookie);
+extern unsigned long sun4v_vintr_set_cookie(unsigned long dev_handle,
+					    unsigned long dev_ino,
+					    unsigned long cookie);
+extern unsigned long sun4v_vintr_get_valid(unsigned long dev_handle,
+					   unsigned long dev_ino,
+					   unsigned long *valid);
+extern unsigned long sun4v_vintr_set_valid(unsigned long dev_handle,
+					   unsigned long dev_ino,
+					   unsigned long valid);
+extern unsigned long sun4v_vintr_get_state(unsigned long dev_handle,
+					   unsigned long dev_ino,
+					   unsigned long *state);
+extern unsigned long sun4v_vintr_set_state(unsigned long dev_handle,
+					   unsigned long dev_ino,
+					   unsigned long state);
+extern unsigned long sun4v_vintr_get_target(unsigned long dev_handle,
+					    unsigned long dev_ino,
+					    unsigned long *cpuid);
+extern unsigned long sun4v_vintr_set_target(unsigned long dev_handle,
+					    unsigned long dev_ino,
+					    unsigned long cpuid);
+#endif
+
 /* PCI IO services.
  *
  * See the terminology descriptions in the device interrupt services
@@ -2037,6 +2253,346 @@ extern unsigned long sun4v_intr_settarget(unsigned long sysino, unsigned long cp
  */
 #define HV_FAST_PCI_MSG_SETVALID	0xd3
 
+/* Logical Domain Channel services.  */
+
+#define LDC_CHANNEL_DOWN		0
+#define LDC_CHANNEL_UP			1
+#define LDC_CHANNEL_RESETTING		2
+
+/* ldc_tx_qconf()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_TX_QCONF
+ * ARG0:	channel ID
+ * ARG1:	real address base of queue
+ * ARG2:	num entries in queue
+ * RET0:	status
+ *
+ * Configure transmit queue for the LDC endpoint specified by the
+ * given channel ID, to be placed at the given real address, and
+ * be of the given num entries.  Num entries must be a power of two.
+ * The real address base of the queue must be aligned on the queue
+ * size.  Each queue entry is 64-bytes, so for example, a 32 entry
+ * queue must be aligned on a 2048 byte real address boundary.
+ *
+ * Upon configuration of a valid transmit queue the head and tail
+ * pointers are set to a hypervisor specific identical value indicating
+ * that the queue initially is empty.
+ *
+ * The endpoint's transmit queue is un-configured if num entries is zero.
+ *
+ * The maximum number of entries for each queue for a specific cpu may be
+ * determined from the machine description.  A transmit queue may be
+ * specified even in the event that the LDC is down (peer endpoint has no
+ * receive queue specified).  Transmission will begin as soon as the peer
+ * endpoint defines a receive queue.
+ *
+ * It is recommended that a guest wait for a transmit queue to empty prior
+ * to reconfiguring it, or un-configuring it.  Re or un-configuring of a
+ * non-empty transmit queue behaves exactly as defined above, however it
+ * is undefined as to how many of the pending entries in the original queue
+ * will be delivered prior to the re-configuration taking effect.
+ * Furthermore, as the queue configuration causes a reset of the head and
+ * tail pointers there is no way for a guest to determine how many entries
+ * have been sent after the configuration operation.
+ */
+#define HV_FAST_LDC_TX_QCONF		0xe0
+
+/* ldc_tx_qinfo()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_TX_QINFO
+ * ARG0:	channel ID
+ * RET0:	status
+ * RET1:	real address base of queue
+ * RET2:	num entries in queue
+ *
+ * Return the configuration info for the transmit queue of LDC endpoint
+ * defined by the given channel ID.  The real address is the currently
+ * defined real address base of the defined queue, and num entries is the
+ * size of the queue in terms of number of entries.
+ *
+ * If the specified channel ID is a valid endpoint number, but no transmit
+ * queue has been defined this service will return success, but with num
+ * entries set to zero and the real address will have an undefined value.
+ */
+#define HV_FAST_LDC_TX_QINFO		0xe1
+
+/* ldc_tx_get_state()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_TX_GET_STATE
+ * ARG0:	channel ID
+ * RET0:	status
+ * RET1:	head offset
+ * RET2:	tail offset
+ * RET3:	channel state
+ *
+ * Return the transmit state, and the head and tail queue pointers, for
+ * the transmit queue of the LDC endpoint defined by the given channel ID.
+ * The head and tail values are the byte offset of the head and tail
+ * positions of the transmit queue for the specified endpoint.
+ */
+#define HV_FAST_LDC_TX_GET_STATE	0xe2
+
+/* ldc_tx_set_qtail()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_TX_SET_QTAIL
+ * ARG0:	channel ID
+ * ARG1:	tail offset
+ * RET0:	status
+ *
+ * Update the tail pointer for the transmit queue associated with the LDC
+ * endpoint defined by the given channel ID.  The tail offset specified
+ * must be aligned on a 64 byte boundary, and calculated so as to increase
+ * the number of pending entries on the transmit queue.  Any attempt to
+ * decrease the number of pending transmit queue entires is considered
+ * an invalid tail offset and will result in an EINVAL error.
+ *
+ * Since the tail of the transmit queue may not be moved backwards, the
+ * transmit queue may be flushed by configuring a new transmit queue,
+ * whereupon the hypervisor will configure the initial transmit head and
+ * tail pointers to be equal.
+ */
+#define HV_FAST_LDC_TX_SET_QTAIL	0xe3
+
+/* ldc_rx_qconf()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_RX_QCONF
+ * ARG0:	channel ID
+ * ARG1:	real address base of queue
+ * ARG2:	num entries in queue
+ * RET0:	status
+ *
+ * Configure receive queue for the LDC endpoint specified by the
+ * given channel ID, to be placed at the given real address, and
+ * be of the given num entries.  Num entries must be a power of two.
+ * The real address base of the queue must be aligned on the queue
+ * size.  Each queue entry is 64-bytes, so for example, a 32 entry
+ * queue must be aligned on a 2048 byte real address boundary.
+ *
+ * The endpoint's transmit queue is un-configured if num entries is zero.
+ *
+ * If a valid receive queue is specified for a local endpoint the LDC is
+ * in the up state for the purpose of transmission to this endpoint.
+ *
+ * The maximum number of entries for each queue for a specific cpu may be
+ * determined from the machine description.
+ *
+ * As receive queue configuration causes a reset of the queue's head and
+ * tail pointers there is no way for a gues to determine how many entries
+ * have been received between a preceeding ldc_get_rx_state() API call
+ * and the completion of the configuration operation.  It should be noted
+ * that datagram delivery is not guarenteed via domain channels anyway,
+ * and therefore any higher protocol should be resilient to datagram
+ * loss if necessary.  However, to overcome this specific race potential
+ * it is recommended, for example, that a higher level protocol be employed
+ * to ensure either retransmission, or ensure that no datagrams are pending
+ * on the peer endpoint's transmit queue prior to the configuration process.
+ */
+#define HV_FAST_LDC_RX_QCONF		0xe4
+
+/* ldc_rx_qinfo()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_RX_QINFO
+ * ARG0:	channel ID
+ * RET0:	status
+ * RET1:	real address base of queue
+ * RET2:	num entries in queue
+ *
+ * Return the configuration info for the receive queue of LDC endpoint
+ * defined by the given channel ID.  The real address is the currently
+ * defined real address base of the defined queue, and num entries is the
+ * size of the queue in terms of number of entries.
+ *
+ * If the specified channel ID is a valid endpoint number, but no receive
+ * queue has been defined this service will return success, but with num
+ * entries set to zero and the real address will have an undefined value.
+ */
+#define HV_FAST_LDC_RX_QINFO		0xe5
+
+/* ldc_rx_get_state()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_RX_GET_STATE
+ * ARG0:	channel ID
+ * RET0:	status
+ * RET1:	head offset
+ * RET2:	tail offset
+ * RET3:	channel state
+ *
+ * Return the receive state, and the head and tail queue pointers, for
+ * the receive queue of the LDC endpoint defined by the given channel ID.
+ * The head and tail values are the byte offset of the head and tail
+ * positions of the receive queue for the specified endpoint.
+ */
+#define HV_FAST_LDC_RX_GET_STATE	0xe6
+
+/* ldc_rx_set_qhead()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_RX_SET_QHEAD
+ * ARG0:	channel ID
+ * ARG1:	head offset
+ * RET0:	status
+ *
+ * Update the head pointer for the receive queue associated with the LDC
+ * endpoint defined by the given channel ID.  The head offset specified
+ * must be aligned on a 64 byte boundary, and calculated so as to decrease
+ * the number of pending entries on the receive queue.  Any attempt to
+ * increase the number of pending receive queue entires is considered
+ * an invalid head offset and will result in an EINVAL error.
+ *
+ * The receive queue may be flushed by setting the head offset equal
+ * to the current tail offset.
+ */
+#define HV_FAST_LDC_RX_SET_QHEAD	0xe7
+
+/* LDC Map Table Entry.  Each slot is defined by a translation table
+ * entry, as specified by the LDC_MTE_* bits below, and a 64-bit
+ * hypervisor invalidation cookie.
+ */
+#define LDC_MTE_PADDR	0x0fffffffffffe000 /* pa[55:13]          */
+#define LDC_MTE_COPY_W	0x0000000000000400 /* copy write access  */
+#define LDC_MTE_COPY_R	0x0000000000000200 /* copy read access   */
+#define LDC_MTE_IOMMU_W	0x0000000000000100 /* IOMMU write access */
+#define LDC_MTE_IOMMU_R	0x0000000000000080 /* IOMMU read access  */
+#define LDC_MTE_EXEC	0x0000000000000040 /* execute            */
+#define LDC_MTE_WRITE	0x0000000000000020 /* read               */
+#define LDC_MTE_READ	0x0000000000000010 /* write              */
+#define LDC_MTE_SZALL	0x000000000000000f /* page size bits     */
+#define LDC_MTE_SZ16GB	0x0000000000000007 /* 16GB page          */
+#define LDC_MTE_SZ2GB	0x0000000000000006 /* 2GB page           */
+#define LDC_MTE_SZ256MB	0x0000000000000005 /* 256MB page         */
+#define LDC_MTE_SZ32MB	0x0000000000000004 /* 32MB page          */
+#define LDC_MTE_SZ4MB	0x0000000000000003 /* 4MB page           */
+#define LDC_MTE_SZ512K	0x0000000000000002 /* 512K page          */
+#define LDC_MTE_SZ64K	0x0000000000000001 /* 64K page           */
+#define LDC_MTE_SZ8K	0x0000000000000000 /* 8K page            */
+
+#ifndef __ASSEMBLY__
+struct ldc_mtable_entry {
+	unsigned long	mte;
+	unsigned long	cookie;
+};
+#endif
+
+/* ldc_set_map_table()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_SET_MAP_TABLE
+ * ARG0:	channel ID
+ * ARG1:	table real address
+ * ARG2:	num entries
+ * RET0:	status
+ *
+ * Register the MTE table at the given table real address, with the
+ * specified num entries, for the LDC indicated by the given channel
+ * ID.
+ */
+#define HV_FAST_LDC_SET_MAP_TABLE	0xea
+
+/* ldc_get_map_table()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_GET_MAP_TABLE
+ * ARG0:	channel ID
+ * RET0:	status
+ * RET1:	table real address
+ * RET2:	num entries
+ *
+ * Return the configuration of the current mapping table registered
+ * for the given channel ID.
+ */
+#define HV_FAST_LDC_GET_MAP_TABLE	0xeb
+
+#define LDC_COPY_IN	0
+#define LDC_COPY_OUT	1
+
+/* ldc_copy()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_COPY
+ * ARG0:	channel ID
+ * ARG1:	LDC_COPY_* direction code
+ * ARG2:	target real address
+ * ARG3:	local real address
+ * ARG4:	length in bytes
+ * RET0:	status
+ * RET1:	actual length in bytes
+ */
+#define HV_FAST_LDC_COPY		0xec
+
+#define LDC_MEM_READ	1
+#define LDC_MEM_WRITE	2
+#define LDC_MEM_EXEC	4
+
+/* ldc_mapin()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_MAPIN
+ * ARG0:	channel ID
+ * ARG1:	cookie
+ * RET0:	status
+ * RET1:	real address
+ * RET2:	LDC_MEM_* permissions
+ */
+#define HV_FAST_LDC_MAPIN		0xed
+
+/* ldc_unmap()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_UNMAP
+ * ARG0:	real address
+ * RET0:	status
+ */
+#define HV_FAST_LDC_UNMAP		0xee
+
+/* ldc_revoke()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_REVOKE
+ * ARG0:	cookie
+ * ARG1:	ldc_mtable_entry cookie
+ * RET0:	status
+ */
+#define HV_FAST_LDC_REVOKE		0xef
+
+#ifndef __ASSEMBLY__
+extern unsigned long sun4v_ldc_tx_qconf(unsigned long channel,
+					unsigned long ra,
+					unsigned long num_entries);
+extern unsigned long sun4v_ldc_tx_qinfo(unsigned long channel,
+					unsigned long *ra,
+					unsigned long *num_entries);
+extern unsigned long sun4v_ldc_tx_get_state(unsigned long channel,
+					    unsigned long *head_off,
+					    unsigned long *tail_off,
+					    unsigned long *chan_state);
+extern unsigned long sun4v_ldc_tx_set_qtail(unsigned long channel,
+					    unsigned long tail_off);
+extern unsigned long sun4v_ldc_rx_qconf(unsigned long channel,
+					unsigned long ra,
+					unsigned long num_entries);
+extern unsigned long sun4v_ldc_rx_qinfo(unsigned long channel,
+					unsigned long *ra,
+					unsigned long *num_entries);
+extern unsigned long sun4v_ldc_rx_get_state(unsigned long channel,
+					    unsigned long *head_off,
+					    unsigned long *tail_off,
+					    unsigned long *chan_state);
+extern unsigned long sun4v_ldc_rx_set_qhead(unsigned long channel,
+					    unsigned long head_off);
+extern unsigned long sun4v_ldc_set_map_table(unsigned long channel,
+					     unsigned long ra,
+					     unsigned long num_entries);
+extern unsigned long sun4v_ldc_get_map_table(unsigned long channel,
+					     unsigned long *ra,
+					     unsigned long *num_entries);
+extern unsigned long sun4v_ldc_copy(unsigned long channel,
+				    unsigned long dir_code,
+				    unsigned long tgt_raddr,
+				    unsigned long lcl_raddr,
+				    unsigned long len,
+				    unsigned long *actual_len);
+extern unsigned long sun4v_ldc_mapin(unsigned long channel,
+				     unsigned long cookie,
+				     unsigned long *ra,
+				     unsigned long *perm);
+extern unsigned long sun4v_ldc_unmap(unsigned long ra);
+extern unsigned long sun4v_ldc_revoke(unsigned long cookie,
+				      unsigned long mte_cookie);
+#endif
+
 /* Performance counter services.  */
 
 #define HV_PERF_JBUS_PERF_CTRL_REG	0x00
@@ -2204,6 +2760,7 @@ extern void sun4v_hvapi_unregister(unsigned long group);
 extern int sun4v_hvapi_get(unsigned long group,
 			   unsigned long *major,
 			   unsigned long *minor);
+extern void sun4v_hvapi_init(void);
 #endif
 
 #endif /* !(_SPARC64_HYPERVISOR_H) */

+ 0 - 1
include/asm-sparc64/kdebug.h

@@ -32,7 +32,6 @@ enum die_val {
 	DIE_TRAP,
 	DIE_TRAP_TL1,
 	DIE_CALL,
-	DIE_PAGE_FAULT,
 };
 
 #endif

+ 39 - 0
include/asm-sparc64/mdesc.h

@@ -0,0 +1,39 @@
+#ifndef _SPARC64_MDESC_H
+#define _SPARC64_MDESC_H
+
+#include <linux/types.h>
+#include <asm/prom.h>
+
+struct mdesc_node;
+struct mdesc_arc {
+	const char		*name;
+	struct mdesc_node	*arc;
+};
+
+struct mdesc_node {
+	const char		*name;
+	u64			node;
+	unsigned int		unique_id;
+	unsigned int		num_arcs;
+	struct property		*properties;
+	struct mdesc_node	*hash_next;
+	struct mdesc_node	*allnodes_next;
+	struct mdesc_arc	arcs[0];
+};
+
+extern struct mdesc_node *md_find_node_by_name(struct mdesc_node *from,
+					       const char *name);
+#define md_for_each_node_by_name(__mn, __name) \
+	for (__mn = md_find_node_by_name(NULL, __name); __mn; \
+	     __mn = md_find_node_by_name(__mn, __name))
+
+extern struct property *md_find_property(const struct mdesc_node *mp,
+					 const char *name,
+					 int *lenp);
+extern const void *md_get_property(const struct mdesc_node *mp,
+				   const char *name,
+				   int *lenp);
+
+extern void sun4v_mdesc_init(void);
+
+#endif

+ 2 - 5
include/asm-sparc64/oplib.h

@@ -316,11 +316,8 @@ extern int prom_setprop(int node, const char *prop_name, char *prop_value,
 			
 extern int prom_pathtoinode(const char *path);
 extern int prom_inst2pkg(int);
-
-/* CPU probing helpers.  */
-struct device_node;
-int cpu_find_by_instance(int instance, struct device_node **dev_node, int *mid);
-int cpu_find_by_mid(int mid, struct device_node **prom_node);
+extern int prom_service_exists(const char *service_name);
+extern void prom_sun4v_guest_soft_state(void);
 
 /* Client interface level routines. */
 extern void prom_set_trap_table(unsigned long tba);

+ 3 - 1
include/asm-sparc64/percpu.h

@@ -5,7 +5,8 @@
 
 #ifdef CONFIG_SMP
 
-extern void setup_per_cpu_areas(void);
+#define setup_per_cpu_areas()			do { } while (0)
+extern void real_setup_per_cpu_areas(void);
 
 extern unsigned long __per_cpu_base;
 extern unsigned long __per_cpu_shift;
@@ -34,6 +35,7 @@ do {								\
 } while (0)
 #else /* ! SMP */
 
+#define real_setup_per_cpu_areas()		do { } while (0)
 #define DEFINE_PER_CPU(type, name) \
     __typeof__(type) per_cpu__##name
 

+ 1 - 0
include/asm-sparc64/prom.h

@@ -90,6 +90,7 @@ extern struct device_node *of_find_compatible_node(struct device_node *from,
 	const char *type, const char *compat);
 extern struct device_node *of_find_node_by_path(const char *path);
 extern struct device_node *of_find_node_by_phandle(phandle handle);
+extern struct device_node *of_find_node_by_cpuid(int cpuid);
 extern struct device_node *of_get_parent(const struct device_node *node);
 extern struct device_node *of_get_next_child(const struct device_node *node,
 					     struct device_node *prev);

+ 2 - 2
include/asm-sparc64/smp.h

@@ -41,7 +41,7 @@ extern cpumask_t cpu_sibling_map[NR_CPUS];
 extern int hard_smp_processor_id(void);
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 
-extern void smp_setup_cpu_possible_map(void);
+extern void smp_fill_in_sib_core_maps(void);
 extern unsigned char boot_cpu_id;
 
 #endif /* !(__ASSEMBLY__) */
@@ -49,7 +49,7 @@ extern unsigned char boot_cpu_id;
 #else
 
 #define hard_smp_processor_id()		0
-#define smp_setup_cpu_possible_map() do { } while (0)
+#define smp_fill_in_sib_core_maps() do { } while (0)
 #define boot_cpu_id	(0)
 
 #endif /* !(CONFIG_SMP) */

+ 13 - 0
include/asm-sparc64/sstate.h

@@ -0,0 +1,13 @@
+#ifndef _SPARC64_SSTATE_H
+#define _SPARC64_SSTATE_H
+
+extern void sstate_booting(void);
+extern void sstate_running(void);
+extern void sstate_halt(void);
+extern void sstate_poweroff(void);
+extern void sstate_panic(void);
+extern void sstate_reboot(void);
+
+extern void sun4v_sstate_init(void);
+
+#endif /* _SPARC64_SSTATE_H */

+ 4 - 4
include/asm-sparc64/thread_info.h

@@ -38,8 +38,8 @@ struct thread_info {
 	/* D$ line 1 */
 	struct task_struct	*task;
 	unsigned long		flags;
-	__u8			cpu;
 	__u8			fpsaved[7];
+	__u8			pad;
 	unsigned long		ksp;
 
 	/* D$ line 2 */
@@ -49,7 +49,7 @@ struct thread_info {
 	int			preempt_count;	/* 0 => preemptable, <0 => BUG */
 	__u8			new_child;
 	__u8			syscall_noerror;
-	__u16			__pad;
+	__u16			cpu;
 
 	unsigned long		*utraps;
 
@@ -83,8 +83,7 @@ struct thread_info {
 #define TI_CURRENT_DS	(TI_FLAGS + TI_FLAG_BYTE_CURRENT_DS)
 #define TI_FPDEPTH	(TI_FLAGS + TI_FLAG_BYTE_FPDEPTH)
 #define TI_WSAVED	(TI_FLAGS + TI_FLAG_BYTE_WSAVED)
-#define TI_CPU		0x00000010
-#define TI_FPSAVED	0x00000011
+#define TI_FPSAVED	0x00000010
 #define TI_KSP		0x00000018
 #define TI_FAULT_ADDR	0x00000020
 #define TI_KREGS	0x00000028
@@ -92,6 +91,7 @@ struct thread_info {
 #define TI_PRE_COUNT	0x00000038
 #define TI_NEW_CHILD	0x0000003c
 #define TI_SYS_NOERROR	0x0000003d
+#define TI_CPU		0x0000003e
 #define TI_UTRAPS	0x00000040
 #define TI_REG_WINDOW	0x00000048
 #define TI_RWIN_SPTRS	0x000003c8	

+ 3 - 0
include/asm-sparc64/topology.h

@@ -6,4 +6,7 @@
 
 #include <asm-generic/topology.h>
 
+#define topology_core_id(cpu)			(cpu_data(cpu).core_id)
+#define topology_thread_siblings(cpu)		(cpu_sibling_map[cpu])
+
 #endif /* _ASM_SPARC64_TOPOLOGY_H */

+ 1 - 1
include/asm-sparc64/tsb.h

@@ -271,7 +271,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
 	sethi		%hi(swapper_4m_tsb), REG1; \
 	or		REG1, %lo(swapper_4m_tsb), REG1; \
-	and		TAG, (KERNEL_TSB_NENTRIES - 1), REG2; \
+	and		TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \
 	sllx		REG2, 4, REG2; \
 	add		REG1, REG2, REG2; \
 	KTSB_LOAD_QUAD(REG2, REG3); \