20 年之前 · 9c1cfda20a
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -16,7 +16,7 @@ obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o
 
				 obj-$(CONFIG_IA64_PALINFO)	+= palinfo.o
			
 
				 obj-$(CONFIG_IOSAPIC)		+= iosapic.o
			
 
				 obj-$(CONFIG_MODULES)		+= module.o
			
 
				-obj-$(CONFIG_SMP)		+= smp.o smpboot.o domain.o
			
 
				+obj-$(CONFIG_SMP)		+= smp.o smpboot.o
			
 
				 obj-$(CONFIG_NUMA)		+= numa.o
			
 
				 obj-$(CONFIG_PERFMON)		+= perfmon_default_smpl.o
			
 
				 obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
			
--- a/arch/ia64/kernel/domain.c
+++ b/arch/ia64/kernel/domain.c
@@ -1,444 +0,0 @@
 
				-/*
			
 
				- * arch/ia64/kernel/domain.c
			
 
				- * Architecture specific sched-domains builder.
			
 
				- *
			
 
				- * Copyright (C) 2004 Jesse Barnes
			
 
				- * Copyright (C) 2004 Silicon Graphics, Inc.
			
 
				- */
			
 
				-
			
 
				-#include <linux/sched.h>
			
 
				-#include <linux/percpu.h>
			
 
				-#include <linux/slab.h>
			
 
				-#include <linux/cpumask.h>
			
 
				-#include <linux/init.h>
			
 
				-#include <linux/topology.h>
			
 
				-#include <linux/nodemask.h>
			
 
				-
			
 
				-#define SD_NODES_PER_DOMAIN 16
			
 
				-
			
 
				-#ifdef CONFIG_NUMA
			
 
				-/**
			
 
				- * find_next_best_node - find the next node to include in a sched_domain
			
 
				- * @node: node whose sched_domain we're building
			
 
				- * @used_nodes: nodes already in the sched_domain
			
 
				- *
			
 
				- * Find the next node to include in a given scheduling domain.  Simply
			
 
				- * finds the closest node not already in the @used_nodes map.
			
 
				- *
			
 
				- * Should use nodemask_t.
			
 
				- */
			
 
				-static int find_next_best_node(int node, unsigned long *used_nodes)
			
 
				-{
			
 
				-	int i, n, val, min_val, best_node = 0;
			
 
				-
			
 
				-	min_val = INT_MAX;
			
 
				-
			
 
				-	for (i = 0; i < MAX_NUMNODES; i++) {
			
 
				-		/* Start at @node */
			
 
				-		n = (node + i) % MAX_NUMNODES;
			
 
				-
			
 
				-		if (!nr_cpus_node(n))
			
 
				-			continue;
			
 
				-
			
 
				-		/* Skip already used nodes */
			
 
				-		if (test_bit(n, used_nodes))
			
 
				-			continue;
			
 
				-
			
 
				-		/* Simple min distance search */
			
 
				-		val = node_distance(node, n);
			
 
				-
			
 
				-		if (val < min_val) {
			
 
				-			min_val = val;
			
 
				-			best_node = n;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	set_bit(best_node, used_nodes);
			
 
				-	return best_node;
			
 
				-}
			
 
				-
			
 
				-/**
			
 
				- * sched_domain_node_span - get a cpumask for a node's sched_domain
			
 
				- * @node: node whose cpumask we're constructing
			
 
				- * @size: number of nodes to include in this span
			
 
				- *
			
 
				- * Given a node, construct a good cpumask for its sched_domain to span.  It
			
 
				- * should be one that prevents unnecessary balancing, but also spreads tasks
			
 
				- * out optimally.
			
 
				- */
			
 
				-static cpumask_t sched_domain_node_span(int node)
			
 
				-{
			
 
				-	int i;
			
 
				-	cpumask_t span, nodemask;
			
 
				-	DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
			
 
				-
			
 
				-	cpus_clear(span);
			
 
				-	bitmap_zero(used_nodes, MAX_NUMNODES);
			
 
				-
			
 
				-	nodemask = node_to_cpumask(node);
			
 
				-	cpus_or(span, span, nodemask);
			
 
				-	set_bit(node, used_nodes);
			
 
				-
			
 
				-	for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
			
 
				-		int next_node = find_next_best_node(node, used_nodes);
			
 
				-		nodemask = node_to_cpumask(next_node);
			
 
				-		cpus_or(span, span, nodemask);
			
 
				-	}
			
 
				-
			
 
				-	return span;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				- * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
			
 
				- * can switch it on easily if needed.
			
 
				- */
			
 
				-#ifdef CONFIG_SCHED_SMT
			
 
				-static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
			
 
				-static struct sched_group sched_group_cpus[NR_CPUS];
			
 
				-static int cpu_to_cpu_group(int cpu)
			
 
				-{
			
 
				-	return cpu;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-static DEFINE_PER_CPU(struct sched_domain, phys_domains);
			
 
				-static struct sched_group sched_group_phys[NR_CPUS];
			
 
				-static int cpu_to_phys_group(int cpu)
			
 
				-{
			
 
				-#ifdef CONFIG_SCHED_SMT
			
 
				-	return first_cpu(cpu_sibling_map[cpu]);
			
 
				-#else
			
 
				-	return cpu;
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				-#ifdef CONFIG_NUMA
			
 
				-/*
			
 
				- * The init_sched_build_groups can't handle what we want to do with node
			
 
				- * groups, so roll our own. Now each node has its own list of groups which
			
 
				- * gets dynamically allocated.
			
 
				- */
			
 
				-static DEFINE_PER_CPU(struct sched_domain, node_domains);
			
 
				-static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
			
 
				-
			
 
				-static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
			
 
				-static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
			
 
				-
			
 
				-static int cpu_to_allnodes_group(int cpu)
			
 
				-{
			
 
				-	return cpu_to_node(cpu);
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-/*
			
 
				- * Build sched domains for a given set of cpus and attach the sched domains
			
 
				- * to the individual cpus
			
 
				- */
			
 
				-void build_sched_domains(const cpumask_t *cpu_map)
			
 
				-{
			
 
				-	int i;
			
 
				-#ifdef CONFIG_NUMA
			
 
				-	struct sched_group **sched_group_nodes = NULL;
			
 
				-	struct sched_group *sched_group_allnodes = NULL;
			
 
				-
			
 
				-	/*
			
 
				-	 * Allocate the per-node list of sched groups
			
 
				-	 */
			
 
				-	sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES,
			
 
				-					   GFP_ATOMIC);
			
 
				-	if (!sched_group_nodes) {
			
 
				-		printk(KERN_WARNING "Can not alloc sched group node list\n");
			
 
				-		return;
			
 
				-	}
			
 
				-	sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
			
 
				-#endif
			
 
				-
			
 
				-	/*
			
 
				-	 * Set up domains for cpus specified by the cpu_map.
			
 
				-	 */
			
 
				-	for_each_cpu_mask(i, *cpu_map) {
			
 
				-		int group;
			
 
				-		struct sched_domain *sd = NULL, *p;
			
 
				-		cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
			
 
				-
			
 
				-		cpus_and(nodemask, nodemask, *cpu_map);
			
 
				-
			
 
				-#ifdef CONFIG_NUMA
			
 
				-		if (cpus_weight(*cpu_map)
			
 
				-				> SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
			
 
				-			if (!sched_group_allnodes) {
			
 
				-				sched_group_allnodes
			
 
				-					= kmalloc(sizeof(struct sched_group)
			
 
				-							* MAX_NUMNODES,
			
 
				-						  GFP_KERNEL);
			
 
				-				if (!sched_group_allnodes) {
			
 
				-					printk(KERN_WARNING
			
 
				-					"Can not alloc allnodes sched group\n");
			
 
				-					break;
			
 
				-				}
			
 
				-				sched_group_allnodes_bycpu[i]
			
 
				-						= sched_group_allnodes;
			
 
				-			}
			
 
				-			sd = &per_cpu(allnodes_domains, i);
			
 
				-			*sd = SD_ALLNODES_INIT;
			
 
				-			sd->span = *cpu_map;
			
 
				-			group = cpu_to_allnodes_group(i);
			
 
				-			sd->groups = &sched_group_allnodes[group];
			
 
				-			p = sd;
			
 
				-		} else
			
 
				-			p = NULL;
			
 
				-
			
 
				-		sd = &per_cpu(node_domains, i);
			
 
				-		*sd = SD_NODE_INIT;
			
 
				-		sd->span = sched_domain_node_span(cpu_to_node(i));
			
 
				-		sd->parent = p;
			
 
				-		cpus_and(sd->span, sd->span, *cpu_map);
			
 
				-#endif
			
 
				-
			
 
				-		p = sd;
			
 
				-		sd = &per_cpu(phys_domains, i);
			
 
				-		group = cpu_to_phys_group(i);
			
 
				-		*sd = SD_CPU_INIT;
			
 
				-		sd->span = nodemask;
			
 
				-		sd->parent = p;
			
 
				-		sd->groups = &sched_group_phys[group];
			
 
				-
			
 
				-#ifdef CONFIG_SCHED_SMT
			
 
				-		p = sd;
			
 
				-		sd = &per_cpu(cpu_domains, i);
			
 
				-		group = cpu_to_cpu_group(i);
			
 
				-		*sd = SD_SIBLING_INIT;
			
 
				-		sd->span = cpu_sibling_map[i];
			
 
				-		cpus_and(sd->span, sd->span, *cpu_map);
			
 
				-		sd->parent = p;
			
 
				-		sd->groups = &sched_group_cpus[group];
			
 
				-#endif
			
 
				-	}
			
 
				-
			
 
				-#ifdef CONFIG_SCHED_SMT
			
 
				-	/* Set up CPU (sibling) groups */
			
 
				-	for_each_cpu_mask(i, *cpu_map) {
			
 
				-		cpumask_t this_sibling_map = cpu_sibling_map[i];
			
 
				-		cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
			
 
				-		if (i != first_cpu(this_sibling_map))
			
 
				-			continue;
			
 
				-
			
 
				-		init_sched_build_groups(sched_group_cpus, this_sibling_map,
			
 
				-						&cpu_to_cpu_group);
			
 
				-	}
			
 
				-#endif
			
 
				-
			
 
				-	/* Set up physical groups */
			
 
				-	for (i = 0; i < MAX_NUMNODES; i++) {
			
 
				-		cpumask_t nodemask = node_to_cpumask(i);
			
 
				-
			
 
				-		cpus_and(nodemask, nodemask, *cpu_map);
			
 
				-		if (cpus_empty(nodemask))
			
 
				-			continue;
			
 
				-
			
 
				-		init_sched_build_groups(sched_group_phys, nodemask,
			
 
				-						&cpu_to_phys_group);
			
 
				-	}
			
 
				-
			
 
				-#ifdef CONFIG_NUMA
			
 
				-	if (sched_group_allnodes)
			
 
				-		init_sched_build_groups(sched_group_allnodes, *cpu_map,
			
 
				-					&cpu_to_allnodes_group);
			
 
				-
			
 
				-	for (i = 0; i < MAX_NUMNODES; i++) {
			
 
				-		/* Set up node groups */
			
 
				-		struct sched_group *sg, *prev;
			
 
				-		cpumask_t nodemask = node_to_cpumask(i);
			
 
				-		cpumask_t domainspan;
			
 
				-		cpumask_t covered = CPU_MASK_NONE;
			
 
				-		int j;
			
 
				-
			
 
				-		cpus_and(nodemask, nodemask, *cpu_map);
			
 
				-		if (cpus_empty(nodemask)) {
			
 
				-			sched_group_nodes[i] = NULL;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		domainspan = sched_domain_node_span(i);
			
 
				-		cpus_and(domainspan, domainspan, *cpu_map);
			
 
				-
			
 
				-		sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
			
 
				-		sched_group_nodes[i] = sg;
			
 
				-		for_each_cpu_mask(j, nodemask) {
			
 
				-			struct sched_domain *sd;
			
 
				-			sd = &per_cpu(node_domains, j);
			
 
				-			sd->groups = sg;
			
 
				-			if (sd->groups == NULL) {
			
 
				-				/* Turn off balancing if we have no groups */
			
 
				-				sd->flags = 0;
			
 
				-			}
			
 
				-		}
			
 
				-		if (!sg) {
			
 
				-			printk(KERN_WARNING
			
 
				-			"Can not alloc domain group for node %d\n", i);
			
 
				-			continue;
			
 
				-		}
			
 
				-		sg->cpu_power = 0;
			
 
				-		sg->cpumask = nodemask;
			
 
				-		cpus_or(covered, covered, nodemask);
			
 
				-		prev = sg;
			
 
				-
			
 
				-		for (j = 0; j < MAX_NUMNODES; j++) {
			
 
				-			cpumask_t tmp, notcovered;
			
 
				-			int n = (i + j) % MAX_NUMNODES;
			
 
				-
			
 
				-			cpus_complement(notcovered, covered);
			
 
				-			cpus_and(tmp, notcovered, *cpu_map);
			
 
				-			cpus_and(tmp, tmp, domainspan);
			
 
				-			if (cpus_empty(tmp))
			
 
				-				break;
			
 
				-
			
 
				-			nodemask = node_to_cpumask(n);
			
 
				-			cpus_and(tmp, tmp, nodemask);
			
 
				-			if (cpus_empty(tmp))
			
 
				-				continue;
			
 
				-
			
 
				-			sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
			
 
				-			if (!sg) {
			
 
				-				printk(KERN_WARNING
			
 
				-				"Can not alloc domain group for node %d\n", j);
			
 
				-				break;
			
 
				-			}
			
 
				-			sg->cpu_power = 0;
			
 
				-			sg->cpumask = tmp;
			
 
				-			cpus_or(covered, covered, tmp);
			
 
				-			prev->next = sg;
			
 
				-			prev = sg;
			
 
				-		}
			
 
				-		prev->next = sched_group_nodes[i];
			
 
				-	}
			
 
				-#endif
			
 
				-
			
 
				-	/* Calculate CPU power for physical packages and nodes */
			
 
				-	for_each_cpu_mask(i, *cpu_map) {
			
 
				-		int power;
			
 
				-		struct sched_domain *sd;
			
 
				-#ifdef CONFIG_SCHED_SMT
			
 
				-		sd = &per_cpu(cpu_domains, i);
			
 
				-		power = SCHED_LOAD_SCALE;
			
 
				-		sd->groups->cpu_power = power;
			
 
				-#endif
			
 
				-
			
 
				-		sd = &per_cpu(phys_domains, i);
			
 
				-		power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
			
 
				-				(cpus_weight(sd->groups->cpumask)-1) / 10;
			
 
				-		sd->groups->cpu_power = power;
			
 
				-
			
 
				-#ifdef CONFIG_NUMA
			
 
				-		sd = &per_cpu(allnodes_domains, i);
			
 
				-		if (sd->groups) {
			
 
				-			power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
			
 
				-				(cpus_weight(sd->groups->cpumask)-1) / 10;
			
 
				-			sd->groups->cpu_power = power;
			
 
				-		}
			
 
				-#endif
			
 
				-	}
			
 
				-
			
 
				-#ifdef CONFIG_NUMA
			
 
				-	for (i = 0; i < MAX_NUMNODES; i++) {
			
 
				-		struct sched_group *sg = sched_group_nodes[i];
			
 
				-		int j;
			
 
				-
			
 
				-		if (sg == NULL)
			
 
				-			continue;
			
 
				-next_sg:
			
 
				-		for_each_cpu_mask(j, sg->cpumask) {
			
 
				-			struct sched_domain *sd;
			
 
				-			int power;
			
 
				-
			
 
				-			sd = &per_cpu(phys_domains, j);
			
 
				-			if (j != first_cpu(sd->groups->cpumask)) {
			
 
				-				/*
			
 
				-				 * Only add "power" once for each
			
 
				-				 * physical package.
			
 
				-				 */
			
 
				-				continue;
			
 
				-			}
			
 
				-			power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
			
 
				-				(cpus_weight(sd->groups->cpumask)-1) / 10;
			
 
				-
			
 
				-			sg->cpu_power += power;
			
 
				-		}
			
 
				-		sg = sg->next;
			
 
				-		if (sg != sched_group_nodes[i])
			
 
				-			goto next_sg;
			
 
				-	}
			
 
				-#endif
			
 
				-
			
 
				-	/* Attach the domains */
			
 
				-	for_each_cpu_mask(i, *cpu_map) {
			
 
				-		struct sched_domain *sd;
			
 
				-#ifdef CONFIG_SCHED_SMT
			
 
				-		sd = &per_cpu(cpu_domains, i);
			
 
				-#else
			
 
				-		sd = &per_cpu(phys_domains, i);
			
 
				-#endif
			
 
				-		cpu_attach_domain(sd, i);
			
 
				-	}
			
 
				-}
			
 
				-/*
			
 
				- * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
			
 
				- */
			
 
				-void arch_init_sched_domains(const cpumask_t *cpu_map)
			
 
				-{
			
 
				-	cpumask_t cpu_default_map;
			
 
				-
			
 
				-	/*
			
 
				-	 * Setup mask for cpus without special case scheduling requirements.
			
 
				-	 * For now this just excludes isolated cpus, but could be used to
			
 
				-	 * exclude other special cases in the future.
			
 
				-	 */
			
 
				-	cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
			
 
				-
			
 
				-	build_sched_domains(&cpu_default_map);
			
 
				-}
			
 
				-
			
 
				-void arch_destroy_sched_domains(const cpumask_t *cpu_map)
			
 
				-{
			
 
				-#ifdef CONFIG_NUMA
			
 
				-	int i;
			
 
				-	int cpu;
			
 
				-
			
 
				-	for_each_cpu_mask(cpu, *cpu_map) {
			
 
				-		struct sched_group *sched_group_allnodes
			
 
				-			= sched_group_allnodes_bycpu[cpu];
			
 
				-		struct sched_group **sched_group_nodes
			
 
				-			= sched_group_nodes_bycpu[cpu];
			
 
				-
			
 
				-		if (sched_group_allnodes) {
			
 
				-			kfree(sched_group_allnodes);
			
 
				-			sched_group_allnodes_bycpu[cpu] = NULL;
			
 
				-		}
			
 
				-
			
 
				-		if (!sched_group_nodes)
			
 
				-			continue;
			
 
				-
			
 
				-		for (i = 0; i < MAX_NUMNODES; i++) {
			
 
				-			cpumask_t nodemask = node_to_cpumask(i);
			
 
				-			struct sched_group *oldsg, *sg = sched_group_nodes[i];
			
 
				-
			
 
				-			cpus_and(nodemask, nodemask, *cpu_map);
			
 
				-			if (cpus_empty(nodemask))
			
 
				-				continue;
			
 
				-
			
 
				-			if (sg == NULL)
			
 
				-				continue;
			
 
				-			sg = sg->next;
			
 
				-next_sg:
			
 
				-			oldsg = sg;
			
 
				-			sg = sg->next;
			
 
				-			kfree(oldsg);
			
 
				-			if (oldsg != sched_group_nodes[i])
			
 
				-				goto next_sg;
			
 
				-		}
			
 
				-		kfree(sched_group_nodes);
			
 
				-		sched_group_nodes_bycpu[cpu] = NULL;
			
 
				-	}
			
 
				-#endif
			
 
				-}
			
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -20,9 +20,6 @@
 
				 #include <asm/ptrace.h>
			
 
				 #include <asm/ustack.h>
			
 
				 
			
 
				-/* Our arch specific arch_init_sched_domain is in arch/ia64/kernel/domain.c */
			
 
				-#define ARCH_HAS_SCHED_DOMAIN
			
 
				-
			
 
				 #define IA64_NUM_DBG_REGS	8
			
 
				 /*
			
 
				  * Limits for PMC and PMD are set to less than maximum architected values
			
--- a/include/asm-ia64/topology.h
+++ b/include/asm-ia64/topology.h
@@ -98,29 +98,6 @@ void build_cpu_to_node_map(void);
 
				 	.nr_balance_failed	= 0,			\
			
 
				 }
			
 
				 
			
 
				-/* sched_domains SD_ALLNODES_INIT for IA64 NUMA machines */
			
 
				-#define SD_ALLNODES_INIT (struct sched_domain) {	\
			
 
				-	.span			= CPU_MASK_NONE,	\
			
 
				-	.parent			= NULL,			\
			
 
				-	.groups			= NULL,			\
			
 
				-	.min_interval		= 64,			\
			
 
				-	.max_interval		= 64*num_online_cpus(),	\
			
 
				-	.busy_factor		= 128,			\
			
 
				-	.imbalance_pct		= 133,			\
			
 
				-	.cache_hot_time		= (10*1000000),		\
			
 
				-	.cache_nice_tries	= 1,			\
			
 
				-	.busy_idx		= 3,			\
			
 
				-	.idle_idx		= 3,			\
			
 
				-	.newidle_idx		= 0, /* unused */	\
			
 
				-	.wake_idx		= 0, /* unused */	\
			
 
				-	.forkexec_idx		= 0, /* unused */	\
			
 
				-	.per_cpu_gain		= 100,			\
			
 
				-	.flags			= SD_LOAD_BALANCE,	\
			
 
				-	.last_balance		= jiffies,		\
			
 
				-	.balance_interval	= 64,			\
			
 
				-	.nr_balance_failed	= 0,			\
			
 
				-}
			
 
				-
			
 
				 #endif /* CONFIG_NUMA */
			
 
				 
			
 
				 #include <asm-generic/topology.h>
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -564,13 +564,6 @@ struct sched_domain {
 
				 
			
 
				 extern void partition_sched_domains(cpumask_t *partition1,
			
 
				 				    cpumask_t *partition2);
			
 
				-#ifdef ARCH_HAS_SCHED_DOMAIN
			
 
				-/* Useful helpers that arch setup code may use. Defined in kernel/sched.c */
			
 
				-extern cpumask_t cpu_isolated_map;
			
 
				-extern void init_sched_build_groups(struct sched_group groups[],
			
 
				-	                        cpumask_t span, int (*group_fn)(int cpu));
			
 
				-extern void cpu_attach_domain(struct sched_domain *sd, int cpu);
			
 
				-#endif /* ARCH_HAS_SCHED_DOMAIN */
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				 
			
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -135,6 +135,29 @@
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+/* sched_domains SD_ALLNODES_INIT for NUMA machines */
			
 
				+#define SD_ALLNODES_INIT (struct sched_domain) {	\
			
 
				+	.span			= CPU_MASK_NONE,	\
			
 
				+	.parent			= NULL,			\
			
 
				+	.groups			= NULL,			\
			
 
				+	.min_interval		= 64,			\
			
 
				+	.max_interval		= 64*num_online_cpus(),	\
			
 
				+	.busy_factor		= 128,			\
			
 
				+	.imbalance_pct		= 133,			\
			
 
				+	.cache_hot_time		= (10*1000000),		\
			
 
				+	.cache_nice_tries	= 1,			\
			
 
				+	.busy_idx		= 3,			\
			
 
				+	.idle_idx		= 3,			\
			
 
				+	.newidle_idx		= 0, /* unused */	\
			
 
				+	.wake_idx		= 0, /* unused */	\
			
 
				+	.forkexec_idx		= 0, /* unused */	\
			
 
				+	.per_cpu_gain		= 100,			\
			
 
				+	.flags			= SD_LOAD_BALANCE,	\
			
 
				+	.last_balance		= jiffies,		\
			
 
				+	.balance_interval	= 64,			\
			
 
				+	.nr_balance_failed	= 0,			\
			
 
				+}
			
 
				+
			
 
				 #ifdef CONFIG_NUMA
			
 
				 #ifndef SD_NODE_INIT
			
 
				 #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!!
			
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4779,7 +4779,7 @@ static int sd_parent_degenerate(struct sched_domain *sd,
 
				  * Attach the domain 'sd' to 'cpu' as its base domain.  Callers must
			
 
				  * hold the hotplug lock.
			
 
				  */
			
 
				-void cpu_attach_domain(struct sched_domain *sd, int cpu)
			
 
				+static void cpu_attach_domain(struct sched_domain *sd, int cpu)
			
 
				 {
			
 
				 	runqueue_t *rq = cpu_rq(cpu);
			
 
				 	struct sched_domain *tmp;
			
@@ -4802,7 +4802,7 @@ void cpu_attach_domain(struct sched_domain *sd, int cpu)
 
				 }
			
 
				 
			
 
				 /* cpus with isolated domains */
			
 
				-cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE;
			
 
				+static cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE;
			
 
				 
			
 
				 /* Setup the mask of cpus configured for isolated domains */
			
 
				 static int __init isolated_cpu_setup(char *str)
			
@@ -4830,8 +4830,8 @@ __setup ("isolcpus=", isolated_cpu_setup);
 
				  * covered by the given span, and will set each group's ->cpumask correctly,
			
 
				  * and ->cpu_power to 0.
			
 
				  */
			
 
				-void init_sched_build_groups(struct sched_group groups[],
			
 
				-			cpumask_t span, int (*group_fn)(int cpu))
			
 
				+static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
			
 
				+				    int (*group_fn)(int cpu))
			
 
				 {
			
 
				 	struct sched_group *first = NULL, *last = NULL;
			
 
				 	cpumask_t covered = CPU_MASK_NONE;
			
@@ -4864,12 +4864,85 @@ void init_sched_build_groups(struct sched_group groups[],
 
				 	last->next = first;
			
 
				 }
			
 
				 
			
 
				+#define SD_NODES_PER_DOMAIN 16
			
 
				 
			
 
				-#ifdef ARCH_HAS_SCHED_DOMAIN
			
 
				-extern void build_sched_domains(const cpumask_t *cpu_map);
			
 
				-extern void arch_init_sched_domains(const cpumask_t *cpu_map);
			
 
				-extern void arch_destroy_sched_domains(const cpumask_t *cpu_map);
			
 
				-#else
			
 
				+#ifdef CONFIG_NUMA
			
 
				+/**
			
 
				+ * find_next_best_node - find the next node to include in a sched_domain
			
 
				+ * @node: node whose sched_domain we're building
			
 
				+ * @used_nodes: nodes already in the sched_domain
			
 
				+ *
			
 
				+ * Find the next node to include in a given scheduling domain.  Simply
			
 
				+ * finds the closest node not already in the @used_nodes map.
			
 
				+ *
			
 
				+ * Should use nodemask_t.
			
 
				+ */
			
 
				+static int find_next_best_node(int node, unsigned long *used_nodes)
			
 
				+{
			
 
				+	int i, n, val, min_val, best_node = 0;
			
 
				+
			
 
				+	min_val = INT_MAX;
			
 
				+
			
 
				+	for (i = 0; i < MAX_NUMNODES; i++) {
			
 
				+		/* Start at @node */
			
 
				+		n = (node + i) % MAX_NUMNODES;
			
 
				+
			
 
				+		if (!nr_cpus_node(n))
			
 
				+			continue;
			
 
				+
			
 
				+		/* Skip already used nodes */
			
 
				+		if (test_bit(n, used_nodes))
			
 
				+			continue;
			
 
				+
			
 
				+		/* Simple min distance search */
			
 
				+		val = node_distance(node, n);
			
 
				+
			
 
				+		if (val < min_val) {
			
 
				+			min_val = val;
			
 
				+			best_node = n;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	set_bit(best_node, used_nodes);
			
 
				+	return best_node;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * sched_domain_node_span - get a cpumask for a node's sched_domain
			
 
				+ * @node: node whose cpumask we're constructing
			
 
				+ * @size: number of nodes to include in this span
			
 
				+ *
			
 
				+ * Given a node, construct a good cpumask for its sched_domain to span.  It
			
 
				+ * should be one that prevents unnecessary balancing, but also spreads tasks
			
 
				+ * out optimally.
			
 
				+ */
			
 
				+static cpumask_t sched_domain_node_span(int node)
			
 
				+{
			
 
				+	int i;
			
 
				+	cpumask_t span, nodemask;
			
 
				+	DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
			
 
				+
			
 
				+	cpus_clear(span);
			
 
				+	bitmap_zero(used_nodes, MAX_NUMNODES);
			
 
				+
			
 
				+	nodemask = node_to_cpumask(node);
			
 
				+	cpus_or(span, span, nodemask);
			
 
				+	set_bit(node, used_nodes);
			
 
				+
			
 
				+	for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
			
 
				+		int next_node = find_next_best_node(node, used_nodes);
			
 
				+		nodemask = node_to_cpumask(next_node);
			
 
				+		cpus_or(span, span, nodemask);
			
 
				+	}
			
 
				+
			
 
				+	return span;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
			
 
				+ * can switch it on easily if needed.
			
 
				+ */
			
 
				 #ifdef CONFIG_SCHED_SMT
			
 
				 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
			
 
				 static struct sched_group sched_group_cpus[NR_CPUS];
			
@@ -4891,36 +4964,20 @@ static int cpu_to_phys_group(int cpu)
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_NUMA
			
 
				-
			
 
				-static DEFINE_PER_CPU(struct sched_domain, node_domains);
			
 
				-static struct sched_group sched_group_nodes[MAX_NUMNODES];
			
 
				-static int cpu_to_node_group(int cpu)
			
 
				-{
			
 
				-	return cpu_to_node(cpu);
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA)
			
 
				 /*
			
 
				- * The domains setup code relies on siblings not spanning
			
 
				- * multiple nodes. Make sure the architecture has a proper
			
 
				- * siblings map:
			
 
				+ * The init_sched_build_groups can't handle what we want to do with node
			
 
				+ * groups, so roll our own. Now each node has its own list of groups which
			
 
				+ * gets dynamically allocated.
			
 
				  */
			
 
				-static void check_sibling_maps(void)
			
 
				-{
			
 
				-	int i, j;
			
 
				+static DEFINE_PER_CPU(struct sched_domain, node_domains);
			
 
				+static struct sched_group *sched_group_nodes[MAX_NUMNODES];
			
 
				 
			
 
				-	for_each_online_cpu(i) {
			
 
				-		for_each_cpu_mask(j, cpu_sibling_map[i]) {
			
 
				-			if (cpu_to_node(i) != cpu_to_node(j)) {
			
 
				-				printk(KERN_INFO "warning: CPU %d siblings map "
			
 
				-					"to different node - isolating "
			
 
				-					"them.\n", i);
			
 
				-				cpu_sibling_map[i] = cpumask_of_cpu(i);
			
 
				-				break;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				+static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
			
 
				+static struct sched_group sched_group_allnodes[MAX_NUMNODES];
			
 
				+
			
 
				+static int cpu_to_allnodes_group(int cpu)
			
 
				+{
			
 
				+	return cpu_to_node(cpu);
			
 
				 }
			
 
				 #endif
			
 
				 
			
@@ -4928,7 +4985,7 @@ static void check_sibling_maps(void)
 
				  * Build sched domains for a given set of cpus and attach the sched domains
			
 
				  * to the individual cpus
			
 
				  */
			
 
				-static void build_sched_domains(const cpumask_t *cpu_map)
			
 
				+void build_sched_domains(const cpumask_t *cpu_map)
			
 
				 {
			
 
				 	int i;
			
 
				 
			
@@ -4943,11 +5000,22 @@ static void build_sched_domains(const cpumask_t *cpu_map)
 
				 		cpus_and(nodemask, nodemask, *cpu_map);
			
 
				 
			
 
				 #ifdef CONFIG_NUMA
			
 
				+		if (num_online_cpus()
			
 
				+				> SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
			
 
				+			sd = &per_cpu(allnodes_domains, i);
			
 
				+			*sd = SD_ALLNODES_INIT;
			
 
				+			sd->span = *cpu_map;
			
 
				+			group = cpu_to_allnodes_group(i);
			
 
				+			sd->groups = &sched_group_allnodes[group];
			
 
				+			p = sd;
			
 
				+		} else
			
 
				+			p = NULL;
			
 
				+
			
 
				 		sd = &per_cpu(node_domains, i);
			
 
				-		group = cpu_to_node_group(i);
			
 
				 		*sd = SD_NODE_INIT;
			
 
				-		sd->span = *cpu_map;
			
 
				-		sd->groups = &sched_group_nodes[group];
			
 
				+		sd->span = sched_domain_node_span(cpu_to_node(i));
			
 
				+		sd->parent = p;
			
 
				+		cpus_and(sd->span, sd->span, *cpu_map);
			
 
				 #endif
			
 
				 
			
 
				 		p = sd;
			
@@ -4972,7 +5040,7 @@ static void build_sched_domains(const cpumask_t *cpu_map)
 
				 
			
 
				 #ifdef CONFIG_SCHED_SMT
			
 
				 	/* Set up CPU (sibling) groups */
			
 
				-	for_each_online_cpu(i) {
			
 
				+	for_each_cpu_mask(i, *cpu_map) {
			
 
				 		cpumask_t this_sibling_map = cpu_sibling_map[i];
			
 
				 		cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
			
 
				 		if (i != first_cpu(this_sibling_map))
			
@@ -4997,8 +5065,74 @@ static void build_sched_domains(const cpumask_t *cpu_map)
 
				 
			
 
				 #ifdef CONFIG_NUMA
			
 
				 	/* Set up node groups */
			
 
				-	init_sched_build_groups(sched_group_nodes, *cpu_map,
			
 
				-					&cpu_to_node_group);
			
 
				+	init_sched_build_groups(sched_group_allnodes, *cpu_map,
			
 
				+				&cpu_to_allnodes_group);
			
 
				+
			
 
				+	for (i = 0; i < MAX_NUMNODES; i++) {
			
 
				+		/* Set up node groups */
			
 
				+		struct sched_group *sg, *prev;
			
 
				+		cpumask_t nodemask = node_to_cpumask(i);
			
 
				+		cpumask_t domainspan;
			
 
				+		cpumask_t covered = CPU_MASK_NONE;
			
 
				+		int j;
			
 
				+
			
 
				+		cpus_and(nodemask, nodemask, *cpu_map);
			
 
				+		if (cpus_empty(nodemask))
			
 
				+			continue;
			
 
				+
			
 
				+		domainspan = sched_domain_node_span(i);
			
 
				+		cpus_and(domainspan, domainspan, *cpu_map);
			
 
				+
			
 
				+		sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
			
 
				+		sched_group_nodes[i] = sg;
			
 
				+		for_each_cpu_mask(j, nodemask) {
			
 
				+			struct sched_domain *sd;
			
 
				+			sd = &per_cpu(node_domains, j);
			
 
				+			sd->groups = sg;
			
 
				+			if (sd->groups == NULL) {
			
 
				+				/* Turn off balancing if we have no groups */
			
 
				+				sd->flags = 0;
			
 
				+			}
			
 
				+		}
			
 
				+		if (!sg) {
			
 
				+			printk(KERN_WARNING
			
 
				+			"Can not alloc domain group for node %d\n", i);
			
 
				+			continue;
			
 
				+		}
			
 
				+		sg->cpu_power = 0;
			
 
				+		sg->cpumask = nodemask;
			
 
				+		cpus_or(covered, covered, nodemask);
			
 
				+		prev = sg;
			
 
				+
			
 
				+		for (j = 0; j < MAX_NUMNODES; j++) {
			
 
				+			cpumask_t tmp, notcovered;
			
 
				+			int n = (i + j) % MAX_NUMNODES;
			
 
				+
			
 
				+			cpus_complement(notcovered, covered);
			
 
				+			cpus_and(tmp, notcovered, *cpu_map);
			
 
				+			cpus_and(tmp, tmp, domainspan);
			
 
				+			if (cpus_empty(tmp))
			
 
				+				break;
			
 
				+
			
 
				+			nodemask = node_to_cpumask(n);
			
 
				+			cpus_and(tmp, tmp, nodemask);
			
 
				+			if (cpus_empty(tmp))
			
 
				+				continue;
			
 
				+
			
 
				+			sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
			
 
				+			if (!sg) {
			
 
				+				printk(KERN_WARNING
			
 
				+				"Can not alloc domain group for node %d\n", j);
			
 
				+				break;
			
 
				+			}
			
 
				+			sg->cpu_power = 0;
			
 
				+			sg->cpumask = tmp;
			
 
				+			cpus_or(covered, covered, tmp);
			
 
				+			prev->next = sg;
			
 
				+			prev = sg;
			
 
				+		}
			
 
				+		prev->next = sched_group_nodes[i];
			
 
				+	}
			
 
				 #endif
			
 
				 
			
 
				 	/* Calculate CPU power for physical packages and nodes */
			
@@ -5017,14 +5151,46 @@ static void build_sched_domains(const cpumask_t *cpu_map)
 
				 		sd->groups->cpu_power = power;
			
 
				 
			
 
				 #ifdef CONFIG_NUMA
			
 
				-		if (i == first_cpu(sd->groups->cpumask)) {
			
 
				-			/* Only add "power" once for each physical package. */
			
 
				-			sd = &per_cpu(node_domains, i);
			
 
				-			sd->groups->cpu_power += power;
			
 
				+		sd = &per_cpu(allnodes_domains, i);
			
 
				+		if (sd->groups) {
			
 
				+			power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
			
 
				+				(cpus_weight(sd->groups->cpumask)-1) / 10;
			
 
				+			sd->groups->cpu_power = power;
			
 
				 		}
			
 
				 #endif
			
 
				 	}
			
 
				 
			
 
				+#ifdef CONFIG_NUMA
			
 
				+	for (i = 0; i < MAX_NUMNODES; i++) {
			
 
				+		struct sched_group *sg = sched_group_nodes[i];
			
 
				+		int j;
			
 
				+
			
 
				+		if (sg == NULL)
			
 
				+			continue;
			
 
				+next_sg:
			
 
				+		for_each_cpu_mask(j, sg->cpumask) {
			
 
				+			struct sched_domain *sd;
			
 
				+			int power;
			
 
				+
			
 
				+			sd = &per_cpu(phys_domains, j);
			
 
				+			if (j != first_cpu(sd->groups->cpumask)) {
			
 
				+				/*
			
 
				+				 * Only add "power" once for each
			
 
				+				 * physical package.
			
 
				+				 */
			
 
				+				continue;
			
 
				+			}
			
 
				+			power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
			
 
				+				(cpus_weight(sd->groups->cpumask)-1) / 10;
			
 
				+
			
 
				+			sg->cpu_power += power;
			
 
				+		}
			
 
				+		sg = sg->next;
			
 
				+		if (sg != sched_group_nodes[i])
			
 
				+			goto next_sg;
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				 	/* Attach the domains */
			
 
				 	for_each_cpu_mask(i, *cpu_map) {
			
 
				 		struct sched_domain *sd;
			
@@ -5039,13 +5205,10 @@ static void build_sched_domains(const cpumask_t *cpu_map)
 
				 /*
			
 
				  * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
			
 
				  */
			
 
				-static void arch_init_sched_domains(cpumask_t *cpu_map)
			
 
				+static void arch_init_sched_domains(const cpumask_t *cpu_map)
			
 
				 {
			
 
				 	cpumask_t cpu_default_map;
			
 
				 
			
 
				-#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA)
			
 
				-	check_sibling_maps();
			
 
				-#endif
			
 
				 	/*
			
 
				 	 * Setup mask for cpus without special case scheduling requirements.
			
 
				 	 * For now this just excludes isolated cpus, but could be used to
			
@@ -5058,10 +5221,29 @@ static void arch_init_sched_domains(cpumask_t *cpu_map)
 
				 
			
 
				 static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
			
 
				 {
			
 
				-	/* Do nothing: everything is statically allocated. */
			
 
				-}
			
 
				+#ifdef CONFIG_NUMA
			
 
				+	int i;
			
 
				+	for (i = 0; i < MAX_NUMNODES; i++) {
			
 
				+		cpumask_t nodemask = node_to_cpumask(i);
			
 
				+		struct sched_group *oldsg, *sg = sched_group_nodes[i];
			
 
				 
			
 
				-#endif /* ARCH_HAS_SCHED_DOMAIN */
			
 
				+		cpus_and(nodemask, nodemask, *cpu_map);
			
 
				+		if (cpus_empty(nodemask))
			
 
				+			continue;
			
 
				+
			
 
				+		if (sg == NULL)
			
 
				+			continue;
			
 
				+		sg = sg->next;
			
 
				+next_sg:
			
 
				+		oldsg = sg;
			
 
				+		sg = sg->next;
			
 
				+		kfree(oldsg);
			
 
				+		if (oldsg != sched_group_nodes[i])
			
 
				+			goto next_sg;
			
 
				+		sched_group_nodes[i] = NULL;
			
 
				+	}
			
 
				+#endif
			
 
				+}
			
 
				 
			
 
				 /*
			
 
				  * Detach sched domains from a group of cpus specified in cpu_map