|
@@ -26,19 +26,9 @@ EXPORT_SYMBOL(node_data);
|
|
|
|
|
|
struct memnode memnode;
|
|
|
|
|
|
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
|
|
|
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
|
|
|
-};
|
|
|
-
|
|
|
static unsigned long __initdata nodemap_addr;
|
|
|
static unsigned long __initdata nodemap_size;
|
|
|
|
|
|
-/*
|
|
|
- * Map cpu index to node index
|
|
|
- */
|
|
|
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
|
|
|
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
|
|
|
-
|
|
|
/*
|
|
|
* Given a shift value, try to populate memnodemap[]
|
|
|
* Returns :
|
|
@@ -234,28 +224,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
|
|
|
node_set_online(nodeid);
|
|
|
}
|
|
|
|
|
|
-/*
|
|
|
- * There are unfortunately some poorly designed mainboards around that
|
|
|
- * only connect memory to a single CPU. This breaks the 1:1 cpu->node
|
|
|
- * mapping. To avoid this fill in the mapping for all possible CPUs,
|
|
|
- * as the number of CPUs is not known yet. We round robin the existing
|
|
|
- * nodes.
|
|
|
- */
|
|
|
-void __init numa_init_array(void)
|
|
|
-{
|
|
|
- int rr, i;
|
|
|
-
|
|
|
- rr = first_node(node_online_map);
|
|
|
- for (i = 0; i < nr_cpu_ids; i++) {
|
|
|
- if (early_cpu_to_node(i) != NUMA_NO_NODE)
|
|
|
- continue;
|
|
|
- numa_set_node(i, rr);
|
|
|
- rr = next_node(rr, node_online_map);
|
|
|
- if (rr == MAX_NUMNODES)
|
|
|
- rr = first_node(node_online_map);
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
#ifdef CONFIG_NUMA_EMU
|
|
|
/* Numa emulation */
|
|
|
static struct bootnode nodes[MAX_NUMNODES] __initdata;
|
|
@@ -676,115 +644,33 @@ unsigned long __init numa_free_all_bootmem(void)
|
|
|
return pages;
|
|
|
}
|
|
|
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
-
|
|
|
-static __init int find_near_online_node(int node)
|
|
|
+int __cpuinit numa_cpu_node(int cpu)
|
|
|
{
|
|
|
- int n, val;
|
|
|
- int min_val = INT_MAX;
|
|
|
- int best_node = -1;
|
|
|
-
|
|
|
- for_each_online_node(n) {
|
|
|
- val = node_distance(node, n);
|
|
|
-
|
|
|
- if (val < min_val) {
|
|
|
- min_val = val;
|
|
|
- best_node = n;
|
|
|
- }
|
|
|
- }
|
|
|
+ int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
|
|
|
|
|
|
- return best_node;
|
|
|
+ if (apicid != BAD_APICID)
|
|
|
+ return __apicid_to_node[apicid];
|
|
|
+ return NUMA_NO_NODE;
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Setup early cpu_to_node.
|
|
|
+ * UGLINESS AHEAD: Currently, CONFIG_NUMA_EMU is 64bit only and makes use
|
|
|
+ * of 64bit specific data structures. The distinction is artificial and
|
|
|
+ * should be removed. numa_{add|remove}_cpu() are implemented in numa.c
|
|
|
+ * for both 32 and 64bit when CONFIG_NUMA_EMU is disabled but here when
|
|
|
+ * enabled.
|
|
|
*
|
|
|
- * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
|
|
|
- * and apicid_to_node[] tables have valid entries for a CPU.
|
|
|
- * This means we skip cpu_to_node[] initialisation for NUMA
|
|
|
- * emulation and faking node case (when running a kernel compiled
|
|
|
- * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
|
|
|
- * is already initialized in a round robin manner at numa_init_array,
|
|
|
- * prior to this call, and this initialization is good enough
|
|
|
- * for the fake NUMA cases.
|
|
|
- *
|
|
|
- * Called before the per_cpu areas are setup.
|
|
|
+ * NUMA emulation is planned to be made generic and the following and other
|
|
|
+ * related code should be moved to numa.c.
|
|
|
*/
|
|
|
-void __init init_cpu_to_node(void)
|
|
|
-{
|
|
|
- int cpu;
|
|
|
- u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
|
|
|
-
|
|
|
- BUG_ON(cpu_to_apicid == NULL);
|
|
|
-
|
|
|
- for_each_possible_cpu(cpu) {
|
|
|
- int node;
|
|
|
- u16 apicid = cpu_to_apicid[cpu];
|
|
|
-
|
|
|
- if (apicid == BAD_APICID)
|
|
|
- continue;
|
|
|
- node = apicid_to_node[apicid];
|
|
|
- if (node == NUMA_NO_NODE)
|
|
|
- continue;
|
|
|
- if (!node_online(node))
|
|
|
- node = find_near_online_node(node);
|
|
|
- numa_set_node(cpu, node);
|
|
|
- }
|
|
|
-}
|
|
|
-#endif
|
|
|
-
|
|
|
-
|
|
|
-void __cpuinit numa_set_node(int cpu, int node)
|
|
|
-{
|
|
|
- int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
|
|
|
-
|
|
|
- /* early setting, no percpu area yet */
|
|
|
- if (cpu_to_node_map) {
|
|
|
- cpu_to_node_map[cpu] = node;
|
|
|
- return;
|
|
|
- }
|
|
|
-
|
|
|
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
|
|
|
- if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
|
|
|
- printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
|
|
|
- dump_stack();
|
|
|
- return;
|
|
|
- }
|
|
|
-#endif
|
|
|
- per_cpu(x86_cpu_to_node_map, cpu) = node;
|
|
|
-
|
|
|
- if (node != NUMA_NO_NODE)
|
|
|
- set_cpu_numa_node(cpu, node);
|
|
|
-}
|
|
|
-
|
|
|
-void __cpuinit numa_clear_node(int cpu)
|
|
|
-{
|
|
|
- numa_set_node(cpu, NUMA_NO_NODE);
|
|
|
-}
|
|
|
-
|
|
|
-#ifndef CONFIG_DEBUG_PER_CPU_MAPS
|
|
|
-
|
|
|
-#ifndef CONFIG_NUMA_EMU
|
|
|
-void __cpuinit numa_add_cpu(int cpu)
|
|
|
-{
|
|
|
- cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
|
|
|
-}
|
|
|
-
|
|
|
-void __cpuinit numa_remove_cpu(int cpu)
|
|
|
-{
|
|
|
- cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
|
|
|
-}
|
|
|
-#else
|
|
|
+#ifdef CONFIG_NUMA_EMU
|
|
|
+# ifndef CONFIG_DEBUG_PER_CPU_MAPS
|
|
|
void __cpuinit numa_add_cpu(int cpu)
|
|
|
{
|
|
|
unsigned long addr;
|
|
|
- u16 apicid;
|
|
|
- int physnid;
|
|
|
- int nid = NUMA_NO_NODE;
|
|
|
+ int physnid, nid;
|
|
|
|
|
|
- apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
|
|
|
- if (apicid != BAD_APICID)
|
|
|
- nid = apicid_to_node[apicid];
|
|
|
+ nid = numa_cpu_node(cpu);
|
|
|
if (nid == NUMA_NO_NODE)
|
|
|
nid = early_cpu_to_node(cpu);
|
|
|
BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
|
|
@@ -818,53 +704,17 @@ void __cpuinit numa_remove_cpu(int cpu)
|
|
|
for_each_online_node(i)
|
|
|
cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
|
|
|
}
|
|
|
-#endif /* !CONFIG_NUMA_EMU */
|
|
|
-
|
|
|
-#else /* CONFIG_DEBUG_PER_CPU_MAPS */
|
|
|
-static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
|
|
|
-{
|
|
|
- int node = early_cpu_to_node(cpu);
|
|
|
- struct cpumask *mask;
|
|
|
- char buf[64];
|
|
|
-
|
|
|
- mask = node_to_cpumask_map[node];
|
|
|
- if (!mask) {
|
|
|
- pr_err("node_to_cpumask_map[%i] NULL\n", node);
|
|
|
- dump_stack();
|
|
|
- return NULL;
|
|
|
- }
|
|
|
-
|
|
|
- cpulist_scnprintf(buf, sizeof(buf), mask);
|
|
|
- printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
|
|
|
- enable ? "numa_add_cpu" : "numa_remove_cpu",
|
|
|
- cpu, node, buf);
|
|
|
- return mask;
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * --------- debug versions of the numa functions ---------
|
|
|
- */
|
|
|
-#ifndef CONFIG_NUMA_EMU
|
|
|
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
|
|
|
-{
|
|
|
- struct cpumask *mask;
|
|
|
-
|
|
|
- mask = debug_cpumask_set_cpu(cpu, enable);
|
|
|
- if (!mask)
|
|
|
- return;
|
|
|
-
|
|
|
- if (enable)
|
|
|
- cpumask_set_cpu(cpu, mask);
|
|
|
- else
|
|
|
- cpumask_clear_cpu(cpu, mask);
|
|
|
-}
|
|
|
-#else
|
|
|
+# else /* !CONFIG_DEBUG_PER_CPU_MAPS */
|
|
|
static void __cpuinit numa_set_cpumask(int cpu, int enable)
|
|
|
{
|
|
|
int node = early_cpu_to_node(cpu);
|
|
|
struct cpumask *mask;
|
|
|
int i;
|
|
|
|
|
|
+ if (node == NUMA_NO_NODE) {
|
|
|
+ /* early_cpu_to_node() already emits a warning and trace */
|
|
|
+ return;
|
|
|
+ }
|
|
|
for_each_online_node(i) {
|
|
|
unsigned long addr;
|
|
|
|
|
@@ -882,7 +732,6 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
|
|
|
cpumask_clear_cpu(cpu, mask);
|
|
|
}
|
|
|
}
|
|
|
-#endif /* CONFIG_NUMA_EMU */
|
|
|
|
|
|
void __cpuinit numa_add_cpu(int cpu)
|
|
|
{
|
|
@@ -893,39 +742,5 @@ void __cpuinit numa_remove_cpu(int cpu)
|
|
|
{
|
|
|
numa_set_cpumask(cpu, 0);
|
|
|
}
|
|
|
-
|
|
|
-int __cpu_to_node(int cpu)
|
|
|
-{
|
|
|
- if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
|
|
|
- printk(KERN_WARNING
|
|
|
- "cpu_to_node(%d): usage too early!\n", cpu);
|
|
|
- dump_stack();
|
|
|
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
|
|
|
- }
|
|
|
- return per_cpu(x86_cpu_to_node_map, cpu);
|
|
|
-}
|
|
|
-EXPORT_SYMBOL(__cpu_to_node);
|
|
|
-
|
|
|
-/*
|
|
|
- * Same function as cpu_to_node() but used if called before the
|
|
|
- * per_cpu areas are setup.
|
|
|
- */
|
|
|
-int early_cpu_to_node(int cpu)
|
|
|
-{
|
|
|
- if (early_per_cpu_ptr(x86_cpu_to_node_map))
|
|
|
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
|
|
|
-
|
|
|
- if (!cpu_possible(cpu)) {
|
|
|
- printk(KERN_WARNING
|
|
|
- "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
|
|
|
- dump_stack();
|
|
|
- return NUMA_NO_NODE;
|
|
|
- }
|
|
|
- return per_cpu(x86_cpu_to_node_map, cpu);
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * --------- end of debug versions of the numa functions ---------
|
|
|
- */
|
|
|
-
|
|
|
-#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
|
|
|
+# endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
|
|
|
+#endif /* CONFIG_NUMA_EMU */
|