numa.c 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. /* Common code for 32 and 64-bit NUMA */
  2. #include <linux/topology.h>
  3. #include <linux/module.h>
  4. #include <linux/bootmem.h>
  5. #include <asm/numa.h>
  6. #include <asm/acpi.h>
  7. int __initdata numa_off;
  8. static __init int numa_setup(char *opt)
  9. {
  10. if (!opt)
  11. return -EINVAL;
  12. if (!strncmp(opt, "off", 3))
  13. numa_off = 1;
  14. #ifdef CONFIG_NUMA_EMU
  15. if (!strncmp(opt, "fake=", 5))
  16. numa_emu_cmdline(opt + 5);
  17. #endif
  18. #ifdef CONFIG_ACPI_NUMA
  19. if (!strncmp(opt, "noacpi", 6))
  20. acpi_numa = -1;
  21. #endif
  22. return 0;
  23. }
  24. early_param("numa", numa_setup);
  25. /*
  26. * apicid, cpu, node mappings
  27. */
  28. s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
  29. [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
  30. };
  31. cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
  32. EXPORT_SYMBOL(node_to_cpumask_map);
  33. /*
  34. * Map cpu index to node index
  35. */
  36. DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
  37. EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
  38. void __cpuinit numa_set_node(int cpu, int node)
  39. {
  40. int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
  41. /* early setting, no percpu area yet */
  42. if (cpu_to_node_map) {
  43. cpu_to_node_map[cpu] = node;
  44. return;
  45. }
  46. #ifdef CONFIG_DEBUG_PER_CPU_MAPS
  47. if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
  48. printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
  49. dump_stack();
  50. return;
  51. }
  52. #endif
  53. per_cpu(x86_cpu_to_node_map, cpu) = node;
  54. if (node != NUMA_NO_NODE)
  55. set_cpu_numa_node(cpu, node);
  56. }
  57. void __cpuinit numa_clear_node(int cpu)
  58. {
  59. numa_set_node(cpu, NUMA_NO_NODE);
  60. }
  61. /*
  62. * Allocate node_to_cpumask_map based on number of available nodes
  63. * Requires node_possible_map to be valid.
  64. *
  65. * Note: node_to_cpumask() is not valid until after this is done.
  66. * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
  67. */
  68. void __init setup_node_to_cpumask_map(void)
  69. {
  70. unsigned int node, num = 0;
  71. /* setup nr_node_ids if not done yet */
  72. if (nr_node_ids == MAX_NUMNODES) {
  73. for_each_node_mask(node, node_possible_map)
  74. num = node;
  75. nr_node_ids = num + 1;
  76. }
  77. /* allocate the map */
  78. for (node = 0; node < nr_node_ids; node++)
  79. alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
  80. /* cpumask_of_node() will now work */
  81. pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
  82. }
  83. /*
  84. * There are unfortunately some poorly designed mainboards around that
  85. * only connect memory to a single CPU. This breaks the 1:1 cpu->node
  86. * mapping. To avoid this fill in the mapping for all possible CPUs,
  87. * as the number of CPUs is not known yet. We round robin the existing
  88. * nodes.
  89. */
  90. void __init numa_init_array(void)
  91. {
  92. int rr, i;
  93. rr = first_node(node_online_map);
  94. for (i = 0; i < nr_cpu_ids; i++) {
  95. if (early_cpu_to_node(i) != NUMA_NO_NODE)
  96. continue;
  97. numa_set_node(i, rr);
  98. rr = next_node(rr, node_online_map);
  99. if (rr == MAX_NUMNODES)
  100. rr = first_node(node_online_map);
  101. }
  102. }
  103. static __init int find_near_online_node(int node)
  104. {
  105. int n, val;
  106. int min_val = INT_MAX;
  107. int best_node = -1;
  108. for_each_online_node(n) {
  109. val = node_distance(node, n);
  110. if (val < min_val) {
  111. min_val = val;
  112. best_node = n;
  113. }
  114. }
  115. return best_node;
  116. }
  117. /*
  118. * Setup early cpu_to_node.
  119. *
  120. * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
  121. * and apicid_to_node[] tables have valid entries for a CPU.
  122. * This means we skip cpu_to_node[] initialisation for NUMA
  123. * emulation and faking node case (when running a kernel compiled
  124. * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
  125. * is already initialized in a round robin manner at numa_init_array,
  126. * prior to this call, and this initialization is good enough
  127. * for the fake NUMA cases.
  128. *
  129. * Called before the per_cpu areas are setup.
  130. */
  131. void __init init_cpu_to_node(void)
  132. {
  133. int cpu;
  134. u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
  135. BUG_ON(cpu_to_apicid == NULL);
  136. for_each_possible_cpu(cpu) {
  137. int node = numa_cpu_node(cpu);
  138. if (node == NUMA_NO_NODE)
  139. continue;
  140. if (!node_online(node))
  141. node = find_near_online_node(node);
  142. numa_set_node(cpu, node);
  143. }
  144. }
  145. #ifndef CONFIG_DEBUG_PER_CPU_MAPS
  146. # ifndef CONFIG_NUMA_EMU
  147. void __cpuinit numa_add_cpu(int cpu)
  148. {
  149. cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
  150. }
  151. void __cpuinit numa_remove_cpu(int cpu)
  152. {
  153. cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
  154. }
  155. # endif /* !CONFIG_NUMA_EMU */
  156. #else /* !CONFIG_DEBUG_PER_CPU_MAPS */
  157. int __cpu_to_node(int cpu)
  158. {
  159. if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
  160. printk(KERN_WARNING
  161. "cpu_to_node(%d): usage too early!\n", cpu);
  162. dump_stack();
  163. return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
  164. }
  165. return per_cpu(x86_cpu_to_node_map, cpu);
  166. }
  167. EXPORT_SYMBOL(__cpu_to_node);
  168. /*
  169. * Same function as cpu_to_node() but used if called before the
  170. * per_cpu areas are setup.
  171. */
  172. int early_cpu_to_node(int cpu)
  173. {
  174. if (early_per_cpu_ptr(x86_cpu_to_node_map))
  175. return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
  176. if (!cpu_possible(cpu)) {
  177. printk(KERN_WARNING
  178. "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
  179. dump_stack();
  180. return NUMA_NO_NODE;
  181. }
  182. return per_cpu(x86_cpu_to_node_map, cpu);
  183. }
  184. struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
  185. {
  186. int node = early_cpu_to_node(cpu);
  187. struct cpumask *mask;
  188. char buf[64];
  189. if (node == NUMA_NO_NODE) {
  190. /* early_cpu_to_node() already emits a warning and trace */
  191. return NULL;
  192. }
  193. mask = node_to_cpumask_map[node];
  194. if (!mask) {
  195. pr_err("node_to_cpumask_map[%i] NULL\n", node);
  196. dump_stack();
  197. return NULL;
  198. }
  199. cpulist_scnprintf(buf, sizeof(buf), mask);
  200. printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
  201. enable ? "numa_add_cpu" : "numa_remove_cpu",
  202. cpu, node, buf);
  203. return mask;
  204. }
  205. # ifndef CONFIG_NUMA_EMU
  206. static void __cpuinit numa_set_cpumask(int cpu, int enable)
  207. {
  208. struct cpumask *mask;
  209. mask = debug_cpumask_set_cpu(cpu, enable);
  210. if (!mask)
  211. return;
  212. if (enable)
  213. cpumask_set_cpu(cpu, mask);
  214. else
  215. cpumask_clear_cpu(cpu, mask);
  216. }
  217. void __cpuinit numa_add_cpu(int cpu)
  218. {
  219. numa_set_cpumask(cpu, 1);
  220. }
  221. void __cpuinit numa_remove_cpu(int cpu)
  222. {
  223. numa_set_cpumask(cpu, 0);
  224. }
  225. # endif /* !CONFIG_NUMA_EMU */
  226. /*
  227. * Returns a pointer to the bitmask of CPUs on Node 'node'.
  228. */
  229. const struct cpumask *cpumask_of_node(int node)
  230. {
  231. if (node >= nr_node_ids) {
  232. printk(KERN_WARNING
  233. "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
  234. node, nr_node_ids);
  235. dump_stack();
  236. return cpu_none_mask;
  237. }
  238. if (node_to_cpumask_map[node] == NULL) {
  239. printk(KERN_WARNING
  240. "cpumask_of_node(%d): no node_to_cpumask_map!\n",
  241. node);
  242. dump_stack();
  243. return cpu_online_mask;
  244. }
  245. return node_to_cpumask_map[node];
  246. }
  247. EXPORT_SYMBOL(cpumask_of_node);
  248. #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */