srat.c 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. /*
  2. * ACPI 3.0 based NUMA setup
  3. * Copyright 2004 Andi Kleen, SuSE Labs.
  4. *
  5. * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
  6. *
  7. * Called from acpi_numa_init while reading the SRAT and SLIT tables.
  8. * Assumes all memory regions belonging to a single proximity domain
  9. * are in one chunk. Holes between them will be included in the node.
  10. */
  11. #include <linux/kernel.h>
  12. #include <linux/acpi.h>
  13. #include <linux/mmzone.h>
  14. #include <linux/bitmap.h>
  15. #include <linux/module.h>
  16. #include <linux/topology.h>
  17. #include <linux/bootmem.h>
  18. #include <linux/memblock.h>
  19. #include <linux/mm.h>
  20. #include <asm/proto.h>
  21. #include <asm/numa.h>
  22. #include <asm/e820.h>
  23. #include <asm/apic.h>
  24. #include <asm/uv/uv.h>
  25. int acpi_numa __initdata;
  26. static __init int setup_node(int pxm)
  27. {
  28. return acpi_map_pxm_to_node(pxm);
  29. }
  30. static __init void bad_srat(void)
  31. {
  32. printk(KERN_ERR "SRAT: SRAT not used.\n");
  33. acpi_numa = -1;
  34. }
  35. static __init inline int srat_disabled(void)
  36. {
  37. return acpi_numa < 0;
  38. }
  39. /* Callback for SLIT parsing */
  40. void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
  41. {
  42. int i, j;
  43. for (i = 0; i < slit->locality_count; i++)
  44. for (j = 0; j < slit->locality_count; j++)
  45. numa_set_distance(pxm_to_node(i), pxm_to_node(j),
  46. slit->entry[slit->locality_count * i + j]);
  47. }
  48. /* Callback for Proximity Domain -> x2APIC mapping */
  49. void __init
  50. acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
  51. {
  52. int pxm, node;
  53. int apic_id;
  54. if (srat_disabled())
  55. return;
  56. if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
  57. bad_srat();
  58. return;
  59. }
  60. if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
  61. return;
  62. pxm = pa->proximity_domain;
  63. apic_id = pa->apic_id;
  64. if (!apic->apic_id_valid(apic_id)) {
  65. printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n",
  66. pxm, apic_id);
  67. return;
  68. }
  69. node = setup_node(pxm);
  70. if (node < 0) {
  71. printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
  72. bad_srat();
  73. return;
  74. }
  75. if (apic_id >= MAX_LOCAL_APIC) {
  76. printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
  77. return;
  78. }
  79. set_apicid_to_node(apic_id, node);
  80. node_set(node, numa_nodes_parsed);
  81. acpi_numa = 1;
  82. printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
  83. pxm, apic_id, node);
  84. }
  85. /* Callback for Proximity Domain -> LAPIC mapping */
  86. void __init
  87. acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
  88. {
  89. int pxm, node;
  90. int apic_id;
  91. if (srat_disabled())
  92. return;
  93. if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
  94. bad_srat();
  95. return;
  96. }
  97. if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
  98. return;
  99. pxm = pa->proximity_domain_lo;
  100. if (acpi_srat_revision >= 2)
  101. pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
  102. node = setup_node(pxm);
  103. if (node < 0) {
  104. printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
  105. bad_srat();
  106. return;
  107. }
  108. if (get_uv_system_type() >= UV_X2APIC)
  109. apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
  110. else
  111. apic_id = pa->apic_id;
  112. if (apic_id >= MAX_LOCAL_APIC) {
  113. printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
  114. return;
  115. }
  116. set_apicid_to_node(apic_id, node);
  117. node_set(node, numa_nodes_parsed);
  118. acpi_numa = 1;
  119. printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
  120. pxm, apic_id, node);
  121. }
  122. #ifdef CONFIG_MEMORY_HOTPLUG
  123. static inline int save_add_info(void) {return 1;}
  124. #else
  125. static inline int save_add_info(void) {return 0;}
  126. #endif
  127. #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
  128. static void __init
  129. handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
  130. {
  131. int overlap, i;
  132. unsigned long start_pfn, end_pfn;
  133. start_pfn = PFN_DOWN(start);
  134. end_pfn = PFN_UP(end);
  135. /*
  136. * For movablemem_map=acpi:
  137. *
  138. * SRAT: |_____| |_____| |_________| |_________| ......
  139. * node id: 0 1 1 2
  140. * hotpluggable: n y y n
  141. * movablemem_map: |_____| |_________|
  142. *
  143. * Using movablemem_map, we can prevent memblock from allocating memory
  144. * on ZONE_MOVABLE at boot time.
  145. *
  146. * Before parsing SRAT, memblock has already reserve some memory ranges
  147. * for other purposes, such as for kernel image. We cannot prevent
  148. * kernel from using these memory, so we need to exclude these memory
  149. * even if it is hotpluggable.
  150. * Furthermore, to ensure the kernel has enough memory to boot, we make
  151. * all the memory on the node which the kernel resides in
  152. * un-hotpluggable.
  153. */
  154. if (hotpluggable && movablemem_map.acpi) {
  155. /* Exclude ranges reserved by memblock. */
  156. struct memblock_type *rgn = &memblock.reserved;
  157. for (i = 0; i < rgn->cnt; i++) {
  158. if (end <= rgn->regions[i].base ||
  159. start >= rgn->regions[i].base +
  160. rgn->regions[i].size)
  161. continue;
  162. /*
  163. * If the memory range overlaps the memory reserved by
  164. * memblock, then the kernel resides in this node.
  165. */
  166. node_set(node, movablemem_map.numa_nodes_kernel);
  167. goto out;
  168. }
  169. /*
  170. * If the kernel resides in this node, then the whole node
  171. * should not be hotpluggable.
  172. */
  173. if (node_isset(node, movablemem_map.numa_nodes_kernel))
  174. goto out;
  175. insert_movablemem_map(start_pfn, end_pfn);
  176. /*
  177. * numa_nodes_hotplug nodemask represents which nodes are put
  178. * into movablemem_map.map[].
  179. */
  180. node_set(node, movablemem_map.numa_nodes_hotplug);
  181. goto out;
  182. }
  183. /*
  184. * For movablemem_map=nn[KMG]@ss[KMG]:
  185. *
  186. * SRAT: |_____| |_____| |_________| |_________| ......
  187. * node id: 0 1 1 2
  188. * user specified: |__| |___|
  189. * movablemem_map: |___| |_________| |______| ......
  190. *
  191. * Using movablemem_map, we can prevent memblock from allocating memory
  192. * on ZONE_MOVABLE at boot time.
  193. *
  194. * NOTE: In this case, SRAT info will be ingored.
  195. */
  196. overlap = movablemem_map_overlap(start_pfn, end_pfn);
  197. if (overlap >= 0) {
  198. /*
  199. * If part of this range is in movablemem_map, we need to
  200. * add the range after it to extend the range to the end
  201. * of the node, because from the min address specified to
  202. * the end of the node will be ZONE_MOVABLE.
  203. */
  204. start_pfn = max(start_pfn,
  205. movablemem_map.map[overlap].start_pfn);
  206. insert_movablemem_map(start_pfn, end_pfn);
  207. /*
  208. * Set the nodemask, so that if the address range on one node
  209. * is not continuse, we can add the subsequent ranges on the
  210. * same node into movablemem_map.
  211. */
  212. node_set(node, movablemem_map.numa_nodes_hotplug);
  213. } else {
  214. if (node_isset(node, movablemem_map.numa_nodes_hotplug))
  215. /*
  216. * Insert the range if we already have movable ranges
  217. * on the same node.
  218. */
  219. insert_movablemem_map(start_pfn, end_pfn);
  220. }
  221. out:
  222. return;
  223. }
  224. #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
  225. static inline void
  226. handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
  227. {
  228. }
  229. #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
  230. /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
  231. int __init
  232. acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
  233. {
  234. u64 start, end;
  235. u32 hotpluggable;
  236. int node, pxm;
  237. if (srat_disabled())
  238. goto out_err;
  239. if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
  240. goto out_err_bad_srat;
  241. if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
  242. goto out_err;
  243. hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
  244. if (hotpluggable && !save_add_info())
  245. goto out_err;
  246. start = ma->base_address;
  247. end = start + ma->length;
  248. pxm = ma->proximity_domain;
  249. if (acpi_srat_revision <= 1)
  250. pxm &= 0xff;
  251. node = setup_node(pxm);
  252. if (node < 0) {
  253. printk(KERN_ERR "SRAT: Too many proximity domains.\n");
  254. goto out_err_bad_srat;
  255. }
  256. if (numa_add_memblk(node, start, end) < 0)
  257. goto out_err_bad_srat;
  258. node_set(node, numa_nodes_parsed);
  259. printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n",
  260. node, pxm,
  261. (unsigned long long) start, (unsigned long long) end - 1,
  262. hotpluggable ? "Hot Pluggable": "");
  263. handle_movablemem(node, start, end, hotpluggable);
  264. return 0;
  265. out_err_bad_srat:
  266. bad_srat();
  267. out_err:
  268. return -1;
  269. }
  270. void __init acpi_numa_arch_fixup(void) {}
  271. int __init x86_acpi_numa_init(void)
  272. {
  273. int ret;
  274. ret = acpi_numa_init();
  275. if (ret < 0)
  276. return ret;
  277. return srat_disabled() ? -EINVAL : 0;
  278. }