node.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. /*
  2. * drivers/base/node.c - basic Node class support
  3. */
  4. #include <linux/sysdev.h>
  5. #include <linux/module.h>
  6. #include <linux/init.h>
  7. #include <linux/mm.h>
  8. #include <linux/memory.h>
  9. #include <linux/node.h>
  10. #include <linux/hugetlb.h>
  11. #include <linux/cpumask.h>
  12. #include <linux/topology.h>
  13. #include <linux/nodemask.h>
  14. #include <linux/cpu.h>
  15. #include <linux/device.h>
  16. #include <linux/swap.h>
  17. static struct sysdev_class node_class = {
  18. .name = "node",
  19. };
  20. static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf)
  21. {
  22. struct node *node_dev = to_node(dev);
  23. node_to_cpumask_ptr(mask, node_dev->sysdev.id);
  24. int len;
  25. /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
  26. BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
  27. len = type?
  28. cpulist_scnprintf(buf, PAGE_SIZE-2, mask) :
  29. cpumask_scnprintf(buf, PAGE_SIZE-2, mask);
  30. buf[len++] = '\n';
  31. buf[len] = '\0';
  32. return len;
  33. }
  34. static inline ssize_t node_read_cpumask(struct sys_device *dev,
  35. struct sysdev_attribute *attr, char *buf)
  36. {
  37. return node_read_cpumap(dev, 0, buf);
  38. }
  39. static inline ssize_t node_read_cpulist(struct sys_device *dev,
  40. struct sysdev_attribute *attr, char *buf)
  41. {
  42. return node_read_cpumap(dev, 1, buf);
  43. }
  44. static SYSDEV_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL);
  45. static SYSDEV_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
  46. #define K(x) ((x) << (PAGE_SHIFT - 10))
  47. static ssize_t node_read_meminfo(struct sys_device * dev,
  48. struct sysdev_attribute *attr, char * buf)
  49. {
  50. int n;
  51. int nid = dev->id;
  52. struct sysinfo i;
  53. si_meminfo_node(&i, nid);
  54. n = sprintf(buf, "\n"
  55. "Node %d MemTotal: %8lu kB\n"
  56. "Node %d MemFree: %8lu kB\n"
  57. "Node %d MemUsed: %8lu kB\n"
  58. "Node %d Active: %8lu kB\n"
  59. "Node %d Inactive: %8lu kB\n"
  60. "Node %d Active(anon): %8lu kB\n"
  61. "Node %d Inactive(anon): %8lu kB\n"
  62. "Node %d Active(file): %8lu kB\n"
  63. "Node %d Inactive(file): %8lu kB\n"
  64. #ifdef CONFIG_UNEVICTABLE_LRU
  65. "Node %d Unevictable: %8lu kB\n"
  66. "Node %d Mlocked: %8lu kB\n"
  67. #endif
  68. #ifdef CONFIG_HIGHMEM
  69. "Node %d HighTotal: %8lu kB\n"
  70. "Node %d HighFree: %8lu kB\n"
  71. "Node %d LowTotal: %8lu kB\n"
  72. "Node %d LowFree: %8lu kB\n"
  73. #endif
  74. "Node %d Dirty: %8lu kB\n"
  75. "Node %d Writeback: %8lu kB\n"
  76. "Node %d FilePages: %8lu kB\n"
  77. "Node %d Mapped: %8lu kB\n"
  78. "Node %d AnonPages: %8lu kB\n"
  79. "Node %d PageTables: %8lu kB\n"
  80. "Node %d NFS_Unstable: %8lu kB\n"
  81. "Node %d Bounce: %8lu kB\n"
  82. "Node %d WritebackTmp: %8lu kB\n"
  83. "Node %d Slab: %8lu kB\n"
  84. "Node %d SReclaimable: %8lu kB\n"
  85. "Node %d SUnreclaim: %8lu kB\n",
  86. nid, K(i.totalram),
  87. nid, K(i.freeram),
  88. nid, K(i.totalram - i.freeram),
  89. nid, K(node_page_state(nid, NR_ACTIVE_ANON) +
  90. node_page_state(nid, NR_ACTIVE_FILE)),
  91. nid, K(node_page_state(nid, NR_INACTIVE_ANON) +
  92. node_page_state(nid, NR_INACTIVE_FILE)),
  93. nid, K(node_page_state(nid, NR_ACTIVE_ANON)),
  94. nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
  95. nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
  96. nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
  97. #ifdef CONFIG_UNEVICTABLE_LRU
  98. nid, K(node_page_state(nid, NR_UNEVICTABLE)),
  99. nid, K(node_page_state(nid, NR_MLOCK)),
  100. #endif
  101. #ifdef CONFIG_HIGHMEM
  102. nid, K(i.totalhigh),
  103. nid, K(i.freehigh),
  104. nid, K(i.totalram - i.totalhigh),
  105. nid, K(i.freeram - i.freehigh),
  106. #endif
  107. nid, K(node_page_state(nid, NR_FILE_DIRTY)),
  108. nid, K(node_page_state(nid, NR_WRITEBACK)),
  109. nid, K(node_page_state(nid, NR_FILE_PAGES)),
  110. nid, K(node_page_state(nid, NR_FILE_MAPPED)),
  111. nid, K(node_page_state(nid, NR_ANON_PAGES)),
  112. nid, K(node_page_state(nid, NR_PAGETABLE)),
  113. nid, K(node_page_state(nid, NR_UNSTABLE_NFS)),
  114. nid, K(node_page_state(nid, NR_BOUNCE)),
  115. nid, K(node_page_state(nid, NR_WRITEBACK_TEMP)),
  116. nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) +
  117. node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
  118. nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)),
  119. nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)));
  120. n += hugetlb_report_node_meminfo(nid, buf + n);
  121. return n;
  122. }
  123. #undef K
  124. static SYSDEV_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL);
  125. static ssize_t node_read_numastat(struct sys_device * dev,
  126. struct sysdev_attribute *attr, char * buf)
  127. {
  128. return sprintf(buf,
  129. "numa_hit %lu\n"
  130. "numa_miss %lu\n"
  131. "numa_foreign %lu\n"
  132. "interleave_hit %lu\n"
  133. "local_node %lu\n"
  134. "other_node %lu\n",
  135. node_page_state(dev->id, NUMA_HIT),
  136. node_page_state(dev->id, NUMA_MISS),
  137. node_page_state(dev->id, NUMA_FOREIGN),
  138. node_page_state(dev->id, NUMA_INTERLEAVE_HIT),
  139. node_page_state(dev->id, NUMA_LOCAL),
  140. node_page_state(dev->id, NUMA_OTHER));
  141. }
  142. static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
  143. static ssize_t node_read_distance(struct sys_device * dev,
  144. struct sysdev_attribute *attr, char * buf)
  145. {
  146. int nid = dev->id;
  147. int len = 0;
  148. int i;
  149. /* buf currently PAGE_SIZE, need ~4 chars per node */
  150. BUILD_BUG_ON(MAX_NUMNODES*4 > PAGE_SIZE/2);
  151. for_each_online_node(i)
  152. len += sprintf(buf + len, "%s%d", i ? " " : "", node_distance(nid, i));
  153. len += sprintf(buf + len, "\n");
  154. return len;
  155. }
  156. static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL);
  157. /*
  158. * register_node - Setup a sysfs device for a node.
  159. * @num - Node number to use when creating the device.
  160. *
  161. * Initialize and register the node device.
  162. */
  163. int register_node(struct node *node, int num, struct node *parent)
  164. {
  165. int error;
  166. node->sysdev.id = num;
  167. node->sysdev.cls = &node_class;
  168. error = sysdev_register(&node->sysdev);
  169. if (!error){
  170. sysdev_create_file(&node->sysdev, &attr_cpumap);
  171. sysdev_create_file(&node->sysdev, &attr_cpulist);
  172. sysdev_create_file(&node->sysdev, &attr_meminfo);
  173. sysdev_create_file(&node->sysdev, &attr_numastat);
  174. sysdev_create_file(&node->sysdev, &attr_distance);
  175. scan_unevictable_register_node(node);
  176. }
  177. return error;
  178. }
  179. /**
  180. * unregister_node - unregister a node device
  181. * @node: node going away
  182. *
  183. * Unregisters a node device @node. All the devices on the node must be
  184. * unregistered before calling this function.
  185. */
  186. void unregister_node(struct node *node)
  187. {
  188. sysdev_remove_file(&node->sysdev, &attr_cpumap);
  189. sysdev_remove_file(&node->sysdev, &attr_cpulist);
  190. sysdev_remove_file(&node->sysdev, &attr_meminfo);
  191. sysdev_remove_file(&node->sysdev, &attr_numastat);
  192. sysdev_remove_file(&node->sysdev, &attr_distance);
  193. scan_unevictable_unregister_node(node);
  194. sysdev_unregister(&node->sysdev);
  195. }
  196. struct node node_devices[MAX_NUMNODES];
  197. /*
  198. * register cpu under node
  199. */
  200. int register_cpu_under_node(unsigned int cpu, unsigned int nid)
  201. {
  202. if (node_online(nid)) {
  203. struct sys_device *obj = get_cpu_sysdev(cpu);
  204. if (!obj)
  205. return 0;
  206. return sysfs_create_link(&node_devices[nid].sysdev.kobj,
  207. &obj->kobj,
  208. kobject_name(&obj->kobj));
  209. }
  210. return 0;
  211. }
  212. int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
  213. {
  214. if (node_online(nid)) {
  215. struct sys_device *obj = get_cpu_sysdev(cpu);
  216. if (obj)
  217. sysfs_remove_link(&node_devices[nid].sysdev.kobj,
  218. kobject_name(&obj->kobj));
  219. }
  220. return 0;
  221. }
  222. #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
  223. #define page_initialized(page) (page->lru.next)
  224. static int get_nid_for_pfn(unsigned long pfn)
  225. {
  226. struct page *page;
  227. if (!pfn_valid_within(pfn))
  228. return -1;
  229. page = pfn_to_page(pfn);
  230. if (!page_initialized(page))
  231. return -1;
  232. return pfn_to_nid(pfn);
  233. }
  234. /* register memory section under specified node if it spans that node */
  235. int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
  236. {
  237. unsigned long pfn, sect_start_pfn, sect_end_pfn;
  238. if (!mem_blk)
  239. return -EFAULT;
  240. if (!node_online(nid))
  241. return 0;
  242. sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
  243. sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
  244. for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
  245. int page_nid;
  246. page_nid = get_nid_for_pfn(pfn);
  247. if (page_nid < 0)
  248. continue;
  249. if (page_nid != nid)
  250. continue;
  251. return sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj,
  252. &mem_blk->sysdev.kobj,
  253. kobject_name(&mem_blk->sysdev.kobj));
  254. }
  255. /* mem section does not span the specified node */
  256. return 0;
  257. }
  258. /* unregister memory section under all nodes that it spans */
  259. int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
  260. {
  261. nodemask_t unlinked_nodes;
  262. unsigned long pfn, sect_start_pfn, sect_end_pfn;
  263. if (!mem_blk)
  264. return -EFAULT;
  265. nodes_clear(unlinked_nodes);
  266. sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
  267. sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
  268. for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
  269. int nid;
  270. nid = get_nid_for_pfn(pfn);
  271. if (nid < 0)
  272. continue;
  273. if (!node_online(nid))
  274. continue;
  275. if (node_test_and_set(nid, unlinked_nodes))
  276. continue;
  277. sysfs_remove_link(&node_devices[nid].sysdev.kobj,
  278. kobject_name(&mem_blk->sysdev.kobj));
  279. }
  280. return 0;
  281. }
  282. static int link_mem_sections(int nid)
  283. {
  284. unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
  285. unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages;
  286. unsigned long pfn;
  287. int err = 0;
  288. for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
  289. unsigned long section_nr = pfn_to_section_nr(pfn);
  290. struct mem_section *mem_sect;
  291. struct memory_block *mem_blk;
  292. int ret;
  293. if (!present_section_nr(section_nr))
  294. continue;
  295. mem_sect = __nr_to_section(section_nr);
  296. mem_blk = find_memory_block(mem_sect);
  297. ret = register_mem_sect_under_node(mem_blk, nid);
  298. if (!err)
  299. err = ret;
  300. /* discard ref obtained in find_memory_block() */
  301. kobject_put(&mem_blk->sysdev.kobj);
  302. }
  303. return err;
  304. }
  305. #else
  306. static int link_mem_sections(int nid) { return 0; }
  307. #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
  308. int register_one_node(int nid)
  309. {
  310. int error = 0;
  311. int cpu;
  312. if (node_online(nid)) {
  313. int p_node = parent_node(nid);
  314. struct node *parent = NULL;
  315. if (p_node != nid)
  316. parent = &node_devices[p_node];
  317. error = register_node(&node_devices[nid], nid, parent);
  318. /* link cpu under this node */
  319. for_each_present_cpu(cpu) {
  320. if (cpu_to_node(cpu) == nid)
  321. register_cpu_under_node(cpu, nid);
  322. }
  323. /* link memory sections under this node */
  324. error = link_mem_sections(nid);
  325. }
  326. return error;
  327. }
  328. void unregister_one_node(int nid)
  329. {
  330. unregister_node(&node_devices[nid]);
  331. }
  332. /*
  333. * node states attributes
  334. */
  335. static ssize_t print_nodes_state(enum node_states state, char *buf)
  336. {
  337. int n;
  338. n = nodelist_scnprintf(buf, PAGE_SIZE, node_states[state]);
  339. if (n > 0 && PAGE_SIZE > n + 1) {
  340. *(buf + n++) = '\n';
  341. *(buf + n++) = '\0';
  342. }
  343. return n;
  344. }
  345. static ssize_t print_nodes_possible(struct sysdev_class *class, char *buf)
  346. {
  347. return print_nodes_state(N_POSSIBLE, buf);
  348. }
  349. static ssize_t print_nodes_online(struct sysdev_class *class, char *buf)
  350. {
  351. return print_nodes_state(N_ONLINE, buf);
  352. }
  353. static ssize_t print_nodes_has_normal_memory(struct sysdev_class *class,
  354. char *buf)
  355. {
  356. return print_nodes_state(N_NORMAL_MEMORY, buf);
  357. }
  358. static ssize_t print_nodes_has_cpu(struct sysdev_class *class, char *buf)
  359. {
  360. return print_nodes_state(N_CPU, buf);
  361. }
  362. static SYSDEV_CLASS_ATTR(possible, 0444, print_nodes_possible, NULL);
  363. static SYSDEV_CLASS_ATTR(online, 0444, print_nodes_online, NULL);
  364. static SYSDEV_CLASS_ATTR(has_normal_memory, 0444, print_nodes_has_normal_memory,
  365. NULL);
  366. static SYSDEV_CLASS_ATTR(has_cpu, 0444, print_nodes_has_cpu, NULL);
  367. #ifdef CONFIG_HIGHMEM
  368. static ssize_t print_nodes_has_high_memory(struct sysdev_class *class,
  369. char *buf)
  370. {
  371. return print_nodes_state(N_HIGH_MEMORY, buf);
  372. }
  373. static SYSDEV_CLASS_ATTR(has_high_memory, 0444, print_nodes_has_high_memory,
  374. NULL);
  375. #endif
  376. struct sysdev_class_attribute *node_state_attr[] = {
  377. &attr_possible,
  378. &attr_online,
  379. &attr_has_normal_memory,
  380. #ifdef CONFIG_HIGHMEM
  381. &attr_has_high_memory,
  382. #endif
  383. &attr_has_cpu,
  384. };
  385. static int node_states_init(void)
  386. {
  387. int i;
  388. int err = 0;
  389. for (i = 0; i < NR_NODE_STATES; i++) {
  390. int ret;
  391. ret = sysdev_class_create_file(&node_class, node_state_attr[i]);
  392. if (!err)
  393. err = ret;
  394. }
  395. return err;
  396. }
  397. static int __init register_node_type(void)
  398. {
  399. int ret;
  400. ret = sysdev_class_register(&node_class);
  401. if (!ret)
  402. ret = node_states_init();
  403. /*
  404. * Note: we're not going to unregister the node class if we fail
  405. * to register the node state class attribute files.
  406. */
  407. return ret;
  408. }
  409. postcore_initcall(register_node_type);