vmstat.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. /*
  2. * linux/mm/vmstat.c
  3. *
  4. * Manages VM statistics
  5. * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
  6. *
  7. * zoned VM statistics
  8. * Copyright (C) 2006 Silicon Graphics, Inc.,
  9. * Christoph Lameter <christoph@lameter.com>
  10. */
  11. #include <linux/config.h>
  12. #include <linux/mm.h>
  13. #include <linux/module.h>
  14. void __get_zone_counts(unsigned long *active, unsigned long *inactive,
  15. unsigned long *free, struct pglist_data *pgdat)
  16. {
  17. struct zone *zones = pgdat->node_zones;
  18. int i;
  19. *active = 0;
  20. *inactive = 0;
  21. *free = 0;
  22. for (i = 0; i < MAX_NR_ZONES; i++) {
  23. *active += zones[i].nr_active;
  24. *inactive += zones[i].nr_inactive;
  25. *free += zones[i].free_pages;
  26. }
  27. }
  28. void get_zone_counts(unsigned long *active,
  29. unsigned long *inactive, unsigned long *free)
  30. {
  31. struct pglist_data *pgdat;
  32. *active = 0;
  33. *inactive = 0;
  34. *free = 0;
  35. for_each_online_pgdat(pgdat) {
  36. unsigned long l, m, n;
  37. __get_zone_counts(&l, &m, &n, pgdat);
  38. *active += l;
  39. *inactive += m;
  40. *free += n;
  41. }
  42. }
  43. #ifdef CONFIG_VM_EVENT_COUNTERS
  44. DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
  45. EXPORT_PER_CPU_SYMBOL(vm_event_states);
  46. static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
  47. {
  48. int cpu = 0;
  49. int i;
  50. memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
  51. cpu = first_cpu(*cpumask);
  52. while (cpu < NR_CPUS) {
  53. struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
  54. cpu = next_cpu(cpu, *cpumask);
  55. if (cpu < NR_CPUS)
  56. prefetch(&per_cpu(vm_event_states, cpu));
  57. for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
  58. ret[i] += this->event[i];
  59. }
  60. }
  61. /*
  62. * Accumulate the vm event counters across all CPUs.
  63. * The result is unavoidably approximate - it can change
  64. * during and after execution of this function.
  65. */
  66. void all_vm_events(unsigned long *ret)
  67. {
  68. sum_vm_events(ret, &cpu_online_map);
  69. }
  70. EXPORT_SYMBOL_GPL(all_vm_events);
  71. #ifdef CONFIG_HOTPLUG
  72. /*
  73. * Fold the foreign cpu events into our own.
  74. *
  75. * This is adding to the events on one processor
  76. * but keeps the global counts constant.
  77. */
  78. void vm_events_fold_cpu(int cpu)
  79. {
  80. struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
  81. int i;
  82. for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
  83. count_vm_events(i, fold_state->event[i]);
  84. fold_state->event[i] = 0;
  85. }
  86. }
  87. #endif /* CONFIG_HOTPLUG */
  88. #endif /* CONFIG_VM_EVENT_COUNTERS */
  89. /*
  90. * Manage combined zone based / global counters
  91. *
  92. * vm_stat contains the global counters
  93. */
  94. atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
  95. EXPORT_SYMBOL(vm_stat);
  96. #ifdef CONFIG_SMP
  97. #define STAT_THRESHOLD 32
  98. /*
  99. * Determine pointer to currently valid differential byte given a zone and
  100. * the item number.
  101. *
  102. * Preemption must be off
  103. */
  104. static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item)
  105. {
  106. return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item];
  107. }
  108. /*
  109. * For use when we know that interrupts are disabled.
  110. */
  111. void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  112. int delta)
  113. {
  114. s8 *p;
  115. long x;
  116. p = diff_pointer(zone, item);
  117. x = delta + *p;
  118. if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) {
  119. zone_page_state_add(x, zone, item);
  120. x = 0;
  121. }
  122. *p = x;
  123. }
  124. EXPORT_SYMBOL(__mod_zone_page_state);
  125. /*
  126. * For an unknown interrupt state
  127. */
  128. void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  129. int delta)
  130. {
  131. unsigned long flags;
  132. local_irq_save(flags);
  133. __mod_zone_page_state(zone, item, delta);
  134. local_irq_restore(flags);
  135. }
  136. EXPORT_SYMBOL(mod_zone_page_state);
  137. /*
  138. * Optimized increment and decrement functions.
  139. *
  140. * These are only for a single page and therefore can take a struct page *
  141. * argument instead of struct zone *. This allows the inclusion of the code
  142. * generated for page_zone(page) into the optimized functions.
  143. *
  144. * No overflow check is necessary and therefore the differential can be
  145. * incremented or decremented in place which may allow the compilers to
  146. * generate better code.
  147. *
  148. * The increment or decrement is known and therefore one boundary check can
  149. * be omitted.
  150. *
  151. * Some processors have inc/dec instructions that are atomic vs an interrupt.
  152. * However, the code must first determine the differential location in a zone
  153. * based on the processor number and then inc/dec the counter. There is no
  154. * guarantee without disabling preemption that the processor will not change
  155. * in between and therefore the atomicity vs. interrupt cannot be exploited
  156. * in a useful way here.
  157. */
  158. static void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
  159. {
  160. s8 *p = diff_pointer(zone, item);
  161. (*p)++;
  162. if (unlikely(*p > STAT_THRESHOLD)) {
  163. zone_page_state_add(*p + STAT_THRESHOLD / 2, zone, item);
  164. *p = -STAT_THRESHOLD / 2;
  165. }
  166. }
  167. void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
  168. {
  169. __inc_zone_state(page_zone(page), item);
  170. }
  171. EXPORT_SYMBOL(__inc_zone_page_state);
  172. void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
  173. {
  174. struct zone *zone = page_zone(page);
  175. s8 *p = diff_pointer(zone, item);
  176. (*p)--;
  177. if (unlikely(*p < -STAT_THRESHOLD)) {
  178. zone_page_state_add(*p - STAT_THRESHOLD / 2, zone, item);
  179. *p = STAT_THRESHOLD /2;
  180. }
  181. }
  182. EXPORT_SYMBOL(__dec_zone_page_state);
  183. void inc_zone_state(struct zone *zone, enum zone_stat_item item)
  184. {
  185. unsigned long flags;
  186. local_irq_save(flags);
  187. __inc_zone_state(zone, item);
  188. local_irq_restore(flags);
  189. }
  190. void inc_zone_page_state(struct page *page, enum zone_stat_item item)
  191. {
  192. unsigned long flags;
  193. struct zone *zone;
  194. zone = page_zone(page);
  195. local_irq_save(flags);
  196. __inc_zone_state(zone, item);
  197. local_irq_restore(flags);
  198. }
  199. EXPORT_SYMBOL(inc_zone_page_state);
  200. void dec_zone_page_state(struct page *page, enum zone_stat_item item)
  201. {
  202. unsigned long flags;
  203. local_irq_save(flags);
  204. __dec_zone_page_state(page, item);
  205. local_irq_restore(flags);
  206. }
  207. EXPORT_SYMBOL(dec_zone_page_state);
  208. /*
  209. * Update the zone counters for one cpu.
  210. */
  211. void refresh_cpu_vm_stats(int cpu)
  212. {
  213. struct zone *zone;
  214. int i;
  215. unsigned long flags;
  216. for_each_zone(zone) {
  217. struct per_cpu_pageset *pcp;
  218. pcp = zone_pcp(zone, cpu);
  219. for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  220. if (pcp->vm_stat_diff[i]) {
  221. local_irq_save(flags);
  222. zone_page_state_add(pcp->vm_stat_diff[i],
  223. zone, i);
  224. pcp->vm_stat_diff[i] = 0;
  225. local_irq_restore(flags);
  226. }
  227. }
  228. }
  229. static void __refresh_cpu_vm_stats(void *dummy)
  230. {
  231. refresh_cpu_vm_stats(smp_processor_id());
  232. }
  233. /*
  234. * Consolidate all counters.
  235. *
  236. * Note that the result is less inaccurate but still inaccurate
  237. * if concurrent processes are allowed to run.
  238. */
  239. void refresh_vm_stats(void)
  240. {
  241. on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1);
  242. }
  243. EXPORT_SYMBOL(refresh_vm_stats);
  244. #endif
  245. #ifdef CONFIG_NUMA
  246. /*
  247. * zonelist = the list of zones passed to the allocator
  248. * z = the zone from which the allocation occurred.
  249. *
  250. * Must be called with interrupts disabled.
  251. */
  252. void zone_statistics(struct zonelist *zonelist, struct zone *z)
  253. {
  254. if (z->zone_pgdat == zonelist->zones[0]->zone_pgdat) {
  255. __inc_zone_state(z, NUMA_HIT);
  256. } else {
  257. __inc_zone_state(z, NUMA_MISS);
  258. __inc_zone_state(zonelist->zones[0], NUMA_FOREIGN);
  259. }
  260. if (z->zone_pgdat == NODE_DATA(numa_node_id()))
  261. __inc_zone_state(z, NUMA_LOCAL);
  262. else
  263. __inc_zone_state(z, NUMA_OTHER);
  264. }
  265. #endif
  266. #ifdef CONFIG_PROC_FS
  267. #include <linux/seq_file.h>
  268. static void *frag_start(struct seq_file *m, loff_t *pos)
  269. {
  270. pg_data_t *pgdat;
  271. loff_t node = *pos;
  272. for (pgdat = first_online_pgdat();
  273. pgdat && node;
  274. pgdat = next_online_pgdat(pgdat))
  275. --node;
  276. return pgdat;
  277. }
  278. static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
  279. {
  280. pg_data_t *pgdat = (pg_data_t *)arg;
  281. (*pos)++;
  282. return next_online_pgdat(pgdat);
  283. }
  284. static void frag_stop(struct seq_file *m, void *arg)
  285. {
  286. }
  287. /*
  288. * This walks the free areas for each zone.
  289. */
  290. static int frag_show(struct seq_file *m, void *arg)
  291. {
  292. pg_data_t *pgdat = (pg_data_t *)arg;
  293. struct zone *zone;
  294. struct zone *node_zones = pgdat->node_zones;
  295. unsigned long flags;
  296. int order;
  297. for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
  298. if (!populated_zone(zone))
  299. continue;
  300. spin_lock_irqsave(&zone->lock, flags);
  301. seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
  302. for (order = 0; order < MAX_ORDER; ++order)
  303. seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
  304. spin_unlock_irqrestore(&zone->lock, flags);
  305. seq_putc(m, '\n');
  306. }
  307. return 0;
  308. }
  309. struct seq_operations fragmentation_op = {
  310. .start = frag_start,
  311. .next = frag_next,
  312. .stop = frag_stop,
  313. .show = frag_show,
  314. };
  315. static char *vmstat_text[] = {
  316. /* Zoned VM counters */
  317. "nr_anon_pages",
  318. "nr_mapped",
  319. "nr_file_pages",
  320. "nr_slab",
  321. "nr_page_table_pages",
  322. "nr_dirty",
  323. "nr_writeback",
  324. "nr_unstable",
  325. "nr_bounce",
  326. #ifdef CONFIG_NUMA
  327. "numa_hit",
  328. "numa_miss",
  329. "numa_foreign",
  330. "numa_interleave",
  331. "numa_local",
  332. "numa_other",
  333. #endif
  334. #ifdef CONFIG_VM_EVENT_COUNTERS
  335. "pgpgin",
  336. "pgpgout",
  337. "pswpin",
  338. "pswpout",
  339. "pgalloc_dma",
  340. "pgalloc_dma32",
  341. "pgalloc_normal",
  342. "pgalloc_high",
  343. "pgfree",
  344. "pgactivate",
  345. "pgdeactivate",
  346. "pgfault",
  347. "pgmajfault",
  348. "pgrefill_dma",
  349. "pgrefill_dma32",
  350. "pgrefill_normal",
  351. "pgrefill_high",
  352. "pgsteal_dma",
  353. "pgsteal_dma32",
  354. "pgsteal_normal",
  355. "pgsteal_high",
  356. "pgscan_kswapd_dma",
  357. "pgscan_kswapd_dma32",
  358. "pgscan_kswapd_normal",
  359. "pgscan_kswapd_high",
  360. "pgscan_direct_dma",
  361. "pgscan_direct_dma32",
  362. "pgscan_direct_normal",
  363. "pgscan_direct_high",
  364. "pginodesteal",
  365. "slabs_scanned",
  366. "kswapd_steal",
  367. "kswapd_inodesteal",
  368. "pageoutrun",
  369. "allocstall",
  370. "pgrotated",
  371. #endif
  372. };
  373. /*
  374. * Output information about zones in @pgdat.
  375. */
  376. static int zoneinfo_show(struct seq_file *m, void *arg)
  377. {
  378. pg_data_t *pgdat = arg;
  379. struct zone *zone;
  380. struct zone *node_zones = pgdat->node_zones;
  381. unsigned long flags;
  382. for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
  383. int i;
  384. if (!populated_zone(zone))
  385. continue;
  386. spin_lock_irqsave(&zone->lock, flags);
  387. seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
  388. seq_printf(m,
  389. "\n pages free %lu"
  390. "\n min %lu"
  391. "\n low %lu"
  392. "\n high %lu"
  393. "\n active %lu"
  394. "\n inactive %lu"
  395. "\n scanned %lu (a: %lu i: %lu)"
  396. "\n spanned %lu"
  397. "\n present %lu",
  398. zone->free_pages,
  399. zone->pages_min,
  400. zone->pages_low,
  401. zone->pages_high,
  402. zone->nr_active,
  403. zone->nr_inactive,
  404. zone->pages_scanned,
  405. zone->nr_scan_active, zone->nr_scan_inactive,
  406. zone->spanned_pages,
  407. zone->present_pages);
  408. for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  409. seq_printf(m, "\n %-12s %lu", vmstat_text[i],
  410. zone_page_state(zone, i));
  411. seq_printf(m,
  412. "\n protection: (%lu",
  413. zone->lowmem_reserve[0]);
  414. for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
  415. seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
  416. seq_printf(m,
  417. ")"
  418. "\n pagesets");
  419. for_each_online_cpu(i) {
  420. struct per_cpu_pageset *pageset;
  421. int j;
  422. pageset = zone_pcp(zone, i);
  423. for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
  424. if (pageset->pcp[j].count)
  425. break;
  426. }
  427. if (j == ARRAY_SIZE(pageset->pcp))
  428. continue;
  429. for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
  430. seq_printf(m,
  431. "\n cpu: %i pcp: %i"
  432. "\n count: %i"
  433. "\n high: %i"
  434. "\n batch: %i",
  435. i, j,
  436. pageset->pcp[j].count,
  437. pageset->pcp[j].high,
  438. pageset->pcp[j].batch);
  439. }
  440. }
  441. seq_printf(m,
  442. "\n all_unreclaimable: %u"
  443. "\n prev_priority: %i"
  444. "\n temp_priority: %i"
  445. "\n start_pfn: %lu",
  446. zone->all_unreclaimable,
  447. zone->prev_priority,
  448. zone->temp_priority,
  449. zone->zone_start_pfn);
  450. spin_unlock_irqrestore(&zone->lock, flags);
  451. seq_putc(m, '\n');
  452. }
  453. return 0;
  454. }
  455. struct seq_operations zoneinfo_op = {
  456. .start = frag_start, /* iterate over all zones. The same as in
  457. * fragmentation. */
  458. .next = frag_next,
  459. .stop = frag_stop,
  460. .show = zoneinfo_show,
  461. };
  462. static void *vmstat_start(struct seq_file *m, loff_t *pos)
  463. {
  464. unsigned long *v;
  465. #ifdef CONFIG_VM_EVENT_COUNTERS
  466. unsigned long *e;
  467. #endif
  468. int i;
  469. if (*pos >= ARRAY_SIZE(vmstat_text))
  470. return NULL;
  471. #ifdef CONFIG_VM_EVENT_COUNTERS
  472. v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
  473. + sizeof(struct vm_event_state), GFP_KERNEL);
  474. #else
  475. v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long),
  476. GFP_KERNEL);
  477. #endif
  478. m->private = v;
  479. if (!v)
  480. return ERR_PTR(-ENOMEM);
  481. for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  482. v[i] = global_page_state(i);
  483. #ifdef CONFIG_VM_EVENT_COUNTERS
  484. e = v + NR_VM_ZONE_STAT_ITEMS;
  485. all_vm_events(e);
  486. e[PGPGIN] /= 2; /* sectors -> kbytes */
  487. e[PGPGOUT] /= 2;
  488. #endif
  489. return v + *pos;
  490. }
  491. static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
  492. {
  493. (*pos)++;
  494. if (*pos >= ARRAY_SIZE(vmstat_text))
  495. return NULL;
  496. return (unsigned long *)m->private + *pos;
  497. }
  498. static int vmstat_show(struct seq_file *m, void *arg)
  499. {
  500. unsigned long *l = arg;
  501. unsigned long off = l - (unsigned long *)m->private;
  502. seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
  503. return 0;
  504. }
  505. static void vmstat_stop(struct seq_file *m, void *arg)
  506. {
  507. kfree(m->private);
  508. m->private = NULL;
  509. }
  510. struct seq_operations vmstat_op = {
  511. .start = vmstat_start,
  512. .next = vmstat_next,
  513. .stop = vmstat_stop,
  514. .show = vmstat_show,
  515. };
  516. #endif /* CONFIG_PROC_FS */