vmstat.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. /*
  2. * linux/mm/vmstat.c
  3. *
  4. * Manages VM statistics
  5. * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
  6. *
  7. * zoned VM statistics
  8. * Copyright (C) 2006 Silicon Graphics, Inc.,
  9. * Christoph Lameter <christoph@lameter.com>
  10. */
  11. #include <linux/config.h>
  12. #include <linux/mm.h>
  13. #include <linux/module.h>
  14. /*
  15. * Accumulate the page_state information across all CPUs.
  16. * The result is unavoidably approximate - it can change
  17. * during and after execution of this function.
  18. */
  19. DEFINE_PER_CPU(struct page_state, page_states) = {0};
  20. static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
  21. {
  22. unsigned cpu;
  23. memset(ret, 0, nr * sizeof(unsigned long));
  24. cpus_and(*cpumask, *cpumask, cpu_online_map);
  25. for_each_cpu_mask(cpu, *cpumask) {
  26. unsigned long *in;
  27. unsigned long *out;
  28. unsigned off;
  29. unsigned next_cpu;
  30. in = (unsigned long *)&per_cpu(page_states, cpu);
  31. next_cpu = next_cpu(cpu, *cpumask);
  32. if (likely(next_cpu < NR_CPUS))
  33. prefetch(&per_cpu(page_states, next_cpu));
  34. out = (unsigned long *)ret;
  35. for (off = 0; off < nr; off++)
  36. *out++ += *in++;
  37. }
  38. }
  39. void get_full_page_state(struct page_state *ret)
  40. {
  41. cpumask_t mask = CPU_MASK_ALL;
  42. __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
  43. }
  44. void __mod_page_state_offset(unsigned long offset, unsigned long delta)
  45. {
  46. void *ptr;
  47. ptr = &__get_cpu_var(page_states);
  48. *(unsigned long *)(ptr + offset) += delta;
  49. }
  50. EXPORT_SYMBOL(__mod_page_state_offset);
  51. void mod_page_state_offset(unsigned long offset, unsigned long delta)
  52. {
  53. unsigned long flags;
  54. void *ptr;
  55. local_irq_save(flags);
  56. ptr = &__get_cpu_var(page_states);
  57. *(unsigned long *)(ptr + offset) += delta;
  58. local_irq_restore(flags);
  59. }
  60. EXPORT_SYMBOL(mod_page_state_offset);
  61. void __get_zone_counts(unsigned long *active, unsigned long *inactive,
  62. unsigned long *free, struct pglist_data *pgdat)
  63. {
  64. struct zone *zones = pgdat->node_zones;
  65. int i;
  66. *active = 0;
  67. *inactive = 0;
  68. *free = 0;
  69. for (i = 0; i < MAX_NR_ZONES; i++) {
  70. *active += zones[i].nr_active;
  71. *inactive += zones[i].nr_inactive;
  72. *free += zones[i].free_pages;
  73. }
  74. }
  75. void get_zone_counts(unsigned long *active,
  76. unsigned long *inactive, unsigned long *free)
  77. {
  78. struct pglist_data *pgdat;
  79. *active = 0;
  80. *inactive = 0;
  81. *free = 0;
  82. for_each_online_pgdat(pgdat) {
  83. unsigned long l, m, n;
  84. __get_zone_counts(&l, &m, &n, pgdat);
  85. *active += l;
  86. *inactive += m;
  87. *free += n;
  88. }
  89. }
  90. /*
  91. * Manage combined zone based / global counters
  92. *
  93. * vm_stat contains the global counters
  94. */
  95. atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
  96. EXPORT_SYMBOL(vm_stat);
  97. #ifdef CONFIG_SMP
  98. #define STAT_THRESHOLD 32
  99. /*
  100. * Determine pointer to currently valid differential byte given a zone and
  101. * the item number.
  102. *
  103. * Preemption must be off
  104. */
  105. static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item)
  106. {
  107. return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item];
  108. }
  109. /*
  110. * For use when we know that interrupts are disabled.
  111. */
  112. void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  113. int delta)
  114. {
  115. s8 *p;
  116. long x;
  117. p = diff_pointer(zone, item);
  118. x = delta + *p;
  119. if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) {
  120. zone_page_state_add(x, zone, item);
  121. x = 0;
  122. }
  123. *p = x;
  124. }
  125. EXPORT_SYMBOL(__mod_zone_page_state);
  126. /*
  127. * For an unknown interrupt state
  128. */
  129. void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  130. int delta)
  131. {
  132. unsigned long flags;
  133. local_irq_save(flags);
  134. __mod_zone_page_state(zone, item, delta);
  135. local_irq_restore(flags);
  136. }
  137. EXPORT_SYMBOL(mod_zone_page_state);
  138. /*
  139. * Optimized increment and decrement functions.
  140. *
  141. * These are only for a single page and therefore can take a struct page *
  142. * argument instead of struct zone *. This allows the inclusion of the code
  143. * generated for page_zone(page) into the optimized functions.
  144. *
  145. * No overflow check is necessary and therefore the differential can be
  146. * incremented or decremented in place which may allow the compilers to
  147. * generate better code.
  148. *
  149. * The increment or decrement is known and therefore one boundary check can
  150. * be omitted.
  151. *
  152. * Some processors have inc/dec instructions that are atomic vs an interrupt.
  153. * However, the code must first determine the differential location in a zone
  154. * based on the processor number and then inc/dec the counter. There is no
  155. * guarantee without disabling preemption that the processor will not change
  156. * in between and therefore the atomicity vs. interrupt cannot be exploited
  157. * in a useful way here.
  158. */
  159. void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
  160. {
  161. struct zone *zone = page_zone(page);
  162. s8 *p = diff_pointer(zone, item);
  163. (*p)++;
  164. if (unlikely(*p > STAT_THRESHOLD)) {
  165. zone_page_state_add(*p, zone, item);
  166. *p = 0;
  167. }
  168. }
  169. EXPORT_SYMBOL(__inc_zone_page_state);
  170. void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
  171. {
  172. struct zone *zone = page_zone(page);
  173. s8 *p = diff_pointer(zone, item);
  174. (*p)--;
  175. if (unlikely(*p < -STAT_THRESHOLD)) {
  176. zone_page_state_add(*p, zone, item);
  177. *p = 0;
  178. }
  179. }
  180. EXPORT_SYMBOL(__dec_zone_page_state);
  181. void inc_zone_page_state(struct page *page, enum zone_stat_item item)
  182. {
  183. unsigned long flags;
  184. struct zone *zone;
  185. s8 *p;
  186. zone = page_zone(page);
  187. local_irq_save(flags);
  188. p = diff_pointer(zone, item);
  189. (*p)++;
  190. if (unlikely(*p > STAT_THRESHOLD)) {
  191. zone_page_state_add(*p, zone, item);
  192. *p = 0;
  193. }
  194. local_irq_restore(flags);
  195. }
  196. EXPORT_SYMBOL(inc_zone_page_state);
  197. void dec_zone_page_state(struct page *page, enum zone_stat_item item)
  198. {
  199. unsigned long flags;
  200. struct zone *zone;
  201. s8 *p;
  202. zone = page_zone(page);
  203. local_irq_save(flags);
  204. p = diff_pointer(zone, item);
  205. (*p)--;
  206. if (unlikely(*p < -STAT_THRESHOLD)) {
  207. zone_page_state_add(*p, zone, item);
  208. *p = 0;
  209. }
  210. local_irq_restore(flags);
  211. }
  212. EXPORT_SYMBOL(dec_zone_page_state);
  213. /*
  214. * Update the zone counters for one cpu.
  215. */
  216. void refresh_cpu_vm_stats(int cpu)
  217. {
  218. struct zone *zone;
  219. int i;
  220. unsigned long flags;
  221. for_each_zone(zone) {
  222. struct per_cpu_pageset *pcp;
  223. pcp = zone_pcp(zone, cpu);
  224. for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  225. if (pcp->vm_stat_diff[i]) {
  226. local_irq_save(flags);
  227. zone_page_state_add(pcp->vm_stat_diff[i],
  228. zone, i);
  229. pcp->vm_stat_diff[i] = 0;
  230. local_irq_restore(flags);
  231. }
  232. }
  233. }
  234. static void __refresh_cpu_vm_stats(void *dummy)
  235. {
  236. refresh_cpu_vm_stats(smp_processor_id());
  237. }
  238. /*
  239. * Consolidate all counters.
  240. *
  241. * Note that the result is less inaccurate but still inaccurate
  242. * if concurrent processes are allowed to run.
  243. */
  244. void refresh_vm_stats(void)
  245. {
  246. on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1);
  247. }
  248. EXPORT_SYMBOL(refresh_vm_stats);
  249. #endif
  250. #ifdef CONFIG_PROC_FS
  251. #include <linux/seq_file.h>
  252. static void *frag_start(struct seq_file *m, loff_t *pos)
  253. {
  254. pg_data_t *pgdat;
  255. loff_t node = *pos;
  256. for (pgdat = first_online_pgdat();
  257. pgdat && node;
  258. pgdat = next_online_pgdat(pgdat))
  259. --node;
  260. return pgdat;
  261. }
  262. static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
  263. {
  264. pg_data_t *pgdat = (pg_data_t *)arg;
  265. (*pos)++;
  266. return next_online_pgdat(pgdat);
  267. }
  268. static void frag_stop(struct seq_file *m, void *arg)
  269. {
  270. }
  271. /*
  272. * This walks the free areas for each zone.
  273. */
  274. static int frag_show(struct seq_file *m, void *arg)
  275. {
  276. pg_data_t *pgdat = (pg_data_t *)arg;
  277. struct zone *zone;
  278. struct zone *node_zones = pgdat->node_zones;
  279. unsigned long flags;
  280. int order;
  281. for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
  282. if (!populated_zone(zone))
  283. continue;
  284. spin_lock_irqsave(&zone->lock, flags);
  285. seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
  286. for (order = 0; order < MAX_ORDER; ++order)
  287. seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
  288. spin_unlock_irqrestore(&zone->lock, flags);
  289. seq_putc(m, '\n');
  290. }
  291. return 0;
  292. }
  293. struct seq_operations fragmentation_op = {
  294. .start = frag_start,
  295. .next = frag_next,
  296. .stop = frag_stop,
  297. .show = frag_show,
  298. };
  299. static char *vmstat_text[] = {
  300. /* Zoned VM counters */
  301. "nr_anon_pages",
  302. "nr_mapped",
  303. "nr_file_pages",
  304. "nr_slab",
  305. "nr_page_table_pages",
  306. "nr_dirty",
  307. "nr_writeback",
  308. "nr_unstable",
  309. "nr_bounce",
  310. /* Event counters */
  311. "pgpgin",
  312. "pgpgout",
  313. "pswpin",
  314. "pswpout",
  315. "pgalloc_high",
  316. "pgalloc_normal",
  317. "pgalloc_dma32",
  318. "pgalloc_dma",
  319. "pgfree",
  320. "pgactivate",
  321. "pgdeactivate",
  322. "pgfault",
  323. "pgmajfault",
  324. "pgrefill_high",
  325. "pgrefill_normal",
  326. "pgrefill_dma32",
  327. "pgrefill_dma",
  328. "pgsteal_high",
  329. "pgsteal_normal",
  330. "pgsteal_dma32",
  331. "pgsteal_dma",
  332. "pgscan_kswapd_high",
  333. "pgscan_kswapd_normal",
  334. "pgscan_kswapd_dma32",
  335. "pgscan_kswapd_dma",
  336. "pgscan_direct_high",
  337. "pgscan_direct_normal",
  338. "pgscan_direct_dma32",
  339. "pgscan_direct_dma",
  340. "pginodesteal",
  341. "slabs_scanned",
  342. "kswapd_steal",
  343. "kswapd_inodesteal",
  344. "pageoutrun",
  345. "allocstall",
  346. "pgrotated",
  347. };
  348. /*
  349. * Output information about zones in @pgdat.
  350. */
  351. static int zoneinfo_show(struct seq_file *m, void *arg)
  352. {
  353. pg_data_t *pgdat = arg;
  354. struct zone *zone;
  355. struct zone *node_zones = pgdat->node_zones;
  356. unsigned long flags;
  357. for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
  358. int i;
  359. if (!populated_zone(zone))
  360. continue;
  361. spin_lock_irqsave(&zone->lock, flags);
  362. seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
  363. seq_printf(m,
  364. "\n pages free %lu"
  365. "\n min %lu"
  366. "\n low %lu"
  367. "\n high %lu"
  368. "\n active %lu"
  369. "\n inactive %lu"
  370. "\n scanned %lu (a: %lu i: %lu)"
  371. "\n spanned %lu"
  372. "\n present %lu",
  373. zone->free_pages,
  374. zone->pages_min,
  375. zone->pages_low,
  376. zone->pages_high,
  377. zone->nr_active,
  378. zone->nr_inactive,
  379. zone->pages_scanned,
  380. zone->nr_scan_active, zone->nr_scan_inactive,
  381. zone->spanned_pages,
  382. zone->present_pages);
  383. for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  384. seq_printf(m, "\n %-12s %lu", vmstat_text[i],
  385. zone_page_state(zone, i));
  386. seq_printf(m,
  387. "\n protection: (%lu",
  388. zone->lowmem_reserve[0]);
  389. for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
  390. seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
  391. seq_printf(m,
  392. ")"
  393. "\n pagesets");
  394. for_each_online_cpu(i) {
  395. struct per_cpu_pageset *pageset;
  396. int j;
  397. pageset = zone_pcp(zone, i);
  398. for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
  399. if (pageset->pcp[j].count)
  400. break;
  401. }
  402. if (j == ARRAY_SIZE(pageset->pcp))
  403. continue;
  404. for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
  405. seq_printf(m,
  406. "\n cpu: %i pcp: %i"
  407. "\n count: %i"
  408. "\n high: %i"
  409. "\n batch: %i",
  410. i, j,
  411. pageset->pcp[j].count,
  412. pageset->pcp[j].high,
  413. pageset->pcp[j].batch);
  414. }
  415. #ifdef CONFIG_NUMA
  416. seq_printf(m,
  417. "\n numa_hit: %lu"
  418. "\n numa_miss: %lu"
  419. "\n numa_foreign: %lu"
  420. "\n interleave_hit: %lu"
  421. "\n local_node: %lu"
  422. "\n other_node: %lu",
  423. pageset->numa_hit,
  424. pageset->numa_miss,
  425. pageset->numa_foreign,
  426. pageset->interleave_hit,
  427. pageset->local_node,
  428. pageset->other_node);
  429. #endif
  430. }
  431. seq_printf(m,
  432. "\n all_unreclaimable: %u"
  433. "\n prev_priority: %i"
  434. "\n temp_priority: %i"
  435. "\n start_pfn: %lu",
  436. zone->all_unreclaimable,
  437. zone->prev_priority,
  438. zone->temp_priority,
  439. zone->zone_start_pfn);
  440. spin_unlock_irqrestore(&zone->lock, flags);
  441. seq_putc(m, '\n');
  442. }
  443. return 0;
  444. }
  445. struct seq_operations zoneinfo_op = {
  446. .start = frag_start, /* iterate over all zones. The same as in
  447. * fragmentation. */
  448. .next = frag_next,
  449. .stop = frag_stop,
  450. .show = zoneinfo_show,
  451. };
  452. static void *vmstat_start(struct seq_file *m, loff_t *pos)
  453. {
  454. unsigned long *v;
  455. struct page_state *ps;
  456. int i;
  457. if (*pos >= ARRAY_SIZE(vmstat_text))
  458. return NULL;
  459. v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
  460. + sizeof(*ps), GFP_KERNEL);
  461. m->private = v;
  462. if (!v)
  463. return ERR_PTR(-ENOMEM);
  464. for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  465. v[i] = global_page_state(i);
  466. ps = (struct page_state *)(v + NR_VM_ZONE_STAT_ITEMS);
  467. get_full_page_state(ps);
  468. ps->pgpgin /= 2; /* sectors -> kbytes */
  469. ps->pgpgout /= 2;
  470. return v + *pos;
  471. }
  472. static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
  473. {
  474. (*pos)++;
  475. if (*pos >= ARRAY_SIZE(vmstat_text))
  476. return NULL;
  477. return (unsigned long *)m->private + *pos;
  478. }
  479. static int vmstat_show(struct seq_file *m, void *arg)
  480. {
  481. unsigned long *l = arg;
  482. unsigned long off = l - (unsigned long *)m->private;
  483. seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
  484. return 0;
  485. }
  486. static void vmstat_stop(struct seq_file *m, void *arg)
  487. {
  488. kfree(m->private);
  489. m->private = NULL;
  490. }
  491. struct seq_operations vmstat_op = {
  492. .start = vmstat_start,
  493. .next = vmstat_next,
  494. .stop = vmstat_stop,
  495. .show = vmstat_show,
  496. };
  497. #endif /* CONFIG_PROC_FS */