vmstat.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. /*
  2. * linux/mm/vmstat.c
  3. *
  4. * Manages VM statistics
  5. * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
  6. *
  7. * zoned VM statistics
  8. * Copyright (C) 2006 Silicon Graphics, Inc.,
  9. * Christoph Lameter <christoph@lameter.com>
  10. */
  11. #include <linux/config.h>
  12. #include <linux/mm.h>
  13. #include <linux/module.h>
  14. void __get_zone_counts(unsigned long *active, unsigned long *inactive,
  15. unsigned long *free, struct pglist_data *pgdat)
  16. {
  17. struct zone *zones = pgdat->node_zones;
  18. int i;
  19. *active = 0;
  20. *inactive = 0;
  21. *free = 0;
  22. for (i = 0; i < MAX_NR_ZONES; i++) {
  23. *active += zones[i].nr_active;
  24. *inactive += zones[i].nr_inactive;
  25. *free += zones[i].free_pages;
  26. }
  27. }
  28. void get_zone_counts(unsigned long *active,
  29. unsigned long *inactive, unsigned long *free)
  30. {
  31. struct pglist_data *pgdat;
  32. *active = 0;
  33. *inactive = 0;
  34. *free = 0;
  35. for_each_online_pgdat(pgdat) {
  36. unsigned long l, m, n;
  37. __get_zone_counts(&l, &m, &n, pgdat);
  38. *active += l;
  39. *inactive += m;
  40. *free += n;
  41. }
  42. }
  43. #ifdef CONFIG_VM_EVENT_COUNTERS
  44. DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
  45. EXPORT_PER_CPU_SYMBOL(vm_event_states);
  46. static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
  47. {
  48. int cpu = 0;
  49. int i;
  50. memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
  51. cpu = first_cpu(*cpumask);
  52. while (cpu < NR_CPUS) {
  53. struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
  54. cpu = next_cpu(cpu, *cpumask);
  55. if (cpu < NR_CPUS)
  56. prefetch(&per_cpu(vm_event_states, cpu));
  57. for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
  58. ret[i] += this->event[i];
  59. }
  60. }
  61. /*
  62. * Accumulate the vm event counters across all CPUs.
  63. * The result is unavoidably approximate - it can change
  64. * during and after execution of this function.
  65. */
  66. void all_vm_events(unsigned long *ret)
  67. {
  68. sum_vm_events(ret, &cpu_online_map);
  69. }
  70. #ifdef CONFIG_HOTPLUG
  71. /*
  72. * Fold the foreign cpu events into our own.
  73. *
  74. * This is adding to the events on one processor
  75. * but keeps the global counts constant.
  76. */
  77. void vm_events_fold_cpu(int cpu)
  78. {
  79. struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
  80. int i;
  81. for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
  82. count_vm_events(i, fold_state->event[i]);
  83. fold_state->event[i] = 0;
  84. }
  85. }
  86. #endif /* CONFIG_HOTPLUG */
  87. #endif /* CONFIG_VM_EVENT_COUNTERS */
  88. /*
  89. * Manage combined zone based / global counters
  90. *
  91. * vm_stat contains the global counters
  92. */
  93. atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
  94. EXPORT_SYMBOL(vm_stat);
  95. #ifdef CONFIG_SMP
  96. #define STAT_THRESHOLD 32
  97. /*
  98. * Determine pointer to currently valid differential byte given a zone and
  99. * the item number.
  100. *
  101. * Preemption must be off
  102. */
  103. static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item)
  104. {
  105. return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item];
  106. }
  107. /*
  108. * For use when we know that interrupts are disabled.
  109. */
  110. void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  111. int delta)
  112. {
  113. s8 *p;
  114. long x;
  115. p = diff_pointer(zone, item);
  116. x = delta + *p;
  117. if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) {
  118. zone_page_state_add(x, zone, item);
  119. x = 0;
  120. }
  121. *p = x;
  122. }
  123. EXPORT_SYMBOL(__mod_zone_page_state);
  124. /*
  125. * For an unknown interrupt state
  126. */
  127. void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  128. int delta)
  129. {
  130. unsigned long flags;
  131. local_irq_save(flags);
  132. __mod_zone_page_state(zone, item, delta);
  133. local_irq_restore(flags);
  134. }
  135. EXPORT_SYMBOL(mod_zone_page_state);
  136. /*
  137. * Optimized increment and decrement functions.
  138. *
  139. * These are only for a single page and therefore can take a struct page *
  140. * argument instead of struct zone *. This allows the inclusion of the code
  141. * generated for page_zone(page) into the optimized functions.
  142. *
  143. * No overflow check is necessary and therefore the differential can be
  144. * incremented or decremented in place which may allow the compilers to
  145. * generate better code.
  146. *
  147. * The increment or decrement is known and therefore one boundary check can
  148. * be omitted.
  149. *
  150. * Some processors have inc/dec instructions that are atomic vs an interrupt.
  151. * However, the code must first determine the differential location in a zone
  152. * based on the processor number and then inc/dec the counter. There is no
  153. * guarantee without disabling preemption that the processor will not change
  154. * in between and therefore the atomicity vs. interrupt cannot be exploited
  155. * in a useful way here.
  156. */
  157. static void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
  158. {
  159. s8 *p = diff_pointer(zone, item);
  160. (*p)++;
  161. if (unlikely(*p > STAT_THRESHOLD)) {
  162. zone_page_state_add(*p, zone, item);
  163. *p = 0;
  164. }
  165. }
  166. void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
  167. {
  168. __inc_zone_state(page_zone(page), item);
  169. }
  170. EXPORT_SYMBOL(__inc_zone_page_state);
  171. void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
  172. {
  173. struct zone *zone = page_zone(page);
  174. s8 *p = diff_pointer(zone, item);
  175. (*p)--;
  176. if (unlikely(*p < -STAT_THRESHOLD)) {
  177. zone_page_state_add(*p, zone, item);
  178. *p = 0;
  179. }
  180. }
  181. EXPORT_SYMBOL(__dec_zone_page_state);
  182. void inc_zone_state(struct zone *zone, enum zone_stat_item item)
  183. {
  184. unsigned long flags;
  185. local_irq_save(flags);
  186. __inc_zone_state(zone, item);
  187. local_irq_restore(flags);
  188. }
  189. void inc_zone_page_state(struct page *page, enum zone_stat_item item)
  190. {
  191. unsigned long flags;
  192. struct zone *zone;
  193. zone = page_zone(page);
  194. local_irq_save(flags);
  195. __inc_zone_state(zone, item);
  196. local_irq_restore(flags);
  197. }
  198. EXPORT_SYMBOL(inc_zone_page_state);
  199. void dec_zone_page_state(struct page *page, enum zone_stat_item item)
  200. {
  201. unsigned long flags;
  202. struct zone *zone;
  203. s8 *p;
  204. zone = page_zone(page);
  205. local_irq_save(flags);
  206. p = diff_pointer(zone, item);
  207. (*p)--;
  208. if (unlikely(*p < -STAT_THRESHOLD)) {
  209. zone_page_state_add(*p, zone, item);
  210. *p = 0;
  211. }
  212. local_irq_restore(flags);
  213. }
  214. EXPORT_SYMBOL(dec_zone_page_state);
  215. /*
  216. * Update the zone counters for one cpu.
  217. */
  218. void refresh_cpu_vm_stats(int cpu)
  219. {
  220. struct zone *zone;
  221. int i;
  222. unsigned long flags;
  223. for_each_zone(zone) {
  224. struct per_cpu_pageset *pcp;
  225. pcp = zone_pcp(zone, cpu);
  226. for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  227. if (pcp->vm_stat_diff[i]) {
  228. local_irq_save(flags);
  229. zone_page_state_add(pcp->vm_stat_diff[i],
  230. zone, i);
  231. pcp->vm_stat_diff[i] = 0;
  232. local_irq_restore(flags);
  233. }
  234. }
  235. }
  236. static void __refresh_cpu_vm_stats(void *dummy)
  237. {
  238. refresh_cpu_vm_stats(smp_processor_id());
  239. }
  240. /*
  241. * Consolidate all counters.
  242. *
  243. * Note that the result is less inaccurate but still inaccurate
  244. * if concurrent processes are allowed to run.
  245. */
  246. void refresh_vm_stats(void)
  247. {
  248. on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1);
  249. }
  250. EXPORT_SYMBOL(refresh_vm_stats);
  251. #endif
  252. #ifdef CONFIG_NUMA
  253. /*
  254. * zonelist = the list of zones passed to the allocator
  255. * z = the zone from which the allocation occurred.
  256. *
  257. * Must be called with interrupts disabled.
  258. */
  259. void zone_statistics(struct zonelist *zonelist, struct zone *z)
  260. {
  261. if (z->zone_pgdat == zonelist->zones[0]->zone_pgdat) {
  262. __inc_zone_state(z, NUMA_HIT);
  263. } else {
  264. __inc_zone_state(z, NUMA_MISS);
  265. __inc_zone_state(zonelist->zones[0], NUMA_FOREIGN);
  266. }
  267. if (z->zone_pgdat == NODE_DATA(numa_node_id()))
  268. __inc_zone_state(z, NUMA_LOCAL);
  269. else
  270. __inc_zone_state(z, NUMA_OTHER);
  271. }
  272. #endif
  273. #ifdef CONFIG_PROC_FS
  274. #include <linux/seq_file.h>
  275. static void *frag_start(struct seq_file *m, loff_t *pos)
  276. {
  277. pg_data_t *pgdat;
  278. loff_t node = *pos;
  279. for (pgdat = first_online_pgdat();
  280. pgdat && node;
  281. pgdat = next_online_pgdat(pgdat))
  282. --node;
  283. return pgdat;
  284. }
  285. static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
  286. {
  287. pg_data_t *pgdat = (pg_data_t *)arg;
  288. (*pos)++;
  289. return next_online_pgdat(pgdat);
  290. }
  291. static void frag_stop(struct seq_file *m, void *arg)
  292. {
  293. }
  294. /*
  295. * This walks the free areas for each zone.
  296. */
  297. static int frag_show(struct seq_file *m, void *arg)
  298. {
  299. pg_data_t *pgdat = (pg_data_t *)arg;
  300. struct zone *zone;
  301. struct zone *node_zones = pgdat->node_zones;
  302. unsigned long flags;
  303. int order;
  304. for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
  305. if (!populated_zone(zone))
  306. continue;
  307. spin_lock_irqsave(&zone->lock, flags);
  308. seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
  309. for (order = 0; order < MAX_ORDER; ++order)
  310. seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
  311. spin_unlock_irqrestore(&zone->lock, flags);
  312. seq_putc(m, '\n');
  313. }
  314. return 0;
  315. }
  316. struct seq_operations fragmentation_op = {
  317. .start = frag_start,
  318. .next = frag_next,
  319. .stop = frag_stop,
  320. .show = frag_show,
  321. };
  322. static char *vmstat_text[] = {
  323. /* Zoned VM counters */
  324. "nr_anon_pages",
  325. "nr_mapped",
  326. "nr_file_pages",
  327. "nr_slab",
  328. "nr_page_table_pages",
  329. "nr_dirty",
  330. "nr_writeback",
  331. "nr_unstable",
  332. "nr_bounce",
  333. #ifdef CONFIG_NUMA
  334. "numa_hit",
  335. "numa_miss",
  336. "numa_foreign",
  337. "numa_interleave",
  338. "numa_local",
  339. "numa_other",
  340. #endif
  341. #ifdef CONFIG_VM_EVENT_COUNTERS
  342. "pgpgin",
  343. "pgpgout",
  344. "pswpin",
  345. "pswpout",
  346. "pgalloc_dma",
  347. "pgalloc_dma32",
  348. "pgalloc_normal",
  349. "pgalloc_high",
  350. "pgfree",
  351. "pgactivate",
  352. "pgdeactivate",
  353. "pgfault",
  354. "pgmajfault",
  355. "pgrefill_dma",
  356. "pgrefill_dma32",
  357. "pgrefill_normal",
  358. "pgrefill_high",
  359. "pgsteal_dma",
  360. "pgsteal_dma32",
  361. "pgsteal_normal",
  362. "pgsteal_high",
  363. "pgscan_kswapd_dma",
  364. "pgscan_kswapd_dma32",
  365. "pgscan_kswapd_normal",
  366. "pgscan_kswapd_high",
  367. "pgscan_direct_dma",
  368. "pgscan_direct_dma32",
  369. "pgscan_direct_normal",
  370. "pgscan_direct_high",
  371. "pginodesteal",
  372. "slabs_scanned",
  373. "kswapd_steal",
  374. "kswapd_inodesteal",
  375. "pageoutrun",
  376. "allocstall",
  377. "pgrotated",
  378. #endif
  379. };
  380. /*
  381. * Output information about zones in @pgdat.
  382. */
  383. static int zoneinfo_show(struct seq_file *m, void *arg)
  384. {
  385. pg_data_t *pgdat = arg;
  386. struct zone *zone;
  387. struct zone *node_zones = pgdat->node_zones;
  388. unsigned long flags;
  389. for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
  390. int i;
  391. if (!populated_zone(zone))
  392. continue;
  393. spin_lock_irqsave(&zone->lock, flags);
  394. seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
  395. seq_printf(m,
  396. "\n pages free %lu"
  397. "\n min %lu"
  398. "\n low %lu"
  399. "\n high %lu"
  400. "\n active %lu"
  401. "\n inactive %lu"
  402. "\n scanned %lu (a: %lu i: %lu)"
  403. "\n spanned %lu"
  404. "\n present %lu",
  405. zone->free_pages,
  406. zone->pages_min,
  407. zone->pages_low,
  408. zone->pages_high,
  409. zone->nr_active,
  410. zone->nr_inactive,
  411. zone->pages_scanned,
  412. zone->nr_scan_active, zone->nr_scan_inactive,
  413. zone->spanned_pages,
  414. zone->present_pages);
  415. for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  416. seq_printf(m, "\n %-12s %lu", vmstat_text[i],
  417. zone_page_state(zone, i));
  418. seq_printf(m,
  419. "\n protection: (%lu",
  420. zone->lowmem_reserve[0]);
  421. for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
  422. seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
  423. seq_printf(m,
  424. ")"
  425. "\n pagesets");
  426. for_each_online_cpu(i) {
  427. struct per_cpu_pageset *pageset;
  428. int j;
  429. pageset = zone_pcp(zone, i);
  430. for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
  431. if (pageset->pcp[j].count)
  432. break;
  433. }
  434. if (j == ARRAY_SIZE(pageset->pcp))
  435. continue;
  436. for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
  437. seq_printf(m,
  438. "\n cpu: %i pcp: %i"
  439. "\n count: %i"
  440. "\n high: %i"
  441. "\n batch: %i",
  442. i, j,
  443. pageset->pcp[j].count,
  444. pageset->pcp[j].high,
  445. pageset->pcp[j].batch);
  446. }
  447. }
  448. seq_printf(m,
  449. "\n all_unreclaimable: %u"
  450. "\n prev_priority: %i"
  451. "\n temp_priority: %i"
  452. "\n start_pfn: %lu",
  453. zone->all_unreclaimable,
  454. zone->prev_priority,
  455. zone->temp_priority,
  456. zone->zone_start_pfn);
  457. spin_unlock_irqrestore(&zone->lock, flags);
  458. seq_putc(m, '\n');
  459. }
  460. return 0;
  461. }
  462. struct seq_operations zoneinfo_op = {
  463. .start = frag_start, /* iterate over all zones. The same as in
  464. * fragmentation. */
  465. .next = frag_next,
  466. .stop = frag_stop,
  467. .show = zoneinfo_show,
  468. };
  469. static void *vmstat_start(struct seq_file *m, loff_t *pos)
  470. {
  471. unsigned long *v;
  472. #ifdef CONFIG_VM_EVENT_COUNTERS
  473. unsigned long *e;
  474. #endif
  475. int i;
  476. if (*pos >= ARRAY_SIZE(vmstat_text))
  477. return NULL;
  478. #ifdef CONFIG_VM_EVENT_COUNTERS
  479. v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
  480. + sizeof(struct vm_event_state), GFP_KERNEL);
  481. #else
  482. v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long),
  483. GFP_KERNEL);
  484. #endif
  485. m->private = v;
  486. if (!v)
  487. return ERR_PTR(-ENOMEM);
  488. for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  489. v[i] = global_page_state(i);
  490. #ifdef CONFIG_VM_EVENT_COUNTERS
  491. e = v + NR_VM_ZONE_STAT_ITEMS;
  492. all_vm_events(e);
  493. e[PGPGIN] /= 2; /* sectors -> kbytes */
  494. e[PGPGOUT] /= 2;
  495. #endif
  496. return v + *pos;
  497. }
  498. static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
  499. {
  500. (*pos)++;
  501. if (*pos >= ARRAY_SIZE(vmstat_text))
  502. return NULL;
  503. return (unsigned long *)m->private + *pos;
  504. }
  505. static int vmstat_show(struct seq_file *m, void *arg)
  506. {
  507. unsigned long *l = arg;
  508. unsigned long off = l - (unsigned long *)m->private;
  509. seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
  510. return 0;
  511. }
  512. static void vmstat_stop(struct seq_file *m, void *arg)
  513. {
  514. kfree(m->private);
  515. m->private = NULL;
  516. }
  517. struct seq_operations vmstat_op = {
  518. .start = vmstat_start,
  519. .next = vmstat_next,
  520. .stop = vmstat_stop,
  521. .show = vmstat_show,
  522. };
  523. #endif /* CONFIG_PROC_FS */