vmstat.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615
  1. /*
  2. * linux/mm/vmstat.c
  3. *
  4. * Manages VM statistics
  5. * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
  6. *
  7. * zoned VM statistics
  8. * Copyright (C) 2006 Silicon Graphics, Inc.,
  9. * Christoph Lameter <christoph@lameter.com>
  10. */
  11. #include <linux/config.h>
  12. #include <linux/mm.h>
  13. #include <linux/module.h>
  14. void __get_zone_counts(unsigned long *active, unsigned long *inactive,
  15. unsigned long *free, struct pglist_data *pgdat)
  16. {
  17. struct zone *zones = pgdat->node_zones;
  18. int i;
  19. *active = 0;
  20. *inactive = 0;
  21. *free = 0;
  22. for (i = 0; i < MAX_NR_ZONES; i++) {
  23. *active += zones[i].nr_active;
  24. *inactive += zones[i].nr_inactive;
  25. *free += zones[i].free_pages;
  26. }
  27. }
  28. void get_zone_counts(unsigned long *active,
  29. unsigned long *inactive, unsigned long *free)
  30. {
  31. struct pglist_data *pgdat;
  32. *active = 0;
  33. *inactive = 0;
  34. *free = 0;
  35. for_each_online_pgdat(pgdat) {
  36. unsigned long l, m, n;
  37. __get_zone_counts(&l, &m, &n, pgdat);
  38. *active += l;
  39. *inactive += m;
  40. *free += n;
  41. }
  42. }
  43. #ifdef CONFIG_VM_EVENT_COUNTERS
  44. DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
  45. EXPORT_PER_CPU_SYMBOL(vm_event_states);
  46. static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
  47. {
  48. int cpu = 0;
  49. int i;
  50. memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
  51. cpu = first_cpu(*cpumask);
  52. while (cpu < NR_CPUS) {
  53. struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
  54. cpu = next_cpu(cpu, *cpumask);
  55. if (cpu < NR_CPUS)
  56. prefetch(&per_cpu(vm_event_states, cpu));
  57. for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
  58. ret[i] += this->event[i];
  59. }
  60. }
  61. /*
  62. * Accumulate the vm event counters across all CPUs.
  63. * The result is unavoidably approximate - it can change
  64. * during and after execution of this function.
  65. */
  66. void all_vm_events(unsigned long *ret)
  67. {
  68. sum_vm_events(ret, &cpu_online_map);
  69. }
  70. EXPORT_SYMBOL_GPL(all_vm_events);
  71. #ifdef CONFIG_HOTPLUG
  72. /*
  73. * Fold the foreign cpu events into our own.
  74. *
  75. * This is adding to the events on one processor
  76. * but keeps the global counts constant.
  77. */
  78. void vm_events_fold_cpu(int cpu)
  79. {
  80. struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
  81. int i;
  82. for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
  83. count_vm_events(i, fold_state->event[i]);
  84. fold_state->event[i] = 0;
  85. }
  86. }
  87. #endif /* CONFIG_HOTPLUG */
  88. #endif /* CONFIG_VM_EVENT_COUNTERS */
  89. /*
  90. * Manage combined zone based / global counters
  91. *
  92. * vm_stat contains the global counters
  93. */
  94. atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
  95. EXPORT_SYMBOL(vm_stat);
  96. #ifdef CONFIG_SMP
  97. #define STAT_THRESHOLD 32
  98. /*
  99. * Determine pointer to currently valid differential byte given a zone and
  100. * the item number.
  101. *
  102. * Preemption must be off
  103. */
  104. static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item)
  105. {
  106. return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item];
  107. }
  108. /*
  109. * For use when we know that interrupts are disabled.
  110. */
  111. void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  112. int delta)
  113. {
  114. s8 *p;
  115. long x;
  116. p = diff_pointer(zone, item);
  117. x = delta + *p;
  118. if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) {
  119. zone_page_state_add(x, zone, item);
  120. x = 0;
  121. }
  122. *p = x;
  123. }
  124. EXPORT_SYMBOL(__mod_zone_page_state);
  125. /*
  126. * For an unknown interrupt state
  127. */
  128. void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
  129. int delta)
  130. {
  131. unsigned long flags;
  132. local_irq_save(flags);
  133. __mod_zone_page_state(zone, item, delta);
  134. local_irq_restore(flags);
  135. }
  136. EXPORT_SYMBOL(mod_zone_page_state);
  137. /*
  138. * Optimized increment and decrement functions.
  139. *
  140. * These are only for a single page and therefore can take a struct page *
  141. * argument instead of struct zone *. This allows the inclusion of the code
  142. * generated for page_zone(page) into the optimized functions.
  143. *
  144. * No overflow check is necessary and therefore the differential can be
  145. * incremented or decremented in place which may allow the compilers to
  146. * generate better code.
  147. *
  148. * The increment or decrement is known and therefore one boundary check can
  149. * be omitted.
  150. *
  151. * Some processors have inc/dec instructions that are atomic vs an interrupt.
  152. * However, the code must first determine the differential location in a zone
  153. * based on the processor number and then inc/dec the counter. There is no
  154. * guarantee without disabling preemption that the processor will not change
  155. * in between and therefore the atomicity vs. interrupt cannot be exploited
  156. * in a useful way here.
  157. */
  158. static void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
  159. {
  160. s8 *p = diff_pointer(zone, item);
  161. (*p)++;
  162. if (unlikely(*p > STAT_THRESHOLD)) {
  163. zone_page_state_add(*p, zone, item);
  164. *p = 0;
  165. }
  166. }
  167. void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
  168. {
  169. __inc_zone_state(page_zone(page), item);
  170. }
  171. EXPORT_SYMBOL(__inc_zone_page_state);
  172. void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
  173. {
  174. struct zone *zone = page_zone(page);
  175. s8 *p = diff_pointer(zone, item);
  176. (*p)--;
  177. if (unlikely(*p < -STAT_THRESHOLD)) {
  178. zone_page_state_add(*p, zone, item);
  179. *p = 0;
  180. }
  181. }
  182. EXPORT_SYMBOL(__dec_zone_page_state);
  183. void inc_zone_state(struct zone *zone, enum zone_stat_item item)
  184. {
  185. unsigned long flags;
  186. local_irq_save(flags);
  187. __inc_zone_state(zone, item);
  188. local_irq_restore(flags);
  189. }
  190. void inc_zone_page_state(struct page *page, enum zone_stat_item item)
  191. {
  192. unsigned long flags;
  193. struct zone *zone;
  194. zone = page_zone(page);
  195. local_irq_save(flags);
  196. __inc_zone_state(zone, item);
  197. local_irq_restore(flags);
  198. }
  199. EXPORT_SYMBOL(inc_zone_page_state);
  200. void dec_zone_page_state(struct page *page, enum zone_stat_item item)
  201. {
  202. unsigned long flags;
  203. struct zone *zone;
  204. s8 *p;
  205. zone = page_zone(page);
  206. local_irq_save(flags);
  207. p = diff_pointer(zone, item);
  208. (*p)--;
  209. if (unlikely(*p < -STAT_THRESHOLD)) {
  210. zone_page_state_add(*p, zone, item);
  211. *p = 0;
  212. }
  213. local_irq_restore(flags);
  214. }
  215. EXPORT_SYMBOL(dec_zone_page_state);
  216. /*
  217. * Update the zone counters for one cpu.
  218. */
  219. void refresh_cpu_vm_stats(int cpu)
  220. {
  221. struct zone *zone;
  222. int i;
  223. unsigned long flags;
  224. for_each_zone(zone) {
  225. struct per_cpu_pageset *pcp;
  226. pcp = zone_pcp(zone, cpu);
  227. for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  228. if (pcp->vm_stat_diff[i]) {
  229. local_irq_save(flags);
  230. zone_page_state_add(pcp->vm_stat_diff[i],
  231. zone, i);
  232. pcp->vm_stat_diff[i] = 0;
  233. local_irq_restore(flags);
  234. }
  235. }
  236. }
  237. static void __refresh_cpu_vm_stats(void *dummy)
  238. {
  239. refresh_cpu_vm_stats(smp_processor_id());
  240. }
  241. /*
  242. * Consolidate all counters.
  243. *
  244. * Note that the result is less inaccurate but still inaccurate
  245. * if concurrent processes are allowed to run.
  246. */
  247. void refresh_vm_stats(void)
  248. {
  249. on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1);
  250. }
  251. EXPORT_SYMBOL(refresh_vm_stats);
  252. #endif
  253. #ifdef CONFIG_NUMA
  254. /*
  255. * zonelist = the list of zones passed to the allocator
  256. * z = the zone from which the allocation occurred.
  257. *
  258. * Must be called with interrupts disabled.
  259. */
  260. void zone_statistics(struct zonelist *zonelist, struct zone *z)
  261. {
  262. if (z->zone_pgdat == zonelist->zones[0]->zone_pgdat) {
  263. __inc_zone_state(z, NUMA_HIT);
  264. } else {
  265. __inc_zone_state(z, NUMA_MISS);
  266. __inc_zone_state(zonelist->zones[0], NUMA_FOREIGN);
  267. }
  268. if (z->zone_pgdat == NODE_DATA(numa_node_id()))
  269. __inc_zone_state(z, NUMA_LOCAL);
  270. else
  271. __inc_zone_state(z, NUMA_OTHER);
  272. }
  273. #endif
  274. #ifdef CONFIG_PROC_FS
  275. #include <linux/seq_file.h>
  276. static void *frag_start(struct seq_file *m, loff_t *pos)
  277. {
  278. pg_data_t *pgdat;
  279. loff_t node = *pos;
  280. for (pgdat = first_online_pgdat();
  281. pgdat && node;
  282. pgdat = next_online_pgdat(pgdat))
  283. --node;
  284. return pgdat;
  285. }
  286. static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
  287. {
  288. pg_data_t *pgdat = (pg_data_t *)arg;
  289. (*pos)++;
  290. return next_online_pgdat(pgdat);
  291. }
  292. static void frag_stop(struct seq_file *m, void *arg)
  293. {
  294. }
  295. /*
  296. * This walks the free areas for each zone.
  297. */
  298. static int frag_show(struct seq_file *m, void *arg)
  299. {
  300. pg_data_t *pgdat = (pg_data_t *)arg;
  301. struct zone *zone;
  302. struct zone *node_zones = pgdat->node_zones;
  303. unsigned long flags;
  304. int order;
  305. for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
  306. if (!populated_zone(zone))
  307. continue;
  308. spin_lock_irqsave(&zone->lock, flags);
  309. seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
  310. for (order = 0; order < MAX_ORDER; ++order)
  311. seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
  312. spin_unlock_irqrestore(&zone->lock, flags);
  313. seq_putc(m, '\n');
  314. }
  315. return 0;
  316. }
  317. struct seq_operations fragmentation_op = {
  318. .start = frag_start,
  319. .next = frag_next,
  320. .stop = frag_stop,
  321. .show = frag_show,
  322. };
  323. static char *vmstat_text[] = {
  324. /* Zoned VM counters */
  325. "nr_anon_pages",
  326. "nr_mapped",
  327. "nr_file_pages",
  328. "nr_slab",
  329. "nr_page_table_pages",
  330. "nr_dirty",
  331. "nr_writeback",
  332. "nr_unstable",
  333. "nr_bounce",
  334. #ifdef CONFIG_NUMA
  335. "numa_hit",
  336. "numa_miss",
  337. "numa_foreign",
  338. "numa_interleave",
  339. "numa_local",
  340. "numa_other",
  341. #endif
  342. #ifdef CONFIG_VM_EVENT_COUNTERS
  343. "pgpgin",
  344. "pgpgout",
  345. "pswpin",
  346. "pswpout",
  347. "pgalloc_dma",
  348. "pgalloc_dma32",
  349. "pgalloc_normal",
  350. "pgalloc_high",
  351. "pgfree",
  352. "pgactivate",
  353. "pgdeactivate",
  354. "pgfault",
  355. "pgmajfault",
  356. "pgrefill_dma",
  357. "pgrefill_dma32",
  358. "pgrefill_normal",
  359. "pgrefill_high",
  360. "pgsteal_dma",
  361. "pgsteal_dma32",
  362. "pgsteal_normal",
  363. "pgsteal_high",
  364. "pgscan_kswapd_dma",
  365. "pgscan_kswapd_dma32",
  366. "pgscan_kswapd_normal",
  367. "pgscan_kswapd_high",
  368. "pgscan_direct_dma",
  369. "pgscan_direct_dma32",
  370. "pgscan_direct_normal",
  371. "pgscan_direct_high",
  372. "pginodesteal",
  373. "slabs_scanned",
  374. "kswapd_steal",
  375. "kswapd_inodesteal",
  376. "pageoutrun",
  377. "allocstall",
  378. "pgrotated",
  379. #endif
  380. };
  381. /*
  382. * Output information about zones in @pgdat.
  383. */
  384. static int zoneinfo_show(struct seq_file *m, void *arg)
  385. {
  386. pg_data_t *pgdat = arg;
  387. struct zone *zone;
  388. struct zone *node_zones = pgdat->node_zones;
  389. unsigned long flags;
  390. for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
  391. int i;
  392. if (!populated_zone(zone))
  393. continue;
  394. spin_lock_irqsave(&zone->lock, flags);
  395. seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
  396. seq_printf(m,
  397. "\n pages free %lu"
  398. "\n min %lu"
  399. "\n low %lu"
  400. "\n high %lu"
  401. "\n active %lu"
  402. "\n inactive %lu"
  403. "\n scanned %lu (a: %lu i: %lu)"
  404. "\n spanned %lu"
  405. "\n present %lu",
  406. zone->free_pages,
  407. zone->pages_min,
  408. zone->pages_low,
  409. zone->pages_high,
  410. zone->nr_active,
  411. zone->nr_inactive,
  412. zone->pages_scanned,
  413. zone->nr_scan_active, zone->nr_scan_inactive,
  414. zone->spanned_pages,
  415. zone->present_pages);
  416. for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  417. seq_printf(m, "\n %-12s %lu", vmstat_text[i],
  418. zone_page_state(zone, i));
  419. seq_printf(m,
  420. "\n protection: (%lu",
  421. zone->lowmem_reserve[0]);
  422. for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
  423. seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
  424. seq_printf(m,
  425. ")"
  426. "\n pagesets");
  427. for_each_online_cpu(i) {
  428. struct per_cpu_pageset *pageset;
  429. int j;
  430. pageset = zone_pcp(zone, i);
  431. for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
  432. if (pageset->pcp[j].count)
  433. break;
  434. }
  435. if (j == ARRAY_SIZE(pageset->pcp))
  436. continue;
  437. for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
  438. seq_printf(m,
  439. "\n cpu: %i pcp: %i"
  440. "\n count: %i"
  441. "\n high: %i"
  442. "\n batch: %i",
  443. i, j,
  444. pageset->pcp[j].count,
  445. pageset->pcp[j].high,
  446. pageset->pcp[j].batch);
  447. }
  448. }
  449. seq_printf(m,
  450. "\n all_unreclaimable: %u"
  451. "\n prev_priority: %i"
  452. "\n temp_priority: %i"
  453. "\n start_pfn: %lu",
  454. zone->all_unreclaimable,
  455. zone->prev_priority,
  456. zone->temp_priority,
  457. zone->zone_start_pfn);
  458. spin_unlock_irqrestore(&zone->lock, flags);
  459. seq_putc(m, '\n');
  460. }
  461. return 0;
  462. }
  463. struct seq_operations zoneinfo_op = {
  464. .start = frag_start, /* iterate over all zones. The same as in
  465. * fragmentation. */
  466. .next = frag_next,
  467. .stop = frag_stop,
  468. .show = zoneinfo_show,
  469. };
  470. static void *vmstat_start(struct seq_file *m, loff_t *pos)
  471. {
  472. unsigned long *v;
  473. #ifdef CONFIG_VM_EVENT_COUNTERS
  474. unsigned long *e;
  475. #endif
  476. int i;
  477. if (*pos >= ARRAY_SIZE(vmstat_text))
  478. return NULL;
  479. #ifdef CONFIG_VM_EVENT_COUNTERS
  480. v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
  481. + sizeof(struct vm_event_state), GFP_KERNEL);
  482. #else
  483. v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long),
  484. GFP_KERNEL);
  485. #endif
  486. m->private = v;
  487. if (!v)
  488. return ERR_PTR(-ENOMEM);
  489. for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
  490. v[i] = global_page_state(i);
  491. #ifdef CONFIG_VM_EVENT_COUNTERS
  492. e = v + NR_VM_ZONE_STAT_ITEMS;
  493. all_vm_events(e);
  494. e[PGPGIN] /= 2; /* sectors -> kbytes */
  495. e[PGPGOUT] /= 2;
  496. #endif
  497. return v + *pos;
  498. }
  499. static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
  500. {
  501. (*pos)++;
  502. if (*pos >= ARRAY_SIZE(vmstat_text))
  503. return NULL;
  504. return (unsigned long *)m->private + *pos;
  505. }
  506. static int vmstat_show(struct seq_file *m, void *arg)
  507. {
  508. unsigned long *l = arg;
  509. unsigned long off = l - (unsigned long *)m->private;
  510. seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
  511. return 0;
  512. }
  513. static void vmstat_stop(struct seq_file *m, void *arg)
  514. {
  515. kfree(m->private);
  516. m->private = NULL;
  517. }
  518. struct seq_operations vmstat_op = {
  519. .start = vmstat_start,
  520. .next = vmstat_next,
  521. .stop = vmstat_stop,
  522. .show = vmstat_show,
  523. };
  524. #endif /* CONFIG_PROC_FS */