task_mmu.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. #include <linux/mm.h>
  2. #include <linux/hugetlb.h>
  3. #include <linux/mount.h>
  4. #include <linux/seq_file.h>
  5. #include <linux/highmem.h>
  6. #include <linux/ptrace.h>
  7. #include <linux/pagemap.h>
  8. #include <linux/mempolicy.h>
  9. #include <asm/elf.h>
  10. #include <asm/uaccess.h>
  11. #include <asm/tlbflush.h>
  12. #include "internal.h"
  13. char *task_mem(struct mm_struct *mm, char *buffer)
  14. {
  15. unsigned long data, text, lib;
  16. unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
  17. /*
  18. * Note: to minimize their overhead, mm maintains hiwater_vm and
  19. * hiwater_rss only when about to *lower* total_vm or rss. Any
  20. * collector of these hiwater stats must therefore get total_vm
  21. * and rss too, which will usually be the higher. Barriers? not
  22. * worth the effort, such snapshots can always be inconsistent.
  23. */
  24. hiwater_vm = total_vm = mm->total_vm;
  25. if (hiwater_vm < mm->hiwater_vm)
  26. hiwater_vm = mm->hiwater_vm;
  27. hiwater_rss = total_rss = get_mm_rss(mm);
  28. if (hiwater_rss < mm->hiwater_rss)
  29. hiwater_rss = mm->hiwater_rss;
  30. data = mm->total_vm - mm->shared_vm - mm->stack_vm;
  31. text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
  32. lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
  33. buffer += sprintf(buffer,
  34. "VmPeak:\t%8lu kB\n"
  35. "VmSize:\t%8lu kB\n"
  36. "VmLck:\t%8lu kB\n"
  37. "VmHWM:\t%8lu kB\n"
  38. "VmRSS:\t%8lu kB\n"
  39. "VmData:\t%8lu kB\n"
  40. "VmStk:\t%8lu kB\n"
  41. "VmExe:\t%8lu kB\n"
  42. "VmLib:\t%8lu kB\n"
  43. "VmPTE:\t%8lu kB\n",
  44. hiwater_vm << (PAGE_SHIFT-10),
  45. (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
  46. mm->locked_vm << (PAGE_SHIFT-10),
  47. hiwater_rss << (PAGE_SHIFT-10),
  48. total_rss << (PAGE_SHIFT-10),
  49. data << (PAGE_SHIFT-10),
  50. mm->stack_vm << (PAGE_SHIFT-10), text, lib,
  51. (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
  52. return buffer;
  53. }
  54. unsigned long task_vsize(struct mm_struct *mm)
  55. {
  56. return PAGE_SIZE * mm->total_vm;
  57. }
  58. int task_statm(struct mm_struct *mm, int *shared, int *text,
  59. int *data, int *resident)
  60. {
  61. *shared = get_mm_counter(mm, file_rss);
  62. *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
  63. >> PAGE_SHIFT;
  64. *data = mm->total_vm - mm->shared_vm;
  65. *resident = *shared + get_mm_counter(mm, anon_rss);
  66. return mm->total_vm;
  67. }
  68. int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
  69. {
  70. struct vm_area_struct * vma;
  71. int result = -ENOENT;
  72. struct task_struct *task = get_proc_task(inode);
  73. struct mm_struct * mm = NULL;
  74. if (task) {
  75. mm = get_task_mm(task);
  76. put_task_struct(task);
  77. }
  78. if (!mm)
  79. goto out;
  80. down_read(&mm->mmap_sem);
  81. vma = mm->mmap;
  82. while (vma) {
  83. if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
  84. break;
  85. vma = vma->vm_next;
  86. }
  87. if (vma) {
  88. *mnt = mntget(vma->vm_file->f_path.mnt);
  89. *dentry = dget(vma->vm_file->f_path.dentry);
  90. result = 0;
  91. }
  92. up_read(&mm->mmap_sem);
  93. mmput(mm);
  94. out:
  95. return result;
  96. }
  97. static void pad_len_spaces(struct seq_file *m, int len)
  98. {
  99. len = 25 + sizeof(void*) * 6 - len;
  100. if (len < 1)
  101. len = 1;
  102. seq_printf(m, "%*c", len, ' ');
  103. }
  104. struct mem_size_stats
  105. {
  106. unsigned long resident;
  107. unsigned long shared_clean;
  108. unsigned long shared_dirty;
  109. unsigned long private_clean;
  110. unsigned long private_dirty;
  111. unsigned long referenced;
  112. };
  113. struct pmd_walker {
  114. struct vm_area_struct *vma;
  115. void *private;
  116. void (*action)(struct vm_area_struct *, pmd_t *, unsigned long,
  117. unsigned long, void *);
  118. };
  119. static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
  120. {
  121. struct proc_maps_private *priv = m->private;
  122. struct task_struct *task = priv->task;
  123. struct vm_area_struct *vma = v;
  124. struct mm_struct *mm = vma->vm_mm;
  125. struct file *file = vma->vm_file;
  126. int flags = vma->vm_flags;
  127. unsigned long ino = 0;
  128. dev_t dev = 0;
  129. int len;
  130. if (maps_protect && !ptrace_may_attach(task))
  131. return -EACCES;
  132. if (file) {
  133. struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
  134. dev = inode->i_sb->s_dev;
  135. ino = inode->i_ino;
  136. }
  137. seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
  138. vma->vm_start,
  139. vma->vm_end,
  140. flags & VM_READ ? 'r' : '-',
  141. flags & VM_WRITE ? 'w' : '-',
  142. flags & VM_EXEC ? 'x' : '-',
  143. flags & VM_MAYSHARE ? 's' : 'p',
  144. vma->vm_pgoff << PAGE_SHIFT,
  145. MAJOR(dev), MINOR(dev), ino, &len);
  146. /*
  147. * Print the dentry name for named mappings, and a
  148. * special [heap] marker for the heap:
  149. */
  150. if (file) {
  151. pad_len_spaces(m, len);
  152. seq_path(m, file->f_path.mnt, file->f_path.dentry, "\n");
  153. } else {
  154. const char *name = arch_vma_name(vma);
  155. if (!name) {
  156. if (mm) {
  157. if (vma->vm_start <= mm->start_brk &&
  158. vma->vm_end >= mm->brk) {
  159. name = "[heap]";
  160. } else if (vma->vm_start <= mm->start_stack &&
  161. vma->vm_end >= mm->start_stack) {
  162. name = "[stack]";
  163. }
  164. } else {
  165. name = "[vdso]";
  166. }
  167. }
  168. if (name) {
  169. pad_len_spaces(m, len);
  170. seq_puts(m, name);
  171. }
  172. }
  173. seq_putc(m, '\n');
  174. if (mss)
  175. seq_printf(m,
  176. "Size: %8lu kB\n"
  177. "Rss: %8lu kB\n"
  178. "Shared_Clean: %8lu kB\n"
  179. "Shared_Dirty: %8lu kB\n"
  180. "Private_Clean: %8lu kB\n"
  181. "Private_Dirty: %8lu kB\n"
  182. "Referenced: %8lu kB\n",
  183. (vma->vm_end - vma->vm_start) >> 10,
  184. mss->resident >> 10,
  185. mss->shared_clean >> 10,
  186. mss->shared_dirty >> 10,
  187. mss->private_clean >> 10,
  188. mss->private_dirty >> 10,
  189. mss->referenced >> 10);
  190. if (m->count < m->size) /* vma is copied successfully */
  191. m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
  192. return 0;
  193. }
  194. static int show_map(struct seq_file *m, void *v)
  195. {
  196. return show_map_internal(m, v, NULL);
  197. }
  198. static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
  199. unsigned long addr, unsigned long end,
  200. void *private)
  201. {
  202. struct mem_size_stats *mss = private;
  203. pte_t *pte, ptent;
  204. spinlock_t *ptl;
  205. struct page *page;
  206. pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
  207. for (; addr != end; pte++, addr += PAGE_SIZE) {
  208. ptent = *pte;
  209. if (!pte_present(ptent))
  210. continue;
  211. mss->resident += PAGE_SIZE;
  212. page = vm_normal_page(vma, addr, ptent);
  213. if (!page)
  214. continue;
  215. /* Accumulate the size in pages that have been accessed. */
  216. if (pte_young(ptent) || PageReferenced(page))
  217. mss->referenced += PAGE_SIZE;
  218. if (page_mapcount(page) >= 2) {
  219. if (pte_dirty(ptent))
  220. mss->shared_dirty += PAGE_SIZE;
  221. else
  222. mss->shared_clean += PAGE_SIZE;
  223. } else {
  224. if (pte_dirty(ptent))
  225. mss->private_dirty += PAGE_SIZE;
  226. else
  227. mss->private_clean += PAGE_SIZE;
  228. }
  229. }
  230. pte_unmap_unlock(pte - 1, ptl);
  231. cond_resched();
  232. }
  233. static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
  234. unsigned long addr, unsigned long end,
  235. void *private)
  236. {
  237. pte_t *pte, ptent;
  238. spinlock_t *ptl;
  239. struct page *page;
  240. pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
  241. for (; addr != end; pte++, addr += PAGE_SIZE) {
  242. ptent = *pte;
  243. if (!pte_present(ptent))
  244. continue;
  245. page = vm_normal_page(vma, addr, ptent);
  246. if (!page)
  247. continue;
  248. /* Clear accessed and referenced bits. */
  249. ptep_test_and_clear_young(vma, addr, pte);
  250. ClearPageReferenced(page);
  251. }
  252. pte_unmap_unlock(pte - 1, ptl);
  253. cond_resched();
  254. }
  255. static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud,
  256. unsigned long addr, unsigned long end)
  257. {
  258. pmd_t *pmd;
  259. unsigned long next;
  260. for (pmd = pmd_offset(pud, addr); addr != end;
  261. pmd++, addr = next) {
  262. next = pmd_addr_end(addr, end);
  263. if (pmd_none_or_clear_bad(pmd))
  264. continue;
  265. walker->action(walker->vma, pmd, addr, next, walker->private);
  266. }
  267. }
  268. static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd,
  269. unsigned long addr, unsigned long end)
  270. {
  271. pud_t *pud;
  272. unsigned long next;
  273. for (pud = pud_offset(pgd, addr); addr != end;
  274. pud++, addr = next) {
  275. next = pud_addr_end(addr, end);
  276. if (pud_none_or_clear_bad(pud))
  277. continue;
  278. walk_pmd_range(walker, pud, addr, next);
  279. }
  280. }
  281. /*
  282. * walk_page_range - walk the page tables of a VMA with a callback
  283. * @vma - VMA to walk
  284. * @action - callback invoked for every bottom-level (PTE) page table
  285. * @private - private data passed to the callback function
  286. *
  287. * Recursively walk the page table for the memory area in a VMA, calling
  288. * a callback for every bottom-level (PTE) page table.
  289. */
  290. static inline void walk_page_range(struct vm_area_struct *vma,
  291. void (*action)(struct vm_area_struct *,
  292. pmd_t *, unsigned long,
  293. unsigned long, void *),
  294. void *private)
  295. {
  296. unsigned long addr = vma->vm_start;
  297. unsigned long end = vma->vm_end;
  298. struct pmd_walker walker = {
  299. .vma = vma,
  300. .private = private,
  301. .action = action,
  302. };
  303. pgd_t *pgd;
  304. unsigned long next;
  305. for (pgd = pgd_offset(vma->vm_mm, addr); addr != end;
  306. pgd++, addr = next) {
  307. next = pgd_addr_end(addr, end);
  308. if (pgd_none_or_clear_bad(pgd))
  309. continue;
  310. walk_pud_range(&walker, pgd, addr, next);
  311. }
  312. }
  313. static int show_smap(struct seq_file *m, void *v)
  314. {
  315. struct vm_area_struct *vma = v;
  316. struct mem_size_stats mss;
  317. memset(&mss, 0, sizeof mss);
  318. if (vma->vm_mm && !is_vm_hugetlb_page(vma))
  319. walk_page_range(vma, smaps_pte_range, &mss);
  320. return show_map_internal(m, v, &mss);
  321. }
  322. void clear_refs_smap(struct mm_struct *mm)
  323. {
  324. struct vm_area_struct *vma;
  325. down_read(&mm->mmap_sem);
  326. for (vma = mm->mmap; vma; vma = vma->vm_next)
  327. if (vma->vm_mm && !is_vm_hugetlb_page(vma))
  328. walk_page_range(vma, clear_refs_pte_range, NULL);
  329. flush_tlb_mm(mm);
  330. up_read(&mm->mmap_sem);
  331. }
  332. static void *m_start(struct seq_file *m, loff_t *pos)
  333. {
  334. struct proc_maps_private *priv = m->private;
  335. unsigned long last_addr = m->version;
  336. struct mm_struct *mm;
  337. struct vm_area_struct *vma, *tail_vma = NULL;
  338. loff_t l = *pos;
  339. /* Clear the per syscall fields in priv */
  340. priv->task = NULL;
  341. priv->tail_vma = NULL;
  342. /*
  343. * We remember last_addr rather than next_addr to hit with
  344. * mmap_cache most of the time. We have zero last_addr at
  345. * the beginning and also after lseek. We will have -1 last_addr
  346. * after the end of the vmas.
  347. */
  348. if (last_addr == -1UL)
  349. return NULL;
  350. priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
  351. if (!priv->task)
  352. return NULL;
  353. mm = get_task_mm(priv->task);
  354. if (!mm)
  355. return NULL;
  356. priv->tail_vma = tail_vma = get_gate_vma(priv->task);
  357. down_read(&mm->mmap_sem);
  358. /* Start with last addr hint */
  359. if (last_addr && (vma = find_vma(mm, last_addr))) {
  360. vma = vma->vm_next;
  361. goto out;
  362. }
  363. /*
  364. * Check the vma index is within the range and do
  365. * sequential scan until m_index.
  366. */
  367. vma = NULL;
  368. if ((unsigned long)l < mm->map_count) {
  369. vma = mm->mmap;
  370. while (l-- && vma)
  371. vma = vma->vm_next;
  372. goto out;
  373. }
  374. if (l != mm->map_count)
  375. tail_vma = NULL; /* After gate vma */
  376. out:
  377. if (vma)
  378. return vma;
  379. /* End of vmas has been reached */
  380. m->version = (tail_vma != NULL)? 0: -1UL;
  381. up_read(&mm->mmap_sem);
  382. mmput(mm);
  383. return tail_vma;
  384. }
  385. static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
  386. {
  387. if (vma && vma != priv->tail_vma) {
  388. struct mm_struct *mm = vma->vm_mm;
  389. up_read(&mm->mmap_sem);
  390. mmput(mm);
  391. }
  392. }
  393. static void *m_next(struct seq_file *m, void *v, loff_t *pos)
  394. {
  395. struct proc_maps_private *priv = m->private;
  396. struct vm_area_struct *vma = v;
  397. struct vm_area_struct *tail_vma = priv->tail_vma;
  398. (*pos)++;
  399. if (vma && (vma != tail_vma) && vma->vm_next)
  400. return vma->vm_next;
  401. vma_stop(priv, vma);
  402. return (vma != tail_vma)? tail_vma: NULL;
  403. }
  404. static void m_stop(struct seq_file *m, void *v)
  405. {
  406. struct proc_maps_private *priv = m->private;
  407. struct vm_area_struct *vma = v;
  408. vma_stop(priv, vma);
  409. if (priv->task)
  410. put_task_struct(priv->task);
  411. }
  412. static struct seq_operations proc_pid_maps_op = {
  413. .start = m_start,
  414. .next = m_next,
  415. .stop = m_stop,
  416. .show = show_map
  417. };
  418. static struct seq_operations proc_pid_smaps_op = {
  419. .start = m_start,
  420. .next = m_next,
  421. .stop = m_stop,
  422. .show = show_smap
  423. };
  424. static int do_maps_open(struct inode *inode, struct file *file,
  425. struct seq_operations *ops)
  426. {
  427. struct proc_maps_private *priv;
  428. int ret = -ENOMEM;
  429. priv = kzalloc(sizeof(*priv), GFP_KERNEL);
  430. if (priv) {
  431. priv->pid = proc_pid(inode);
  432. ret = seq_open(file, ops);
  433. if (!ret) {
  434. struct seq_file *m = file->private_data;
  435. m->private = priv;
  436. } else {
  437. kfree(priv);
  438. }
  439. }
  440. return ret;
  441. }
  442. static int maps_open(struct inode *inode, struct file *file)
  443. {
  444. return do_maps_open(inode, file, &proc_pid_maps_op);
  445. }
  446. const struct file_operations proc_maps_operations = {
  447. .open = maps_open,
  448. .read = seq_read,
  449. .llseek = seq_lseek,
  450. .release = seq_release_private,
  451. };
  452. #ifdef CONFIG_NUMA
  453. extern int show_numa_map(struct seq_file *m, void *v);
  454. static int show_numa_map_checked(struct seq_file *m, void *v)
  455. {
  456. struct proc_maps_private *priv = m->private;
  457. struct task_struct *task = priv->task;
  458. if (maps_protect && !ptrace_may_attach(task))
  459. return -EACCES;
  460. return show_numa_map(m, v);
  461. }
  462. static struct seq_operations proc_pid_numa_maps_op = {
  463. .start = m_start,
  464. .next = m_next,
  465. .stop = m_stop,
  466. .show = show_numa_map_checked
  467. };
  468. static int numa_maps_open(struct inode *inode, struct file *file)
  469. {
  470. return do_maps_open(inode, file, &proc_pid_numa_maps_op);
  471. }
  472. const struct file_operations proc_numa_maps_operations = {
  473. .open = numa_maps_open,
  474. .read = seq_read,
  475. .llseek = seq_lseek,
  476. .release = seq_release_private,
  477. };
  478. #endif
  479. static int smaps_open(struct inode *inode, struct file *file)
  480. {
  481. return do_maps_open(inode, file, &proc_pid_smaps_op);
  482. }
  483. const struct file_operations proc_smaps_operations = {
  484. .open = smaps_open,
  485. .read = seq_read,
  486. .llseek = seq_lseek,
  487. .release = seq_release_private,
  488. };