task_mmu.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. #include <linux/mm.h>
  2. #include <linux/hugetlb.h>
  3. #include <linux/mount.h>
  4. #include <linux/seq_file.h>
  5. #include <linux/highmem.h>
  6. #include <linux/ptrace.h>
  7. #include <linux/pagemap.h>
  8. #include <linux/mempolicy.h>
  9. #include <asm/elf.h>
  10. #include <asm/uaccess.h>
  11. #include <asm/tlbflush.h>
  12. #include "internal.h"
  13. char *task_mem(struct mm_struct *mm, char *buffer)
  14. {
  15. unsigned long data, text, lib;
  16. unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
  17. /*
  18. * Note: to minimize their overhead, mm maintains hiwater_vm and
  19. * hiwater_rss only when about to *lower* total_vm or rss. Any
  20. * collector of these hiwater stats must therefore get total_vm
  21. * and rss too, which will usually be the higher. Barriers? not
  22. * worth the effort, such snapshots can always be inconsistent.
  23. */
  24. hiwater_vm = total_vm = mm->total_vm;
  25. if (hiwater_vm < mm->hiwater_vm)
  26. hiwater_vm = mm->hiwater_vm;
  27. hiwater_rss = total_rss = get_mm_rss(mm);
  28. if (hiwater_rss < mm->hiwater_rss)
  29. hiwater_rss = mm->hiwater_rss;
  30. data = mm->total_vm - mm->shared_vm - mm->stack_vm;
  31. text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
  32. lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
  33. buffer += sprintf(buffer,
  34. "VmPeak:\t%8lu kB\n"
  35. "VmSize:\t%8lu kB\n"
  36. "VmLck:\t%8lu kB\n"
  37. "VmHWM:\t%8lu kB\n"
  38. "VmRSS:\t%8lu kB\n"
  39. "VmData:\t%8lu kB\n"
  40. "VmStk:\t%8lu kB\n"
  41. "VmExe:\t%8lu kB\n"
  42. "VmLib:\t%8lu kB\n"
  43. "VmPTE:\t%8lu kB\n",
  44. hiwater_vm << (PAGE_SHIFT-10),
  45. (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
  46. mm->locked_vm << (PAGE_SHIFT-10),
  47. hiwater_rss << (PAGE_SHIFT-10),
  48. total_rss << (PAGE_SHIFT-10),
  49. data << (PAGE_SHIFT-10),
  50. mm->stack_vm << (PAGE_SHIFT-10), text, lib,
  51. (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
  52. return buffer;
  53. }
  54. unsigned long task_vsize(struct mm_struct *mm)
  55. {
  56. return PAGE_SIZE * mm->total_vm;
  57. }
  58. int task_statm(struct mm_struct *mm, int *shared, int *text,
  59. int *data, int *resident)
  60. {
  61. *shared = get_mm_counter(mm, file_rss);
  62. *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
  63. >> PAGE_SHIFT;
  64. *data = mm->total_vm - mm->shared_vm;
  65. *resident = *shared + get_mm_counter(mm, anon_rss);
  66. return mm->total_vm;
  67. }
  68. int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
  69. {
  70. struct vm_area_struct * vma;
  71. int result = -ENOENT;
  72. struct task_struct *task = get_proc_task(inode);
  73. struct mm_struct * mm = NULL;
  74. if (task) {
  75. mm = get_task_mm(task);
  76. put_task_struct(task);
  77. }
  78. if (!mm)
  79. goto out;
  80. down_read(&mm->mmap_sem);
  81. vma = mm->mmap;
  82. while (vma) {
  83. if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
  84. break;
  85. vma = vma->vm_next;
  86. }
  87. if (vma) {
  88. *mnt = mntget(vma->vm_file->f_path.mnt);
  89. *dentry = dget(vma->vm_file->f_path.dentry);
  90. result = 0;
  91. }
  92. up_read(&mm->mmap_sem);
  93. mmput(mm);
  94. out:
  95. return result;
  96. }
  97. static void pad_len_spaces(struct seq_file *m, int len)
  98. {
  99. len = 25 + sizeof(void*) * 6 - len;
  100. if (len < 1)
  101. len = 1;
  102. seq_printf(m, "%*c", len, ' ');
  103. }
  104. static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
  105. {
  106. if (vma && vma != priv->tail_vma) {
  107. struct mm_struct *mm = vma->vm_mm;
  108. up_read(&mm->mmap_sem);
  109. mmput(mm);
  110. }
  111. }
  112. static void *m_start(struct seq_file *m, loff_t *pos)
  113. {
  114. struct proc_maps_private *priv = m->private;
  115. unsigned long last_addr = m->version;
  116. struct mm_struct *mm;
  117. struct vm_area_struct *vma, *tail_vma = NULL;
  118. loff_t l = *pos;
  119. /* Clear the per syscall fields in priv */
  120. priv->task = NULL;
  121. priv->tail_vma = NULL;
  122. /*
  123. * We remember last_addr rather than next_addr to hit with
  124. * mmap_cache most of the time. We have zero last_addr at
  125. * the beginning and also after lseek. We will have -1 last_addr
  126. * after the end of the vmas.
  127. */
  128. if (last_addr == -1UL)
  129. return NULL;
  130. priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
  131. if (!priv->task)
  132. return NULL;
  133. mm = mm_for_maps(priv->task);
  134. if (!mm)
  135. return NULL;
  136. tail_vma = get_gate_vma(priv->task);
  137. priv->tail_vma = tail_vma;
  138. /* Start with last addr hint */
  139. vma = find_vma(mm, last_addr);
  140. if (last_addr && vma) {
  141. vma = vma->vm_next;
  142. goto out;
  143. }
  144. /*
  145. * Check the vma index is within the range and do
  146. * sequential scan until m_index.
  147. */
  148. vma = NULL;
  149. if ((unsigned long)l < mm->map_count) {
  150. vma = mm->mmap;
  151. while (l-- && vma)
  152. vma = vma->vm_next;
  153. goto out;
  154. }
  155. if (l != mm->map_count)
  156. tail_vma = NULL; /* After gate vma */
  157. out:
  158. if (vma)
  159. return vma;
  160. /* End of vmas has been reached */
  161. m->version = (tail_vma != NULL)? 0: -1UL;
  162. up_read(&mm->mmap_sem);
  163. mmput(mm);
  164. return tail_vma;
  165. }
  166. static void *m_next(struct seq_file *m, void *v, loff_t *pos)
  167. {
  168. struct proc_maps_private *priv = m->private;
  169. struct vm_area_struct *vma = v;
  170. struct vm_area_struct *tail_vma = priv->tail_vma;
  171. (*pos)++;
  172. if (vma && (vma != tail_vma) && vma->vm_next)
  173. return vma->vm_next;
  174. vma_stop(priv, vma);
  175. return (vma != tail_vma)? tail_vma: NULL;
  176. }
  177. static void m_stop(struct seq_file *m, void *v)
  178. {
  179. struct proc_maps_private *priv = m->private;
  180. struct vm_area_struct *vma = v;
  181. vma_stop(priv, vma);
  182. if (priv->task)
  183. put_task_struct(priv->task);
  184. }
  185. static int do_maps_open(struct inode *inode, struct file *file,
  186. struct seq_operations *ops)
  187. {
  188. struct proc_maps_private *priv;
  189. int ret = -ENOMEM;
  190. priv = kzalloc(sizeof(*priv), GFP_KERNEL);
  191. if (priv) {
  192. priv->pid = proc_pid(inode);
  193. ret = seq_open(file, ops);
  194. if (!ret) {
  195. struct seq_file *m = file->private_data;
  196. m->private = priv;
  197. } else {
  198. kfree(priv);
  199. }
  200. }
  201. return ret;
  202. }
  203. static int show_map(struct seq_file *m, void *v)
  204. {
  205. struct proc_maps_private *priv = m->private;
  206. struct task_struct *task = priv->task;
  207. struct vm_area_struct *vma = v;
  208. struct mm_struct *mm = vma->vm_mm;
  209. struct file *file = vma->vm_file;
  210. int flags = vma->vm_flags;
  211. unsigned long ino = 0;
  212. dev_t dev = 0;
  213. int len;
  214. if (maps_protect && !ptrace_may_attach(task))
  215. return -EACCES;
  216. if (file) {
  217. struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
  218. dev = inode->i_sb->s_dev;
  219. ino = inode->i_ino;
  220. }
  221. seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
  222. vma->vm_start,
  223. vma->vm_end,
  224. flags & VM_READ ? 'r' : '-',
  225. flags & VM_WRITE ? 'w' : '-',
  226. flags & VM_EXEC ? 'x' : '-',
  227. flags & VM_MAYSHARE ? 's' : 'p',
  228. vma->vm_pgoff << PAGE_SHIFT,
  229. MAJOR(dev), MINOR(dev), ino, &len);
  230. /*
  231. * Print the dentry name for named mappings, and a
  232. * special [heap] marker for the heap:
  233. */
  234. if (file) {
  235. pad_len_spaces(m, len);
  236. seq_path(m, file->f_path.mnt, file->f_path.dentry, "\n");
  237. } else {
  238. const char *name = arch_vma_name(vma);
  239. if (!name) {
  240. if (mm) {
  241. if (vma->vm_start <= mm->start_brk &&
  242. vma->vm_end >= mm->brk) {
  243. name = "[heap]";
  244. } else if (vma->vm_start <= mm->start_stack &&
  245. vma->vm_end >= mm->start_stack) {
  246. name = "[stack]";
  247. }
  248. } else {
  249. name = "[vdso]";
  250. }
  251. }
  252. if (name) {
  253. pad_len_spaces(m, len);
  254. seq_puts(m, name);
  255. }
  256. }
  257. seq_putc(m, '\n');
  258. if (m->count < m->size) /* vma is copied successfully */
  259. m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
  260. return 0;
  261. }
  262. static struct seq_operations proc_pid_maps_op = {
  263. .start = m_start,
  264. .next = m_next,
  265. .stop = m_stop,
  266. .show = show_map
  267. };
  268. static int maps_open(struct inode *inode, struct file *file)
  269. {
  270. return do_maps_open(inode, file, &proc_pid_maps_op);
  271. }
  272. const struct file_operations proc_maps_operations = {
  273. .open = maps_open,
  274. .read = seq_read,
  275. .llseek = seq_lseek,
  276. .release = seq_release_private,
  277. };
  278. /*
  279. * Proportional Set Size(PSS): my share of RSS.
  280. *
  281. * PSS of a process is the count of pages it has in memory, where each
  282. * page is divided by the number of processes sharing it. So if a
  283. * process has 1000 pages all to itself, and 1000 shared with one other
  284. * process, its PSS will be 1500.
  285. *
  286. * To keep (accumulated) division errors low, we adopt a 64bit
  287. * fixed-point pss counter to minimize division errors. So (pss >>
  288. * PSS_SHIFT) would be the real byte count.
  289. *
  290. * A shift of 12 before division means (assuming 4K page size):
  291. * - 1M 3-user-pages add up to 8KB errors;
  292. * - supports mapcount up to 2^24, or 16M;
  293. * - supports PSS up to 2^52 bytes, or 4PB.
  294. */
  295. #define PSS_SHIFT 12
  296. struct mem_size_stats
  297. {
  298. struct vm_area_struct *vma;
  299. unsigned long resident;
  300. unsigned long shared_clean;
  301. unsigned long shared_dirty;
  302. unsigned long private_clean;
  303. unsigned long private_dirty;
  304. unsigned long referenced;
  305. u64 pss;
  306. };
  307. static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
  308. void *private)
  309. {
  310. struct mem_size_stats *mss = private;
  311. struct vm_area_struct *vma = mss->vma;
  312. pte_t *pte, ptent;
  313. spinlock_t *ptl;
  314. struct page *page;
  315. int mapcount;
  316. pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
  317. for (; addr != end; pte++, addr += PAGE_SIZE) {
  318. ptent = *pte;
  319. if (!pte_present(ptent))
  320. continue;
  321. mss->resident += PAGE_SIZE;
  322. page = vm_normal_page(vma, addr, ptent);
  323. if (!page)
  324. continue;
  325. /* Accumulate the size in pages that have been accessed. */
  326. if (pte_young(ptent) || PageReferenced(page))
  327. mss->referenced += PAGE_SIZE;
  328. mapcount = page_mapcount(page);
  329. if (mapcount >= 2) {
  330. if (pte_dirty(ptent))
  331. mss->shared_dirty += PAGE_SIZE;
  332. else
  333. mss->shared_clean += PAGE_SIZE;
  334. mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
  335. } else {
  336. if (pte_dirty(ptent))
  337. mss->private_dirty += PAGE_SIZE;
  338. else
  339. mss->private_clean += PAGE_SIZE;
  340. mss->pss += (PAGE_SIZE << PSS_SHIFT);
  341. }
  342. }
  343. pte_unmap_unlock(pte - 1, ptl);
  344. cond_resched();
  345. return 0;
  346. }
  347. static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
  348. static int show_smap(struct seq_file *m, void *v)
  349. {
  350. struct vm_area_struct *vma = v;
  351. struct mem_size_stats mss;
  352. int ret;
  353. memset(&mss, 0, sizeof mss);
  354. mss.vma = vma;
  355. if (vma->vm_mm && !is_vm_hugetlb_page(vma))
  356. walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end,
  357. &smaps_walk, &mss);
  358. ret = show_map(m, v);
  359. if (ret)
  360. return ret;
  361. seq_printf(m,
  362. "Size: %8lu kB\n"
  363. "Rss: %8lu kB\n"
  364. "Pss: %8lu kB\n"
  365. "Shared_Clean: %8lu kB\n"
  366. "Shared_Dirty: %8lu kB\n"
  367. "Private_Clean: %8lu kB\n"
  368. "Private_Dirty: %8lu kB\n"
  369. "Referenced: %8lu kB\n",
  370. (vma->vm_end - vma->vm_start) >> 10,
  371. mss.resident >> 10,
  372. (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
  373. mss.shared_clean >> 10,
  374. mss.shared_dirty >> 10,
  375. mss.private_clean >> 10,
  376. mss.private_dirty >> 10,
  377. mss.referenced >> 10);
  378. return ret;
  379. }
  380. static struct seq_operations proc_pid_smaps_op = {
  381. .start = m_start,
  382. .next = m_next,
  383. .stop = m_stop,
  384. .show = show_smap
  385. };
  386. static int smaps_open(struct inode *inode, struct file *file)
  387. {
  388. return do_maps_open(inode, file, &proc_pid_smaps_op);
  389. }
  390. const struct file_operations proc_smaps_operations = {
  391. .open = smaps_open,
  392. .read = seq_read,
  393. .llseek = seq_lseek,
  394. .release = seq_release_private,
  395. };
  396. static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
  397. unsigned long end, void *private)
  398. {
  399. struct vm_area_struct *vma = private;
  400. pte_t *pte, ptent;
  401. spinlock_t *ptl;
  402. struct page *page;
  403. pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
  404. for (; addr != end; pte++, addr += PAGE_SIZE) {
  405. ptent = *pte;
  406. if (!pte_present(ptent))
  407. continue;
  408. page = vm_normal_page(vma, addr, ptent);
  409. if (!page)
  410. continue;
  411. /* Clear accessed and referenced bits. */
  412. ptep_test_and_clear_young(vma, addr, pte);
  413. ClearPageReferenced(page);
  414. }
  415. pte_unmap_unlock(pte - 1, ptl);
  416. cond_resched();
  417. return 0;
  418. }
  419. static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
  420. static ssize_t clear_refs_write(struct file *file, const char __user *buf,
  421. size_t count, loff_t *ppos)
  422. {
  423. struct task_struct *task;
  424. char buffer[PROC_NUMBUF], *end;
  425. struct mm_struct *mm;
  426. struct vm_area_struct *vma;
  427. memset(buffer, 0, sizeof(buffer));
  428. if (count > sizeof(buffer) - 1)
  429. count = sizeof(buffer) - 1;
  430. if (copy_from_user(buffer, buf, count))
  431. return -EFAULT;
  432. if (!simple_strtol(buffer, &end, 0))
  433. return -EINVAL;
  434. if (*end == '\n')
  435. end++;
  436. task = get_proc_task(file->f_path.dentry->d_inode);
  437. if (!task)
  438. return -ESRCH;
  439. mm = get_task_mm(task);
  440. if (mm) {
  441. down_read(&mm->mmap_sem);
  442. for (vma = mm->mmap; vma; vma = vma->vm_next)
  443. if (!is_vm_hugetlb_page(vma))
  444. walk_page_range(mm, vma->vm_start, vma->vm_end,
  445. &clear_refs_walk, vma);
  446. flush_tlb_mm(mm);
  447. up_read(&mm->mmap_sem);
  448. mmput(mm);
  449. }
  450. put_task_struct(task);
  451. if (end - buffer == 0)
  452. return -EIO;
  453. return end - buffer;
  454. }
  455. const struct file_operations proc_clear_refs_operations = {
  456. .write = clear_refs_write,
  457. };
  458. #ifdef CONFIG_NUMA
  459. extern int show_numa_map(struct seq_file *m, void *v);
  460. static int show_numa_map_checked(struct seq_file *m, void *v)
  461. {
  462. struct proc_maps_private *priv = m->private;
  463. struct task_struct *task = priv->task;
  464. if (maps_protect && !ptrace_may_attach(task))
  465. return -EACCES;
  466. return show_numa_map(m, v);
  467. }
  468. static struct seq_operations proc_pid_numa_maps_op = {
  469. .start = m_start,
  470. .next = m_next,
  471. .stop = m_stop,
  472. .show = show_numa_map_checked
  473. };
  474. static int numa_maps_open(struct inode *inode, struct file *file)
  475. {
  476. return do_maps_open(inode, file, &proc_pid_numa_maps_op);
  477. }
  478. const struct file_operations proc_numa_maps_operations = {
  479. .open = numa_maps_open,
  480. .read = seq_read,
  481. .llseek = seq_lseek,
  482. .release = seq_release_private,
  483. };
  484. #endif