|
@@ -5,7 +5,10 @@
|
|
|
#include <linux/highmem.h>
|
|
|
#include <linux/ptrace.h>
|
|
|
#include <linux/pagemap.h>
|
|
|
+#include <linux/ptrace.h>
|
|
|
#include <linux/mempolicy.h>
|
|
|
+#include <linux/swap.h>
|
|
|
+#include <linux/swapops.h>
|
|
|
|
|
|
#include <asm/elf.h>
|
|
|
#include <asm/uaccess.h>
|
|
@@ -519,6 +522,202 @@ const struct file_operations proc_clear_refs_operations = {
|
|
|
.write = clear_refs_write,
|
|
|
};
|
|
|
|
|
|
+struct pagemapread {
|
|
|
+ char __user *out, *end;
|
|
|
+};
|
|
|
+
|
|
|
+#define PM_ENTRY_BYTES sizeof(u64)
|
|
|
+#define PM_RESERVED_BITS 3
|
|
|
+#define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS)
|
|
|
+#define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET)
|
|
|
+#define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK)
|
|
|
+#define PM_NOT_PRESENT PM_SPECIAL(1LL)
|
|
|
+#define PM_SWAP PM_SPECIAL(2LL)
|
|
|
+#define PM_END_OF_BUFFER 1
|
|
|
+
|
|
|
+static int add_to_pagemap(unsigned long addr, u64 pfn,
|
|
|
+ struct pagemapread *pm)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * Make sure there's room in the buffer for an
|
|
|
+ * entire entry. Otherwise, only copy part of
|
|
|
+ * the pfn.
|
|
|
+ */
|
|
|
+ if (pm->out + PM_ENTRY_BYTES >= pm->end) {
|
|
|
+ if (copy_to_user(pm->out, &pfn, pm->end - pm->out))
|
|
|
+ return -EFAULT;
|
|
|
+ pm->out = pm->end;
|
|
|
+ return PM_END_OF_BUFFER;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (put_user(pfn, pm->out))
|
|
|
+ return -EFAULT;
|
|
|
+ pm->out += PM_ENTRY_BYTES;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int pagemap_pte_hole(unsigned long start, unsigned long end,
|
|
|
+ void *private)
|
|
|
+{
|
|
|
+ struct pagemapread *pm = private;
|
|
|
+ unsigned long addr;
|
|
|
+ int err = 0;
|
|
|
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
|
|
|
+ err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
|
|
|
+ if (err)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ return err;
|
|
|
+}
|
|
|
+
|
|
|
+u64 swap_pte_to_pagemap_entry(pte_t pte)
|
|
|
+{
|
|
|
+ swp_entry_t e = pte_to_swp_entry(pte);
|
|
|
+ return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
|
|
|
+}
|
|
|
+
|
|
|
+static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
|
|
|
+ void *private)
|
|
|
+{
|
|
|
+ struct pagemapread *pm = private;
|
|
|
+ pte_t *pte;
|
|
|
+ int err = 0;
|
|
|
+
|
|
|
+ for (; addr != end; addr += PAGE_SIZE) {
|
|
|
+ u64 pfn = PM_NOT_PRESENT;
|
|
|
+ pte = pte_offset_map(pmd, addr);
|
|
|
+ if (is_swap_pte(*pte))
|
|
|
+ pfn = swap_pte_to_pagemap_entry(*pte);
|
|
|
+ else if (pte_present(*pte))
|
|
|
+ pfn = pte_pfn(*pte);
|
|
|
+ /* unmap so we're not in atomic when we copy to userspace */
|
|
|
+ pte_unmap(pte);
|
|
|
+ err = add_to_pagemap(addr, pfn, pm);
|
|
|
+ if (err)
|
|
|
+ return err;
|
|
|
+ }
|
|
|
+
|
|
|
+ cond_resched();
|
|
|
+
|
|
|
+ return err;
|
|
|
+}
|
|
|
+
|
|
|
+static struct mm_walk pagemap_walk = {
|
|
|
+ .pmd_entry = pagemap_pte_range,
|
|
|
+ .pte_hole = pagemap_pte_hole
|
|
|
+};
|
|
|
+
|
|
|
+/*
|
|
|
+ * /proc/pid/pagemap - an array mapping virtual pages to pfns
|
|
|
+ *
|
|
|
+ * For each page in the address space, this file contains one 64-bit
|
|
|
+ * entry representing the corresponding physical page frame number
|
|
|
+ * (PFN) if the page is present. If there is a swap entry for the
|
|
|
+ * physical page, then an encoding of the swap file number and the
|
|
|
+ * page's offset into the swap file are returned. If no page is
|
|
|
+ * present at all, PM_NOT_PRESENT is returned. This allows determining
|
|
|
+ * precisely which pages are mapped (or in swap) and comparing mapped
|
|
|
+ * pages between processes.
|
|
|
+ *
|
|
|
+ * Efficient users of this interface will use /proc/pid/maps to
|
|
|
+ * determine which areas of memory are actually mapped and llseek to
|
|
|
+ * skip over unmapped regions.
|
|
|
+ */
|
|
|
+static ssize_t pagemap_read(struct file *file, char __user *buf,
|
|
|
+ size_t count, loff_t *ppos)
|
|
|
+{
|
|
|
+ struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
|
|
|
+ struct page **pages, *page;
|
|
|
+ unsigned long uaddr, uend;
|
|
|
+ struct mm_struct *mm;
|
|
|
+ struct pagemapread pm;
|
|
|
+ int pagecount;
|
|
|
+ int ret = -ESRCH;
|
|
|
+
|
|
|
+ if (!task)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ ret = -EACCES;
|
|
|
+ if (!ptrace_may_attach(task))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ ret = -EINVAL;
|
|
|
+ /* file position must be aligned */
|
|
|
+ if (*ppos % PM_ENTRY_BYTES)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ ret = 0;
|
|
|
+ mm = get_task_mm(task);
|
|
|
+ if (!mm)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ ret = -ENOMEM;
|
|
|
+ uaddr = (unsigned long)buf & PAGE_MASK;
|
|
|
+ uend = (unsigned long)(buf + count);
|
|
|
+ pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
|
|
|
+ pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
|
|
|
+ if (!pages)
|
|
|
+ goto out_task;
|
|
|
+
|
|
|
+ down_read(¤t->mm->mmap_sem);
|
|
|
+ ret = get_user_pages(current, current->mm, uaddr, pagecount,
|
|
|
+ 1, 0, pages, NULL);
|
|
|
+ up_read(¤t->mm->mmap_sem);
|
|
|
+
|
|
|
+ if (ret < 0)
|
|
|
+ goto out_free;
|
|
|
+
|
|
|
+ pm.out = buf;
|
|
|
+ pm.end = buf + count;
|
|
|
+
|
|
|
+ if (!ptrace_may_attach(task)) {
|
|
|
+ ret = -EIO;
|
|
|
+ } else {
|
|
|
+ unsigned long src = *ppos;
|
|
|
+ unsigned long svpfn = src / PM_ENTRY_BYTES;
|
|
|
+ unsigned long start_vaddr = svpfn << PAGE_SHIFT;
|
|
|
+ unsigned long end_vaddr = TASK_SIZE_OF(task);
|
|
|
+
|
|
|
+ /* watch out for wraparound */
|
|
|
+ if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
|
|
|
+ start_vaddr = end_vaddr;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The odds are that this will stop walking way
|
|
|
+ * before end_vaddr, because the length of the
|
|
|
+ * user buffer is tracked in "pm", and the walk
|
|
|
+ * will stop when we hit the end of the buffer.
|
|
|
+ */
|
|
|
+ ret = walk_page_range(mm, start_vaddr, end_vaddr,
|
|
|
+ &pagemap_walk, &pm);
|
|
|
+ if (ret == PM_END_OF_BUFFER)
|
|
|
+ ret = 0;
|
|
|
+ /* don't need mmap_sem for these, but this looks cleaner */
|
|
|
+ *ppos += pm.out - buf;
|
|
|
+ if (!ret)
|
|
|
+ ret = pm.out - buf;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (; pagecount; pagecount--) {
|
|
|
+ page = pages[pagecount-1];
|
|
|
+ if (!PageReserved(page))
|
|
|
+ SetPageDirty(page);
|
|
|
+ page_cache_release(page);
|
|
|
+ }
|
|
|
+ mmput(mm);
|
|
|
+out_free:
|
|
|
+ kfree(pages);
|
|
|
+out_task:
|
|
|
+ put_task_struct(task);
|
|
|
+out:
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+const struct file_operations proc_pagemap_operations = {
|
|
|
+ .llseek = mem_lseek, /* borrow this */
|
|
|
+ .read = pagemap_read,
|
|
|
+};
|
|
|
+
|
|
|
#ifdef CONFIG_NUMA
|
|
|
extern int show_numa_map(struct seq_file *m, void *v);
|
|
|
|
|
@@ -552,4 +751,3 @@ const struct file_operations proc_numa_maps_operations = {
|
|
|
.release = seq_release_private,
|
|
|
};
|
|
|
#endif
|
|
|
-
|