|
@@ -2931,15 +2931,6 @@ out_mutex:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-/* Can be overriden by architectures */
|
|
|
-__attribute__((weak)) struct page *
|
|
|
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
|
|
|
- pud_t *pud, int write)
|
|
|
-{
|
|
|
- BUG();
|
|
|
- return NULL;
|
|
|
-}
|
|
|
-
|
|
|
long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
struct page **pages, struct vm_area_struct **vmas,
|
|
|
unsigned long *position, unsigned long *nr_pages,
|
|
@@ -3169,6 +3160,216 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
|
|
|
hugetlb_acct_memory(h, -(chg - freed));
|
|
|
}
|
|
|
|
|
|
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
|
|
|
+static unsigned long page_table_shareable(struct vm_area_struct *svma,
|
|
|
+ struct vm_area_struct *vma,
|
|
|
+ unsigned long addr, pgoff_t idx)
|
|
|
+{
|
|
|
+ unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
|
|
|
+ svma->vm_start;
|
|
|
+ unsigned long sbase = saddr & PUD_MASK;
|
|
|
+ unsigned long s_end = sbase + PUD_SIZE;
|
|
|
+
|
|
|
+ /* Allow segments to share if only one is marked locked */
|
|
|
+ unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
|
|
|
+ unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * match the virtual addresses, permission and the alignment of the
|
|
|
+ * page table page.
|
|
|
+ */
|
|
|
+ if (pmd_index(addr) != pmd_index(saddr) ||
|
|
|
+ vm_flags != svm_flags ||
|
|
|
+ sbase < svma->vm_start || svma->vm_end < s_end)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ return saddr;
|
|
|
+}
|
|
|
+
|
|
|
+static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
|
|
|
+{
|
|
|
+ unsigned long base = addr & PUD_MASK;
|
|
|
+ unsigned long end = base + PUD_SIZE;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * check on proper vm_flags and page table alignment
|
|
|
+ */
|
|
|
+ if (vma->vm_flags & VM_MAYSHARE &&
|
|
|
+ vma->vm_start <= base && end <= vma->vm_end)
|
|
|
+ return 1;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
|
|
|
+ * and returns the corresponding pte. While this is not necessary for the
|
|
|
+ * !shared pmd case because we can allocate the pmd later as well, it makes the
|
|
|
+ * code much cleaner. pmd allocation is essential for the shared case because
|
|
|
+ * pud has to be populated inside the same i_mmap_mutex section - otherwise
|
|
|
+ * racing tasks could either miss the sharing (see huge_pte_offset) or select a
|
|
|
+ * bad pmd for sharing.
|
|
|
+ */
|
|
|
+pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
|
|
|
+{
|
|
|
+ struct vm_area_struct *vma = find_vma(mm, addr);
|
|
|
+ struct address_space *mapping = vma->vm_file->f_mapping;
|
|
|
+ pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
|
|
|
+ vma->vm_pgoff;
|
|
|
+ struct vm_area_struct *svma;
|
|
|
+ unsigned long saddr;
|
|
|
+ pte_t *spte = NULL;
|
|
|
+ pte_t *pte;
|
|
|
+
|
|
|
+ if (!vma_shareable(vma, addr))
|
|
|
+ return (pte_t *)pmd_alloc(mm, pud, addr);
|
|
|
+
|
|
|
+ mutex_lock(&mapping->i_mmap_mutex);
|
|
|
+ vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
|
|
|
+ if (svma == vma)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ saddr = page_table_shareable(svma, vma, addr, idx);
|
|
|
+ if (saddr) {
|
|
|
+ spte = huge_pte_offset(svma->vm_mm, saddr);
|
|
|
+ if (spte) {
|
|
|
+ get_page(virt_to_page(spte));
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!spte)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ spin_lock(&mm->page_table_lock);
|
|
|
+ if (pud_none(*pud))
|
|
|
+ pud_populate(mm, pud,
|
|
|
+ (pmd_t *)((unsigned long)spte & PAGE_MASK));
|
|
|
+ else
|
|
|
+ put_page(virt_to_page(spte));
|
|
|
+ spin_unlock(&mm->page_table_lock);
|
|
|
+out:
|
|
|
+ pte = (pte_t *)pmd_alloc(mm, pud, addr);
|
|
|
+ mutex_unlock(&mapping->i_mmap_mutex);
|
|
|
+ return pte;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * unmap huge page backed by shared pte.
|
|
|
+ *
|
|
|
+ * Hugetlb pte page is ref counted at the time of mapping. If pte is shared
|
|
|
+ * indicated by page_count > 1, unmap is achieved by clearing pud and
|
|
|
+ * decrementing the ref count. If count == 1, the pte page is not shared.
|
|
|
+ *
|
|
|
+ * called with vma->vm_mm->page_table_lock held.
|
|
|
+ *
|
|
|
+ * returns: 1 successfully unmapped a shared pte page
|
|
|
+ * 0 the underlying pte page is not shared, or it is the last user
|
|
|
+ */
|
|
|
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
|
|
|
+{
|
|
|
+ pgd_t *pgd = pgd_offset(mm, *addr);
|
|
|
+ pud_t *pud = pud_offset(pgd, *addr);
|
|
|
+
|
|
|
+ BUG_ON(page_count(virt_to_page(ptep)) == 0);
|
|
|
+ if (page_count(virt_to_page(ptep)) == 1)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ pud_clear(pud);
|
|
|
+ put_page(virt_to_page(ptep));
|
|
|
+ *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
|
|
|
+ return 1;
|
|
|
+}
|
|
|
+#define want_pmd_share() (1)
|
|
|
+#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
|
|
|
+pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
|
|
|
+{
|
|
|
+ return NULL;
|
|
|
+}
|
|
|
+#define want_pmd_share() (0)
|
|
|
+#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
|
|
|
+
|
|
|
+#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
|
|
|
+pte_t *huge_pte_alloc(struct mm_struct *mm,
|
|
|
+ unsigned long addr, unsigned long sz)
|
|
|
+{
|
|
|
+ pgd_t *pgd;
|
|
|
+ pud_t *pud;
|
|
|
+ pte_t *pte = NULL;
|
|
|
+
|
|
|
+ pgd = pgd_offset(mm, addr);
|
|
|
+ pud = pud_alloc(mm, pgd, addr);
|
|
|
+ if (pud) {
|
|
|
+ if (sz == PUD_SIZE) {
|
|
|
+ pte = (pte_t *)pud;
|
|
|
+ } else {
|
|
|
+ BUG_ON(sz != PMD_SIZE);
|
|
|
+ if (want_pmd_share() && pud_none(*pud))
|
|
|
+ pte = huge_pmd_share(mm, addr, pud);
|
|
|
+ else
|
|
|
+ pte = (pte_t *)pmd_alloc(mm, pud, addr);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
|
|
|
+
|
|
|
+ return pte;
|
|
|
+}
|
|
|
+
|
|
|
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
|
|
+{
|
|
|
+ pgd_t *pgd;
|
|
|
+ pud_t *pud;
|
|
|
+ pmd_t *pmd = NULL;
|
|
|
+
|
|
|
+ pgd = pgd_offset(mm, addr);
|
|
|
+ if (pgd_present(*pgd)) {
|
|
|
+ pud = pud_offset(pgd, addr);
|
|
|
+ if (pud_present(*pud)) {
|
|
|
+ if (pud_huge(*pud))
|
|
|
+ return (pte_t *)pud;
|
|
|
+ pmd = pmd_offset(pud, addr);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return (pte_t *) pmd;
|
|
|
+}
|
|
|
+
|
|
|
+struct page *
|
|
|
+follow_huge_pmd(struct mm_struct *mm, unsigned long address,
|
|
|
+ pmd_t *pmd, int write)
|
|
|
+{
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ page = pte_page(*(pte_t *)pmd);
|
|
|
+ if (page)
|
|
|
+ page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
|
|
|
+ return page;
|
|
|
+}
|
|
|
+
|
|
|
+struct page *
|
|
|
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
|
|
|
+ pud_t *pud, int write)
|
|
|
+{
|
|
|
+ struct page *page;
|
|
|
+
|
|
|
+ page = pte_page(*(pte_t *)pud);
|
|
|
+ if (page)
|
|
|
+ page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
|
|
|
+ return page;
|
|
|
+}
|
|
|
+
|
|
|
+#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */
|
|
|
+
|
|
|
+/* Can be overriden by architectures */
|
|
|
+__attribute__((weak)) struct page *
|
|
|
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
|
|
|
+ pud_t *pud, int write)
|
|
|
+{
|
|
|
+ BUG();
|
|
|
+ return NULL;
|
|
|
+}
|
|
|
+
|
|
|
+#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
|
|
|
+
|
|
|
#ifdef CONFIG_MEMORY_FAILURE
|
|
|
|
|
|
/* Should be called in hugetlb_lock */
|