|
@@ -66,12 +66,6 @@ static inline void pgd_list_del(pgd_t *pgd)
|
|
static void pgd_ctor(void *p)
|
|
static void pgd_ctor(void *p)
|
|
{
|
|
{
|
|
pgd_t *pgd = p;
|
|
pgd_t *pgd = p;
|
|
- unsigned long flags;
|
|
|
|
-
|
|
|
|
- /* Clear usermode parts of PGD */
|
|
|
|
- memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
|
|
|
|
-
|
|
|
|
- spin_lock_irqsave(&pgd_lock, flags);
|
|
|
|
|
|
|
|
/* If the pgd points to a shared pagetable level (either the
|
|
/* If the pgd points to a shared pagetable level (either the
|
|
ptes in non-PAE, or shared PMD in PAE), then just copy the
|
|
ptes in non-PAE, or shared PMD in PAE), then just copy the
|
|
@@ -91,8 +85,6 @@ static void pgd_ctor(void *p)
|
|
/* list required to sync kernel mapping updates */
|
|
/* list required to sync kernel mapping updates */
|
|
if (!SHARED_KERNEL_PMD)
|
|
if (!SHARED_KERNEL_PMD)
|
|
pgd_list_add(pgd);
|
|
pgd_list_add(pgd);
|
|
-
|
|
|
|
- spin_unlock_irqrestore(&pgd_lock, flags);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
static void pgd_dtor(void *pgd)
|
|
static void pgd_dtor(void *pgd)
|
|
@@ -119,6 +111,72 @@ static void pgd_dtor(void *pgd)
|
|
*/
|
|
*/
|
|
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
#ifdef CONFIG_X86_PAE
|
|
|
|
+/*
|
|
|
|
+ * In PAE mode, we need to do a cr3 reload (=tlb flush) when
|
|
|
|
+ * updating the top-level pagetable entries to guarantee the
|
|
|
|
+ * processor notices the update. Since this is expensive, and
|
|
|
|
+ * all 4 top-level entries are used almost immediately in a
|
|
|
|
+ * new process's life, we just pre-populate them here.
|
|
|
|
+ *
|
|
|
|
+ * Also, if we're in a paravirt environment where the kernel pmd is
|
|
|
|
+ * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
|
|
|
|
+ * and initialize the kernel pmds here.
|
|
|
|
+ */
|
|
|
|
+#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
|
|
|
|
+
|
|
|
|
+void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
|
|
|
|
+{
|
|
|
|
+ paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
|
|
|
|
+
|
|
|
|
+ /* Note: almost everything apart from _PAGE_PRESENT is
|
|
|
|
+ reserved at the pmd (PDPT) level. */
|
|
|
|
+ set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * According to Intel App note "TLBs, Paging-Structure Caches,
|
|
|
|
+ * and Their Invalidation", April 2007, document 317080-001,
|
|
|
|
+ * section 8.1: in PAE mode we explicitly have to flush the
|
|
|
|
+ * TLB via cr3 if the top-level pgd is changed...
|
|
|
|
+ */
|
|
|
|
+ if (mm == current->active_mm)
|
|
|
|
+ write_cr3(read_cr3());
|
|
|
|
+}
|
|
|
|
+#else /* !CONFIG_X86_PAE */
|
|
|
|
+
|
|
|
|
+/* No need to prepopulate any pagetable entries in non-PAE modes. */
|
|
|
|
+#define PREALLOCATED_PMDS 0
|
|
|
|
+
|
|
|
|
+#endif /* CONFIG_X86_PAE */
|
|
|
|
+
|
|
|
|
+static void free_pmds(pmd_t *pmds[])
|
|
|
|
+{
|
|
|
|
+ int i;
|
|
|
|
+
|
|
|
|
+ for(i = 0; i < PREALLOCATED_PMDS; i++)
|
|
|
|
+ if (pmds[i])
|
|
|
|
+ free_page((unsigned long)pmds[i]);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int preallocate_pmds(pmd_t *pmds[])
|
|
|
|
+{
|
|
|
|
+ int i;
|
|
|
|
+ bool failed = false;
|
|
|
|
+
|
|
|
|
+ for(i = 0; i < PREALLOCATED_PMDS; i++) {
|
|
|
|
+ pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
|
|
|
|
+ if (pmd == NULL)
|
|
|
|
+ failed = true;
|
|
|
|
+ pmds[i] = pmd;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (failed) {
|
|
|
|
+ free_pmds(pmds);
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Mop up any pmd pages which may still be attached to the pgd.
|
|
* Mop up any pmd pages which may still be attached to the pgd.
|
|
* Normally they will be freed by munmap/exit_mmap, but any pmd we
|
|
* Normally they will be freed by munmap/exit_mmap, but any pmd we
|
|
@@ -129,7 +187,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
|
|
{
|
|
{
|
|
int i;
|
|
int i;
|
|
|
|
|
|
- for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
|
|
|
|
|
|
+ for(i = 0; i < PREALLOCATED_PMDS; i++) {
|
|
pgd_t pgd = pgdp[i];
|
|
pgd_t pgd = pgdp[i];
|
|
|
|
|
|
if (pgd_val(pgd) != 0) {
|
|
if (pgd_val(pgd) != 0) {
|
|
@@ -143,32 +201,17 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
- * In PAE mode, we need to do a cr3 reload (=tlb flush) when
|
|
|
|
- * updating the top-level pagetable entries to guarantee the
|
|
|
|
- * processor notices the update. Since this is expensive, and
|
|
|
|
- * all 4 top-level entries are used almost immediately in a
|
|
|
|
- * new process's life, we just pre-populate them here.
|
|
|
|
- *
|
|
|
|
- * Also, if we're in a paravirt environment where the kernel pmd is
|
|
|
|
- * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
|
|
|
|
- * and initialize the kernel pmds here.
|
|
|
|
- */
|
|
|
|
-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
|
|
|
|
|
|
+static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
|
|
{
|
|
{
|
|
pud_t *pud;
|
|
pud_t *pud;
|
|
unsigned long addr;
|
|
unsigned long addr;
|
|
int i;
|
|
int i;
|
|
|
|
|
|
pud = pud_offset(pgd, 0);
|
|
pud = pud_offset(pgd, 0);
|
|
- for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
|
|
|
|
- i++, pud++, addr += PUD_SIZE) {
|
|
|
|
- pmd_t *pmd = pmd_alloc_one(mm, addr);
|
|
|
|
|
|
|
|
- if (!pmd) {
|
|
|
|
- pgd_mop_up_pmds(mm, pgd);
|
|
|
|
- return 0;
|
|
|
|
- }
|
|
|
|
|
|
+ for (addr = i = 0; i < PREALLOCATED_PMDS;
|
|
|
|
+ i++, pud++, addr += PUD_SIZE) {
|
|
|
|
+ pmd_t *pmd = pmds[i];
|
|
|
|
|
|
if (i >= KERNEL_PGD_BOUNDARY)
|
|
if (i >= KERNEL_PGD_BOUNDARY)
|
|
memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
|
|
memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
|
|
@@ -176,57 +219,47 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
|
|
|
|
|
|
pud_populate(mm, pud, pmd);
|
|
pud_populate(mm, pud, pmd);
|
|
}
|
|
}
|
|
-
|
|
|
|
- return 1;
|
|
|
|
}
|
|
}
|
|
|
|
|
|
-void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
|
|
|
|
|
|
+pgd_t *pgd_alloc(struct mm_struct *mm)
|
|
{
|
|
{
|
|
- paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
|
|
|
|
|
|
+ pgd_t *pgd;
|
|
|
|
+ pmd_t *pmds[PREALLOCATED_PMDS];
|
|
|
|
+ unsigned long flags;
|
|
|
|
|
|
- /* Note: almost everything apart from _PAGE_PRESENT is
|
|
|
|
- reserved at the pmd (PDPT) level. */
|
|
|
|
- set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
|
|
|
|
|
|
+ pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
|
|
|
|
+
|
|
|
|
+ if (pgd == NULL)
|
|
|
|
+ goto out;
|
|
|
|
+
|
|
|
|
+ mm->pgd = pgd;
|
|
|
|
+
|
|
|
|
+ if (preallocate_pmds(pmds) != 0)
|
|
|
|
+ goto out_free_pgd;
|
|
|
|
+
|
|
|
|
+ if (paravirt_pgd_alloc(mm) != 0)
|
|
|
|
+ goto out_free_pmds;
|
|
|
|
|
|
/*
|
|
/*
|
|
- * According to Intel App note "TLBs, Paging-Structure Caches,
|
|
|
|
- * and Their Invalidation", April 2007, document 317080-001,
|
|
|
|
- * section 8.1: in PAE mode we explicitly have to flush the
|
|
|
|
- * TLB via cr3 if the top-level pgd is changed...
|
|
|
|
|
|
+ * Make sure that pre-populating the pmds is atomic with
|
|
|
|
+ * respect to anything walking the pgd_list, so that they
|
|
|
|
+ * never see a partially populated pgd.
|
|
*/
|
|
*/
|
|
- if (mm == current->active_mm)
|
|
|
|
- write_cr3(read_cr3());
|
|
|
|
-}
|
|
|
|
-#else /* !CONFIG_X86_PAE */
|
|
|
|
-/* No need to prepopulate any pagetable entries in non-PAE modes. */
|
|
|
|
-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
|
|
|
|
-{
|
|
|
|
- return 1;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
|
|
|
|
-{
|
|
|
|
-}
|
|
|
|
-#endif /* CONFIG_X86_PAE */
|
|
|
|
|
|
+ spin_lock_irqsave(&pgd_lock, flags);
|
|
|
|
|
|
-pgd_t *pgd_alloc(struct mm_struct *mm)
|
|
|
|
-{
|
|
|
|
- pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
|
|
|
|
|
|
+ pgd_ctor(pgd);
|
|
|
|
+ pgd_prepopulate_pmd(mm, pgd, pmds);
|
|
|
|
|
|
- /* so that alloc_pmd can use it */
|
|
|
|
- mm->pgd = pgd;
|
|
|
|
- if (pgd) {
|
|
|
|
- pgd_ctor(pgd);
|
|
|
|
-
|
|
|
|
- if (paravirt_pgd_alloc(mm) != 0 ||
|
|
|
|
- !pgd_prepopulate_pmd(mm, pgd)) {
|
|
|
|
- pgd_dtor(pgd);
|
|
|
|
- free_page((unsigned long)pgd);
|
|
|
|
- pgd = NULL;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ spin_unlock_irqrestore(&pgd_lock, flags);
|
|
|
|
|
|
return pgd;
|
|
return pgd;
|
|
|
|
+
|
|
|
|
+out_free_pmds:
|
|
|
|
+ free_pmds(pmds);
|
|
|
|
+out_free_pgd:
|
|
|
|
+ free_page((unsigned long)pgd);
|
|
|
|
+out:
|
|
|
|
+ return NULL;
|
|
}
|
|
}
|
|
|
|
|
|
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
|
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|