|
@@ -50,11 +50,69 @@ static unsigned nr_gpages;
|
|
|
|
|
|
#define hugepd_none(hpd) ((hpd).pd == 0)
|
|
|
|
|
|
+#ifdef CONFIG_PPC_BOOK3S_64
|
|
|
+/*
|
|
|
+ * At this point we do the placement change only for BOOK3S 64. This would
|
|
|
+ * possibly work on other subarchs.
|
|
|
+ */
|
|
|
+
|
|
|
+/*
|
|
|
+ * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
|
|
|
+ * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
|
|
|
+ */
|
|
|
+int pmd_huge(pmd_t pmd)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * leaf pte for huge page, bottom two bits != 00
|
|
|
+ */
|
|
|
+ return ((pmd_val(pmd) & 0x3) != 0x0);
|
|
|
+}
|
|
|
+
|
|
|
+int pud_huge(pud_t pud)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * leaf pte for huge page, bottom two bits != 00
|
|
|
+ */
|
|
|
+ return ((pud_val(pud) & 0x3) != 0x0);
|
|
|
+}
|
|
|
+
|
|
|
+int pgd_huge(pgd_t pgd)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * leaf pte for huge page, bottom two bits != 00
|
|
|
+ */
|
|
|
+ return ((pgd_val(pgd) & 0x3) != 0x0);
|
|
|
+}
|
|
|
+#else
|
|
|
+int pmd_huge(pmd_t pmd)
|
|
|
+{
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+int pud_huge(pud_t pud)
|
|
|
+{
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+int pgd_huge(pgd_t pgd)
|
|
|
+{
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
+/*
|
|
|
+ * We have 4 cases for pgds and pmds:
|
|
|
+ * (1) invalid (all zeroes)
|
|
|
+ * (2) pointer to next table, as normal; bottom 6 bits == 0
|
|
|
+ * (3) leaf pte for huge page, bottom two bits != 00
|
|
|
+ * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table
|
|
|
+ */
|
|
|
pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
|
|
|
{
|
|
|
pgd_t *pg;
|
|
|
pud_t *pu;
|
|
|
pmd_t *pm;
|
|
|
+ pte_t *ret_pte;
|
|
|
hugepd_t *hpdp = NULL;
|
|
|
unsigned pdshift = PGDIR_SHIFT;
|
|
|
|
|
@@ -62,30 +120,43 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
|
|
|
*shift = 0;
|
|
|
|
|
|
pg = pgdir + pgd_index(ea);
|
|
|
- if (is_hugepd(pg)) {
|
|
|
+
|
|
|
+ if (pgd_huge(*pg)) {
|
|
|
+ ret_pte = (pte_t *) pg;
|
|
|
+ goto out;
|
|
|
+ } else if (is_hugepd(pg))
|
|
|
hpdp = (hugepd_t *)pg;
|
|
|
- } else if (!pgd_none(*pg)) {
|
|
|
+ else if (!pgd_none(*pg)) {
|
|
|
pdshift = PUD_SHIFT;
|
|
|
pu = pud_offset(pg, ea);
|
|
|
- if (is_hugepd(pu))
|
|
|
+
|
|
|
+ if (pud_huge(*pu)) {
|
|
|
+ ret_pte = (pte_t *) pu;
|
|
|
+ goto out;
|
|
|
+ } else if (is_hugepd(pu))
|
|
|
hpdp = (hugepd_t *)pu;
|
|
|
else if (!pud_none(*pu)) {
|
|
|
pdshift = PMD_SHIFT;
|
|
|
pm = pmd_offset(pu, ea);
|
|
|
- if (is_hugepd(pm))
|
|
|
+
|
|
|
+ if (pmd_huge(*pm)) {
|
|
|
+ ret_pte = (pte_t *) pm;
|
|
|
+ goto out;
|
|
|
+ } else if (is_hugepd(pm))
|
|
|
hpdp = (hugepd_t *)pm;
|
|
|
- else if (!pmd_none(*pm)) {
|
|
|
+ else if (!pmd_none(*pm))
|
|
|
return pte_offset_kernel(pm, ea);
|
|
|
- }
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
if (!hpdp)
|
|
|
return NULL;
|
|
|
|
|
|
+ ret_pte = hugepte_offset(hpdp, ea, pdshift);
|
|
|
+ pdshift = hugepd_shift(*hpdp);
|
|
|
+out:
|
|
|
if (shift)
|
|
|
- *shift = hugepd_shift(*hpdp);
|
|
|
- return hugepte_offset(hpdp, ea, pdshift);
|
|
|
+ *shift = pdshift;
|
|
|
+ return ret_pte;
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
|
|
|
|
|
@@ -165,6 +236,61 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
|
|
|
#define HUGEPD_PUD_SHIFT PMD_SHIFT
|
|
|
#endif
|
|
|
|
|
|
+#ifdef CONFIG_PPC_BOOK3S_64
|
|
|
+/*
|
|
|
+ * At this point we do the placement change only for BOOK3S 64. This would
|
|
|
+ * possibly work on other subarchs.
|
|
|
+ */
|
|
|
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
|
|
|
+{
|
|
|
+ pgd_t *pg;
|
|
|
+ pud_t *pu;
|
|
|
+ pmd_t *pm;
|
|
|
+ hugepd_t *hpdp = NULL;
|
|
|
+ unsigned pshift = __ffs(sz);
|
|
|
+ unsigned pdshift = PGDIR_SHIFT;
|
|
|
+
|
|
|
+ addr &= ~(sz-1);
|
|
|
+ pg = pgd_offset(mm, addr);
|
|
|
+
|
|
|
+ if (pshift == PGDIR_SHIFT)
|
|
|
+ /* 16GB huge page */
|
|
|
+ return (pte_t *) pg;
|
|
|
+ else if (pshift > PUD_SHIFT)
|
|
|
+ /*
|
|
|
+ * We need to use hugepd table
|
|
|
+ */
|
|
|
+ hpdp = (hugepd_t *)pg;
|
|
|
+ else {
|
|
|
+ pdshift = PUD_SHIFT;
|
|
|
+ pu = pud_alloc(mm, pg, addr);
|
|
|
+ if (pshift == PUD_SHIFT)
|
|
|
+ return (pte_t *)pu;
|
|
|
+ else if (pshift > PMD_SHIFT)
|
|
|
+ hpdp = (hugepd_t *)pu;
|
|
|
+ else {
|
|
|
+ pdshift = PMD_SHIFT;
|
|
|
+ pm = pmd_alloc(mm, pu, addr);
|
|
|
+ if (pshift == PMD_SHIFT)
|
|
|
+ /* 16MB hugepage */
|
|
|
+ return (pte_t *)pm;
|
|
|
+ else
|
|
|
+ hpdp = (hugepd_t *)pm;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (!hpdp)
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
|
|
|
+
|
|
|
+ if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ return hugepte_offset(hpdp, addr, pdshift);
|
|
|
+}
|
|
|
+
|
|
|
+#else
|
|
|
+
|
|
|
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
|
|
|
{
|
|
|
pgd_t *pg;
|
|
@@ -202,6 +328,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
|
|
|
|
|
|
return hugepte_offset(hpdp, addr, pdshift);
|
|
|
}
|
|
|
+#endif
|
|
|
|
|
|
#ifdef CONFIG_PPC_FSL_BOOK3E
|
|
|
/* Build list of addresses of gigantic pages. This function is used in early
|
|
@@ -465,7 +592,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
|
|
|
do {
|
|
|
pmd = pmd_offset(pud, addr);
|
|
|
next = pmd_addr_end(addr, end);
|
|
|
- if (pmd_none(*pmd))
|
|
|
+ if (pmd_none_or_clear_bad(pmd))
|
|
|
continue;
|
|
|
#ifdef CONFIG_PPC_FSL_BOOK3E
|
|
|
/*
|
|
@@ -618,16 +745,6 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
|
|
|
return page;
|
|
|
}
|
|
|
|
|
|
-int pmd_huge(pmd_t pmd)
|
|
|
-{
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
-int pud_huge(pud_t pud)
|
|
|
-{
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
struct page *
|
|
|
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
|
|
|
pmd_t *pmd, int write)
|
|
@@ -636,8 +753,8 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
-static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
|
|
|
- unsigned long end, int write, struct page **pages, int *nr)
|
|
|
+int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
|
|
|
+ unsigned long end, int write, struct page **pages, int *nr)
|
|
|
{
|
|
|
unsigned long mask;
|
|
|
unsigned long pte_end;
|
|
@@ -873,11 +990,16 @@ static int __init hugetlbpage_init(void)
|
|
|
pdshift = PUD_SHIFT;
|
|
|
else
|
|
|
pdshift = PGDIR_SHIFT;
|
|
|
-
|
|
|
- pgtable_cache_add(pdshift - shift, NULL);
|
|
|
- if (!PGT_CACHE(pdshift - shift))
|
|
|
- panic("hugetlbpage_init(): could not create "
|
|
|
- "pgtable cache for %d bit pagesize\n", shift);
|
|
|
+ /*
|
|
|
+ * if we have pdshift and shift value same, we don't
|
|
|
+ * use pgt cache for hugepd.
|
|
|
+ */
|
|
|
+ if (pdshift != shift) {
|
|
|
+ pgtable_cache_add(pdshift - shift, NULL);
|
|
|
+ if (!PGT_CACHE(pdshift - shift))
|
|
|
+ panic("hugetlbpage_init(): could not create "
|
|
|
+ "pgtable cache for %d bit pagesize\n", shift);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/* Set default large page size. Currently, we pick 16M or 1M
|