hugetlbpage.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922
  1. /*
  2. * PPC Huge TLB Page Support for Kernel.
  3. *
  4. * Copyright (C) 2003 David Gibson, IBM Corporation.
  5. * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor
  6. *
  7. * Based on the IA-32 version:
  8. * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
  9. */
  10. #include <linux/mm.h>
  11. #include <linux/io.h>
  12. #include <linux/slab.h>
  13. #include <linux/hugetlb.h>
  14. #include <linux/export.h>
  15. #include <linux/of_fdt.h>
  16. #include <linux/memblock.h>
  17. #include <linux/bootmem.h>
  18. #include <linux/moduleparam.h>
  19. #include <asm/pgtable.h>
  20. #include <asm/pgalloc.h>
  21. #include <asm/tlb.h>
  22. #include <asm/setup.h>
  23. #define PAGE_SHIFT_64K 16
  24. #define PAGE_SHIFT_16M 24
  25. #define PAGE_SHIFT_16G 34
  26. unsigned int HPAGE_SHIFT;
  27. /*
  28. * Tracks gpages after the device tree is scanned and before the
  29. * huge_boot_pages list is ready. On non-Freescale implementations, this is
  30. * just used to track 16G pages and so is a single array. FSL-based
  31. * implementations may have more than one gpage size, so we need multiple
  32. * arrays
  33. */
  34. #ifdef CONFIG_PPC_FSL_BOOK3E
  35. #define MAX_NUMBER_GPAGES 128
  36. struct psize_gpages {
  37. u64 gpage_list[MAX_NUMBER_GPAGES];
  38. unsigned int nr_gpages;
  39. };
  40. static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
  41. #else
  42. #define MAX_NUMBER_GPAGES 1024
  43. static u64 gpage_freearray[MAX_NUMBER_GPAGES];
  44. static unsigned nr_gpages;
  45. #endif
  46. static inline int shift_to_mmu_psize(unsigned int shift)
  47. {
  48. int psize;
  49. for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
  50. if (mmu_psize_defs[psize].shift == shift)
  51. return psize;
  52. return -1;
  53. }
  54. static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
  55. {
  56. if (mmu_psize_defs[mmu_psize].shift)
  57. return mmu_psize_defs[mmu_psize].shift;
  58. BUG();
  59. }
  60. #define hugepd_none(hpd) ((hpd).pd == 0)
  61. pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
  62. {
  63. pgd_t *pg;
  64. pud_t *pu;
  65. pmd_t *pm;
  66. hugepd_t *hpdp = NULL;
  67. unsigned pdshift = PGDIR_SHIFT;
  68. if (shift)
  69. *shift = 0;
  70. pg = pgdir + pgd_index(ea);
  71. if (is_hugepd(pg)) {
  72. hpdp = (hugepd_t *)pg;
  73. } else if (!pgd_none(*pg)) {
  74. pdshift = PUD_SHIFT;
  75. pu = pud_offset(pg, ea);
  76. if (is_hugepd(pu))
  77. hpdp = (hugepd_t *)pu;
  78. else if (!pud_none(*pu)) {
  79. pdshift = PMD_SHIFT;
  80. pm = pmd_offset(pu, ea);
  81. if (is_hugepd(pm))
  82. hpdp = (hugepd_t *)pm;
  83. else if (!pmd_none(*pm)) {
  84. return pte_offset_kernel(pm, ea);
  85. }
  86. }
  87. }
  88. if (!hpdp)
  89. return NULL;
  90. if (shift)
  91. *shift = hugepd_shift(*hpdp);
  92. return hugepte_offset(hpdp, ea, pdshift);
  93. }
  94. EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
  95. pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
  96. {
  97. return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
  98. }
  99. static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
  100. unsigned long address, unsigned pdshift, unsigned pshift)
  101. {
  102. struct kmem_cache *cachep;
  103. pte_t *new;
  104. #ifdef CONFIG_PPC_FSL_BOOK3E
  105. int i;
  106. int num_hugepd = 1 << (pshift - pdshift);
  107. cachep = hugepte_cache;
  108. #else
  109. cachep = PGT_CACHE(pdshift - pshift);
  110. #endif
  111. new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
  112. BUG_ON(pshift > HUGEPD_SHIFT_MASK);
  113. BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
  114. if (! new)
  115. return -ENOMEM;
  116. spin_lock(&mm->page_table_lock);
  117. #ifdef CONFIG_PPC_FSL_BOOK3E
  118. /*
  119. * We have multiple higher-level entries that point to the same
  120. * actual pte location. Fill in each as we go and backtrack on error.
  121. * We need all of these so the DTLB pgtable walk code can find the
  122. * right higher-level entry without knowing if it's a hugepage or not.
  123. */
  124. for (i = 0; i < num_hugepd; i++, hpdp++) {
  125. if (unlikely(!hugepd_none(*hpdp)))
  126. break;
  127. else
  128. hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
  129. }
  130. /* If we bailed from the for loop early, an error occurred, clean up */
  131. if (i < num_hugepd) {
  132. for (i = i - 1 ; i >= 0; i--, hpdp--)
  133. hpdp->pd = 0;
  134. kmem_cache_free(cachep, new);
  135. }
  136. #else
  137. if (!hugepd_none(*hpdp))
  138. kmem_cache_free(cachep, new);
  139. else
  140. hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
  141. #endif
  142. spin_unlock(&mm->page_table_lock);
  143. return 0;
  144. }
  145. /*
  146. * These macros define how to determine which level of the page table holds
  147. * the hpdp.
  148. */
  149. #ifdef CONFIG_PPC_FSL_BOOK3E
  150. #define HUGEPD_PGD_SHIFT PGDIR_SHIFT
  151. #define HUGEPD_PUD_SHIFT PUD_SHIFT
  152. #else
  153. #define HUGEPD_PGD_SHIFT PUD_SHIFT
  154. #define HUGEPD_PUD_SHIFT PMD_SHIFT
  155. #endif
  156. pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
  157. {
  158. pgd_t *pg;
  159. pud_t *pu;
  160. pmd_t *pm;
  161. hugepd_t *hpdp = NULL;
  162. unsigned pshift = __ffs(sz);
  163. unsigned pdshift = PGDIR_SHIFT;
  164. addr &= ~(sz-1);
  165. pg = pgd_offset(mm, addr);
  166. if (pshift >= HUGEPD_PGD_SHIFT) {
  167. hpdp = (hugepd_t *)pg;
  168. } else {
  169. pdshift = PUD_SHIFT;
  170. pu = pud_alloc(mm, pg, addr);
  171. if (pshift >= HUGEPD_PUD_SHIFT) {
  172. hpdp = (hugepd_t *)pu;
  173. } else {
  174. pdshift = PMD_SHIFT;
  175. pm = pmd_alloc(mm, pu, addr);
  176. hpdp = (hugepd_t *)pm;
  177. }
  178. }
  179. if (!hpdp)
  180. return NULL;
  181. BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
  182. if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
  183. return NULL;
  184. return hugepte_offset(hpdp, addr, pdshift);
  185. }
  186. #ifdef CONFIG_PPC_FSL_BOOK3E
  187. /* Build list of addresses of gigantic pages. This function is used in early
  188. * boot before the buddy or bootmem allocator is setup.
  189. */
  190. void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
  191. {
  192. unsigned int idx = shift_to_mmu_psize(__ffs(page_size));
  193. int i;
  194. if (addr == 0)
  195. return;
  196. gpage_freearray[idx].nr_gpages = number_of_pages;
  197. for (i = 0; i < number_of_pages; i++) {
  198. gpage_freearray[idx].gpage_list[i] = addr;
  199. addr += page_size;
  200. }
  201. }
  202. /*
  203. * Moves the gigantic page addresses from the temporary list to the
  204. * huge_boot_pages list.
  205. */
  206. int alloc_bootmem_huge_page(struct hstate *hstate)
  207. {
  208. struct huge_bootmem_page *m;
  209. int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT);
  210. int nr_gpages = gpage_freearray[idx].nr_gpages;
  211. if (nr_gpages == 0)
  212. return 0;
  213. #ifdef CONFIG_HIGHMEM
  214. /*
  215. * If gpages can be in highmem we can't use the trick of storing the
  216. * data structure in the page; allocate space for this
  217. */
  218. m = alloc_bootmem(sizeof(struct huge_bootmem_page));
  219. m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
  220. #else
  221. m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
  222. #endif
  223. list_add(&m->list, &huge_boot_pages);
  224. gpage_freearray[idx].nr_gpages = nr_gpages;
  225. gpage_freearray[idx].gpage_list[nr_gpages] = 0;
  226. m->hstate = hstate;
  227. return 1;
  228. }
  229. /*
  230. * Scan the command line hugepagesz= options for gigantic pages; store those in
  231. * a list that we use to allocate the memory once all options are parsed.
  232. */
  233. unsigned long gpage_npages[MMU_PAGE_COUNT];
  234. static int __init do_gpage_early_setup(char *param, char *val,
  235. const char *unused)
  236. {
  237. static phys_addr_t size;
  238. unsigned long npages;
  239. /*
  240. * The hugepagesz and hugepages cmdline options are interleaved. We
  241. * use the size variable to keep track of whether or not this was done
  242. * properly and skip over instances where it is incorrect. Other
  243. * command-line parsing code will issue warnings, so we don't need to.
  244. *
  245. */
  246. if ((strcmp(param, "default_hugepagesz") == 0) ||
  247. (strcmp(param, "hugepagesz") == 0)) {
  248. size = memparse(val, NULL);
  249. } else if (strcmp(param, "hugepages") == 0) {
  250. if (size != 0) {
  251. if (sscanf(val, "%lu", &npages) <= 0)
  252. npages = 0;
  253. gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
  254. size = 0;
  255. }
  256. }
  257. return 0;
  258. }
  259. /*
  260. * This function allocates physical space for pages that are larger than the
  261. * buddy allocator can handle. We want to allocate these in highmem because
  262. * the amount of lowmem is limited. This means that this function MUST be
  263. * called before lowmem_end_addr is set up in MMU_init() in order for the lmb
  264. * allocate to grab highmem.
  265. */
  266. void __init reserve_hugetlb_gpages(void)
  267. {
  268. static __initdata char cmdline[COMMAND_LINE_SIZE];
  269. phys_addr_t size, base;
  270. int i;
  271. strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
  272. parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
  273. &do_gpage_early_setup);
  274. /*
  275. * Walk gpage list in reverse, allocating larger page sizes first.
  276. * Skip over unsupported sizes, or sizes that have 0 gpages allocated.
  277. * When we reach the point in the list where pages are no longer
  278. * considered gpages, we're done.
  279. */
  280. for (i = MMU_PAGE_COUNT-1; i >= 0; i--) {
  281. if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0)
  282. continue;
  283. else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT))
  284. break;
  285. size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i));
  286. base = memblock_alloc_base(size * gpage_npages[i], size,
  287. MEMBLOCK_ALLOC_ANYWHERE);
  288. add_gpage(base, size, gpage_npages[i]);
  289. }
  290. }
  291. #else /* !PPC_FSL_BOOK3E */
  292. /* Build list of addresses of gigantic pages. This function is used in early
  293. * boot before the buddy or bootmem allocator is setup.
  294. */
  295. void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
  296. {
  297. if (!addr)
  298. return;
  299. while (number_of_pages > 0) {
  300. gpage_freearray[nr_gpages] = addr;
  301. nr_gpages++;
  302. number_of_pages--;
  303. addr += page_size;
  304. }
  305. }
  306. /* Moves the gigantic page addresses from the temporary list to the
  307. * huge_boot_pages list.
  308. */
  309. int alloc_bootmem_huge_page(struct hstate *hstate)
  310. {
  311. struct huge_bootmem_page *m;
  312. if (nr_gpages == 0)
  313. return 0;
  314. m = phys_to_virt(gpage_freearray[--nr_gpages]);
  315. gpage_freearray[nr_gpages] = 0;
  316. list_add(&m->list, &huge_boot_pages);
  317. m->hstate = hstate;
  318. return 1;
  319. }
  320. #endif
  321. int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
  322. {
  323. return 0;
  324. }
  325. #ifdef CONFIG_PPC_FSL_BOOK3E
  326. #define HUGEPD_FREELIST_SIZE \
  327. ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
  328. struct hugepd_freelist {
  329. struct rcu_head rcu;
  330. unsigned int index;
  331. void *ptes[0];
  332. };
  333. static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
  334. static void hugepd_free_rcu_callback(struct rcu_head *head)
  335. {
  336. struct hugepd_freelist *batch =
  337. container_of(head, struct hugepd_freelist, rcu);
  338. unsigned int i;
  339. for (i = 0; i < batch->index; i++)
  340. kmem_cache_free(hugepte_cache, batch->ptes[i]);
  341. free_page((unsigned long)batch);
  342. }
  343. static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
  344. {
  345. struct hugepd_freelist **batchp;
  346. batchp = &__get_cpu_var(hugepd_freelist_cur);
  347. if (atomic_read(&tlb->mm->mm_users) < 2 ||
  348. cpumask_equal(mm_cpumask(tlb->mm),
  349. cpumask_of(smp_processor_id()))) {
  350. kmem_cache_free(hugepte_cache, hugepte);
  351. return;
  352. }
  353. if (*batchp == NULL) {
  354. *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
  355. (*batchp)->index = 0;
  356. }
  357. (*batchp)->ptes[(*batchp)->index++] = hugepte;
  358. if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
  359. call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
  360. *batchp = NULL;
  361. }
  362. }
  363. #endif
  364. static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
  365. unsigned long start, unsigned long end,
  366. unsigned long floor, unsigned long ceiling)
  367. {
  368. pte_t *hugepte = hugepd_page(*hpdp);
  369. int i;
  370. unsigned long pdmask = ~((1UL << pdshift) - 1);
  371. unsigned int num_hugepd = 1;
  372. #ifdef CONFIG_PPC_FSL_BOOK3E
  373. /* Note: On fsl the hpdp may be the first of several */
  374. num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
  375. #else
  376. unsigned int shift = hugepd_shift(*hpdp);
  377. #endif
  378. start &= pdmask;
  379. if (start < floor)
  380. return;
  381. if (ceiling) {
  382. ceiling &= pdmask;
  383. if (! ceiling)
  384. return;
  385. }
  386. if (end - 1 > ceiling - 1)
  387. return;
  388. for (i = 0; i < num_hugepd; i++, hpdp++)
  389. hpdp->pd = 0;
  390. tlb->need_flush = 1;
  391. #ifdef CONFIG_PPC_FSL_BOOK3E
  392. hugepd_free(tlb, hugepte);
  393. #else
  394. pgtable_free_tlb(tlb, hugepte, pdshift - shift);
  395. #endif
  396. }
  397. static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
  398. unsigned long addr, unsigned long end,
  399. unsigned long floor, unsigned long ceiling)
  400. {
  401. pmd_t *pmd;
  402. unsigned long next;
  403. unsigned long start;
  404. start = addr;
  405. do {
  406. pmd = pmd_offset(pud, addr);
  407. next = pmd_addr_end(addr, end);
  408. if (pmd_none(*pmd))
  409. continue;
  410. #ifdef CONFIG_PPC_FSL_BOOK3E
  411. /*
  412. * Increment next by the size of the huge mapping since
  413. * there may be more than one entry at this level for a
  414. * single hugepage, but all of them point to
  415. * the same kmem cache that holds the hugepte.
  416. */
  417. next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
  418. #endif
  419. free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
  420. addr, next, floor, ceiling);
  421. } while (addr = next, addr != end);
  422. start &= PUD_MASK;
  423. if (start < floor)
  424. return;
  425. if (ceiling) {
  426. ceiling &= PUD_MASK;
  427. if (!ceiling)
  428. return;
  429. }
  430. if (end - 1 > ceiling - 1)
  431. return;
  432. pmd = pmd_offset(pud, start);
  433. pud_clear(pud);
  434. pmd_free_tlb(tlb, pmd, start);
  435. }
  436. static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
  437. unsigned long addr, unsigned long end,
  438. unsigned long floor, unsigned long ceiling)
  439. {
  440. pud_t *pud;
  441. unsigned long next;
  442. unsigned long start;
  443. start = addr;
  444. do {
  445. pud = pud_offset(pgd, addr);
  446. next = pud_addr_end(addr, end);
  447. if (!is_hugepd(pud)) {
  448. if (pud_none_or_clear_bad(pud))
  449. continue;
  450. hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
  451. ceiling);
  452. } else {
  453. #ifdef CONFIG_PPC_FSL_BOOK3E
  454. /*
  455. * Increment next by the size of the huge mapping since
  456. * there may be more than one entry at this level for a
  457. * single hugepage, but all of them point to
  458. * the same kmem cache that holds the hugepte.
  459. */
  460. next = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
  461. #endif
  462. free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
  463. addr, next, floor, ceiling);
  464. }
  465. } while (addr = next, addr != end);
  466. start &= PGDIR_MASK;
  467. if (start < floor)
  468. return;
  469. if (ceiling) {
  470. ceiling &= PGDIR_MASK;
  471. if (!ceiling)
  472. return;
  473. }
  474. if (end - 1 > ceiling - 1)
  475. return;
  476. pud = pud_offset(pgd, start);
  477. pgd_clear(pgd);
  478. pud_free_tlb(tlb, pud, start);
  479. }
  480. /*
  481. * This function frees user-level page tables of a process.
  482. *
  483. * Must be called with pagetable lock held.
  484. */
  485. void hugetlb_free_pgd_range(struct mmu_gather *tlb,
  486. unsigned long addr, unsigned long end,
  487. unsigned long floor, unsigned long ceiling)
  488. {
  489. pgd_t *pgd;
  490. unsigned long next;
  491. /*
  492. * Because there are a number of different possible pagetable
  493. * layouts for hugepage ranges, we limit knowledge of how
  494. * things should be laid out to the allocation path
  495. * (huge_pte_alloc(), above). Everything else works out the
  496. * structure as it goes from information in the hugepd
  497. * pointers. That means that we can't here use the
  498. * optimization used in the normal page free_pgd_range(), of
  499. * checking whether we're actually covering a large enough
  500. * range to have to do anything at the top level of the walk
  501. * instead of at the bottom.
  502. *
  503. * To make sense of this, you should probably go read the big
  504. * block comment at the top of the normal free_pgd_range(),
  505. * too.
  506. */
  507. do {
  508. next = pgd_addr_end(addr, end);
  509. pgd = pgd_offset(tlb->mm, addr);
  510. if (!is_hugepd(pgd)) {
  511. if (pgd_none_or_clear_bad(pgd))
  512. continue;
  513. hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
  514. } else {
  515. #ifdef CONFIG_PPC_FSL_BOOK3E
  516. /*
  517. * Increment next by the size of the huge mapping since
  518. * there may be more than one entry at the pgd level
  519. * for a single hugepage, but all of them point to the
  520. * same kmem cache that holds the hugepte.
  521. */
  522. next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
  523. #endif
  524. free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
  525. addr, next, floor, ceiling);
  526. }
  527. } while (addr = next, addr != end);
  528. }
  529. struct page *
  530. follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
  531. {
  532. pte_t *ptep;
  533. struct page *page;
  534. unsigned shift;
  535. unsigned long mask;
  536. ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
  537. /* Verify it is a huge page else bail. */
  538. if (!ptep || !shift)
  539. return ERR_PTR(-EINVAL);
  540. mask = (1UL << shift) - 1;
  541. page = pte_page(*ptep);
  542. if (page)
  543. page += (address & mask) / PAGE_SIZE;
  544. return page;
  545. }
  546. int pmd_huge(pmd_t pmd)
  547. {
  548. return 0;
  549. }
  550. int pud_huge(pud_t pud)
  551. {
  552. return 0;
  553. }
  554. struct page *
  555. follow_huge_pmd(struct mm_struct *mm, unsigned long address,
  556. pmd_t *pmd, int write)
  557. {
  558. BUG();
  559. return NULL;
  560. }
  561. static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
  562. unsigned long end, int write, struct page **pages, int *nr)
  563. {
  564. unsigned long mask;
  565. unsigned long pte_end;
  566. struct page *head, *page, *tail;
  567. pte_t pte;
  568. int refs;
  569. pte_end = (addr + sz) & ~(sz-1);
  570. if (pte_end < end)
  571. end = pte_end;
  572. pte = *ptep;
  573. mask = _PAGE_PRESENT | _PAGE_USER;
  574. if (write)
  575. mask |= _PAGE_RW;
  576. if ((pte_val(pte) & mask) != mask)
  577. return 0;
  578. /* hugepages are never "special" */
  579. VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
  580. refs = 0;
  581. head = pte_page(pte);
  582. page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
  583. tail = page;
  584. do {
  585. VM_BUG_ON(compound_head(page) != head);
  586. pages[*nr] = page;
  587. (*nr)++;
  588. page++;
  589. refs++;
  590. } while (addr += PAGE_SIZE, addr != end);
  591. if (!page_cache_add_speculative(head, refs)) {
  592. *nr -= refs;
  593. return 0;
  594. }
  595. if (unlikely(pte_val(pte) != pte_val(*ptep))) {
  596. /* Could be optimized better */
  597. *nr -= refs;
  598. while (refs--)
  599. put_page(head);
  600. return 0;
  601. }
  602. /*
  603. * Any tail page need their mapcount reference taken before we
  604. * return.
  605. */
  606. while (refs--) {
  607. if (PageTail(tail))
  608. get_huge_page_tail(tail);
  609. tail++;
  610. }
  611. return 1;
  612. }
  613. static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
  614. unsigned long sz)
  615. {
  616. unsigned long __boundary = (addr + sz) & ~(sz-1);
  617. return (__boundary - 1 < end - 1) ? __boundary : end;
  618. }
  619. int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
  620. unsigned long addr, unsigned long end,
  621. int write, struct page **pages, int *nr)
  622. {
  623. pte_t *ptep;
  624. unsigned long sz = 1UL << hugepd_shift(*hugepd);
  625. unsigned long next;
  626. ptep = hugepte_offset(hugepd, addr, pdshift);
  627. do {
  628. next = hugepte_addr_end(addr, end, sz);
  629. if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
  630. return 0;
  631. } while (ptep++, addr = next, addr != end);
  632. return 1;
  633. }
  634. #ifdef CONFIG_PPC_MM_SLICES
  635. unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
  636. unsigned long len, unsigned long pgoff,
  637. unsigned long flags)
  638. {
  639. struct hstate *hstate = hstate_file(file);
  640. int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
  641. return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
  642. }
  643. #endif
  644. unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
  645. {
  646. #ifdef CONFIG_PPC_MM_SLICES
  647. unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
  648. return 1UL << mmu_psize_to_shift(psize);
  649. #else
  650. if (!is_vm_hugetlb_page(vma))
  651. return PAGE_SIZE;
  652. return huge_page_size(hstate_vma(vma));
  653. #endif
  654. }
  655. static inline bool is_power_of_4(unsigned long x)
  656. {
  657. if (is_power_of_2(x))
  658. return (__ilog2(x) % 2) ? false : true;
  659. return false;
  660. }
  661. static int __init add_huge_page_size(unsigned long long size)
  662. {
  663. int shift = __ffs(size);
  664. int mmu_psize;
  665. /* Check that it is a page size supported by the hardware and
  666. * that it fits within pagetable and slice limits. */
  667. #ifdef CONFIG_PPC_FSL_BOOK3E
  668. if ((size < PAGE_SIZE) || !is_power_of_4(size))
  669. return -EINVAL;
  670. #else
  671. if (!is_power_of_2(size)
  672. || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
  673. return -EINVAL;
  674. #endif
  675. if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
  676. return -EINVAL;
  677. #ifdef CONFIG_SPU_FS_64K_LS
  678. /* Disable support for 64K huge pages when 64K SPU local store
  679. * support is enabled as the current implementation conflicts.
  680. */
  681. if (shift == PAGE_SHIFT_64K)
  682. return -EINVAL;
  683. #endif /* CONFIG_SPU_FS_64K_LS */
  684. BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
  685. /* Return if huge page size has already been setup */
  686. if (size_to_hstate(size))
  687. return 0;
  688. hugetlb_add_hstate(shift - PAGE_SHIFT);
  689. return 0;
  690. }
  691. static int __init hugepage_setup_sz(char *str)
  692. {
  693. unsigned long long size;
  694. size = memparse(str, &str);
  695. if (add_huge_page_size(size) != 0)
  696. printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
  697. return 1;
  698. }
  699. __setup("hugepagesz=", hugepage_setup_sz);
  700. #ifdef CONFIG_PPC_FSL_BOOK3E
  701. struct kmem_cache *hugepte_cache;
  702. static int __init hugetlbpage_init(void)
  703. {
  704. int psize;
  705. for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
  706. unsigned shift;
  707. if (!mmu_psize_defs[psize].shift)
  708. continue;
  709. shift = mmu_psize_to_shift(psize);
  710. /* Don't treat normal page sizes as huge... */
  711. if (shift != PAGE_SHIFT)
  712. if (add_huge_page_size(1ULL << shift) < 0)
  713. continue;
  714. }
  715. /*
  716. * Create a kmem cache for hugeptes. The bottom bits in the pte have
  717. * size information encoded in them, so align them to allow this
  718. */
  719. hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t),
  720. HUGEPD_SHIFT_MASK + 1, 0, NULL);
  721. if (hugepte_cache == NULL)
  722. panic("%s: Unable to create kmem cache for hugeptes\n",
  723. __func__);
  724. /* Default hpage size = 4M */
  725. if (mmu_psize_defs[MMU_PAGE_4M].shift)
  726. HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
  727. else
  728. panic("%s: Unable to set default huge page size\n", __func__);
  729. return 0;
  730. }
  731. #else
  732. static int __init hugetlbpage_init(void)
  733. {
  734. int psize;
  735. if (!mmu_has_feature(MMU_FTR_16M_PAGE))
  736. return -ENODEV;
  737. for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
  738. unsigned shift;
  739. unsigned pdshift;
  740. if (!mmu_psize_defs[psize].shift)
  741. continue;
  742. shift = mmu_psize_to_shift(psize);
  743. if (add_huge_page_size(1ULL << shift) < 0)
  744. continue;
  745. if (shift < PMD_SHIFT)
  746. pdshift = PMD_SHIFT;
  747. else if (shift < PUD_SHIFT)
  748. pdshift = PUD_SHIFT;
  749. else
  750. pdshift = PGDIR_SHIFT;
  751. pgtable_cache_add(pdshift - shift, NULL);
  752. if (!PGT_CACHE(pdshift - shift))
  753. panic("hugetlbpage_init(): could not create "
  754. "pgtable cache for %d bit pagesize\n", shift);
  755. }
  756. /* Set default large page size. Currently, we pick 16M or 1M
  757. * depending on what is available
  758. */
  759. if (mmu_psize_defs[MMU_PAGE_16M].shift)
  760. HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
  761. else if (mmu_psize_defs[MMU_PAGE_1M].shift)
  762. HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
  763. return 0;
  764. }
  765. #endif
  766. module_init(hugetlbpage_init);
  767. void flush_dcache_icache_hugepage(struct page *page)
  768. {
  769. int i;
  770. void *start;
  771. BUG_ON(!PageCompound(page));
  772. for (i = 0; i < (1UL << compound_order(page)); i++) {
  773. if (!PageHighMem(page)) {
  774. __flush_dcache_icache(page_address(page+i));
  775. } else {
  776. start = kmap_atomic(page+i);
  777. __flush_dcache_icache(start);
  778. kunmap_atomic(start);
  779. }
  780. }
  781. }