hugetlbpage.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750
  1. /*
  2. * PPC64 (POWER4) Huge TLB Page Support for Kernel.
  3. *
  4. * Copyright (C) 2003 David Gibson, IBM Corporation.
  5. *
  6. * Based on the IA-32 version:
  7. * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
  8. */
  9. #include <linux/init.h>
  10. #include <linux/fs.h>
  11. #include <linux/mm.h>
  12. #include <linux/hugetlb.h>
  13. #include <linux/pagemap.h>
  14. #include <linux/slab.h>
  15. #include <linux/err.h>
  16. #include <linux/sysctl.h>
  17. #include <asm/mman.h>
  18. #include <asm/pgalloc.h>
  19. #include <asm/tlb.h>
  20. #include <asm/tlbflush.h>
  21. #include <asm/mmu_context.h>
  22. #include <asm/machdep.h>
  23. #include <asm/cputable.h>
  24. #include <asm/spu.h>
  25. #define PAGE_SHIFT_64K 16
  26. #define PAGE_SHIFT_16M 24
  27. #define PAGE_SHIFT_16G 34
  28. #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
  29. #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
  30. #define MAX_NUMBER_GPAGES 1024
  31. /* Tracks the 16G pages after the device tree is scanned and before the
  32. * huge_boot_pages list is ready. */
  33. static unsigned long gpage_freearray[MAX_NUMBER_GPAGES];
  34. static unsigned nr_gpages;
  35. /* Array of valid huge page sizes - non-zero value(hugepte_shift) is
  36. * stored for the huge page sizes that are valid.
  37. */
  38. static unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */
  39. /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()
  40. * will choke on pointers to hugepte tables, which is handy for
  41. * catching screwups early. */
  42. static inline int shift_to_mmu_psize(unsigned int shift)
  43. {
  44. switch (shift) {
  45. #ifndef CONFIG_PPC_64K_PAGES
  46. case PAGE_SHIFT_64K:
  47. return MMU_PAGE_64K;
  48. #endif
  49. case PAGE_SHIFT_16M:
  50. return MMU_PAGE_16M;
  51. case PAGE_SHIFT_16G:
  52. return MMU_PAGE_16G;
  53. }
  54. return -1;
  55. }
  56. static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
  57. {
  58. if (mmu_psize_defs[mmu_psize].shift)
  59. return mmu_psize_defs[mmu_psize].shift;
  60. BUG();
  61. }
  62. #define hugepd_none(hpd) ((hpd).pd == 0)
  63. static inline pte_t *hugepd_page(hugepd_t hpd)
  64. {
  65. BUG_ON(!hugepd_ok(hpd));
  66. return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000);
  67. }
  68. static inline unsigned int hugepd_shift(hugepd_t hpd)
  69. {
  70. return hpd.pd & HUGEPD_SHIFT_MASK;
  71. }
  72. static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift)
  73. {
  74. unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
  75. pte_t *dir = hugepd_page(*hpdp);
  76. return dir + idx;
  77. }
  78. pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
  79. {
  80. pgd_t *pg;
  81. pud_t *pu;
  82. pmd_t *pm;
  83. hugepd_t *hpdp = NULL;
  84. unsigned pdshift = PGDIR_SHIFT;
  85. if (shift)
  86. *shift = 0;
  87. pg = pgdir + pgd_index(ea);
  88. if (is_hugepd(pg)) {
  89. hpdp = (hugepd_t *)pg;
  90. } else if (!pgd_none(*pg)) {
  91. pdshift = PUD_SHIFT;
  92. pu = pud_offset(pg, ea);
  93. if (is_hugepd(pu))
  94. hpdp = (hugepd_t *)pu;
  95. else if (!pud_none(*pu)) {
  96. pdshift = PMD_SHIFT;
  97. pm = pmd_offset(pu, ea);
  98. if (is_hugepd(pm))
  99. hpdp = (hugepd_t *)pm;
  100. else if (!pmd_none(*pm)) {
  101. return pte_offset_map(pm, ea);
  102. }
  103. }
  104. }
  105. if (!hpdp)
  106. return NULL;
  107. if (shift)
  108. *shift = hugepd_shift(*hpdp);
  109. return hugepte_offset(hpdp, ea, pdshift);
  110. }
  111. pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
  112. {
  113. return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
  114. }
  115. static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
  116. unsigned long address, unsigned pdshift, unsigned pshift)
  117. {
  118. pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift),
  119. GFP_KERNEL|__GFP_REPEAT);
  120. BUG_ON(pshift > HUGEPD_SHIFT_MASK);
  121. BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
  122. if (! new)
  123. return -ENOMEM;
  124. spin_lock(&mm->page_table_lock);
  125. if (!hugepd_none(*hpdp))
  126. kmem_cache_free(PGT_CACHE(pdshift - pshift), new);
  127. else
  128. hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift;
  129. spin_unlock(&mm->page_table_lock);
  130. return 0;
  131. }
  132. pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
  133. {
  134. pgd_t *pg;
  135. pud_t *pu;
  136. pmd_t *pm;
  137. hugepd_t *hpdp = NULL;
  138. unsigned pshift = __ffs(sz);
  139. unsigned pdshift = PGDIR_SHIFT;
  140. addr &= ~(sz-1);
  141. pg = pgd_offset(mm, addr);
  142. if (pshift >= PUD_SHIFT) {
  143. hpdp = (hugepd_t *)pg;
  144. } else {
  145. pdshift = PUD_SHIFT;
  146. pu = pud_alloc(mm, pg, addr);
  147. if (pshift >= PMD_SHIFT) {
  148. hpdp = (hugepd_t *)pu;
  149. } else {
  150. pdshift = PMD_SHIFT;
  151. pm = pmd_alloc(mm, pu, addr);
  152. hpdp = (hugepd_t *)pm;
  153. }
  154. }
  155. if (!hpdp)
  156. return NULL;
  157. BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
  158. if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
  159. return NULL;
  160. return hugepte_offset(hpdp, addr, pdshift);
  161. }
  162. /* Build list of addresses of gigantic pages. This function is used in early
  163. * boot before the buddy or bootmem allocator is setup.
  164. */
  165. void add_gpage(unsigned long addr, unsigned long page_size,
  166. unsigned long number_of_pages)
  167. {
  168. if (!addr)
  169. return;
  170. while (number_of_pages > 0) {
  171. gpage_freearray[nr_gpages] = addr;
  172. nr_gpages++;
  173. number_of_pages--;
  174. addr += page_size;
  175. }
  176. }
  177. /* Moves the gigantic page addresses from the temporary list to the
  178. * huge_boot_pages list.
  179. */
  180. int alloc_bootmem_huge_page(struct hstate *hstate)
  181. {
  182. struct huge_bootmem_page *m;
  183. if (nr_gpages == 0)
  184. return 0;
  185. m = phys_to_virt(gpage_freearray[--nr_gpages]);
  186. gpage_freearray[nr_gpages] = 0;
  187. list_add(&m->list, &huge_boot_pages);
  188. m->hstate = hstate;
  189. return 1;
  190. }
  191. int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
  192. {
  193. return 0;
  194. }
  195. static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
  196. unsigned long start, unsigned long end,
  197. unsigned long floor, unsigned long ceiling)
  198. {
  199. pte_t *hugepte = hugepd_page(*hpdp);
  200. unsigned shift = hugepd_shift(*hpdp);
  201. unsigned long pdmask = ~((1UL << pdshift) - 1);
  202. start &= pdmask;
  203. if (start < floor)
  204. return;
  205. if (ceiling) {
  206. ceiling &= pdmask;
  207. if (! ceiling)
  208. return;
  209. }
  210. if (end - 1 > ceiling - 1)
  211. return;
  212. hpdp->pd = 0;
  213. tlb->need_flush = 1;
  214. pgtable_free_tlb(tlb, hugepte, pdshift - shift);
  215. }
  216. static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
  217. unsigned long addr, unsigned long end,
  218. unsigned long floor, unsigned long ceiling)
  219. {
  220. pmd_t *pmd;
  221. unsigned long next;
  222. unsigned long start;
  223. start = addr;
  224. pmd = pmd_offset(pud, addr);
  225. do {
  226. next = pmd_addr_end(addr, end);
  227. if (pmd_none(*pmd))
  228. continue;
  229. free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
  230. addr, next, floor, ceiling);
  231. } while (pmd++, addr = next, addr != end);
  232. start &= PUD_MASK;
  233. if (start < floor)
  234. return;
  235. if (ceiling) {
  236. ceiling &= PUD_MASK;
  237. if (!ceiling)
  238. return;
  239. }
  240. if (end - 1 > ceiling - 1)
  241. return;
  242. pmd = pmd_offset(pud, start);
  243. pud_clear(pud);
  244. pmd_free_tlb(tlb, pmd, start);
  245. }
  246. static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
  247. unsigned long addr, unsigned long end,
  248. unsigned long floor, unsigned long ceiling)
  249. {
  250. pud_t *pud;
  251. unsigned long next;
  252. unsigned long start;
  253. start = addr;
  254. pud = pud_offset(pgd, addr);
  255. do {
  256. next = pud_addr_end(addr, end);
  257. if (!is_hugepd(pud)) {
  258. if (pud_none_or_clear_bad(pud))
  259. continue;
  260. hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
  261. ceiling);
  262. } else {
  263. free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
  264. addr, next, floor, ceiling);
  265. }
  266. } while (pud++, addr = next, addr != end);
  267. start &= PGDIR_MASK;
  268. if (start < floor)
  269. return;
  270. if (ceiling) {
  271. ceiling &= PGDIR_MASK;
  272. if (!ceiling)
  273. return;
  274. }
  275. if (end - 1 > ceiling - 1)
  276. return;
  277. pud = pud_offset(pgd, start);
  278. pgd_clear(pgd);
  279. pud_free_tlb(tlb, pud, start);
  280. }
  281. /*
  282. * This function frees user-level page tables of a process.
  283. *
  284. * Must be called with pagetable lock held.
  285. */
  286. void hugetlb_free_pgd_range(struct mmu_gather *tlb,
  287. unsigned long addr, unsigned long end,
  288. unsigned long floor, unsigned long ceiling)
  289. {
  290. pgd_t *pgd;
  291. unsigned long next;
  292. /*
  293. * Because there are a number of different possible pagetable
  294. * layouts for hugepage ranges, we limit knowledge of how
  295. * things should be laid out to the allocation path
  296. * (huge_pte_alloc(), above). Everything else works out the
  297. * structure as it goes from information in the hugepd
  298. * pointers. That means that we can't here use the
  299. * optimization used in the normal page free_pgd_range(), of
  300. * checking whether we're actually covering a large enough
  301. * range to have to do anything at the top level of the walk
  302. * instead of at the bottom.
  303. *
  304. * To make sense of this, you should probably go read the big
  305. * block comment at the top of the normal free_pgd_range(),
  306. * too.
  307. */
  308. pgd = pgd_offset(tlb->mm, addr);
  309. do {
  310. next = pgd_addr_end(addr, end);
  311. if (!is_hugepd(pgd)) {
  312. if (pgd_none_or_clear_bad(pgd))
  313. continue;
  314. hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
  315. } else {
  316. free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
  317. addr, next, floor, ceiling);
  318. }
  319. } while (pgd++, addr = next, addr != end);
  320. }
  321. void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
  322. pte_t *ptep, pte_t pte)
  323. {
  324. if (pte_present(*ptep)) {
  325. /* We open-code pte_clear because we need to pass the right
  326. * argument to hpte_need_flush (huge / !huge). Might not be
  327. * necessary anymore if we make hpte_need_flush() get the
  328. * page size from the slices
  329. */
  330. pte_update(mm, addr, ptep, ~0UL, 1);
  331. }
  332. *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
  333. }
  334. pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
  335. pte_t *ptep)
  336. {
  337. unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1);
  338. return __pte(old);
  339. }
  340. struct page *
  341. follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
  342. {
  343. pte_t *ptep;
  344. struct page *page;
  345. unsigned shift;
  346. unsigned long mask;
  347. ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
  348. /* Verify it is a huge page else bail. */
  349. if (!ptep || !shift)
  350. return ERR_PTR(-EINVAL);
  351. mask = (1UL << shift) - 1;
  352. page = pte_page(*ptep);
  353. if (page)
  354. page += (address & mask) / PAGE_SIZE;
  355. return page;
  356. }
  357. int pmd_huge(pmd_t pmd)
  358. {
  359. return 0;
  360. }
  361. int pud_huge(pud_t pud)
  362. {
  363. return 0;
  364. }
  365. struct page *
  366. follow_huge_pmd(struct mm_struct *mm, unsigned long address,
  367. pmd_t *pmd, int write)
  368. {
  369. BUG();
  370. return NULL;
  371. }
  372. static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
  373. unsigned long end, int write, struct page **pages, int *nr)
  374. {
  375. unsigned long mask;
  376. unsigned long pte_end;
  377. struct page *head, *page;
  378. pte_t pte;
  379. int refs;
  380. pte_end = (addr + sz) & ~(sz-1);
  381. if (pte_end < end)
  382. end = pte_end;
  383. pte = *ptep;
  384. mask = _PAGE_PRESENT | _PAGE_USER;
  385. if (write)
  386. mask |= _PAGE_RW;
  387. if ((pte_val(pte) & mask) != mask)
  388. return 0;
  389. /* hugepages are never "special" */
  390. VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
  391. refs = 0;
  392. head = pte_page(pte);
  393. page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
  394. do {
  395. VM_BUG_ON(compound_head(page) != head);
  396. pages[*nr] = page;
  397. (*nr)++;
  398. page++;
  399. refs++;
  400. } while (addr += PAGE_SIZE, addr != end);
  401. if (!page_cache_add_speculative(head, refs)) {
  402. *nr -= refs;
  403. return 0;
  404. }
  405. if (unlikely(pte_val(pte) != pte_val(*ptep))) {
  406. /* Could be optimized better */
  407. while (*nr) {
  408. put_page(page);
  409. (*nr)--;
  410. }
  411. }
  412. return 1;
  413. }
  414. int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
  415. unsigned long addr, unsigned long end,
  416. int write, struct page **pages, int *nr)
  417. {
  418. pte_t *ptep;
  419. unsigned long sz = 1UL << hugepd_shift(*hugepd);
  420. ptep = hugepte_offset(hugepd, addr, pdshift);
  421. do {
  422. if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
  423. return 0;
  424. } while (ptep++, addr += sz, addr != end);
  425. return 1;
  426. }
  427. unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
  428. unsigned long len, unsigned long pgoff,
  429. unsigned long flags)
  430. {
  431. struct hstate *hstate = hstate_file(file);
  432. int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
  433. if (!mmu_huge_psizes[mmu_psize])
  434. return -EINVAL;
  435. return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
  436. }
  437. unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
  438. {
  439. unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
  440. return 1UL << mmu_psize_to_shift(psize);
  441. }
  442. /*
  443. * Called by asm hashtable.S for doing lazy icache flush
  444. */
  445. static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
  446. pte_t pte, int trap, unsigned long sz)
  447. {
  448. struct page *page;
  449. int i;
  450. if (!pfn_valid(pte_pfn(pte)))
  451. return rflags;
  452. page = pte_page(pte);
  453. /* page is dirty */
  454. if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
  455. if (trap == 0x400) {
  456. for (i = 0; i < (sz / PAGE_SIZE); i++)
  457. __flush_dcache_icache(page_address(page+i));
  458. set_bit(PG_arch_1, &page->flags);
  459. } else {
  460. rflags |= HPTE_R_N;
  461. }
  462. }
  463. return rflags;
  464. }
  465. int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
  466. pte_t *ptep, unsigned long trap, int local, int ssize,
  467. unsigned int shift, unsigned int mmu_psize)
  468. {
  469. unsigned long old_pte, new_pte;
  470. unsigned long va, rflags, pa, sz;
  471. long slot;
  472. int err = 1;
  473. BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
  474. /* Search the Linux page table for a match with va */
  475. va = hpt_va(ea, vsid, ssize);
  476. /*
  477. * Check the user's access rights to the page. If access should be
  478. * prevented then send the problem up to do_page_fault.
  479. */
  480. if (unlikely(access & ~pte_val(*ptep)))
  481. goto out;
  482. /*
  483. * At this point, we have a pte (old_pte) which can be used to build
  484. * or update an HPTE. There are 2 cases:
  485. *
  486. * 1. There is a valid (present) pte with no associated HPTE (this is
  487. * the most common case)
  488. * 2. There is a valid (present) pte with an associated HPTE. The
  489. * current values of the pp bits in the HPTE prevent access
  490. * because we are doing software DIRTY bit management and the
  491. * page is currently not DIRTY.
  492. */
  493. do {
  494. old_pte = pte_val(*ptep);
  495. if (old_pte & _PAGE_BUSY)
  496. goto out;
  497. new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
  498. } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
  499. old_pte, new_pte));
  500. rflags = 0x2 | (!(new_pte & _PAGE_RW));
  501. /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
  502. rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
  503. sz = ((1UL) << shift);
  504. if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
  505. /* No CPU has hugepages but lacks no execute, so we
  506. * don't need to worry about that case */
  507. rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte),
  508. trap, sz);
  509. /* Check if pte already has an hpte (case 2) */
  510. if (unlikely(old_pte & _PAGE_HASHPTE)) {
  511. /* There MIGHT be an HPTE for this pte */
  512. unsigned long hash, slot;
  513. hash = hpt_hash(va, shift, ssize);
  514. if (old_pte & _PAGE_F_SECOND)
  515. hash = ~hash;
  516. slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
  517. slot += (old_pte & _PAGE_F_GIX) >> 12;
  518. if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
  519. ssize, local) == -1)
  520. old_pte &= ~_PAGE_HPTEFLAGS;
  521. }
  522. if (likely(!(old_pte & _PAGE_HASHPTE))) {
  523. unsigned long hash = hpt_hash(va, shift, ssize);
  524. unsigned long hpte_group;
  525. pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
  526. repeat:
  527. hpte_group = ((hash & htab_hash_mask) *
  528. HPTES_PER_GROUP) & ~0x7UL;
  529. /* clear HPTE slot informations in new PTE */
  530. #ifdef CONFIG_PPC_64K_PAGES
  531. new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
  532. #else
  533. new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
  534. #endif
  535. /* Add in WIMG bits */
  536. rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
  537. _PAGE_COHERENT | _PAGE_GUARDED));
  538. /* Insert into the hash table, primary slot */
  539. slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
  540. mmu_psize, ssize);
  541. /* Primary is full, try the secondary */
  542. if (unlikely(slot == -1)) {
  543. hpte_group = ((~hash & htab_hash_mask) *
  544. HPTES_PER_GROUP) & ~0x7UL;
  545. slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
  546. HPTE_V_SECONDARY,
  547. mmu_psize, ssize);
  548. if (slot == -1) {
  549. if (mftb() & 0x1)
  550. hpte_group = ((hash & htab_hash_mask) *
  551. HPTES_PER_GROUP)&~0x7UL;
  552. ppc_md.hpte_remove(hpte_group);
  553. goto repeat;
  554. }
  555. }
  556. if (unlikely(slot == -2))
  557. panic("hash_huge_page: pte_insert failed\n");
  558. new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
  559. }
  560. /*
  561. * No need to use ldarx/stdcx here
  562. */
  563. *ptep = __pte(new_pte & ~_PAGE_BUSY);
  564. err = 0;
  565. out:
  566. return err;
  567. }
  568. static void __init set_huge_psize(int psize)
  569. {
  570. unsigned pdshift;
  571. /* Check that it is a page size supported by the hardware and
  572. * that it fits within pagetable limits. */
  573. if (mmu_psize_defs[psize].shift &&
  574. mmu_psize_defs[psize].shift < SID_SHIFT_1T &&
  575. (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT ||
  576. mmu_psize_defs[psize].shift == PAGE_SHIFT_64K ||
  577. mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) {
  578. /* Return if huge page size has already been setup or is the
  579. * same as the base page size. */
  580. if (mmu_huge_psizes[psize] ||
  581. mmu_psize_defs[psize].shift == PAGE_SHIFT)
  582. return;
  583. hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);
  584. if (mmu_psize_defs[psize].shift < PMD_SHIFT)
  585. pdshift = PMD_SHIFT;
  586. else if (mmu_psize_defs[psize].shift < PUD_SHIFT)
  587. pdshift = PUD_SHIFT;
  588. else
  589. pdshift = PGDIR_SHIFT;
  590. mmu_huge_psizes[psize] = pdshift - mmu_psize_defs[psize].shift;
  591. }
  592. }
  593. static int __init hugepage_setup_sz(char *str)
  594. {
  595. unsigned long long size;
  596. int mmu_psize;
  597. int shift;
  598. size = memparse(str, &str);
  599. shift = __ffs(size);
  600. mmu_psize = shift_to_mmu_psize(shift);
  601. if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift)
  602. set_huge_psize(mmu_psize);
  603. else
  604. printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
  605. return 1;
  606. }
  607. __setup("hugepagesz=", hugepage_setup_sz);
  608. static int __init hugetlbpage_init(void)
  609. {
  610. int psize;
  611. if (!cpu_has_feature(CPU_FTR_16M_PAGE))
  612. return -ENODEV;
  613. /* Add supported huge page sizes. Need to change
  614. * HUGE_MAX_HSTATE if the number of supported huge page sizes
  615. * changes.
  616. */
  617. set_huge_psize(MMU_PAGE_16M);
  618. set_huge_psize(MMU_PAGE_16G);
  619. /* Temporarily disable support for 64K huge pages when 64K SPU local
  620. * store support is enabled as the current implementation conflicts.
  621. */
  622. #ifndef CONFIG_SPU_FS_64K_LS
  623. set_huge_psize(MMU_PAGE_64K);
  624. #endif
  625. for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
  626. if (mmu_huge_psizes[psize]) {
  627. pgtable_cache_add(mmu_huge_psizes[psize], NULL);
  628. if (!PGT_CACHE(mmu_huge_psizes[psize]))
  629. panic("hugetlbpage_init(): could not create "
  630. "pgtable cache for %d bit pagesize\n",
  631. mmu_psize_to_shift(psize));
  632. }
  633. }
  634. return 0;
  635. }
  636. module_init(hugetlbpage_init);