hugetlbpage.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771
  1. /*
  2. * PPC64 (POWER4) Huge TLB Page Support for Kernel.
  3. *
  4. * Copyright (C) 2003 David Gibson, IBM Corporation.
  5. *
  6. * Based on the IA-32 version:
  7. * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
  8. */
  9. #include <linux/init.h>
  10. #include <linux/fs.h>
  11. #include <linux/mm.h>
  12. #include <linux/hugetlb.h>
  13. #include <linux/pagemap.h>
  14. #include <linux/slab.h>
  15. #include <linux/err.h>
  16. #include <linux/sysctl.h>
  17. #include <asm/mman.h>
  18. #include <asm/pgalloc.h>
  19. #include <asm/tlb.h>
  20. #include <asm/tlbflush.h>
  21. #include <asm/mmu_context.h>
  22. #include <asm/machdep.h>
  23. #include <asm/cputable.h>
  24. #include <asm/spu.h>
  25. #define PAGE_SHIFT_64K 16
  26. #define PAGE_SHIFT_16M 24
  27. #define PAGE_SHIFT_16G 34
  28. #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
  29. #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
  30. #define MAX_NUMBER_GPAGES 1024
  31. /* Tracks the 16G pages after the device tree is scanned and before the
  32. * huge_boot_pages list is ready. */
  33. static unsigned long gpage_freearray[MAX_NUMBER_GPAGES];
  34. static unsigned nr_gpages;
  35. /* Array of valid huge page sizes - non-zero value(hugepte_shift) is
  36. * stored for the huge page sizes that are valid.
  37. */
  38. unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */
  39. #define hugepte_shift mmu_huge_psizes
  40. #define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize])
  41. #define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize])
  42. #define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \
  43. + hugepte_shift[psize])
  44. #define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize))
  45. #define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1))
  46. /* Subtract one from array size because we don't need a cache for 4K since
  47. * is not a huge page size */
  48. #define huge_pgtable_cache(psize) (pgtable_cache[HUGEPTE_CACHE_NUM \
  49. + psize-1])
  50. #define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize])
  51. static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
  52. "unused_4K", "hugepte_cache_64K", "unused_64K_AP",
  53. "hugepte_cache_1M", "hugepte_cache_16M", "hugepte_cache_16G"
  54. };
  55. /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()
  56. * will choke on pointers to hugepte tables, which is handy for
  57. * catching screwups early. */
  58. #define HUGEPD_OK 0x1
  59. typedef struct { unsigned long pd; } hugepd_t;
  60. #define hugepd_none(hpd) ((hpd).pd == 0)
  61. static inline int shift_to_mmu_psize(unsigned int shift)
  62. {
  63. switch (shift) {
  64. #ifndef CONFIG_PPC_64K_PAGES
  65. case PAGE_SHIFT_64K:
  66. return MMU_PAGE_64K;
  67. #endif
  68. case PAGE_SHIFT_16M:
  69. return MMU_PAGE_16M;
  70. case PAGE_SHIFT_16G:
  71. return MMU_PAGE_16G;
  72. }
  73. return -1;
  74. }
  75. static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
  76. {
  77. if (mmu_psize_defs[mmu_psize].shift)
  78. return mmu_psize_defs[mmu_psize].shift;
  79. BUG();
  80. }
  81. static inline pte_t *hugepd_page(hugepd_t hpd)
  82. {
  83. BUG_ON(!(hpd.pd & HUGEPD_OK));
  84. return (pte_t *)(hpd.pd & ~HUGEPD_OK);
  85. }
  86. static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
  87. struct hstate *hstate)
  88. {
  89. unsigned int shift = huge_page_shift(hstate);
  90. int psize = shift_to_mmu_psize(shift);
  91. unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1));
  92. pte_t *dir = hugepd_page(*hpdp);
  93. return dir + idx;
  94. }
  95. static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
  96. unsigned long address, unsigned int psize)
  97. {
  98. pte_t *new = kmem_cache_zalloc(huge_pgtable_cache(psize),
  99. GFP_KERNEL|__GFP_REPEAT);
  100. if (! new)
  101. return -ENOMEM;
  102. spin_lock(&mm->page_table_lock);
  103. if (!hugepd_none(*hpdp))
  104. kmem_cache_free(huge_pgtable_cache(psize), new);
  105. else
  106. hpdp->pd = (unsigned long)new | HUGEPD_OK;
  107. spin_unlock(&mm->page_table_lock);
  108. return 0;
  109. }
  110. /* Base page size affects how we walk hugetlb page tables */
  111. #ifdef CONFIG_PPC_64K_PAGES
  112. #define hpmd_offset(pud, addr, h) pmd_offset(pud, addr)
  113. #define hpmd_alloc(mm, pud, addr, h) pmd_alloc(mm, pud, addr)
  114. #else
  115. static inline
  116. pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate)
  117. {
  118. if (huge_page_shift(hstate) == PAGE_SHIFT_64K)
  119. return pmd_offset(pud, addr);
  120. else
  121. return (pmd_t *) pud;
  122. }
  123. static inline
  124. pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr,
  125. struct hstate *hstate)
  126. {
  127. if (huge_page_shift(hstate) == PAGE_SHIFT_64K)
  128. return pmd_alloc(mm, pud, addr);
  129. else
  130. return (pmd_t *) pud;
  131. }
  132. #endif
  133. /* Build list of addresses of gigantic pages. This function is used in early
  134. * boot before the buddy or bootmem allocator is setup.
  135. */
  136. void add_gpage(unsigned long addr, unsigned long page_size,
  137. unsigned long number_of_pages)
  138. {
  139. if (!addr)
  140. return;
  141. while (number_of_pages > 0) {
  142. gpage_freearray[nr_gpages] = addr;
  143. nr_gpages++;
  144. number_of_pages--;
  145. addr += page_size;
  146. }
  147. }
  148. /* Moves the gigantic page addresses from the temporary list to the
  149. * huge_boot_pages list.
  150. */
  151. int alloc_bootmem_huge_page(struct hstate *hstate)
  152. {
  153. struct huge_bootmem_page *m;
  154. if (nr_gpages == 0)
  155. return 0;
  156. m = phys_to_virt(gpage_freearray[--nr_gpages]);
  157. gpage_freearray[nr_gpages] = 0;
  158. list_add(&m->list, &huge_boot_pages);
  159. m->hstate = hstate;
  160. return 1;
  161. }
  162. /* Modelled after find_linux_pte() */
  163. pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
  164. {
  165. pgd_t *pg;
  166. pud_t *pu;
  167. pmd_t *pm;
  168. unsigned int psize;
  169. unsigned int shift;
  170. unsigned long sz;
  171. struct hstate *hstate;
  172. psize = get_slice_psize(mm, addr);
  173. shift = mmu_psize_to_shift(psize);
  174. sz = ((1UL) << shift);
  175. hstate = size_to_hstate(sz);
  176. addr &= hstate->mask;
  177. pg = pgd_offset(mm, addr);
  178. if (!pgd_none(*pg)) {
  179. pu = pud_offset(pg, addr);
  180. if (!pud_none(*pu)) {
  181. pm = hpmd_offset(pu, addr, hstate);
  182. if (!pmd_none(*pm))
  183. return hugepte_offset((hugepd_t *)pm, addr,
  184. hstate);
  185. }
  186. }
  187. return NULL;
  188. }
  189. pte_t *huge_pte_alloc(struct mm_struct *mm,
  190. unsigned long addr, unsigned long sz)
  191. {
  192. pgd_t *pg;
  193. pud_t *pu;
  194. pmd_t *pm;
  195. hugepd_t *hpdp = NULL;
  196. struct hstate *hstate;
  197. unsigned int psize;
  198. hstate = size_to_hstate(sz);
  199. psize = get_slice_psize(mm, addr);
  200. BUG_ON(!mmu_huge_psizes[psize]);
  201. addr &= hstate->mask;
  202. pg = pgd_offset(mm, addr);
  203. pu = pud_alloc(mm, pg, addr);
  204. if (pu) {
  205. pm = hpmd_alloc(mm, pu, addr, hstate);
  206. if (pm)
  207. hpdp = (hugepd_t *)pm;
  208. }
  209. if (! hpdp)
  210. return NULL;
  211. if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize))
  212. return NULL;
  213. return hugepte_offset(hpdp, addr, hstate);
  214. }
  215. int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
  216. {
  217. return 0;
  218. }
  219. static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp,
  220. unsigned int psize)
  221. {
  222. pte_t *hugepte = hugepd_page(*hpdp);
  223. hpdp->pd = 0;
  224. tlb->need_flush = 1;
  225. pgtable_free_tlb(tlb, pgtable_free_cache(hugepte,
  226. HUGEPTE_CACHE_NUM+psize-1,
  227. PGF_CACHENUM_MASK));
  228. }
  229. static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
  230. unsigned long addr, unsigned long end,
  231. unsigned long floor, unsigned long ceiling,
  232. unsigned int psize)
  233. {
  234. pmd_t *pmd;
  235. unsigned long next;
  236. unsigned long start;
  237. start = addr;
  238. pmd = pmd_offset(pud, addr);
  239. do {
  240. next = pmd_addr_end(addr, end);
  241. if (pmd_none(*pmd))
  242. continue;
  243. free_hugepte_range(tlb, (hugepd_t *)pmd, psize);
  244. } while (pmd++, addr = next, addr != end);
  245. start &= PUD_MASK;
  246. if (start < floor)
  247. return;
  248. if (ceiling) {
  249. ceiling &= PUD_MASK;
  250. if (!ceiling)
  251. return;
  252. }
  253. if (end - 1 > ceiling - 1)
  254. return;
  255. pmd = pmd_offset(pud, start);
  256. pud_clear(pud);
  257. pmd_free_tlb(tlb, pmd);
  258. }
  259. static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
  260. unsigned long addr, unsigned long end,
  261. unsigned long floor, unsigned long ceiling)
  262. {
  263. pud_t *pud;
  264. unsigned long next;
  265. unsigned long start;
  266. unsigned int shift;
  267. unsigned int psize = get_slice_psize(tlb->mm, addr);
  268. shift = mmu_psize_to_shift(psize);
  269. start = addr;
  270. pud = pud_offset(pgd, addr);
  271. do {
  272. next = pud_addr_end(addr, end);
  273. #ifdef CONFIG_PPC_64K_PAGES
  274. if (pud_none_or_clear_bad(pud))
  275. continue;
  276. hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling,
  277. psize);
  278. #else
  279. if (shift == PAGE_SHIFT_64K) {
  280. if (pud_none_or_clear_bad(pud))
  281. continue;
  282. hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
  283. ceiling, psize);
  284. } else {
  285. if (pud_none(*pud))
  286. continue;
  287. free_hugepte_range(tlb, (hugepd_t *)pud, psize);
  288. }
  289. #endif
  290. } while (pud++, addr = next, addr != end);
  291. start &= PGDIR_MASK;
  292. if (start < floor)
  293. return;
  294. if (ceiling) {
  295. ceiling &= PGDIR_MASK;
  296. if (!ceiling)
  297. return;
  298. }
  299. if (end - 1 > ceiling - 1)
  300. return;
  301. pud = pud_offset(pgd, start);
  302. pgd_clear(pgd);
  303. pud_free_tlb(tlb, pud);
  304. }
  305. /*
  306. * This function frees user-level page tables of a process.
  307. *
  308. * Must be called with pagetable lock held.
  309. */
  310. void hugetlb_free_pgd_range(struct mmu_gather *tlb,
  311. unsigned long addr, unsigned long end,
  312. unsigned long floor, unsigned long ceiling)
  313. {
  314. pgd_t *pgd;
  315. unsigned long next;
  316. unsigned long start;
  317. /*
  318. * Comments below take from the normal free_pgd_range(). They
  319. * apply here too. The tests against HUGEPD_MASK below are
  320. * essential, because we *don't* test for this at the bottom
  321. * level. Without them we'll attempt to free a hugepte table
  322. * when we unmap just part of it, even if there are other
  323. * active mappings using it.
  324. *
  325. * The next few lines have given us lots of grief...
  326. *
  327. * Why are we testing HUGEPD* at this top level? Because
  328. * often there will be no work to do at all, and we'd prefer
  329. * not to go all the way down to the bottom just to discover
  330. * that.
  331. *
  332. * Why all these "- 1"s? Because 0 represents both the bottom
  333. * of the address space and the top of it (using -1 for the
  334. * top wouldn't help much: the masks would do the wrong thing).
  335. * The rule is that addr 0 and floor 0 refer to the bottom of
  336. * the address space, but end 0 and ceiling 0 refer to the top
  337. * Comparisons need to use "end - 1" and "ceiling - 1" (though
  338. * that end 0 case should be mythical).
  339. *
  340. * Wherever addr is brought up or ceiling brought down, we
  341. * must be careful to reject "the opposite 0" before it
  342. * confuses the subsequent tests. But what about where end is
  343. * brought down by HUGEPD_SIZE below? no, end can't go down to
  344. * 0 there.
  345. *
  346. * Whereas we round start (addr) and ceiling down, by different
  347. * masks at different levels, in order to test whether a table
  348. * now has no other vmas using it, so can be freed, we don't
  349. * bother to round floor or end up - the tests don't need that.
  350. */
  351. unsigned int psize = get_slice_psize(tlb->mm, addr);
  352. addr &= HUGEPD_MASK(psize);
  353. if (addr < floor) {
  354. addr += HUGEPD_SIZE(psize);
  355. if (!addr)
  356. return;
  357. }
  358. if (ceiling) {
  359. ceiling &= HUGEPD_MASK(psize);
  360. if (!ceiling)
  361. return;
  362. }
  363. if (end - 1 > ceiling - 1)
  364. end -= HUGEPD_SIZE(psize);
  365. if (addr > end - 1)
  366. return;
  367. start = addr;
  368. pgd = pgd_offset(tlb->mm, addr);
  369. do {
  370. psize = get_slice_psize(tlb->mm, addr);
  371. BUG_ON(!mmu_huge_psizes[psize]);
  372. next = pgd_addr_end(addr, end);
  373. if (pgd_none_or_clear_bad(pgd))
  374. continue;
  375. hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
  376. } while (pgd++, addr = next, addr != end);
  377. }
  378. void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
  379. pte_t *ptep, pte_t pte)
  380. {
  381. if (pte_present(*ptep)) {
  382. /* We open-code pte_clear because we need to pass the right
  383. * argument to hpte_need_flush (huge / !huge). Might not be
  384. * necessary anymore if we make hpte_need_flush() get the
  385. * page size from the slices
  386. */
  387. unsigned int psize = get_slice_psize(mm, addr);
  388. unsigned int shift = mmu_psize_to_shift(psize);
  389. unsigned long sz = ((1UL) << shift);
  390. struct hstate *hstate = size_to_hstate(sz);
  391. pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1);
  392. }
  393. *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
  394. }
  395. pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
  396. pte_t *ptep)
  397. {
  398. unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1);
  399. return __pte(old);
  400. }
  401. struct page *
  402. follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
  403. {
  404. pte_t *ptep;
  405. struct page *page;
  406. unsigned int mmu_psize = get_slice_psize(mm, address);
  407. /* Verify it is a huge page else bail. */
  408. if (!mmu_huge_psizes[mmu_psize])
  409. return ERR_PTR(-EINVAL);
  410. ptep = huge_pte_offset(mm, address);
  411. page = pte_page(*ptep);
  412. if (page) {
  413. unsigned int shift = mmu_psize_to_shift(mmu_psize);
  414. unsigned long sz = ((1UL) << shift);
  415. page += (address % sz) / PAGE_SIZE;
  416. }
  417. return page;
  418. }
  419. int pmd_huge(pmd_t pmd)
  420. {
  421. return 0;
  422. }
  423. int pud_huge(pud_t pud)
  424. {
  425. return 0;
  426. }
  427. struct page *
  428. follow_huge_pmd(struct mm_struct *mm, unsigned long address,
  429. pmd_t *pmd, int write)
  430. {
  431. BUG();
  432. return NULL;
  433. }
  434. unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
  435. unsigned long len, unsigned long pgoff,
  436. unsigned long flags)
  437. {
  438. struct hstate *hstate = hstate_file(file);
  439. int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
  440. return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
  441. }
  442. /*
  443. * Called by asm hashtable.S for doing lazy icache flush
  444. */
  445. static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
  446. pte_t pte, int trap, unsigned long sz)
  447. {
  448. struct page *page;
  449. int i;
  450. if (!pfn_valid(pte_pfn(pte)))
  451. return rflags;
  452. page = pte_page(pte);
  453. /* page is dirty */
  454. if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
  455. if (trap == 0x400) {
  456. for (i = 0; i < (sz / PAGE_SIZE); i++)
  457. __flush_dcache_icache(page_address(page+i));
  458. set_bit(PG_arch_1, &page->flags);
  459. } else {
  460. rflags |= HPTE_R_N;
  461. }
  462. }
  463. return rflags;
  464. }
  465. int hash_huge_page(struct mm_struct *mm, unsigned long access,
  466. unsigned long ea, unsigned long vsid, int local,
  467. unsigned long trap)
  468. {
  469. pte_t *ptep;
  470. unsigned long old_pte, new_pte;
  471. unsigned long va, rflags, pa, sz;
  472. long slot;
  473. int err = 1;
  474. int ssize = user_segment_size(ea);
  475. unsigned int mmu_psize;
  476. int shift;
  477. mmu_psize = get_slice_psize(mm, ea);
  478. if (!mmu_huge_psizes[mmu_psize])
  479. goto out;
  480. ptep = huge_pte_offset(mm, ea);
  481. /* Search the Linux page table for a match with va */
  482. va = hpt_va(ea, vsid, ssize);
  483. /*
  484. * If no pte found or not present, send the problem up to
  485. * do_page_fault
  486. */
  487. if (unlikely(!ptep || pte_none(*ptep)))
  488. goto out;
  489. /*
  490. * Check the user's access rights to the page. If access should be
  491. * prevented then send the problem up to do_page_fault.
  492. */
  493. if (unlikely(access & ~pte_val(*ptep)))
  494. goto out;
  495. /*
  496. * At this point, we have a pte (old_pte) which can be used to build
  497. * or update an HPTE. There are 2 cases:
  498. *
  499. * 1. There is a valid (present) pte with no associated HPTE (this is
  500. * the most common case)
  501. * 2. There is a valid (present) pte with an associated HPTE. The
  502. * current values of the pp bits in the HPTE prevent access
  503. * because we are doing software DIRTY bit management and the
  504. * page is currently not DIRTY.
  505. */
  506. do {
  507. old_pte = pte_val(*ptep);
  508. if (old_pte & _PAGE_BUSY)
  509. goto out;
  510. new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
  511. } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
  512. old_pte, new_pte));
  513. rflags = 0x2 | (!(new_pte & _PAGE_RW));
  514. /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
  515. rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
  516. shift = mmu_psize_to_shift(mmu_psize);
  517. sz = ((1UL) << shift);
  518. if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
  519. /* No CPU has hugepages but lacks no execute, so we
  520. * don't need to worry about that case */
  521. rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte),
  522. trap, sz);
  523. /* Check if pte already has an hpte (case 2) */
  524. if (unlikely(old_pte & _PAGE_HASHPTE)) {
  525. /* There MIGHT be an HPTE for this pte */
  526. unsigned long hash, slot;
  527. hash = hpt_hash(va, shift, ssize);
  528. if (old_pte & _PAGE_F_SECOND)
  529. hash = ~hash;
  530. slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
  531. slot += (old_pte & _PAGE_F_GIX) >> 12;
  532. if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
  533. ssize, local) == -1)
  534. old_pte &= ~_PAGE_HPTEFLAGS;
  535. }
  536. if (likely(!(old_pte & _PAGE_HASHPTE))) {
  537. unsigned long hash = hpt_hash(va, shift, ssize);
  538. unsigned long hpte_group;
  539. pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
  540. repeat:
  541. hpte_group = ((hash & htab_hash_mask) *
  542. HPTES_PER_GROUP) & ~0x7UL;
  543. /* clear HPTE slot informations in new PTE */
  544. #ifdef CONFIG_PPC_64K_PAGES
  545. new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
  546. #else
  547. new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
  548. #endif
  549. /* Add in WIMG bits */
  550. rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
  551. _PAGE_COHERENT | _PAGE_GUARDED));
  552. /* Insert into the hash table, primary slot */
  553. slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
  554. mmu_psize, ssize);
  555. /* Primary is full, try the secondary */
  556. if (unlikely(slot == -1)) {
  557. hpte_group = ((~hash & htab_hash_mask) *
  558. HPTES_PER_GROUP) & ~0x7UL;
  559. slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
  560. HPTE_V_SECONDARY,
  561. mmu_psize, ssize);
  562. if (slot == -1) {
  563. if (mftb() & 0x1)
  564. hpte_group = ((hash & htab_hash_mask) *
  565. HPTES_PER_GROUP)&~0x7UL;
  566. ppc_md.hpte_remove(hpte_group);
  567. goto repeat;
  568. }
  569. }
  570. if (unlikely(slot == -2))
  571. panic("hash_huge_page: pte_insert failed\n");
  572. new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
  573. }
  574. /*
  575. * No need to use ldarx/stdcx here
  576. */
  577. *ptep = __pte(new_pte & ~_PAGE_BUSY);
  578. err = 0;
  579. out:
  580. return err;
  581. }
  582. void set_huge_psize(int psize)
  583. {
  584. /* Check that it is a page size supported by the hardware and
  585. * that it fits within pagetable limits. */
  586. if (mmu_psize_defs[psize].shift &&
  587. mmu_psize_defs[psize].shift < SID_SHIFT_1T &&
  588. (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT ||
  589. mmu_psize_defs[psize].shift == PAGE_SHIFT_64K ||
  590. mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) {
  591. /* Return if huge page size has already been setup or is the
  592. * same as the base page size. */
  593. if (mmu_huge_psizes[psize] ||
  594. mmu_psize_defs[psize].shift == PAGE_SHIFT)
  595. return;
  596. hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);
  597. switch (mmu_psize_defs[psize].shift) {
  598. case PAGE_SHIFT_64K:
  599. /* We only allow 64k hpages with 4k base page,
  600. * which was checked above, and always put them
  601. * at the PMD */
  602. hugepte_shift[psize] = PMD_SHIFT;
  603. break;
  604. case PAGE_SHIFT_16M:
  605. /* 16M pages can be at two different levels
  606. * of pagestables based on base page size */
  607. if (PAGE_SHIFT == PAGE_SHIFT_64K)
  608. hugepte_shift[psize] = PMD_SHIFT;
  609. else /* 4k base page */
  610. hugepte_shift[psize] = PUD_SHIFT;
  611. break;
  612. case PAGE_SHIFT_16G:
  613. /* 16G pages are always at PGD level */
  614. hugepte_shift[psize] = PGDIR_SHIFT;
  615. break;
  616. }
  617. hugepte_shift[psize] -= mmu_psize_defs[psize].shift;
  618. } else
  619. hugepte_shift[psize] = 0;
  620. }
  621. static int __init hugepage_setup_sz(char *str)
  622. {
  623. unsigned long long size;
  624. int mmu_psize;
  625. int shift;
  626. size = memparse(str, &str);
  627. shift = __ffs(size);
  628. mmu_psize = shift_to_mmu_psize(shift);
  629. if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift)
  630. set_huge_psize(mmu_psize);
  631. else
  632. printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
  633. return 1;
  634. }
  635. __setup("hugepagesz=", hugepage_setup_sz);
  636. static int __init hugetlbpage_init(void)
  637. {
  638. unsigned int psize;
  639. if (!cpu_has_feature(CPU_FTR_16M_PAGE))
  640. return -ENODEV;
  641. /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE
  642. * and adjust PTE_NONCACHE_NUM if the number of supported huge page
  643. * sizes changes.
  644. */
  645. set_huge_psize(MMU_PAGE_16M);
  646. set_huge_psize(MMU_PAGE_16G);
  647. /* Temporarily disable support for 64K huge pages when 64K SPU local
  648. * store support is enabled as the current implementation conflicts.
  649. */
  650. #ifndef CONFIG_SPU_FS_64K_LS
  651. set_huge_psize(MMU_PAGE_64K);
  652. #endif
  653. for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
  654. if (mmu_huge_psizes[psize]) {
  655. huge_pgtable_cache(psize) = kmem_cache_create(
  656. HUGEPTE_CACHE_NAME(psize),
  657. HUGEPTE_TABLE_SIZE(psize),
  658. HUGEPTE_TABLE_SIZE(psize),
  659. 0,
  660. NULL);
  661. if (!huge_pgtable_cache(psize))
  662. panic("hugetlbpage_init(): could not create %s"\
  663. "\n", HUGEPTE_CACHE_NAME(psize));
  664. }
  665. }
  666. return 0;
  667. }
  668. module_init(hugetlbpage_init);