|
@@ -707,6 +707,310 @@ int arch_add_memory(int nid, u64 start, u64 size)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(arch_add_memory);
|
|
|
|
|
|
+#define PAGE_INUSE 0xFD
|
|
|
+
|
|
|
+static void __meminit free_pagetable(struct page *page, int order)
|
|
|
+{
|
|
|
+ struct zone *zone;
|
|
|
+ bool bootmem = false;
|
|
|
+ unsigned long magic;
|
|
|
+ unsigned int nr_pages = 1 << order;
|
|
|
+
|
|
|
+ /* bootmem page has reserved flag */
|
|
|
+ if (PageReserved(page)) {
|
|
|
+ __ClearPageReserved(page);
|
|
|
+ bootmem = true;
|
|
|
+
|
|
|
+ magic = (unsigned long)page->lru.next;
|
|
|
+ if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
|
|
|
+ while (nr_pages--)
|
|
|
+ put_page_bootmem(page++);
|
|
|
+ } else
|
|
|
+ __free_pages_bootmem(page, order);
|
|
|
+ } else
|
|
|
+ free_pages((unsigned long)page_address(page), order);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * SECTION_INFO pages and MIX_SECTION_INFO pages
|
|
|
+ * are all allocated by bootmem.
|
|
|
+ */
|
|
|
+ if (bootmem) {
|
|
|
+ zone = page_zone(page);
|
|
|
+ zone_span_writelock(zone);
|
|
|
+ zone->present_pages += nr_pages;
|
|
|
+ zone_span_writeunlock(zone);
|
|
|
+ totalram_pages += nr_pages;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
|
|
|
+{
|
|
|
+ pte_t *pte;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < PTRS_PER_PTE; i++) {
|
|
|
+ pte = pte_start + i;
|
|
|
+ if (pte_val(*pte))
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* free a pte talbe */
|
|
|
+ free_pagetable(pmd_page(*pmd), 0);
|
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
|
+ pmd_clear(pmd);
|
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
|
+}
|
|
|
+
|
|
|
+static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
|
|
|
+{
|
|
|
+ pmd_t *pmd;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < PTRS_PER_PMD; i++) {
|
|
|
+ pmd = pmd_start + i;
|
|
|
+ if (pmd_val(*pmd))
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* free a pmd talbe */
|
|
|
+ free_pagetable(pud_page(*pud), 0);
|
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
|
+ pud_clear(pud);
|
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
|
+}
|
|
|
+
|
|
|
+/* Return true if pgd is changed, otherwise return false. */
|
|
|
+static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
|
|
|
+{
|
|
|
+ pud_t *pud;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < PTRS_PER_PUD; i++) {
|
|
|
+ pud = pud_start + i;
|
|
|
+ if (pud_val(*pud))
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* free a pud table */
|
|
|
+ free_pagetable(pgd_page(*pgd), 0);
|
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
|
+ pgd_clear(pgd);
|
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+static void __meminit
|
|
|
+remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
|
|
|
+ bool direct)
|
|
|
+{
|
|
|
+ unsigned long next, pages = 0;
|
|
|
+ pte_t *pte;
|
|
|
+ void *page_addr;
|
|
|
+ phys_addr_t phys_addr;
|
|
|
+
|
|
|
+ pte = pte_start + pte_index(addr);
|
|
|
+ for (; addr < end; addr = next, pte++) {
|
|
|
+ next = (addr + PAGE_SIZE) & PAGE_MASK;
|
|
|
+ if (next > end)
|
|
|
+ next = end;
|
|
|
+
|
|
|
+ if (!pte_present(*pte))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We mapped [0,1G) memory as identity mapping when
|
|
|
+ * initializing, in arch/x86/kernel/head_64.S. These
|
|
|
+ * pagetables cannot be removed.
|
|
|
+ */
|
|
|
+ phys_addr = pte_val(*pte) + (addr & PAGE_MASK);
|
|
|
+ if (phys_addr < (phys_addr_t)0x40000000)
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (IS_ALIGNED(addr, PAGE_SIZE) &&
|
|
|
+ IS_ALIGNED(next, PAGE_SIZE)) {
|
|
|
+ /*
|
|
|
+ * Do not free direct mapping pages since they were
|
|
|
+ * freed when offlining, or simplely not in use.
|
|
|
+ */
|
|
|
+ if (!direct)
|
|
|
+ free_pagetable(pte_page(*pte), 0);
|
|
|
+
|
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
|
+ pte_clear(&init_mm, addr, pte);
|
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
|
+
|
|
|
+ /* For non-direct mapping, pages means nothing. */
|
|
|
+ pages++;
|
|
|
+ } else {
|
|
|
+ /*
|
|
|
+ * If we are here, we are freeing vmemmap pages since
|
|
|
+ * direct mapped memory ranges to be freed are aligned.
|
|
|
+ *
|
|
|
+ * If we are not removing the whole page, it means
|
|
|
+ * other page structs in this page are being used and
|
|
|
+ * we canot remove them. So fill the unused page_structs
|
|
|
+ * with 0xFD, and remove the page when it is wholly
|
|
|
+ * filled with 0xFD.
|
|
|
+ */
|
|
|
+ memset((void *)addr, PAGE_INUSE, next - addr);
|
|
|
+
|
|
|
+ page_addr = page_address(pte_page(*pte));
|
|
|
+ if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
|
|
|
+ free_pagetable(pte_page(*pte), 0);
|
|
|
+
|
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
|
+ pte_clear(&init_mm, addr, pte);
|
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Call free_pte_table() in remove_pmd_table(). */
|
|
|
+ flush_tlb_all();
|
|
|
+ if (direct)
|
|
|
+ update_page_count(PG_LEVEL_4K, -pages);
|
|
|
+}
|
|
|
+
|
|
|
+static void __meminit
|
|
|
+remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
|
|
|
+ bool direct)
|
|
|
+{
|
|
|
+ unsigned long next, pages = 0;
|
|
|
+ pte_t *pte_base;
|
|
|
+ pmd_t *pmd;
|
|
|
+ void *page_addr;
|
|
|
+
|
|
|
+ pmd = pmd_start + pmd_index(addr);
|
|
|
+ for (; addr < end; addr = next, pmd++) {
|
|
|
+ next = pmd_addr_end(addr, end);
|
|
|
+
|
|
|
+ if (!pmd_present(*pmd))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (pmd_large(*pmd)) {
|
|
|
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
|
|
|
+ IS_ALIGNED(next, PMD_SIZE)) {
|
|
|
+ if (!direct)
|
|
|
+ free_pagetable(pmd_page(*pmd),
|
|
|
+ get_order(PMD_SIZE));
|
|
|
+
|
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
|
+ pmd_clear(pmd);
|
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
|
+ pages++;
|
|
|
+ } else {
|
|
|
+ /* If here, we are freeing vmemmap pages. */
|
|
|
+ memset((void *)addr, PAGE_INUSE, next - addr);
|
|
|
+
|
|
|
+ page_addr = page_address(pmd_page(*pmd));
|
|
|
+ if (!memchr_inv(page_addr, PAGE_INUSE,
|
|
|
+ PMD_SIZE)) {
|
|
|
+ free_pagetable(pmd_page(*pmd),
|
|
|
+ get_order(PMD_SIZE));
|
|
|
+
|
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
|
+ pmd_clear(pmd);
|
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ pte_base = (pte_t *)pmd_page_vaddr(*pmd);
|
|
|
+ remove_pte_table(pte_base, addr, next, direct);
|
|
|
+ free_pte_table(pte_base, pmd);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Call free_pmd_table() in remove_pud_table(). */
|
|
|
+ if (direct)
|
|
|
+ update_page_count(PG_LEVEL_2M, -pages);
|
|
|
+}
|
|
|
+
|
|
|
+static void __meminit
|
|
|
+remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
|
|
|
+ bool direct)
|
|
|
+{
|
|
|
+ unsigned long next, pages = 0;
|
|
|
+ pmd_t *pmd_base;
|
|
|
+ pud_t *pud;
|
|
|
+ void *page_addr;
|
|
|
+
|
|
|
+ pud = pud_start + pud_index(addr);
|
|
|
+ for (; addr < end; addr = next, pud++) {
|
|
|
+ next = pud_addr_end(addr, end);
|
|
|
+
|
|
|
+ if (!pud_present(*pud))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (pud_large(*pud)) {
|
|
|
+ if (IS_ALIGNED(addr, PUD_SIZE) &&
|
|
|
+ IS_ALIGNED(next, PUD_SIZE)) {
|
|
|
+ if (!direct)
|
|
|
+ free_pagetable(pud_page(*pud),
|
|
|
+ get_order(PUD_SIZE));
|
|
|
+
|
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
|
+ pud_clear(pud);
|
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
|
+ pages++;
|
|
|
+ } else {
|
|
|
+ /* If here, we are freeing vmemmap pages. */
|
|
|
+ memset((void *)addr, PAGE_INUSE, next - addr);
|
|
|
+
|
|
|
+ page_addr = page_address(pud_page(*pud));
|
|
|
+ if (!memchr_inv(page_addr, PAGE_INUSE,
|
|
|
+ PUD_SIZE)) {
|
|
|
+ free_pagetable(pud_page(*pud),
|
|
|
+ get_order(PUD_SIZE));
|
|
|
+
|
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
|
+ pud_clear(pud);
|
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ pmd_base = (pmd_t *)pud_page_vaddr(*pud);
|
|
|
+ remove_pmd_table(pmd_base, addr, next, direct);
|
|
|
+ free_pmd_table(pmd_base, pud);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (direct)
|
|
|
+ update_page_count(PG_LEVEL_1G, -pages);
|
|
|
+}
|
|
|
+
|
|
|
+/* start and end are both virtual address. */
|
|
|
+static void __meminit
|
|
|
+remove_pagetable(unsigned long start, unsigned long end, bool direct)
|
|
|
+{
|
|
|
+ unsigned long next;
|
|
|
+ pgd_t *pgd;
|
|
|
+ pud_t *pud;
|
|
|
+ bool pgd_changed = false;
|
|
|
+
|
|
|
+ for (; start < end; start = next) {
|
|
|
+ next = pgd_addr_end(start, end);
|
|
|
+
|
|
|
+ pgd = pgd_offset_k(start);
|
|
|
+ if (!pgd_present(*pgd))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ pud = (pud_t *)pgd_page_vaddr(*pgd);
|
|
|
+ remove_pud_table(pud, start, next, direct);
|
|
|
+ if (free_pud_table(pud, pgd))
|
|
|
+ pgd_changed = true;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (pgd_changed)
|
|
|
+ sync_global_pgds(start, end - 1);
|
|
|
+
|
|
|
+ flush_tlb_all();
|
|
|
+}
|
|
|
+
|
|
|
#ifdef CONFIG_MEMORY_HOTREMOVE
|
|
|
int __ref arch_remove_memory(u64 start, u64 size)
|
|
|
{
|