13 years ago · 25e531a988
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1930,6 +1930,42 @@ The "pte_enc" field provides a value that can OR'ed into the hash
 
				 PTE's RPN field (ie, it needs to be shifted left by 12 to OR it
			
 
				 into the hash PTE second double word).
			
 
				 
			
 
				+
			
 
				+4.75 KVM_PPC_ALLOCATE_HTAB
			
 
				+
			
 
				+Capability: KVM_CAP_PPC_ALLOC_HTAB
			
 
				+Architectures: powerpc
			
 
				+Type: vm ioctl
			
 
				+Parameters: Pointer to u32 containing hash table order (in/out)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+This requests the host kernel to allocate an MMU hash table for a
			
 
				+guest using the PAPR paravirtualization interface.  This only does
			
 
				+anything if the kernel is configured to use the Book 3S HV style of
			
 
				+virtualization.  Otherwise the capability doesn't exist and the ioctl
			
 
				+returns an ENOTTY error.  The rest of this description assumes Book 3S
			
 
				+HV.
			
 
				+
			
 
				+There must be no vcpus running when this ioctl is called; if there
			
 
				+are, it will do nothing and return an EBUSY error.
			
 
				+
			
 
				+The parameter is a pointer to a 32-bit unsigned integer variable
			
 
				+containing the order (log base 2) of the desired size of the hash
			
 
				+table, which must be between 18 and 46.  On successful return from the
			
 
				+ioctl, it will have been updated with the order of the hash table that
			
 
				+was allocated.
			
 
				+
			
 
				+If no hash table has been allocated when any vcpu is asked to run
			
 
				+(with the KVM_RUN ioctl), the host kernel will allocate a
			
 
				+default-sized hash table (16 MB).
			
 
				+
			
 
				+If this ioctl is called when a hash table has already been allocated,
			
 
				+the kernel will clear out the existing hash table (zero all HPTEs) and
			
 
				+return the hash table order in the parameter.  (If the guest is using
			
 
				+the virtualized real-mode area (VRMA) facility, the kernel will
			
 
				+re-create the VMRA HPTEs on the next KVM_RUN of any vcpu.)
			
 
				+
			
 
				+
			
 
				 5. The kvm_run structure
			
 
				 ------------------------
			
 
				 
			
--- a/Documentation/virtual/kvm/ppc-pv.txt
+++ b/Documentation/virtual/kvm/ppc-pv.txt
@@ -109,8 +109,6 @@ The following bits are safe to be set inside the guest:
 
				 
			
 
				   MSR_EE
			
 
				   MSR_RI
			
 
				-  MSR_CR
			
 
				-  MSR_ME
			
 
				 
			
 
				 If any other bit changes in the MSR, please still use mtmsr(d).
			
 
				 
			
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -153,6 +153,8 @@
 
				 #define EV_HCALL_CLOBBERS2 EV_HCALL_CLOBBERS3, "r5"
			
 
				 #define EV_HCALL_CLOBBERS1 EV_HCALL_CLOBBERS2, "r4"
			
 
				 
			
 
				+extern bool epapr_paravirt_enabled;
			
 
				+extern u32 epapr_hypercall_start[];
			
 
				 
			
 
				 /*
			
 
				  * We use "uintptr_t" to define a register because it's guaranteed to be a
			
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -36,11 +36,8 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
				 #define SPAPR_TCE_SHIFT		12
			
 
				 
			
 
				 #ifdef CONFIG_KVM_BOOK3S_64_HV
			
 
				-/* For now use fixed-size 16MB page table */
			
 
				-#define HPT_ORDER	24
			
 
				-#define HPT_NPTEG	(1ul << (HPT_ORDER - 7))	/* 128B per pteg */
			
 
				-#define HPT_NPTE	(HPT_NPTEG << 3)		/* 8 PTEs per PTEG */
			
 
				-#define HPT_HASH_MASK	(HPT_NPTEG - 1)
			
 
				+#define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
			
 
				+extern int kvm_hpt_order;		/* order of preallocated HPTs */
			
 
				 #endif
			
 
				 
			
 
				 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
			
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -237,6 +237,10 @@ struct kvm_arch {
 
				 	unsigned long vrma_slb_v;
			
 
				 	int rma_setup_done;
			
 
				 	int using_mmu_notifiers;
			
 
				+	u32 hpt_order;
			
 
				+	atomic_t vcpus_running;
			
 
				+	unsigned long hpt_npte;
			
 
				+	unsigned long hpt_mask;
			
 
				 	spinlock_t slot_phys_lock;
			
 
				 	unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
			
 
				 	int slot_npages[KVM_MEM_SLOTS_NUM];
			
@@ -414,7 +418,9 @@ struct kvm_vcpu_arch {
 
				 	ulong mcsrr1;
			
 
				 	ulong mcsr;
			
 
				 	u32 dec;
			
 
				+#ifdef CONFIG_BOOKE
			
 
				 	u32 decar;
			
 
				+#endif
			
 
				 	u32 tbl;
			
 
				 	u32 tbu;
			
 
				 	u32 tcr;
			
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -119,7 +119,8 @@ extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 
				 extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
			
 
				 extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				-extern long kvmppc_alloc_hpt(struct kvm *kvm);
			
 
				+extern long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp);
			
 
				+extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
			
 
				 extern void kvmppc_free_hpt(struct kvm *kvm);
			
 
				 extern long kvmppc_prepare_vrma(struct kvm *kvm,
			
 
				 				struct kvm_userspace_memory_region *mem);
			
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -128,6 +128,7 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
 
				 obj-y				+= ppc_save_regs.o
			
 
				 endif
			
 
				 
			
 
				+obj-$(CONFIG_EPAPR_PARAVIRT)	+= epapr_paravirt.o epapr_hcalls.o
			
 
				 obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvm_emul.o
			
 
				 
			
 
				 # Disable GCOV in odd or sensitive code
			
--- a/arch/powerpc/kernel/epapr_hcalls.S
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -0,0 +1,25 @@
 
				+/*
			
 
				+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License
			
 
				+ * as published by the Free Software Foundation; either version
			
 
				+ * 2 of the License, or (at your option) any later version.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/threads.h>
			
 
				+#include <asm/reg.h>
			
 
				+#include <asm/page.h>
			
 
				+#include <asm/cputable.h>
			
 
				+#include <asm/thread_info.h>
			
 
				+#include <asm/ppc_asm.h>
			
 
				+#include <asm/asm-offsets.h>
			
 
				+
			
 
				+/* Hypercall entry point. Will be patched with device tree instructions. */
			
 
				+.global epapr_hypercall_start
			
 
				+epapr_hypercall_start:
			
 
				+	li	r3, -1
			
 
				+	nop
			
 
				+	nop
			
 
				+	nop
			
 
				+	blr
			
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -0,0 +1,52 @@
 
				+/*
			
 
				+ * ePAPR para-virtualization support.
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License, version 2, as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write to the Free Software
			
 
				+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
			
 
				+ *
			
 
				+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/of.h>
			
 
				+#include <asm/epapr_hcalls.h>
			
 
				+#include <asm/cacheflush.h>
			
 
				+#include <asm/code-patching.h>
			
 
				+
			
 
				+bool epapr_paravirt_enabled;
			
 
				+
			
 
				+static int __init epapr_paravirt_init(void)
			
 
				+{
			
 
				+	struct device_node *hyper_node;
			
 
				+	const u32 *insts;
			
 
				+	int len, i;
			
 
				+
			
 
				+	hyper_node = of_find_node_by_path("/hypervisor");
			
 
				+	if (!hyper_node)
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	insts = of_get_property(hyper_node, "hcall-instructions", &len);
			
 
				+	if (!insts)
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	if (len % 4 || len > (4 * 4))
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	for (i = 0; i < (len / 4); i++)
			
 
				+		patch_instruction(epapr_hypercall_start + i, insts[i]);
			
 
				+
			
 
				+	epapr_paravirt_enabled = true;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+early_initcall(epapr_paravirt_init);
			
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -31,6 +31,7 @@
 
				 #include <asm/cacheflush.h>
			
 
				 #include <asm/disassemble.h>
			
 
				 #include <asm/ppc-opcode.h>
			
 
				+#include <asm/epapr_hcalls.h>
			
 
				 
			
 
				 #define KVM_MAGIC_PAGE		(-4096L)
			
 
				 #define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
			
@@ -726,7 +727,7 @@ unsigned long kvm_hypercall(unsigned long *in,
 
				 	unsigned long register r11 asm("r11") = nr;
			
 
				 	unsigned long register r12 asm("r12");
			
 
				 
			
 
				-	asm volatile("bl	kvm_hypercall_start"
			
 
				+	asm volatile("bl	epapr_hypercall_start"
			
 
				 		     : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
			
 
				 		       "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
			
 
				 		       "=r"(r12)
			
@@ -747,29 +748,6 @@ unsigned long kvm_hypercall(unsigned long *in,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kvm_hypercall);
			
 
				 
			
 
				-static int kvm_para_setup(void)
			
 
				-{
			
 
				-	extern u32 kvm_hypercall_start;
			
 
				-	struct device_node *hyper_node;
			
 
				-	u32 *insts;
			
 
				-	int len, i;
			
 
				-
			
 
				-	hyper_node = of_find_node_by_path("/hypervisor");
			
 
				-	if (!hyper_node)
			
 
				-		return -1;
			
 
				-
			
 
				-	insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len);
			
 
				-	if (len % 4)
			
 
				-		return -1;
			
 
				-	if (len > (4 * 4))
			
 
				-		return -1;
			
 
				-
			
 
				-	for (i = 0; i < (len / 4); i++)
			
 
				-		kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				 static __init void kvm_free_tmp(void)
			
 
				 {
			
 
				 	unsigned long start, end;
			
@@ -791,7 +769,7 @@ static int __init kvm_guest_init(void)
 
				 	if (!kvm_para_available())
			
 
				 		goto free_tmp;
			
 
				 
			
 
				-	if (kvm_para_setup())
			
 
				+	if (!epapr_paravirt_enabled)
			
 
				 		goto free_tmp;
			
 
				 
			
 
				 	if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
			
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -24,16 +24,6 @@
 
				 #include <asm/page.h>
			
 
				 #include <asm/asm-offsets.h>
			
 
				 
			
 
				-/* Hypercall entry point. Will be patched with device tree instructions. */
			
 
				-
			
 
				-.global kvm_hypercall_start
			
 
				-kvm_hypercall_start:
			
 
				-	li	r3, -1
			
 
				-	nop
			
 
				-	nop
			
 
				-	nop
			
 
				-	blr
			
 
				-
			
 
				 #define KVM_MAGIC_PAGE		(-4096)
			
 
				 
			
 
				 #ifdef CONFIG_64BIT
			
@@ -132,7 +122,7 @@ kvm_emulate_mtmsrd_len:
 
				 	.long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4
			
 
				 
			
 
				 
			
 
				-#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI)
			
 
				+#define MSR_SAFE_BITS (MSR_EE | MSR_RI)
			
 
				 #define MSR_CRITICAL_BITS ~MSR_SAFE_BITS
			
 
				 
			
 
				 .global kvm_emulate_mtmsr
			
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,56 +37,121 @@
 
				 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
			
 
				 #define MAX_LPID_970	63
			
 
				 
			
 
				-long kvmppc_alloc_hpt(struct kvm *kvm)
			
 
				+/* Power architecture requires HPT is at least 256kB */
			
 
				+#define PPC_MIN_HPT_ORDER	18
			
 
				+
			
 
				+long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
			
 
				 {
			
 
				 	unsigned long hpt;
			
 
				-	long lpid;
			
 
				 	struct revmap_entry *rev;
			
 
				 	struct kvmppc_linear_info *li;
			
 
				+	long order = kvm_hpt_order;
			
 
				 
			
 
				-	/* Allocate guest's hashed page table */
			
 
				-	li = kvm_alloc_hpt();
			
 
				-	if (li) {
			
 
				-		/* using preallocated memory */
			
 
				-		hpt = (ulong)li->base_virt;
			
 
				-		kvm->arch.hpt_li = li;
			
 
				-	} else {
			
 
				-		/* using dynamic memory */
			
 
				+	if (htab_orderp) {
			
 
				+		order = *htab_orderp;
			
 
				+		if (order < PPC_MIN_HPT_ORDER)
			
 
				+			order = PPC_MIN_HPT_ORDER;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * If the user wants a different size from default,
			
 
				+	 * try first to allocate it from the kernel page allocator.
			
 
				+	 */
			
 
				+	hpt = 0;
			
 
				+	if (order != kvm_hpt_order) {
			
 
				 		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
			
 
				-				       __GFP_NOWARN, HPT_ORDER - PAGE_SHIFT);
			
 
				+				       __GFP_NOWARN, order - PAGE_SHIFT);
			
 
				+		if (!hpt)
			
 
				+			--order;
			
 
				 	}
			
 
				 
			
 
				+	/* Next try to allocate from the preallocated pool */
			
 
				 	if (!hpt) {
			
 
				-		pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n");
			
 
				-		return -ENOMEM;
			
 
				+		li = kvm_alloc_hpt();
			
 
				+		if (li) {
			
 
				+			hpt = (ulong)li->base_virt;
			
 
				+			kvm->arch.hpt_li = li;
			
 
				+			order = kvm_hpt_order;
			
 
				+		}
			
 
				 	}
			
 
				+
			
 
				+	/* Lastly try successively smaller sizes from the page allocator */
			
 
				+	while (!hpt && order > PPC_MIN_HPT_ORDER) {
			
 
				+		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
			
 
				+				       __GFP_NOWARN, order - PAGE_SHIFT);
			
 
				+		if (!hpt)
			
 
				+			--order;
			
 
				+	}
			
 
				+
			
 
				+	if (!hpt)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				 	kvm->arch.hpt_virt = hpt;
			
 
				+	kvm->arch.hpt_order = order;
			
 
				+	/* HPTEs are 2**4 bytes long */
			
 
				+	kvm->arch.hpt_npte = 1ul << (order - 4);
			
 
				+	/* 128 (2**7) bytes in each HPTEG */
			
 
				+	kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
			
 
				 
			
 
				 	/* Allocate reverse map array */
			
 
				-	rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE);
			
 
				+	rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
			
 
				 	if (!rev) {
			
 
				 		pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
			
 
				 		goto out_freehpt;
			
 
				 	}
			
 
				 	kvm->arch.revmap = rev;
			
 
				+	kvm->arch.sdr1 = __pa(hpt) | (order - 18);
			
 
				 
			
 
				-	lpid = kvmppc_alloc_lpid();
			
 
				-	if (lpid < 0)
			
 
				-		goto out_freeboth;
			
 
				+	pr_info("KVM guest htab at %lx (order %ld), LPID %x\n",
			
 
				+		hpt, order, kvm->arch.lpid);
			
 
				 
			
 
				-	kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
			
 
				-	kvm->arch.lpid = lpid;
			
 
				-
			
 
				-	pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
			
 
				+	if (htab_orderp)
			
 
				+		*htab_orderp = order;
			
 
				 	return 0;
			
 
				 
			
 
				- out_freeboth:
			
 
				-	vfree(rev);
			
 
				  out_freehpt:
			
 
				-	free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
			
 
				+	if (kvm->arch.hpt_li)
			
 
				+		kvm_release_hpt(kvm->arch.hpt_li);
			
 
				+	else
			
 
				+		free_pages(hpt, order - PAGE_SHIFT);
			
 
				 	return -ENOMEM;
			
 
				 }
			
 
				 
			
 
				+long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
			
 
				+{
			
 
				+	long err = -EBUSY;
			
 
				+	long order;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+	if (kvm->arch.rma_setup_done) {
			
 
				+		kvm->arch.rma_setup_done = 0;
			
 
				+		/* order rma_setup_done vs. vcpus_running */
			
 
				+		smp_mb();
			
 
				+		if (atomic_read(&kvm->arch.vcpus_running)) {
			
 
				+			kvm->arch.rma_setup_done = 1;
			
 
				+			goto out;
			
 
				+		}
			
 
				+	}
			
 
				+	if (kvm->arch.hpt_virt) {
			
 
				+		order = kvm->arch.hpt_order;
			
 
				+		/* Set the entire HPT to 0, i.e. invalid HPTEs */
			
 
				+		memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
			
 
				+		/*
			
 
				+		 * Set the whole last_vcpu array to an invalid vcpu number.
			
 
				+		 * This ensures that each vcpu will flush its TLB on next entry.
			
 
				+		 */
			
 
				+		memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu));
			
 
				+		*htab_orderp = order;
			
 
				+		err = 0;
			
 
				+	} else {
			
 
				+		err = kvmppc_alloc_hpt(kvm, htab_orderp);
			
 
				+		order = *htab_orderp;
			
 
				+	}
			
 
				+ out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				 void kvmppc_free_hpt(struct kvm *kvm)
			
 
				 {
			
 
				 	kvmppc_free_lpid(kvm->arch.lpid);
			
@@ -94,7 +159,8 @@ void kvmppc_free_hpt(struct kvm *kvm)
 
				 	if (kvm->arch.hpt_li)
			
 
				 		kvm_release_hpt(kvm->arch.hpt_li);
			
 
				 	else
			
 
				-		free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
			
 
				+		free_pages(kvm->arch.hpt_virt,
			
 
				+			   kvm->arch.hpt_order - PAGE_SHIFT);
			
 
				 }
			
 
				 
			
 
				 /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
			
@@ -119,6 +185,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
 
				 	unsigned long psize;
			
 
				 	unsigned long hp0, hp1;
			
 
				 	long ret;
			
 
				+	struct kvm *kvm = vcpu->kvm;
			
 
				 
			
 
				 	psize = 1ul << porder;
			
 
				 	npages = memslot->npages >> (porder - PAGE_SHIFT);
			
@@ -127,8 +194,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
 
				 	if (npages > 1ul << (40 - porder))
			
 
				 		npages = 1ul << (40 - porder);
			
 
				 	/* Can't use more than 1 HPTE per HPTEG */
			
 
				-	if (npages > HPT_NPTEG)
			
 
				-		npages = HPT_NPTEG;
			
 
				+	if (npages > kvm->arch.hpt_mask + 1)
			
 
				+		npages = kvm->arch.hpt_mask + 1;
			
 
				 
			
 
				 	hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
			
 
				 		HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
			
@@ -138,7 +205,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
 
				 	for (i = 0; i < npages; ++i) {
			
 
				 		addr = i << porder;
			
 
				 		/* can't use hpt_hash since va > 64 bits */
			
 
				-		hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
			
 
				+		hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask;
			
 
				 		/*
			
 
				 		 * We assume that the hash table is empty and no
			
 
				 		 * vcpus are using it at this stage.  Since we create
			
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -56,7 +56,7 @@
 
				 /* #define EXIT_DEBUG_INT */
			
 
				 
			
 
				 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
			
 
				-static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu);
			
 
				+static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
			
 
				 {
			
@@ -1068,11 +1068,15 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
				 		return -EINTR;
			
 
				 	}
			
 
				 
			
 
				-	/* On the first time here, set up VRMA or RMA */
			
 
				+	atomic_inc(&vcpu->kvm->arch.vcpus_running);
			
 
				+	/* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
			
 
				+	smp_mb();
			
 
				+
			
 
				+	/* On the first time here, set up HTAB and VRMA or RMA */
			
 
				 	if (!vcpu->kvm->arch.rma_setup_done) {
			
 
				-		r = kvmppc_hv_setup_rma(vcpu);
			
 
				+		r = kvmppc_hv_setup_htab_rma(vcpu);
			
 
				 		if (r)
			
 
				-			return r;
			
 
				+			goto out;
			
 
				 	}
			
 
				 
			
 
				 	flush_fp_to_thread(current);
			
@@ -1090,6 +1094,9 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
				 			kvmppc_core_prepare_to_enter(vcpu);
			
 
				 		}
			
 
				 	} while (r == RESUME_GUEST);
			
 
				+
			
 
				+ out:
			
 
				+	atomic_dec(&vcpu->kvm->arch.vcpus_running);
			
 
				 	return r;
			
 
				 }
			
 
				 
			
@@ -1305,7 +1312,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
 
				 {
			
 
				 }
			
 
				 
			
 
				-static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
			
 
				+static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	int err = 0;
			
 
				 	struct kvm *kvm = vcpu->kvm;
			
@@ -1324,6 +1331,15 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
 
				 	if (kvm->arch.rma_setup_done)
			
 
				 		goto out;	/* another vcpu beat us to it */
			
 
				 
			
 
				+	/* Allocate hashed page table (if not done already) and reset it */
			
 
				+	if (!kvm->arch.hpt_virt) {
			
 
				+		err = kvmppc_alloc_hpt(kvm, NULL);
			
 
				+		if (err) {
			
 
				+			pr_err("KVM: Couldn't alloc HPT\n");
			
 
				+			goto out;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	/* Look up the memslot for guest physical address 0 */
			
 
				 	memslot = gfn_to_memslot(kvm, 0);
			
 
				 
			
@@ -1435,13 +1451,14 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
 
				 
			
 
				 int kvmppc_core_init_vm(struct kvm *kvm)
			
 
				 {
			
 
				-	long r;
			
 
				-	unsigned long lpcr;
			
 
				+	unsigned long lpcr, lpid;
			
 
				 
			
 
				-	/* Allocate hashed page table */
			
 
				-	r = kvmppc_alloc_hpt(kvm);
			
 
				-	if (r)
			
 
				-		return r;
			
 
				+	/* Allocate the guest's logical partition ID */
			
 
				+
			
 
				+	lpid = kvmppc_alloc_lpid();
			
 
				+	if (lpid < 0)
			
 
				+		return -ENOMEM;
			
 
				+	kvm->arch.lpid = lpid;
			
 
				 
			
 
				 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
			
 
				 
			
@@ -1451,7 +1468,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 
				 
			
 
				 	if (cpu_has_feature(CPU_FTR_ARCH_201)) {
			
 
				 		/* PPC970; HID4 is effectively the LPCR */
			
 
				-		unsigned long lpid = kvm->arch.lpid;
			
 
				 		kvm->arch.host_lpid = 0;
			
 
				 		kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
			
 
				 		lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
			
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -25,6 +25,9 @@ static void __init kvm_linear_init_one(ulong size, int count, int type);
 
				 static struct kvmppc_linear_info *kvm_alloc_linear(int type);
			
 
				 static void kvm_release_linear(struct kvmppc_linear_info *ri);
			
 
				 
			
 
				+int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
			
 
				+EXPORT_SYMBOL_GPL(kvm_hpt_order);
			
 
				+
			
 
				 /*************** RMA *************/
			
 
				 
			
 
				 /*
			
@@ -209,7 +212,7 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri)
 
				 void __init kvm_linear_init(void)
			
 
				 {
			
 
				 	/* HPT */
			
 
				-	kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT);
			
 
				+	kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
			
 
				 
			
 
				 	/* RMA */
			
 
				 	/* Only do this on PPC970 in HV mode */
			
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -237,7 +237,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 
				 
			
 
				 	/* Find and lock the HPTEG slot to use */
			
 
				  do_insert:
			
 
				-	if (pte_index >= HPT_NPTE)
			
 
				+	if (pte_index >= kvm->arch.hpt_npte)
			
 
				 		return H_PARAMETER;
			
 
				 	if (likely((flags & H_EXACT) == 0)) {
			
 
				 		pte_index &= ~7UL;
			
@@ -352,7 +352,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
 
				 	unsigned long v, r, rb;
			
 
				 	struct revmap_entry *rev;
			
 
				 
			
 
				-	if (pte_index >= HPT_NPTE)
			
 
				+	if (pte_index >= kvm->arch.hpt_npte)
			
 
				 		return H_PARAMETER;
			
 
				 	hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
			
 
				 	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
			
@@ -419,7 +419,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 
				 				i = 4;
			
 
				 				break;
			
 
				 			}
			
 
				-			if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) {
			
 
				+			if (req != 1 || flags == 3 ||
			
 
				+			    pte_index >= kvm->arch.hpt_npte) {
			
 
				 				/* parameter error */
			
 
				 				args[j] = ((0xa0 | flags) << 56) + pte_index;
			
 
				 				ret = H_PARAMETER;
			
@@ -521,7 +522,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 
				 	struct revmap_entry *rev;
			
 
				 	unsigned long v, r, rb, mask, bits;
			
 
				 
			
 
				-	if (pte_index >= HPT_NPTE)
			
 
				+	if (pte_index >= kvm->arch.hpt_npte)
			
 
				 		return H_PARAMETER;
			
 
				 
			
 
				 	hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
			
@@ -583,7 +584,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 
				 	int i, n = 1;
			
 
				 	struct revmap_entry *rev = NULL;
			
 
				 
			
 
				-	if (pte_index >= HPT_NPTE)
			
 
				+	if (pte_index >= kvm->arch.hpt_npte)
			
 
				 		return H_PARAMETER;
			
 
				 	if (flags & H_READ_4) {
			
 
				 		pte_index &= ~3;
			
@@ -678,7 +679,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 
				 		somask = (1UL << 28) - 1;
			
 
				 		vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
			
 
				 	}
			
 
				-	hash = (vsid ^ ((eaddr & somask) >> pshift)) & HPT_HASH_MASK;
			
 
				+	hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
			
 
				 	avpn = slb_v & ~(somask >> 16);	/* also includes B */
			
 
				 	avpn |= (eaddr & somask) >> 16;
			
 
				 
			
@@ -723,7 +724,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 
				 		if (val & HPTE_V_SECONDARY)
			
 
				 			break;
			
 
				 		val |= HPTE_V_SECONDARY;
			
 
				-		hash = hash ^ HPT_HASH_MASK;
			
 
				+		hash = hash ^ kvm->arch.hpt_mask;
			
 
				 	}
			
 
				 	return -1;
			
 
				 }
			
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1267,6 +1267,11 @@ void kvmppc_decrementer_func(unsigned long data)
 
				 {
			
 
				 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
			
 
				 
			
 
				+	if (vcpu->arch.tcr & TCR_ARE) {
			
 
				+		vcpu->arch.dec = vcpu->arch.decar;
			
 
				+		kvmppc_emulate_dec(vcpu);
			
 
				+	}
			
 
				+
			
 
				 	kvmppc_set_tsr_bits(vcpu, TSR_DIS);
			
 
				 }
			
 
				 
			
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -129,6 +129,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 
				 		kvmppc_set_tcr(vcpu, spr_val);
			
 
				 		break;
			
 
				 
			
 
				+	case SPRN_DECAR:
			
 
				+		vcpu->arch.decar = spr_val;
			
 
				+		break;
			
 
				 	/*
			
 
				 	 * Note: SPRG4-7 are user-readable.
			
 
				 	 * These values are loaded into the real SPRGs when resuming the
			
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -269,6 +269,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 
				 		*spr_val = vcpu->arch.shared->mas7_3 >> 32;
			
 
				 		break;
			
 
				 #endif
			
 
				+	case SPRN_DECAR:
			
 
				+		*spr_val = vcpu->arch.decar;
			
 
				+		break;
			
 
				 	case SPRN_TLB0CFG:
			
 
				 		*spr_val = vcpu->arch.tlbcfg[0];
			
 
				 		break;
			
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -246,6 +246,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 
				 #endif
			
 
				 #ifdef CONFIG_PPC_BOOK3S_64
			
 
				 	case KVM_CAP_SPAPR_TCE:
			
 
				+	case KVM_CAP_PPC_ALLOC_HTAB:
			
 
				 		r = 1;
			
 
				 		break;
			
 
				 #endif /* CONFIG_PPC_BOOK3S_64 */
			
@@ -802,6 +803,23 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 			r = -EFAULT;
			
 
				 		break;
			
 
				 	}
			
 
				+
			
 
				+	case KVM_PPC_ALLOCATE_HTAB: {
			
 
				+		struct kvm *kvm = filp->private_data;
			
 
				+		u32 htab_order;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (get_user(htab_order, (u32 __user *)argp))
			
 
				+			break;
			
 
				+		r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
			
 
				+		if (r)
			
 
				+			break;
			
 
				+		r = -EFAULT;
			
 
				+		if (put_user(htab_order, (u32 __user *)argp))
			
 
				+			break;
			
 
				+		r = 0;
			
 
				+		break;
			
 
				+	}
			
 
				 #endif /* CONFIG_KVM_BOOK3S_64_HV */
			
 
				 
			
 
				 #ifdef CONFIG_PPC_BOOK3S_64
			
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -25,6 +25,7 @@ source "arch/powerpc/platforms/wsp/Kconfig"
 
				 config KVM_GUEST
			
 
				 	bool "KVM Guest support"
			
 
				 	default n
			
 
				+	select EPAPR_PARAVIRT
			
 
				 	---help---
			
 
				 	  This option enables various optimizations for running under the KVM
			
 
				 	  hypervisor. Overhead for the kernel when not running inside KVM should
			
@@ -32,6 +33,14 @@ config KVM_GUEST
 
				 
			
 
				 	  In case of doubt, say Y
			
 
				 
			
 
				+config EPAPR_PARAVIRT
			
 
				+	bool "ePAPR para-virtualization support"
			
 
				+	default n
			
 
				+	help
			
 
				+	  Enables ePAPR para-virtualization support for guests.
			
 
				+
			
 
				+	  In case of doubt, say Y
			
 
				+
			
 
				 config PPC_NATIVE
			
 
				 	bool
			
 
				 	depends on 6xx || PPC64
			
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -617,6 +617,7 @@ struct kvm_ppc_smmu_info {
 
				 #define KVM_CAP_SIGNAL_MSI 77
			
 
				 #define KVM_CAP_PPC_GET_SMMU_INFO 78
			
 
				 #define KVM_CAP_S390_COW 79
			
 
				+#define KVM_CAP_PPC_ALLOC_HTAB 80
			
 
				 
			
 
				 #ifdef KVM_CAP_IRQ_ROUTING
			
 
				 
			
@@ -828,6 +829,8 @@ struct kvm_s390_ucas_mapping {
 
				 #define KVM_SIGNAL_MSI            _IOW(KVMIO,  0xa5, struct kvm_msi)
			
 
				 /* Available with KVM_CAP_PPC_GET_SMMU_INFO */
			
 
				 #define KVM_PPC_GET_SMMU_INFO	  _IOR(KVMIO,  0xa6, struct kvm_ppc_smmu_info)
			
 
				+/* Available with KVM_CAP_PPC_ALLOC_HTAB */
			
 
				+#define KVM_PPC_ALLOCATE_HTAB	  _IOWR(KVMIO, 0xa7, __u32)
			
 
				 
			
 
				 /*
			
 
				  * ioctls for vcpu fds