15 years ago · 486d35e222
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -53,7 +53,7 @@ config KVM_440
 
				 
			
 
				 config KVM_EXIT_TIMING
			
 
				 	bool "Detailed exit timing"
			
 
				-	depends on KVM
			
 
				+	depends on KVM_440 || KVM_E500
			
 
				 	---help---
			
 
				 	  Calculate elapsed time for every exit/enter cycle. A per-vcpu
			
 
				 	  report is available in debugfs kvm/vm#_vcpu#_timing.
			
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -213,7 +213,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
 
				 	return rc2;
			
 
				 }
			
 
				 
			
 
				-static const intercept_handler_t intercept_funcs[0x48 >> 2] = {
			
 
				+static const intercept_handler_t intercept_funcs[] = {
			
 
				 	[0x00 >> 2] = handle_noop,
			
 
				 	[0x04 >> 2] = handle_instruction,
			
 
				 	[0x08 >> 2] = handle_prog,
			
@@ -230,7 +230,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 
				 	intercept_handler_t func;
			
 
				 	u8 code = vcpu->arch.sie_block->icptcode;
			
 
				 
			
 
				-	if (code & 3 || code > 0x48)
			
 
				+	if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs))
			
 
				 		return -ENOTSUPP;
			
 
				 	func = intercept_funcs[code >> 2];
			
 
				 	if (func)
			
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -373,6 +373,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
				 		if (unlikely(!apic_enabled(apic)))
			
 
				 			break;
			
 
				 
			
 
				+		if (trig_mode) {
			
 
				+			apic_debug("level trig mode for vector %d", vector);
			
 
				+			apic_set_vector(vector, apic->regs + APIC_TMR);
			
 
				+		} else
			
 
				+			apic_clear_vector(vector, apic->regs + APIC_TMR);
			
 
				+
			
 
				 		result = !apic_test_and_set_irr(vector, apic);
			
 
				 		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
			
 
				 					  trig_mode, vector, !result);
			
@@ -383,11 +389,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
				 			break;
			
 
				 		}
			
 
				 
			
 
				-		if (trig_mode) {
			
 
				-			apic_debug("level trig mode for vector %d", vector);
			
 
				-			apic_set_vector(vector, apic->regs + APIC_TMR);
			
 
				-		} else
			
 
				-			apic_clear_vector(vector, apic->regs + APIC_TMR);
			
 
				 		kvm_vcpu_kick(vcpu);
			
 
				 		break;
			
 
				 
			
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -477,7 +477,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
 
				 
			
 
				 	addr = gfn_to_hva(kvm, gfn);
			
 
				 	if (kvm_is_error_hva(addr))
			
 
				-		return page_size;
			
 
				+		return PT_PAGE_TABLE_LEVEL;
			
 
				 
			
 
				 	down_read(&current->mm->mmap_sem);
			
 
				 	vma = find_vma(current->mm, addr);
			
@@ -515,11 +515,9 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
 
				 	if (host_level == PT_PAGE_TABLE_LEVEL)
			
 
				 		return host_level;
			
 
				 
			
 
				-	for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) {
			
 
				-
			
 
				+	for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level)
			
 
				 		if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
			
 
				 			break;
			
 
				-	}
			
 
				 
			
 
				 	return level - 1;
			
 
				 }
			
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -150,7 +150,9 @@ walk:
 
				 		walker->table_gfn[walker->level - 1] = table_gfn;
			
 
				 		walker->pte_gpa[walker->level - 1] = pte_gpa;
			
 
				 
			
 
				-		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
			
 
				+		if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)))
			
 
				+			goto not_present;
			
 
				+
			
 
				 		trace_kvm_mmu_paging_element(pte, walker->level);
			
 
				 
			
 
				 		if (!is_present_gpte(pte))
			
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5072,12 +5072,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
				 				       GFP_KERNEL);
			
 
				 	if (!vcpu->arch.mce_banks) {
			
 
				 		r = -ENOMEM;
			
 
				-		goto fail_mmu_destroy;
			
 
				+		goto fail_free_lapic;
			
 
				 	}
			
 
				 	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
			
 
				 
			
 
				 	return 0;
			
 
				-
			
 
				+fail_free_lapic:
			
 
				+	kvm_free_lapic(vcpu);
			
 
				 fail_mmu_destroy:
			
 
				 	kvm_mmu_destroy(vcpu);
			
 
				 fail_free_pio_data:
			
@@ -5088,6 +5089,7 @@ fail:
 
				 
			
 
				 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				+	kfree(vcpu->arch.mce_banks);
			
 
				 	kvm_free_lapic(vcpu);
			
 
				 	down_read(&vcpu->kvm->slots_lock);
			
 
				 	kvm_mmu_destroy(vcpu);
			
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
 
				 	return events;
			
 
				 }
			
 
				 
			
 
				-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
			
 
				-			    loff_t *ppos)
			
 
				+static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
			
 
				+{
			
 
				+	*cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
			
 
				+	ctx->count -= *cnt;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
			
 
				+ * @ctx: [in] Pointer to eventfd context.
			
 
				+ * @wait: [in] Wait queue to be removed.
			
 
				+ * @cnt: [out] Pointer to the 64bit conter value.
			
 
				+ *
			
 
				+ * Returns zero if successful, or the following error codes:
			
 
				+ *
			
 
				+ * -EAGAIN      : The operation would have blocked.
			
 
				+ *
			
 
				+ * This is used to atomically remove a wait queue entry from the eventfd wait
			
 
				+ * queue head, and read/reset the counter value.
			
 
				+ */
			
 
				+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
			
 
				+				  __u64 *cnt)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	spin_lock_irqsave(&ctx->wqh.lock, flags);
			
 
				+	eventfd_ctx_do_read(ctx, cnt);
			
 
				+	__remove_wait_queue(&ctx->wqh, wait);
			
 
				+	if (*cnt != 0 && waitqueue_active(&ctx->wqh))
			
 
				+		wake_up_locked_poll(&ctx->wqh, POLLOUT);
			
 
				+	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
			
 
				+
			
 
				+	return *cnt != 0 ? 0 : -EAGAIN;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
			
 
				+
			
 
				+/**
			
 
				+ * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
			
 
				+ * @ctx: [in] Pointer to eventfd context.
			
 
				+ * @no_wait: [in] Different from zero if the operation should not block.
			
 
				+ * @cnt: [out] Pointer to the 64bit conter value.
			
 
				+ *
			
 
				+ * Returns zero if successful, or the following error codes:
			
 
				+ *
			
 
				+ * -EAGAIN      : The operation would have blocked but @no_wait was nonzero.
			
 
				+ * -ERESTARTSYS : A signal interrupted the wait operation.
			
 
				+ *
			
 
				+ * If @no_wait is zero, the function might sleep until the eventfd internal
			
 
				+ * counter becomes greater than zero.
			
 
				+ */
			
 
				+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
			
 
				 {
			
 
				-	struct eventfd_ctx *ctx = file->private_data;
			
 
				 	ssize_t res;
			
 
				-	__u64 ucnt = 0;
			
 
				 	DECLARE_WAITQUEUE(wait, current);
			
 
				 
			
 
				-	if (count < sizeof(ucnt))
			
 
				-		return -EINVAL;
			
 
				 	spin_lock_irq(&ctx->wqh.lock);
			
 
				+	*cnt = 0;
			
 
				 	res = -EAGAIN;
			
 
				 	if (ctx->count > 0)
			
 
				-		res = sizeof(ucnt);
			
 
				-	else if (!(file->f_flags & O_NONBLOCK)) {
			
 
				+		res = 0;
			
 
				+	else if (!no_wait) {
			
 
				 		__add_wait_queue(&ctx->wqh, &wait);
			
 
				-		for (res = 0;;) {
			
 
				+		for (;;) {
			
 
				 			set_current_state(TASK_INTERRUPTIBLE);
			
 
				 			if (ctx->count > 0) {
			
 
				-				res = sizeof(ucnt);
			
 
				+				res = 0;
			
 
				 				break;
			
 
				 			}
			
 
				 			if (signal_pending(current)) {
			
@@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
 
				 		__remove_wait_queue(&ctx->wqh, &wait);
			
 
				 		__set_current_state(TASK_RUNNING);
			
 
				 	}
			
 
				-	if (likely(res > 0)) {
			
 
				-		ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
			
 
				-		ctx->count -= ucnt;
			
 
				+	if (likely(res == 0)) {
			
 
				+		eventfd_ctx_do_read(ctx, cnt);
			
 
				 		if (waitqueue_active(&ctx->wqh))
			
 
				 			wake_up_locked_poll(&ctx->wqh, POLLOUT);
			
 
				 	}
			
 
				 	spin_unlock_irq(&ctx->wqh.lock);
			
 
				-	if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
			
 
				-		return -EFAULT;
			
 
				 
			
 
				 	return res;
			
 
				 }
			
 
				+EXPORT_SYMBOL_GPL(eventfd_ctx_read);
			
 
				+
			
 
				+static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
			
 
				+			    loff_t *ppos)
			
 
				+{
			
 
				+	struct eventfd_ctx *ctx = file->private_data;
			
 
				+	ssize_t res;
			
 
				+	__u64 cnt;
			
 
				+
			
 
				+	if (count < sizeof(cnt))
			
 
				+		return -EINVAL;
			
 
				+	res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
			
 
				+	if (res < 0)
			
 
				+		return res;
			
 
				+
			
 
				+	return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
			
 
				+}
			
 
				 
			
 
				 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
			
 
				 			     loff_t *ppos)
			
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -10,6 +10,7 @@
 
				 
			
 
				 #include <linux/fcntl.h>
			
 
				 #include <linux/file.h>
			
 
				+#include <linux/wait.h>
			
 
				 
			
 
				 /*
			
 
				  * CAREFUL: Check include/asm-generic/fcntl.h when defining
			
@@ -34,6 +35,9 @@ struct file *eventfd_fget(int fd);
 
				 struct eventfd_ctx *eventfd_ctx_fdget(int fd);
			
 
				 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
			
 
				 int eventfd_signal(struct eventfd_ctx *ctx, int n);
			
 
				+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt);
			
 
				+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
			
 
				+				  __u64 *cnt);
			
 
				 
			
 
				 #else /* CONFIG_EVENTFD */
			
 
				 
			
@@ -61,6 +65,18 @@ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
 
				 
			
 
				 }
			
 
				 
			
 
				+static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait,
			
 
				+				       __u64 *cnt)
			
 
				+{
			
 
				+	return -ENOSYS;
			
 
				+}
			
 
				+
			
 
				+static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
			
 
				+						wait_queue_t *wait, __u64 *cnt)
			
 
				+{
			
 
				+	return -ENOSYS;
			
 
				+}
			
 
				+
			
 
				 #endif
			
 
				 
			
 
				 #endif /* _LINUX_EVENTFD_H */
			
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -72,12 +72,13 @@ static void
 
				 irqfd_shutdown(struct work_struct *work)
			
 
				 {
			
 
				 	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
			
 
				+	u64 cnt;
			
 
				 
			
 
				 	/*
			
 
				 	 * Synchronize with the wait-queue and unhook ourselves to prevent
			
 
				 	 * further events.
			
 
				 	 */
			
 
				-	remove_wait_queue(irqfd->wqh, &irqfd->wait);
			
 
				+	eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
			
 
				 
			
 
				 	/*
			
 
				 	 * We know no new events will be scheduled at this point, so block
			
@@ -166,7 +167,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 
				 static int
			
 
				 kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
			
 
				 {
			
 
				-	struct _irqfd *irqfd;
			
 
				+	struct _irqfd *irqfd, *tmp;
			
 
				 	struct file *file = NULL;
			
 
				 	struct eventfd_ctx *eventfd = NULL;
			
 
				 	int ret;
			
@@ -203,9 +204,20 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 
				 	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
			
 
				 	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
			
 
				 
			
 
				+	spin_lock_irq(&kvm->irqfds.lock);
			
 
				+
			
 
				+	ret = 0;
			
 
				+	list_for_each_entry(tmp, &kvm->irqfds.items, list) {
			
 
				+		if (irqfd->eventfd != tmp->eventfd)
			
 
				+			continue;
			
 
				+		/* This fd is used for another irq already. */
			
 
				+		ret = -EBUSY;
			
 
				+		spin_unlock_irq(&kvm->irqfds.lock);
			
 
				+		goto fail;
			
 
				+	}
			
 
				+
			
 
				 	events = file->f_op->poll(file, &irqfd->pt);
			
 
				 
			
 
				-	spin_lock_irq(&kvm->irqfds.lock);
			
 
				 	list_add_tail(&irqfd->list, &kvm->irqfds.items);
			
 
				 	spin_unlock_irq(&kvm->irqfds.lock);
			
 
				 
			
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -302,6 +302,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 
				 {
			
 
				 	int r = -EINVAL;
			
 
				 	int delta;
			
 
				+	unsigned max_pin;
			
 
				 	struct kvm_kernel_irq_routing_entry *ei;
			
 
				 	struct hlist_node *n;
			
 
				 
			
@@ -322,12 +323,15 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 
				 		switch (ue->u.irqchip.irqchip) {
			
 
				 		case KVM_IRQCHIP_PIC_MASTER:
			
 
				 			e->set = kvm_set_pic_irq;
			
 
				+			max_pin = 16;
			
 
				 			break;
			
 
				 		case KVM_IRQCHIP_PIC_SLAVE:
			
 
				 			e->set = kvm_set_pic_irq;
			
 
				+			max_pin = 16;
			
 
				 			delta = 8;
			
 
				 			break;
			
 
				 		case KVM_IRQCHIP_IOAPIC:
			
 
				+			max_pin = KVM_IOAPIC_NUM_PINS;
			
 
				 			e->set = kvm_set_ioapic_irq;
			
 
				 			break;
			
 
				 		default:
			
@@ -335,7 +339,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 
				 		}
			
 
				 		e->irqchip.irqchip = ue->u.irqchip.irqchip;
			
 
				 		e->irqchip.pin = ue->u.irqchip.pin + delta;
			
 
				-		if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS)
			
 
				+		if (e->irqchip.pin >= max_pin)
			
 
				 			goto out;
			
 
				 		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
			
 
				 		break;