From e6adf28365b2fca0b5235cabff00c9f3d1e7bdf4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 30 Apr 2007 16:07:54 +0300 Subject: KVM: Avoid saving and restoring some host CPU state on lightweight vmexit Many msrs and the like will only be used by the host if we schedule() or return to userspace. Therefore, we avoid saving them if we handle the exit within the kernel, and if a reschedule is not requested. Based on a patch from Eddie Dong with a couple of fixes by me. Signed-off-by: Yaozu(Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 8f1f07adb04e..7d682586423b 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -72,6 +72,7 @@ static struct kvm_stats_debugfs_item { { "halt_exits", STAT_OFFSET(halt_exits) }, { "request_irq", STAT_OFFSET(request_irq_exits) }, { "irq_exits", STAT_OFFSET(irq_exits) }, + { "light_exits", STAT_OFFSET(light_exits) }, { NULL } }; -- cgit v1.2.3 From 09072daf37abbfe8e2d5018dd913f229c76190f7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 1 May 2007 14:16:52 +0300 Subject: KVM: Unify kvm_mmu_pre_write() and kvm_mmu_post_write() Instead of calling two functions and repeating expensive checks, call one function and provide it with before/after information. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 7d682586423b..b6ad9c6f2efe 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1071,18 +1071,18 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, { struct page *page; void *virt; + unsigned offset = offset_in_page(gpa); if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) return 0; page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); if (!page) return 0; - kvm_mmu_pre_write(vcpu, gpa, bytes); mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); virt = kmap_atomic(page, KM_USER0); + kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); memcpy(virt + offset_in_page(gpa), val, bytes); kunmap_atomic(virt, KM_USER0); - kvm_mmu_post_write(vcpu, gpa, bytes); return 1; } -- cgit v1.2.3 From 2dc7094b5662c4446aa647b257d47a9412fbacc9 Mon Sep 17 00:00:00 2001 From: Matthew Gregan Date: Sun, 6 May 2007 10:59:46 +0300 Subject: KVM: Implement IA32_EBL_CR_POWERON msr Attempting to boot the default 'bsd' kernel of OpenBSD 4.1 i386 in a guest fails early in the kernel init inside p3_get_bus_clock while trying to read the IA32_EBL_CR_POWERON MSR. KVM logs an 'unhandled MSR' message and the guest kernel faults. This patch is sufficient to allow OpenBSD to boot, after which it seems to run fine. I'm not sure if this is the correct solution for dealing with this particular MSR, but it works for me. Signed-off-by: Matthew Gregan Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index b6ad9c6f2efe..095d673b9efb 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1470,6 +1470,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_MC0_MISC+16: case MSR_IA32_UCODE_REV: case MSR_IA32_PERF_STATUS: + case MSR_IA32_EBL_CR_POWERON: /* MTRR registers */ case 0xfe: case 0x200 ... 0x2ff: -- cgit v1.2.3 From 2cc51560aed0edb291341089d3475e1fbe8bfd04 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Mon, 21 May 2007 07:28:09 +0300 Subject: KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit MSR_EFER.LME/LMA bits are automatically save/restored by VMX hardware, KVM only needs to save NX/SCE bits at time of heavy weight VM Exit. But clearing NX bits in host envirnment may cause system hang if the host page table is using EXB bits, thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we can do guest page table EXB bits check before inserting a shadow pte (though no guest is expecting to see this kind of gp fault). If host NX=0, we present guest no Execute-Disable feature to guest, thus no host NX=0, guest NX=1 combination. This patch reduces raw vmexit time by ~27%. Me: fix compile warnings on i386. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 095d673b9efb..af07cd539bba 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -73,6 +73,7 @@ static struct kvm_stats_debugfs_item { { "request_irq", STAT_OFFSET(request_irq_exits) }, { "irq_exits", STAT_OFFSET(irq_exits) }, { "light_exits", STAT_OFFSET(light_exits) }, + { "efer_reload", STAT_OFFSET(efer_reload) }, { NULL } }; @@ -2378,6 +2379,27 @@ out: return r; } +static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) +{ + u64 efer; + int i; + struct kvm_cpuid_entry *e, *entry; + + rdmsrl(MSR_EFER, efer); + entry = NULL; + for (i = 0; i < vcpu->cpuid_nent; ++i) { + e = &vcpu->cpuid_entries[i]; + if (e->function == 0x80000001) { + entry = e; + break; + } + } + if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) { + entry->edx &= ~(1 << 20); + printk(KERN_INFO ": guest NX capability removed\n"); + } +} + static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) @@ -2392,6 +2414,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, cpuid->nent * sizeof(struct kvm_cpuid_entry))) goto out; vcpu->cpuid_nent = cpuid->nent; + cpuid_fix_nx_cap(vcpu); return 0; out: -- cgit v1.2.3 From d3d25b048b9c7e5c1c20918157a71df734f71766 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 30 May 2007 12:34:53 +0300 Subject: KVM: MMU: Use slab caches for shadow pages and their headers Use slab caches instead of a simple custom list. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index af07cd539bba..bf35457ce377 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -326,7 +326,6 @@ static struct kvm *kvm_create_vm(void) vcpu->cpu = -1; vcpu->kvm = kvm; vcpu->mmu.root_hpa = INVALID_PAGE; - INIT_LIST_HEAD(&vcpu->free_pages); spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); -- cgit v1.2.3 From 313899477f7578d37e82ead1af10f794a6da3c90 Mon Sep 17 00:00:00 2001 From: Nguyen Anh Quynh Date: Tue, 5 Jun 2007 10:35:19 +0300 Subject: KVM: Remove unnecessary initialization and checks in mark_page_dirty() Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index bf35457ce377..3c3231d8dabf 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -970,7 +970,7 @@ EXPORT_SYMBOL_GPL(gfn_to_page); void mark_page_dirty(struct kvm *kvm, gfn_t gfn) { int i; - struct kvm_memory_slot *memslot = NULL; + struct kvm_memory_slot *memslot; unsigned long rel_gfn; for (i = 0; i < kvm->nmemslots; ++i) { @@ -979,7 +979,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) if (gfn >= memslot->base_gfn && gfn < memslot->base_gfn + memslot->npages) { - if (!memslot || !memslot->dirty_bitmap) + if (!memslot->dirty_bitmap) return; rel_gfn = gfn - memslot->base_gfn; -- cgit v1.2.3 From 7b53aa56508479507c6e5667bb252ca7c2cd19cf Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 5 Jun 2007 12:17:03 +0300 Subject: KVM: Fix vcpu freeing for guest smp A vcpu can pin up to four mmu shadow pages, which means the freeing loop will never terminate. Fix by first unpinning shadow pages on all vcpus, then freeing shadow pages. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 3c3231d8dabf..3ff8ee56279c 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -381,6 +381,16 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu) } } +static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) +{ + if (!vcpu->vmcs) + return; + + vcpu_load(vcpu); + kvm_mmu_unload(vcpu); + vcpu_put(vcpu); +} + static void kvm_free_vcpu(struct kvm_vcpu *vcpu) { if (!vcpu->vmcs) @@ -401,6 +411,11 @@ static void kvm_free_vcpus(struct kvm *kvm) { unsigned int i; + /* + * Unpin any mmu pages first. + */ + for (i = 0; i < KVM_MAX_VCPUS; ++i) + kvm_unload_vcpu_mmu(&kvm->vcpus[i]); for (i = 0; i < KVM_MAX_VCPUS; ++i) kvm_free_vcpu(&kvm->vcpus[i]); } -- cgit v1.2.3 From 120e9a453b5e7d9a708caff7d97b71fc4ccd59e8 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 5 Jun 2007 14:36:10 +0300 Subject: KVM: Fix adding an smp virtual machine to the vm list If we add the vm once per vcpu, we corrupt the list if the guest has multiple vcpus. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 3ff8ee56279c..230b25aa469c 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -319,6 +319,9 @@ static struct kvm *kvm_create_vm(void) spin_lock_init(&kvm->lock); INIT_LIST_HEAD(&kvm->active_mmu_pages); + spin_lock(&kvm_lock); + list_add(&kvm->vm_list, &vm_list); + spin_unlock(&kvm_lock); for (i = 0; i < KVM_MAX_VCPUS; ++i) { struct kvm_vcpu *vcpu = &kvm->vcpus[i]; @@ -326,9 +329,6 @@ static struct kvm *kvm_create_vm(void) vcpu->cpu = -1; vcpu->kvm = kvm; vcpu->mmu.root_hpa = INVALID_PAGE; - spin_lock(&kvm_lock); - list_add(&kvm->vm_list, &vm_list); - spin_unlock(&kvm_lock); } return kvm; } -- cgit v1.2.3 From d3bef15f84f91c73a5515ad4c6a1749f8f63afcf Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 5 Jun 2007 15:53:05 +0300 Subject: KVM: Move duplicate halt handling code into kvm_main.c Will soon have a thid user. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 230b25aa469c..556416962541 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1285,6 +1285,17 @@ int emulate_instruction(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(emulate_instruction); +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ + if (vcpu->irq_summary) + return 1; + + vcpu->run->exit_reason = KVM_EXIT_HLT; + ++vcpu->stat.halt_exits; + return 0; +} +EXPORT_SYMBOL_GPL(kvm_emulate_halt); + int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) { unsigned long nr, a0, a1, a2, a3, a4, a5, ret; -- cgit v1.2.3 From 39c3b86e5c193e09f69f0e99c93600a4999ffc60 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 7 Jun 2007 19:11:53 +0300 Subject: KVM: Keep an upper bound of initialized vcpus That way, we don't need to loop for KVM_MAX_VCPUS for a single vcpu vm. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 556416962541..4e1a017f3db7 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2391,6 +2391,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) if (r < 0) goto out_free_vcpus; + spin_lock(&kvm_lock); + if (n >= kvm->nvcpus) + kvm->nvcpus = n + 1; + spin_unlock(&kvm_lock); + return r; out_free_vcpus: -- cgit v1.2.3 From d9e368d61263055eceac2966bb7ea31b89da3425 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 7 Jun 2007 19:18:30 +0300 Subject: KVM: Flush remote tlbs when reducing shadow pte permissions When a vcpu causes a shadow tlb entry to have reduced permissions, it must also clear the tlb on remote vcpus. We do that by: - setting a bit on the vcpu that requests a tlb flush before the next entry - if the vcpu is currently executing, we send an ipi to make sure it exits before we continue Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 4e1a017f3db7..633c2eded08d 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -41,6 +41,8 @@ #include #include #include +#include +#include #include "x86_emulate.h" #include "segment_descriptor.h" @@ -309,6 +311,48 @@ static void vcpu_put(struct kvm_vcpu *vcpu) mutex_unlock(&vcpu->mutex); } +static void ack_flush(void *_completed) +{ + atomic_t *completed = _completed; + + atomic_inc(completed); +} + +void kvm_flush_remote_tlbs(struct kvm *kvm) +{ + int i, cpu, needed; + cpumask_t cpus; + struct kvm_vcpu *vcpu; + atomic_t completed; + + atomic_set(&completed, 0); + cpus_clear(cpus); + needed = 0; + for (i = 0; i < kvm->nvcpus; ++i) { + vcpu = &kvm->vcpus[i]; + if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) + continue; + cpu = vcpu->cpu; + if (cpu != -1 && cpu != raw_smp_processor_id()) + if (!cpu_isset(cpu, cpus)) { + cpu_set(cpu, cpus); + ++needed; + } + } + + /* + * We really want smp_call_function_mask() here. But that's not + * available, so ipi all cpus in parallel and wait for them + * to complete. + */ + for (cpu = first_cpu(cpus); cpu != NR_CPUS; cpu = next_cpu(cpu, cpus)) + smp_call_function_single(cpu, ack_flush, &completed, 1, 0); + while (atomic_read(&completed) != needed) { + cpu_relax(); + barrier(); + } +} + static struct kvm *kvm_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); -- cgit v1.2.3 From 2eeb2e94eb6232f0895da696c10e6636093ff72b Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Thu, 31 May 2007 14:08:53 -0400 Subject: KVM: Adds support for in-kernel mmio handlers Signed-off-by: Gregory Haskins Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 94 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 82 insertions(+), 12 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 633c2eded08d..e157e282fcff 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -366,6 +366,7 @@ static struct kvm *kvm_create_vm(void) spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); + kvm_io_bus_init(&kvm->mmio_bus); for (i = 0; i < KVM_MAX_VCPUS; ++i) { struct kvm_vcpu *vcpu = &kvm->vcpus[i]; @@ -474,6 +475,7 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); + kvm_io_bus_destroy(&kvm->mmio_bus); kvm_free_vcpus(kvm); kvm_free_physmem(kvm); kfree(kvm); @@ -1097,12 +1099,25 @@ static int emulator_write_std(unsigned long addr, return X86EMUL_UNHANDLEABLE; } +static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, + gpa_t addr) +{ + /* + * Note that its important to have this wrapper function because + * in the very near future we will be checking for MMIOs against + * the LAPIC as well as the general MMIO bus + */ + return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); +} + static int emulator_read_emulated(unsigned long addr, void *val, unsigned int bytes, struct x86_emulate_ctxt *ctxt) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_io_device *mmio_dev; + gpa_t gpa; if (vcpu->mmio_read_completed) { memcpy(val, vcpu->mmio_data, bytes); @@ -1111,18 +1126,26 @@ static int emulator_read_emulated(unsigned long addr, } else if (emulator_read_std(addr, val, bytes, ctxt) == X86EMUL_CONTINUE) return X86EMUL_CONTINUE; - else { - gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); - if (gpa == UNMAPPED_GVA) - return X86EMUL_PROPAGATE_FAULT; - vcpu->mmio_needed = 1; - vcpu->mmio_phys_addr = gpa; - vcpu->mmio_size = bytes; - vcpu->mmio_is_write = 0; + gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); + if (gpa == UNMAPPED_GVA) + return X86EMUL_PROPAGATE_FAULT; - return X86EMUL_UNHANDLEABLE; + /* + * Is this MMIO handled locally? + */ + mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); + if (mmio_dev) { + kvm_iodevice_read(mmio_dev, gpa, bytes, val); + return X86EMUL_CONTINUE; } + + vcpu->mmio_needed = 1; + vcpu->mmio_phys_addr = gpa; + vcpu->mmio_size = bytes; + vcpu->mmio_is_write = 0; + + return X86EMUL_UNHANDLEABLE; } static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, @@ -1150,8 +1173,9 @@ static int emulator_write_emulated(unsigned long addr, unsigned int bytes, struct x86_emulate_ctxt *ctxt) { - struct kvm_vcpu *vcpu = ctxt->vcpu; - gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); + struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_io_device *mmio_dev; + gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); if (gpa == UNMAPPED_GVA) { kvm_arch_ops->inject_page_fault(vcpu, addr, 2); @@ -1161,6 +1185,15 @@ static int emulator_write_emulated(unsigned long addr, if (emulator_write_phys(vcpu, gpa, val, bytes)) return X86EMUL_CONTINUE; + /* + * Is this MMIO handled locally? + */ + mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); + if (mmio_dev) { + kvm_iodevice_write(mmio_dev, gpa, bytes, val); + return X86EMUL_CONTINUE; + } + vcpu->mmio_needed = 1; vcpu->mmio_phys_addr = gpa; vcpu->mmio_size = bytes; @@ -3031,6 +3064,43 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, return NOTIFY_OK; } +void kvm_io_bus_init(struct kvm_io_bus *bus) +{ + memset(bus, 0, sizeof(*bus)); +} + +void kvm_io_bus_destroy(struct kvm_io_bus *bus) +{ + int i; + + for (i = 0; i < bus->dev_count; i++) { + struct kvm_io_device *pos = bus->devs[i]; + + kvm_iodevice_destructor(pos); + } +} + +struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr) +{ + int i; + + for (i = 0; i < bus->dev_count; i++) { + struct kvm_io_device *pos = bus->devs[i]; + + if (pos->in_range(pos, addr)) + return pos; + } + + return NULL; +} + +void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev) +{ + BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1)); + + bus->devs[bus->dev_count++] = dev; +} + static struct notifier_block kvm_cpu_notifier = { .notifier_call = kvm_cpu_hotplug, .priority = 20, /* must be > scheduler priority */ -- cgit v1.2.3 From 74906345ff9f84f2b3b772d368c7e49f4ba27456 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Tue, 19 Jun 2007 18:05:03 +0300 Subject: KVM: Add support for in-kernel pio handlers Useful for the PIC and PIT. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index e157e282fcff..7826f16271e5 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -361,6 +361,7 @@ static struct kvm *kvm_create_vm(void) if (!kvm) return ERR_PTR(-ENOMEM); + kvm_io_bus_init(&kvm->pio_bus); spin_lock_init(&kvm->lock); INIT_LIST_HEAD(&kvm->active_mmu_pages); spin_lock(&kvm_lock); @@ -475,6 +476,7 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); + kvm_io_bus_destroy(&kvm->pio_bus); kvm_io_bus_destroy(&kvm->mmio_bus); kvm_free_vcpus(kvm); kvm_free_physmem(kvm); @@ -1110,6 +1112,12 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); } +static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, + gpa_t addr) +{ + return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr); +} + static int emulator_read_emulated(unsigned long addr, void *val, unsigned int bytes, @@ -1832,6 +1840,20 @@ static int complete_pio(struct kvm_vcpu *vcpu) return 0; } +void kernel_pio(struct kvm_io_device *pio_dev, struct kvm_vcpu *vcpu) +{ + /* TODO: String I/O for in kernel device */ + + if (vcpu->pio.in) + kvm_iodevice_read(pio_dev, vcpu->pio.port, + vcpu->pio.size, + vcpu->pio_data); + else + kvm_iodevice_write(pio_dev, vcpu->pio.port, + vcpu->pio.size, + vcpu->pio_data); +} + int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, int size, unsigned long count, int string, int down, gva_t address, int rep, unsigned port) @@ -1840,6 +1862,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, int i; int nr_pages = 1; struct page *page; + struct kvm_io_device *pio_dev; vcpu->run->exit_reason = KVM_EXIT_IO; vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; @@ -1851,17 +1874,27 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->pio.cur_count = count; vcpu->pio.size = size; vcpu->pio.in = in; + vcpu->pio.port = port; vcpu->pio.string = string; vcpu->pio.down = down; vcpu->pio.guest_page_offset = offset_in_page(address); vcpu->pio.rep = rep; + pio_dev = vcpu_find_pio_dev(vcpu, port); if (!string) { kvm_arch_ops->cache_regs(vcpu); memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); kvm_arch_ops->decache_regs(vcpu); + if (pio_dev) { + kernel_pio(pio_dev, vcpu); + complete_pio(vcpu); + return 1; + } return 0; } + /* TODO: String I/O for in kernel device */ + if (pio_dev) + printk(KERN_ERR "kvm_setup_pio: no string io support\n"); if (!count) { kvm_arch_ops->skip_emulated_instruction(vcpu); -- cgit v1.2.3 From a3c870bdce4d34332ebdba7eb9969592c4c6b243 Mon Sep 17 00:00:00 2001 From: Luca Tettamanti Date: Tue, 19 Jun 2007 22:41:38 +0200 Subject: KVM: Avoid useless memory write when possible When writing to normal memory and the memory area is unchanged the write can be safely skipped, avoiding the costly kvm_mmu_pte_write. Signed-Off-By: Luca Tettamanti Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 7826f16271e5..5603000573ec 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1170,8 +1170,10 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, return 0; mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); virt = kmap_atomic(page, KM_USER0); - kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); - memcpy(virt + offset_in_page(gpa), val, bytes); + if (memcmp(virt + offset_in_page(gpa), val, bytes)) { + kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); + memcpy(virt + offset_in_page(gpa), val, bytes); + } kunmap_atomic(virt, KM_USER0); return 1; } -- cgit v1.2.3 From d6d281684913dabb878e2f53219eed5df2cd867b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 28 Jun 2007 08:38:16 -0400 Subject: KVM: Remove kvmfs in favor of the anonymous inodes source kvm uses a pseudo filesystem, kvmfs, to generate inodes, a job that the new anonymous inodes source does much better. Cc: Davide Libenzi Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 143 ++++--------------------------------------------- 1 file changed, 11 insertions(+), 132 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 5603000573ec..26ca90f74fc7 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "x86_emulate.h" #include "segment_descriptor.h" @@ -81,8 +82,6 @@ static struct kvm_stats_debugfs_item { static struct dentry *debugfs_dir; -struct vfsmount *kvmfs_mnt; - #define MAX_IO_MSRS 256 #define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL @@ -104,55 +103,6 @@ struct segment_descriptor_64 { static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); -static struct inode *kvmfs_inode(struct file_operations *fops) -{ - int error = -ENOMEM; - struct inode *inode = new_inode(kvmfs_mnt->mnt_sb); - - if (!inode) - goto eexit_1; - - inode->i_fop = fops; - - /* - * Mark the inode dirty from the very beginning, - * that way it will never be moved to the dirty - * list because mark_inode_dirty() will think - * that it already _is_ on the dirty list. - */ - inode->i_state = I_DIRTY; - inode->i_mode = S_IRUSR | S_IWUSR; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - return inode; - -eexit_1: - return ERR_PTR(error); -} - -static struct file *kvmfs_file(struct inode *inode, void *private_data) -{ - struct file *file = get_empty_filp(); - - if (!file) - return ERR_PTR(-ENFILE); - - file->f_path.mnt = mntget(kvmfs_mnt); - file->f_path.dentry = d_alloc_anon(inode); - if (!file->f_path.dentry) - return ERR_PTR(-ENOMEM); - file->f_mapping = inode->i_mapping; - - file->f_pos = 0; - file->f_flags = O_RDWR; - file->f_op = inode->i_fop; - file->f_mode = FMODE_READ | FMODE_WRITE; - file->f_version = 0; - file->private_data = private_data; - return file; -} - unsigned long segment_base(u16 selector) { struct descriptor_table gdt; @@ -2413,34 +2363,12 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) struct inode *inode; struct file *file; + r = anon_inode_getfd(&fd, &inode, &file, + "kvm-vcpu", &kvm_vcpu_fops, vcpu); + if (r) + return r; atomic_inc(&vcpu->kvm->filp->f_count); - inode = kvmfs_inode(&kvm_vcpu_fops); - if (IS_ERR(inode)) { - r = PTR_ERR(inode); - goto out1; - } - - file = kvmfs_file(inode, vcpu); - if (IS_ERR(file)) { - r = PTR_ERR(file); - goto out2; - } - - r = get_unused_fd(); - if (r < 0) - goto out3; - fd = r; - fd_install(fd, file); - return fd; - -out3: - fput(file); -out2: - iput(inode); -out1: - fput(vcpu->kvm->filp); - return r; } /* @@ -2905,41 +2833,18 @@ static int kvm_dev_ioctl_create_vm(void) struct file *file; struct kvm *kvm; - inode = kvmfs_inode(&kvm_vm_fops); - if (IS_ERR(inode)) { - r = PTR_ERR(inode); - goto out1; - } - kvm = kvm_create_vm(); - if (IS_ERR(kvm)) { - r = PTR_ERR(kvm); - goto out2; + if (IS_ERR(kvm)) + return PTR_ERR(kvm); + r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm); + if (r) { + kvm_destroy_vm(kvm); + return r; } - file = kvmfs_file(inode, kvm); - if (IS_ERR(file)) { - r = PTR_ERR(file); - goto out3; - } kvm->filp = file; - r = get_unused_fd(); - if (r < 0) - goto out4; - fd = r; - fd_install(fd, file); - return fd; - -out4: - fput(file); -out3: - kvm_destroy_vm(kvm); -out2: - iput(inode); -out1: - return r; } static long kvm_dev_ioctl(struct file *filp, @@ -3211,18 +3116,6 @@ static struct sys_device kvm_sysdev = { hpa_t bad_page_address; -static int kvmfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) -{ - return get_sb_pseudo(fs_type, "kvm:", NULL, KVMFS_SUPER_MAGIC, mnt); -} - -static struct file_system_type kvm_fs_type = { - .name = "kvmfs", - .get_sb = kvmfs_get_sb, - .kill_sb = kill_anon_super, -}; - int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) { int r; @@ -3307,14 +3200,6 @@ static __init int kvm_init(void) if (r) goto out4; - r = register_filesystem(&kvm_fs_type); - if (r) - goto out3; - - kvmfs_mnt = kern_mount(&kvm_fs_type); - r = PTR_ERR(kvmfs_mnt); - if (IS_ERR(kvmfs_mnt)) - goto out2; kvm_init_debug(); kvm_init_msr_list(); @@ -3331,10 +3216,6 @@ static __init int kvm_init(void) out: kvm_exit_debug(); - mntput(kvmfs_mnt); -out2: - unregister_filesystem(&kvm_fs_type); -out3: kvm_mmu_module_exit(); out4: return r; @@ -3344,8 +3225,6 @@ static __exit void kvm_exit(void) { kvm_exit_debug(); __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); - mntput(kvmfs_mnt); - unregister_filesystem(&kvm_fs_type); kvm_mmu_module_exit(); } -- cgit v1.2.3 From e495606dd09d79f9fa496334ac3958f6ff179d82 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 28 Jun 2007 14:15:57 -0400 Subject: KVM: Clean up #includes Remove unnecessary ones, and rearange the remaining in the standard order. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 26ca90f74fc7..ea027190a658 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -16,37 +16,33 @@ */ #include "kvm.h" +#include "x86_emulate.h" +#include "segment_descriptor.h" #include #include #include -#include -#include #include #include -#include #include #include #include -#include #include -#include #include #include #include -#include #include #include -#include -#include -#include #include #include #include #include -#include "x86_emulate.h" -#include "segment_descriptor.h" +#include +#include +#include +#include +#include MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 1b6c016818a562aaea22b1a1b05b15c796b0c2f0 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 24 May 2007 13:03:52 +0300 Subject: KVM: Keep track of which cpus have virtualization enabled By keeping track of which cpus have virtualization enabled, we prevent double-enable or double-disable during hotplug, which is a very fatal oops. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index ea027190a658..3226ad4bce7c 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -50,8 +50,12 @@ MODULE_LICENSE("GPL"); static DEFINE_SPINLOCK(kvm_lock); static LIST_HEAD(vm_list); +static cpumask_t cpus_hardware_enabled; + struct kvm_arch_ops *kvm_arch_ops; +static void hardware_disable(void *ignored); + #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) static struct kvm_stats_debugfs_item { @@ -2930,7 +2934,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, * in vmx root mode. */ printk(KERN_INFO "kvm: exiting hardware virtualization\n"); - on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 0, 1); } return NOTIFY_OK; } @@ -2973,6 +2977,27 @@ static void decache_vcpus_on_cpu(int cpu) spin_unlock(&kvm_lock); } +static void hardware_enable(void *junk) +{ + int cpu = raw_smp_processor_id(); + + if (cpu_isset(cpu, cpus_hardware_enabled)) + return; + cpu_set(cpu, cpus_hardware_enabled); + kvm_arch_ops->hardware_enable(NULL); +} + +static void hardware_disable(void *junk) +{ + int cpu = raw_smp_processor_id(); + + if (!cpu_isset(cpu, cpus_hardware_enabled)) + return; + cpu_clear(cpu, cpus_hardware_enabled); + decache_vcpus_on_cpu(cpu); + kvm_arch_ops->hardware_disable(NULL); +} + static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, void *v) { @@ -2985,16 +3010,13 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, case CPU_UP_CANCELED_FROZEN: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", cpu); - decache_vcpus_on_cpu(cpu); - smp_call_function_single(cpu, kvm_arch_ops->hardware_disable, - NULL, 0, 1); + smp_call_function_single(cpu, hardware_disable, NULL, 0, 1); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); - smp_call_function_single(cpu, kvm_arch_ops->hardware_enable, - NULL, 0, 1); + smp_call_function_single(cpu, hardware_enable, NULL, 0, 1); break; } return NOTIFY_OK; @@ -3088,14 +3110,13 @@ static void kvm_exit_debug(void) static int kvm_suspend(struct sys_device *dev, pm_message_t state) { - decache_vcpus_on_cpu(raw_smp_processor_id()); - on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 0, 0); return 0; } static int kvm_resume(struct sys_device *dev) { - on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 0, 0); return 0; } @@ -3136,7 +3157,7 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) if (r < 0) goto out; - on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1); + on_each_cpu(hardware_enable, NULL, 0, 1); r = register_cpu_notifier(&kvm_cpu_notifier); if (r) goto out_free_1; @@ -3168,7 +3189,7 @@ out_free_2: unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); out_free_1: - on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 0, 1); kvm_arch_ops->hardware_unsetup(); out: kvm_arch_ops = NULL; @@ -3182,7 +3203,7 @@ void kvm_exit_arch(void) sysdev_class_unregister(&kvm_sysdev_class); unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); - on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); + on_each_cpu(hardware_disable, NULL, 0, 1); kvm_arch_ops->hardware_unsetup(); kvm_arch_ops = NULL; } -- cgit v1.2.3 From 4267c41a458cd7d287dc8031468fc385c2f5b2c3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 24 May 2007 13:09:41 +0300 Subject: KVM: Tune hotplug/suspend IPIs The hotplug IPIs can be called from the cpu on which we are currently running on, so use on_cpu(). Similarly, drop on_each_cpu() for the suspend/resume callbacks, as we're in atomic context here and only one cpu is up anyway. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 3226ad4bce7c..603e8ce3d65e 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -3110,13 +3110,13 @@ static void kvm_exit_debug(void) static int kvm_suspend(struct sys_device *dev, pm_message_t state) { - on_each_cpu(hardware_disable, NULL, 0, 0); + hardware_disable(NULL); return 0; } static int kvm_resume(struct sys_device *dev) { - on_each_cpu(hardware_disable, NULL, 0, 0); + hardware_enable(NULL); return 0; } -- cgit v1.2.3 From cec9ad279b66793bee0b5009b7ca311060061efd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 24 May 2007 13:11:41 +0300 Subject: KVM: Use CPU_DYING for disabling virtualization Only at the CPU_DYING stage can we be sure that no user process will be scheduled onto the cpu and oops when trying to use virtualization extensions. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/kvm_main.c') diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 603e8ce3d65e..1b206f197c6b 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -3004,8 +3004,8 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, int cpu = (long)v; switch (val) { - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: + case CPU_DYING: + case CPU_DYING_FROZEN: case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", -- cgit v1.2.3