From fdef3ad1b38660d74a29abc990940b5dbaaf3fc9 Mon Sep 17 00:00:00 2001 From: "He, Qing" Date: Mon, 30 Apr 2007 09:45:24 +0300 Subject: KVM: VMX: Enable io bitmaps to avoid IO port 0x80 VMEXITs This patch enables IO bitmaps control on vmx and unmask the 0x80 port to avoid VMEXITs caused by accessing port 0x80. 0x80 is used as delays (see include/asm/io.h), and handling VMEXITs on its access is unnecessary but slows things down. This patch improves kernel build test at around 3%~5%. Because every VM uses the same io bitmap, it is shared between all VMs rather than a per-VM data structure. Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 4 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index c1ac106ace8c..52bd5f079df1 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -34,6 +34,9 @@ MODULE_LICENSE("GPL"); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); +static struct page *vmx_io_bitmap_a; +static struct page *vmx_io_bitmap_b; + #ifdef CONFIG_X86_64 #define HOST_IS_64 1 #else @@ -1129,8 +1132,8 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); /* I/O */ - vmcs_write64(IO_BITMAP_A, 0); - vmcs_write64(IO_BITMAP_B, 0); + vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); + vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); guest_write_tsc(0); @@ -1150,7 +1153,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) CPU_BASED_HLT_EXITING /* 20.6.2 */ | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */ | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */ - | CPU_BASED_UNCOND_IO_EXITING /* 20.6.2 */ + | CPU_BASED_ACTIVATE_IO_BITMAP /* 20.6.2 */ | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */ ); @@ -2188,11 +2191,50 @@ static struct kvm_arch_ops vmx_arch_ops = { static int __init vmx_init(void) { - return kvm_init_arch(&vmx_arch_ops, THIS_MODULE); + void *iova; + int r; + + vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (!vmx_io_bitmap_a) + return -ENOMEM; + + vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (!vmx_io_bitmap_b) { + r = -ENOMEM; + goto out; + } + + /* + * Allow direct access to the PC debug port (it is often used for I/O + * delays, but the vmexits simply slow things down). + */ + iova = kmap(vmx_io_bitmap_a); + memset(iova, 0xff, PAGE_SIZE); + clear_bit(0x80, iova); + kunmap(iova); + + iova = kmap(vmx_io_bitmap_b); + memset(iova, 0xff, PAGE_SIZE); + kunmap(iova); + + r = kvm_init_arch(&vmx_arch_ops, THIS_MODULE); + if (r) + goto out1; + + return 0; + +out1: + __free_page(vmx_io_bitmap_b); +out: + __free_page(vmx_io_bitmap_a); + return r; } static void __exit vmx_exit(void) { + __free_page(vmx_io_bitmap_b); + __free_page(vmx_io_bitmap_a); + kvm_exit_arch(); } -- cgit v1.2.3 From e6adf28365b2fca0b5235cabff00c9f3d1e7bdf4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 30 Apr 2007 16:07:54 +0300 Subject: KVM: Avoid saving and restoring some host CPU state on lightweight vmexit Many msrs and the like will only be used by the host if we schedule() or return to userspace. Therefore, we avoid saving them if we handle the exit within the kernel, and if a reschedule is not requested. Based on a patch from Eddie Dong with a couple of fixes by me. Signed-off-by: Yaozu(Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 105 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 60 insertions(+), 45 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 52bd5f079df1..84ce0c0930a0 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -483,6 +483,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) case MSR_GS_BASE: vmcs_writel(GUEST_GS_BASE, data); break; + case MSR_LSTAR: + case MSR_SYSCALL_MASK: + msr = find_msr_entry(vcpu, msr_index); + if (msr) + msr->data = data; + load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); + break; #endif case MSR_IA32_SYSENTER_CS: vmcs_write32(GUEST_SYSENTER_CS, data); @@ -1820,7 +1827,7 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int fs_gs_ldt_reload_needed; int r; -again: +preempted: /* * Set host fs and gs selectors. Unfortunately, 22.2.3 does not * allow segment selectors with cpl > 0 or ti == 1. @@ -1851,13 +1858,6 @@ again: if (vcpu->guest_debug.enabled) kvm_guest_debug_pre(vcpu); - kvm_load_guest_fpu(vcpu); - - /* - * Loading guest fpu may have cleared host cr0.ts - */ - vmcs_writel(HOST_CR0, read_cr0()); - #ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1); @@ -1865,6 +1865,14 @@ again: } #endif +again: + kvm_load_guest_fpu(vcpu); + + /* + * Loading guest fpu may have cleared host cr0.ts + */ + vmcs_writel(HOST_CR0, read_cr0()); + asm ( /* Store host registers */ "pushf \n\t" @@ -1984,36 +1992,8 @@ again: [cr2]"i"(offsetof(struct kvm_vcpu, cr2)) : "cc", "memory" ); - /* - * Reload segment selectors ASAP. (it's needed for a functional - * kernel: x86 relies on having __KERNEL_PDA in %fs and x86_64 - * relies on having 0 in %gs for the CPU PDA to work.) - */ - if (fs_gs_ldt_reload_needed) { - load_ldt(ldt_sel); - load_fs(fs_sel); - /* - * If we have to reload gs, we must take care to - * preserve our gs base. - */ - local_irq_disable(); - load_gs(gs_sel); -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); -#endif - local_irq_enable(); - - reload_tss(); - } ++vcpu->stat.exits; -#ifdef CONFIG_X86_64 - if (is_long_mode(vcpu)) { - save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); - load_msrs(vcpu->host_msrs, NR_BAD_MSRS); - } -#endif - vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); @@ -2035,24 +2015,59 @@ again: if (r > 0) { /* Give scheduler a change to reschedule. */ if (signal_pending(current)) { - ++vcpu->stat.signal_exits; - post_kvm_run_save(vcpu, kvm_run); + r = -EINTR; kvm_run->exit_reason = KVM_EXIT_INTR; - return -EINTR; + ++vcpu->stat.signal_exits; + goto out; } if (dm_request_for_irq_injection(vcpu, kvm_run)) { - ++vcpu->stat.request_irq_exits; - post_kvm_run_save(vcpu, kvm_run); + r = -EINTR; kvm_run->exit_reason = KVM_EXIT_INTR; - return -EINTR; + ++vcpu->stat.request_irq_exits; + goto out; + } + if (!need_resched()) { + ++vcpu->stat.light_exits; + goto again; } - - kvm_resched(vcpu); - goto again; } } +out: + /* + * Reload segment selectors ASAP. (it's needed for a functional + * kernel: x86 relies on having __KERNEL_PDA in %fs and x86_64 + * relies on having 0 in %gs for the CPU PDA to work.) + */ + if (fs_gs_ldt_reload_needed) { + load_ldt(ldt_sel); + load_fs(fs_sel); + /* + * If we have to reload gs, we must take care to + * preserve our gs base. + */ + local_irq_disable(); + load_gs(gs_sel); +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); +#endif + local_irq_enable(); + + reload_tss(); + } +#ifdef CONFIG_X86_64 + if (is_long_mode(vcpu)) { + save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); + load_msrs(vcpu->host_msrs, NR_BAD_MSRS); + } +#endif + + if (r > 0) { + kvm_resched(vcpu); + goto preempted; + } + post_kvm_run_save(vcpu, kvm_run); return r; } -- cgit v1.2.3 From 05e0c8c344dd356b42e81bdf0d47d2b884bf49b5 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 30 Apr 2007 16:15:58 +0300 Subject: KVM: Unindent some code Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 58 +++++++++++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 84ce0c0930a0..9ebb18d07bde 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1998,39 +1998,39 @@ again: asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); - if (fail) { + if (unlikely(fail)) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason = vmcs_read32(VM_INSTRUCTION_ERROR); r = 0; - } else { - /* - * Profile KVM exit RIPs: - */ - if (unlikely(prof_on == KVM_PROFILING)) - profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); - - vcpu->launched = 1; - r = kvm_handle_exit(kvm_run, vcpu); - if (r > 0) { - /* Give scheduler a change to reschedule. */ - if (signal_pending(current)) { - r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; - ++vcpu->stat.signal_exits; - goto out; - } - - if (dm_request_for_irq_injection(vcpu, kvm_run)) { - r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; - ++vcpu->stat.request_irq_exits; - goto out; - } - if (!need_resched()) { - ++vcpu->stat.light_exits; - goto again; - } + goto out; + } + /* + * Profile KVM exit RIPs: + */ + if (unlikely(prof_on == KVM_PROFILING)) + profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); + + vcpu->launched = 1; + r = kvm_handle_exit(kvm_run, vcpu); + if (r > 0) { + /* Give scheduler a change to reschedule. */ + if (signal_pending(current)) { + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.signal_exits; + goto out; + } + + if (dm_request_for_irq_injection(vcpu, kvm_run)) { + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.request_irq_exits; + goto out; + } + if (!need_resched()) { + ++vcpu->stat.light_exits; + goto again; } } -- cgit v1.2.3 From 621358455ae043ab39bc3481f13b101bd6016c8d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 1 May 2007 11:32:28 +0300 Subject: KVM: Be more careful restoring fs on lightweight vmexit i386 wants fs for accessing the pda even on a lightweight exit, so ensure we can always restore it. This fixes a regression on i386 introduced by the lightweight vmexit patch. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 9ebb18d07bde..49cadd31120b 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1832,16 +1832,21 @@ preempted: * Set host fs and gs selectors. Unfortunately, 22.2.3 does not * allow segment selectors with cpl > 0 or ti == 1. */ - fs_sel = read_fs(); - gs_sel = read_gs(); ldt_sel = read_ldt(); - fs_gs_ldt_reload_needed = (fs_sel & 7) | (gs_sel & 7) | ldt_sel; - if (!fs_gs_ldt_reload_needed) { + fs_gs_ldt_reload_needed = ldt_sel; + fs_sel = read_fs(); + if (!(fs_sel & 7)) vmcs_write16(HOST_FS_SELECTOR, fs_sel); - vmcs_write16(HOST_GS_SELECTOR, gs_sel); - } else { + else { vmcs_write16(HOST_FS_SELECTOR, 0); + fs_gs_ldt_reload_needed = 1; + } + gs_sel = read_gs(); + if (!(gs_sel & 7)) + vmcs_write16(HOST_GS_SELECTOR, gs_sel); + else { vmcs_write16(HOST_GS_SELECTOR, 0); + fs_gs_ldt_reload_needed = 1; } #ifdef CONFIG_X86_64 @@ -2035,11 +2040,6 @@ again: } out: - /* - * Reload segment selectors ASAP. (it's needed for a functional - * kernel: x86 relies on having __KERNEL_PDA in %fs and x86_64 - * relies on having 0 in %gs for the CPU PDA to work.) - */ if (fs_gs_ldt_reload_needed) { load_ldt(ldt_sel); load_fs(fs_sel); -- cgit v1.2.3 From 33ed6329210f3ad0638306bfa46cd3aaf5a5f929 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 2 May 2007 16:54:03 +0300 Subject: KVM: Fix potential guest state leak into host The lightweight vmexit path avoids saving and reloading certain host state. However in certain cases lightweight vmexit handling can schedule() which requires reloading the host state. So we store the host state in the vcpu structure, and reloaded it if we relinquish the vcpu. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 160 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 89 insertions(+), 71 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 49cadd31120b..677b38c4444a 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -237,6 +237,93 @@ static void vmcs_set_bits(unsigned long field, u32 mask) vmcs_writel(field, vmcs_readl(field) | mask); } +static void reload_tss(void) +{ +#ifndef CONFIG_X86_64 + + /* + * VT restores TR but not its size. Useless. + */ + struct descriptor_table gdt; + struct segment_descriptor *descs; + + get_gdt(&gdt); + descs = (void *)gdt.base; + descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ + load_TR_desc(); +#endif +} + +static void vmx_save_host_state(struct kvm_vcpu *vcpu) +{ + struct vmx_host_state *hs = &vcpu->vmx_host_state; + + if (hs->loaded) + return; + + hs->loaded = 1; + /* + * Set host fs and gs selectors. Unfortunately, 22.2.3 does not + * allow segment selectors with cpl > 0 or ti == 1. + */ + hs->ldt_sel = read_ldt(); + hs->fs_gs_ldt_reload_needed = hs->ldt_sel; + hs->fs_sel = read_fs(); + if (!(hs->fs_sel & 7)) + vmcs_write16(HOST_FS_SELECTOR, hs->fs_sel); + else { + vmcs_write16(HOST_FS_SELECTOR, 0); + hs->fs_gs_ldt_reload_needed = 1; + } + hs->gs_sel = read_gs(); + if (!(hs->gs_sel & 7)) + vmcs_write16(HOST_GS_SELECTOR, hs->gs_sel); + else { + vmcs_write16(HOST_GS_SELECTOR, 0); + hs->fs_gs_ldt_reload_needed = 1; + } + +#ifdef CONFIG_X86_64 + vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); + vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); +#else + vmcs_writel(HOST_FS_BASE, segment_base(hs->fs_sel)); + vmcs_writel(HOST_GS_BASE, segment_base(hs->gs_sel)); +#endif +} + +static void vmx_load_host_state(struct kvm_vcpu *vcpu) +{ + struct vmx_host_state *hs = &vcpu->vmx_host_state; + + if (!hs->loaded) + return; + + hs->loaded = 0; + if (hs->fs_gs_ldt_reload_needed) { + load_ldt(hs->ldt_sel); + load_fs(hs->fs_sel); + /* + * If we have to reload gs, we must take care to + * preserve our gs base. + */ + local_irq_disable(); + load_gs(hs->gs_sel); +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); +#endif + local_irq_enable(); + + reload_tss(); + } +#ifdef CONFIG_X86_64 + if (is_long_mode(vcpu)) { + save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); + load_msrs(vcpu->host_msrs, NR_BAD_MSRS); + } +#endif +} + /* * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. @@ -283,6 +370,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu) static void vmx_vcpu_put(struct kvm_vcpu *vcpu) { + vmx_load_host_state(vcpu); kvm_put_guest_fpu(vcpu); put_cpu(); } @@ -397,23 +485,6 @@ static void guest_write_tsc(u64 guest_tsc) vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); } -static void reload_tss(void) -{ -#ifndef CONFIG_X86_64 - - /* - * VT restores TR but not its size. Useless. - */ - struct descriptor_table gdt; - struct segment_descriptor *descs; - - get_gdt(&gdt); - descs = (void *)gdt.base; - descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ - load_TR_desc(); -#endif -} - /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -1823,40 +1894,9 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u8 fail; - u16 fs_sel, gs_sel, ldt_sel; - int fs_gs_ldt_reload_needed; int r; preempted: - /* - * Set host fs and gs selectors. Unfortunately, 22.2.3 does not - * allow segment selectors with cpl > 0 or ti == 1. - */ - ldt_sel = read_ldt(); - fs_gs_ldt_reload_needed = ldt_sel; - fs_sel = read_fs(); - if (!(fs_sel & 7)) - vmcs_write16(HOST_FS_SELECTOR, fs_sel); - else { - vmcs_write16(HOST_FS_SELECTOR, 0); - fs_gs_ldt_reload_needed = 1; - } - gs_sel = read_gs(); - if (!(gs_sel & 7)) - vmcs_write16(HOST_GS_SELECTOR, gs_sel); - else { - vmcs_write16(HOST_GS_SELECTOR, 0); - fs_gs_ldt_reload_needed = 1; - } - -#ifdef CONFIG_X86_64 - vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); - vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); -#else - vmcs_writel(HOST_FS_BASE, segment_base(fs_sel)); - vmcs_writel(HOST_GS_BASE, segment_base(gs_sel)); -#endif - if (!vcpu->mmio_read_completed) do_interrupt_requests(vcpu, kvm_run); @@ -1871,6 +1911,7 @@ preempted: #endif again: + vmx_save_host_state(vcpu); kvm_load_guest_fpu(vcpu); /* @@ -2040,29 +2081,6 @@ again: } out: - if (fs_gs_ldt_reload_needed) { - load_ldt(ldt_sel); - load_fs(fs_sel); - /* - * If we have to reload gs, we must take care to - * preserve our gs base. - */ - local_irq_disable(); - load_gs(gs_sel); -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); -#endif - local_irq_enable(); - - reload_tss(); - } -#ifdef CONFIG_X86_64 - if (is_long_mode(vcpu)) { - save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); - load_msrs(vcpu->host_msrs, NR_BAD_MSRS); - } -#endif - if (r > 0) { kvm_resched(vcpu); goto preempted; -- cgit v1.2.3 From 707c08743060b6721b08df68f4fd546b106e7510 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 2 May 2007 17:33:43 +0300 Subject: KVM: Move some more msr mangling into vmx_save_host_state() Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 677b38c4444a..93c3abfc1e0a 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -290,6 +290,13 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) vmcs_writel(HOST_FS_BASE, segment_base(hs->fs_sel)); vmcs_writel(HOST_GS_BASE, segment_base(hs->gs_sel)); #endif + +#ifdef CONFIG_X86_64 + if (is_long_mode(vcpu)) { + save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1); + load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); + } +#endif } static void vmx_load_host_state(struct kvm_vcpu *vcpu) @@ -1903,13 +1910,6 @@ preempted: if (vcpu->guest_debug.enabled) kvm_guest_debug_pre(vcpu); -#ifdef CONFIG_X86_64 - if (is_long_mode(vcpu)) { - save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1); - load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); - } -#endif - again: vmx_save_host_state(vcpu); kvm_load_guest_fpu(vcpu); -- cgit v1.2.3 From abd3f2d622a810b7f6687f7ddb405e90e4cfb7ab Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 2 May 2007 17:57:40 +0300 Subject: KVM: Rationalize exception bitmap usage Everyone owns a piece of the exception bitmap, but they happily write to the entire thing like there's no tomorrow. Centralize handling in update_exception_bitmap() and have everyone call that. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 93c3abfc1e0a..2190020e055b 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -237,6 +237,20 @@ static void vmcs_set_bits(unsigned long field, u32 mask) vmcs_writel(field, vmcs_readl(field) | mask); } +static void update_exception_bitmap(struct kvm_vcpu *vcpu) +{ + u32 eb; + + eb = 1u << PF_VECTOR; + if (!vcpu->fpu_active) + eb |= 1u << NM_VECTOR; + if (vcpu->guest_debug.enabled) + eb |= 1u << 1; + if (vcpu->rmode.active) + eb = ~0; + vmcs_write32(EXCEPTION_BITMAP, eb); +} + static void reload_tss(void) { #ifndef CONFIG_X86_64 @@ -618,10 +632,8 @@ static void vcpu_put_rsp_rip(struct kvm_vcpu *vcpu) static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) { unsigned long dr7 = 0x400; - u32 exception_bitmap; int old_singlestep; - exception_bitmap = vmcs_read32(EXCEPTION_BITMAP); old_singlestep = vcpu->guest_debug.singlestep; vcpu->guest_debug.enabled = dbg->enabled; @@ -637,13 +649,9 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) dr7 |= 0 << (i*4+16); /* execution breakpoint */ } - exception_bitmap |= (1u << 1); /* Trap debug exceptions */ - vcpu->guest_debug.singlestep = dbg->singlestep; - } else { - exception_bitmap &= ~(1u << 1); /* Ignore debug exceptions */ + } else vcpu->guest_debug.singlestep = 0; - } if (old_singlestep && !vcpu->guest_debug.singlestep) { unsigned long flags; @@ -653,7 +661,7 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) vmcs_writel(GUEST_RFLAGS, flags); } - vmcs_write32(EXCEPTION_BITMAP, exception_bitmap); + update_exception_bitmap(vcpu); vmcs_writel(GUEST_DR7, dr7); return 0; @@ -767,14 +775,6 @@ static __exit void hardware_unsetup(void) free_kvm_area(); } -static void update_exception_bitmap(struct kvm_vcpu *vcpu) -{ - if (vcpu->rmode.active) - vmcs_write32(EXCEPTION_BITMAP, ~0); - else - vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR); -} - static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -942,7 +942,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (!(cr0 & CR0_TS_MASK)) { vcpu->fpu_active = 1; - vmcs_clear_bits(EXCEPTION_BITMAP, CR0_TS_MASK); + update_exception_bitmap(vcpu); } vmcs_writel(CR0_READ_SHADOW, cr0); @@ -958,7 +958,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (!(vcpu->cr0 & CR0_TS_MASK)) { vcpu->fpu_active = 0; vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); - vmcs_set_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); + update_exception_bitmap(vcpu); } } @@ -1243,7 +1243,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */ ); - vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR); vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ @@ -1329,6 +1328,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 vmx_set_efer(vcpu, 0); #endif + update_exception_bitmap(vcpu); return 0; @@ -1489,7 +1489,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (is_no_device(intr_info)) { vcpu->fpu_active = 1; - vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); + update_exception_bitmap(vcpu); if (!(vcpu->cr0 & CR0_TS_MASK)) vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); return 1; @@ -1684,7 +1684,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) case 2: /* clts */ vcpu_load_rsp_rip(vcpu); vcpu->fpu_active = 1; - vmcs_clear_bits(EXCEPTION_BITMAP, 1 << NM_VECTOR); + update_exception_bitmap(vcpu); vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); vcpu->cr0 &= ~CR0_TS_MASK; vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); -- cgit v1.2.3 From 5fd86fcfc0dbdd42296b1182945f7a0a05578211 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 2 May 2007 20:40:00 +0300 Subject: KVM: Consolidate guest fpu activation and deactivation Easier to keep track of where the fpu is this way. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 50 +++++++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 19 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 2190020e055b..096cb6a1e899 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -396,6 +396,26 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) put_cpu(); } +static void vmx_fpu_activate(struct kvm_vcpu *vcpu) +{ + if (vcpu->fpu_active) + return; + vcpu->fpu_active = 1; + vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); + if (vcpu->cr0 & CR0_TS_MASK) + vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); + update_exception_bitmap(vcpu); +} + +static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) +{ + if (!vcpu->fpu_active) + return; + vcpu->fpu_active = 0; + vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); + update_exception_bitmap(vcpu); +} + static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) { vcpu_clear(vcpu); @@ -925,6 +945,8 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { + vmx_fpu_deactivate(vcpu); + if (vcpu->rmode.active && (cr0 & CR0_PE_MASK)) enter_pmode(vcpu); @@ -940,26 +962,20 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } #endif - if (!(cr0 & CR0_TS_MASK)) { - vcpu->fpu_active = 1; - update_exception_bitmap(vcpu); - } - vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); vcpu->cr0 = cr0; + + if (!(cr0 & CR0_TS_MASK) || !(cr0 & CR0_PE_MASK)) + vmx_fpu_activate(vcpu); } static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { vmcs_writel(GUEST_CR3, cr3); - - if (!(vcpu->cr0 & CR0_TS_MASK)) { - vcpu->fpu_active = 0; - vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); - update_exception_bitmap(vcpu); - } + if (vcpu->cr0 & CR0_PE_MASK) + vmx_fpu_deactivate(vcpu); } static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) @@ -1328,6 +1344,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 vmx_set_efer(vcpu, 0); #endif + vmx_fpu_activate(vcpu); update_exception_bitmap(vcpu); return 0; @@ -1488,10 +1505,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } if (is_no_device(intr_info)) { - vcpu->fpu_active = 1; - update_exception_bitmap(vcpu); - if (!(vcpu->cr0 & CR0_TS_MASK)) - vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); + vmx_fpu_activate(vcpu); return 1; } @@ -1683,11 +1697,10 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) break; case 2: /* clts */ vcpu_load_rsp_rip(vcpu); - vcpu->fpu_active = 1; - update_exception_bitmap(vcpu); - vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); + vmx_fpu_deactivate(vcpu); vcpu->cr0 &= ~CR0_TS_MASK; vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); + vmx_fpu_activate(vcpu); skip_emulated_instruction(vcpu); return 1; case 1: /*mov from cr*/ @@ -2158,7 +2171,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) vmcs_clear(vmcs); vcpu->vmcs = vmcs; vcpu->launched = 0; - vcpu->fpu_active = 1; return 0; -- cgit v1.2.3 From eff708bc2bacd4f22cf844871341bef341bd096a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 6 May 2007 16:10:01 +0300 Subject: KVM: VMX: Only reload guest msrs if they are already loaded If we set an msr via an ioctl() instead of by handling a guest exit, we have the host state loaded, so reloading the msrs would clobber host state instead of guest state. This fixes a host oops (and loss of a cpu) on a guest reboot. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 096cb6a1e899..b353eaa0a441 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -600,7 +600,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) msr = find_msr_entry(vcpu, msr_index); if (msr) msr->data = data; - load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); + if (vcpu->vmx_host_state.loaded) + load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); break; #endif case MSR_IA32_SYSENTER_CS: -- cgit v1.2.3 From 653e3108b7d6097d25089d25ab4e99bc58b28962 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 May 2007 10:55:37 +0300 Subject: KVM: Avoid corrupting tr in real mode The real mode tr needs to be set to a specific tss so that I/O instructions can function. Divert the new tr values to the real mode save area from where they will be restored on transition to protected mode. This fixes some crashes on reboot when the bios accesses an I/O instruction. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index b353eaa0a441..e39ebe0b6958 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1042,23 +1042,11 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->unusable = (ar >> 16) & 1; } -static void vmx_set_segment(struct kvm_vcpu *vcpu, - struct kvm_segment *var, int seg) +static u32 vmx_segment_access_rights(struct kvm_segment *var) { - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; u32 ar; - vmcs_writel(sf->base, var->base); - vmcs_write32(sf->limit, var->limit); - vmcs_write16(sf->selector, var->selector); - if (vcpu->rmode.active && var->s) { - /* - * Hack real-mode segments into vm86 compatibility. - */ - if (var->base == 0xffff0000 && var->selector == 0xf000) - vmcs_writel(sf->base, 0xf0000); - ar = 0xf3; - } else if (var->unusable) + if (var->unusable) ar = 1 << 16; else { ar = var->type & 15; @@ -1072,6 +1060,35 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, } if (ar == 0) /* a 0 value means unusable */ ar = AR_UNUSABLE_MASK; + + return ar; +} + +static void vmx_set_segment(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg) +{ + struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + u32 ar; + + if (vcpu->rmode.active && seg == VCPU_SREG_TR) { + vcpu->rmode.tr.selector = var->selector; + vcpu->rmode.tr.base = var->base; + vcpu->rmode.tr.limit = var->limit; + vcpu->rmode.tr.ar = vmx_segment_access_rights(var); + return; + } + vmcs_writel(sf->base, var->base); + vmcs_write32(sf->limit, var->limit); + vmcs_write16(sf->selector, var->selector); + if (vcpu->rmode.active && var->s) { + /* + * Hack real-mode segments into vm86 compatibility. + */ + if (var->base == 0xffff0000 && var->selector == 0xf000) + vmcs_writel(sf->base, 0xf0000); + ar = 0xf3; + } else + ar = vmx_segment_access_rights(var); vmcs_write32(sf->ar_bytes, ar); } -- cgit v1.2.3 From cd0536d7cb4d5d5c5aa37ccd3edd71c4b0524add Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 8 May 2007 11:34:07 +0300 Subject: KVM: Fix vmx I/O bitmap initialization on highmem systems kunmap() expects a struct page, not a virtual address. Fixes an oops loading kvm-intel.ko on i386 with CONFIG_HIGHMEM. Thanks to Michael Ivanov for reporting. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index e39ebe0b6958..34171d9008ff 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -2274,11 +2274,11 @@ static int __init vmx_init(void) iova = kmap(vmx_io_bitmap_a); memset(iova, 0xff, PAGE_SIZE); clear_bit(0x80, iova); - kunmap(iova); + kunmap(vmx_io_bitmap_a); iova = kmap(vmx_io_bitmap_b); memset(iova, 0xff, PAGE_SIZE); - kunmap(iova); + kunmap(vmx_io_bitmap_b); r = kvm_init_arch(&vmx_arch_ops, THIS_MODULE); if (r) -- cgit v1.2.3 From cd2276a795b013d1416c96b38eec90a66cdd10c4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 14 May 2007 20:41:13 +0300 Subject: KVM: VMX: Use local labels in inline assembly This makes oprofile dumps and disassebly easier to read. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 34171d9008ff..c4c553588a20 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1188,7 +1188,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) struct descriptor_table dt; int i; int ret = 0; - extern asmlinkage void kvm_vmx_return(void); + unsigned long kvm_vmx_return; if (!init_rmode_tss(vcpu->kvm)) { ret = -ENOMEM; @@ -1306,8 +1306,8 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) get_idt(&dt); vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ - - vmcs_writel(HOST_RIP, (unsigned long)kvm_vmx_return); /* 22.2.5 */ + asm ("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); + vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); @@ -1997,12 +1997,11 @@ again: "mov %c[rcx](%3), %%ecx \n\t" /* kills %3 (ecx) */ #endif /* Enter guest mode */ - "jne launched \n\t" + "jne .Llaunched \n\t" ASM_VMX_VMLAUNCH "\n\t" - "jmp kvm_vmx_return \n\t" - "launched: " ASM_VMX_VMRESUME "\n\t" - ".globl kvm_vmx_return \n\t" - "kvm_vmx_return: " + "jmp .Lkvm_vmx_return \n\t" + ".Llaunched: " ASM_VMX_VMRESUME "\n\t" + ".Lkvm_vmx_return: " /* Save guest registers, load host registers, keep flags */ #ifdef CONFIG_X86_64 "xchg %3, (%%rsp) \n\t" -- cgit v1.2.3 From b3f37707b05e9ce82d5bec660e9d0b15452ee9a0 Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Thu, 17 May 2007 15:50:34 +0300 Subject: KVM: VMX: Handle #SS faults from real mode Instructions with address size override prefix opcode 0x67 Cause the #SS fault with 0 error code in VM86 mode. Forward them to the emulator. Signed-Off-By: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index c4c553588a20..a05bfa085877 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1488,7 +1488,11 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, if (!vcpu->rmode.active) return 0; - if (vec == GP_VECTOR && err_code == 0) + /* + * Instruction with address size override prefix opcode 0x67 + * Cause the #SS fault with 0 error code in VM86 mode. + */ + if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) if (emulate_instruction(vcpu, NULL, 0, 0) == EMULATE_DONE) return 1; return 0; -- cgit v1.2.3 From a75beee6e4f5d2f0ae6e28cd626b2f157e93afd2 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Thu, 17 May 2007 18:55:15 +0300 Subject: KVM: VMX: Avoid saving and restoring msrs on lightweight vmexit In a lightweight exit (where we exit and reenter the guest without scheduling or exiting to userspace in between), we don't need various msrs on the host, and avoiding shuffling them around reduces raw exit time by 8%. i386 compile fix by Daniel Hecken . Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 128 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 72 insertions(+), 56 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index a05bfa085877..872ca0381fbe 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -85,19 +85,6 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) -#ifdef CONFIG_X86_64 -static unsigned msr_offset_kernel_gs_base; -#define NR_64BIT_MSRS 4 -/* - * avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt - * mechanism (cpu bug AA24) - */ -#define NR_BAD_MSRS 2 -#else -#define NR_64BIT_MSRS 0 -#define NR_BAD_MSRS 0 -#endif - static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -118,13 +105,23 @@ static inline int is_external_interrupt(u32 intr_info) == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } -static struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr) +static int __find_msr_index(struct kvm_vcpu *vcpu, u32 msr) { int i; for (i = 0; i < vcpu->nmsrs; ++i) if (vcpu->guest_msrs[i].index == msr) - return &vcpu->guest_msrs[i]; + return i; + return -1; +} + +static struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr) +{ + int i; + + i = __find_msr_index(vcpu, msr); + if (i >= 0) + return &vcpu->guest_msrs[i]; return NULL; } @@ -307,10 +304,10 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { - save_msrs(vcpu->host_msrs + msr_offset_kernel_gs_base, 1); - load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); + save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1); } #endif + load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); } static void vmx_load_host_state(struct kvm_vcpu *vcpu) @@ -337,12 +334,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) reload_tss(); } -#ifdef CONFIG_X86_64 - if (is_long_mode(vcpu)) { - save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); - load_msrs(vcpu->host_msrs, NR_BAD_MSRS); - } -#endif + save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); } /* @@ -463,6 +456,20 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) INTR_INFO_VALID_MASK); } +/* + * Swap MSR entry in host/guest MSR entry array. + */ +void move_msr_up(struct kvm_vcpu *vcpu, int from, int to) +{ + struct vmx_msr_entry tmp; + tmp = vcpu->guest_msrs[to]; + vcpu->guest_msrs[to] = vcpu->guest_msrs[from]; + vcpu->guest_msrs[from] = tmp; + tmp = vcpu->host_msrs[to]; + vcpu->host_msrs[to] = vcpu->host_msrs[from]; + vcpu->host_msrs[from] = tmp; +} + /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy @@ -470,35 +477,54 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) */ static void setup_msrs(struct kvm_vcpu *vcpu) { - int nr_skip, nr_good_msrs; + int index, save_nmsrs; - if (is_long_mode(vcpu)) - nr_skip = NR_BAD_MSRS; - else - nr_skip = NR_64BIT_MSRS; - nr_good_msrs = vcpu->nmsrs - nr_skip; + save_nmsrs = 0; +#ifdef CONFIG_X86_64 + if (is_long_mode(vcpu)) { + index = __find_msr_index(vcpu, MSR_SYSCALL_MASK); + if (index >= 0) + move_msr_up(vcpu, index, save_nmsrs++); + index = __find_msr_index(vcpu, MSR_LSTAR); + if (index >= 0) + move_msr_up(vcpu, index, save_nmsrs++); + index = __find_msr_index(vcpu, MSR_CSTAR); + if (index >= 0) + move_msr_up(vcpu, index, save_nmsrs++); + index = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); + if (index >= 0) + move_msr_up(vcpu, index, save_nmsrs++); + /* + * MSR_K6_STAR is only needed on long mode guests, and only + * if efer.sce is enabled. + */ + index = __find_msr_index(vcpu, MSR_K6_STAR); + if ((index >= 0) && (vcpu->shadow_efer & EFER_SCE)) + move_msr_up(vcpu, index, save_nmsrs++); + } +#endif + vcpu->save_nmsrs = save_nmsrs; - /* - * MSR_K6_STAR is only needed on long mode guests, and only - * if efer.sce is enabled. - */ - if (find_msr_entry(vcpu, MSR_K6_STAR)) { - --nr_good_msrs; #ifdef CONFIG_X86_64 - if (is_long_mode(vcpu) && (vcpu->shadow_efer & EFER_SCE)) - ++nr_good_msrs; + vcpu->msr_offset_kernel_gs_base = + __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); #endif + index = __find_msr_index(vcpu, MSR_EFER); + if (index >= 0) + save_nmsrs = 1; + else { + save_nmsrs = 0; + index = 0; } - vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, - virt_to_phys(vcpu->guest_msrs + nr_skip)); + virt_to_phys(vcpu->guest_msrs + index)); vmcs_writel(VM_EXIT_MSR_STORE_ADDR, - virt_to_phys(vcpu->guest_msrs + nr_skip)); + virt_to_phys(vcpu->guest_msrs + index)); vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, - virt_to_phys(vcpu->host_msrs + nr_skip)); - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, nr_good_msrs); /* 22.2.2 */ - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, nr_good_msrs); /* 22.2.2 */ + virt_to_phys(vcpu->host_msrs + index)); + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs); } /* @@ -595,14 +621,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) case MSR_GS_BASE: vmcs_writel(GUEST_GS_BASE, data); break; - case MSR_LSTAR: - case MSR_SYSCALL_MASK: - msr = find_msr_entry(vcpu, msr_index); - if (msr) - msr->data = data; - if (vcpu->vmx_host_state.loaded) - load_msrs(vcpu->guest_msrs, NR_BAD_MSRS); - break; #endif case MSR_IA32_SYSENTER_CS: vmcs_write32(GUEST_SYSENTER_CS, data); @@ -620,6 +638,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) msr = find_msr_entry(vcpu, msr_index); if (msr) { msr->data = data; + if (vcpu->vmx_host_state.loaded) + load_msrs(vcpu->guest_msrs,vcpu->save_nmsrs); break; } return kvm_set_msr_common(vcpu, msr_index, data); @@ -1331,10 +1351,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->host_msrs[j].reserved = 0; vcpu->host_msrs[j].data = data; vcpu->guest_msrs[j] = vcpu->host_msrs[j]; -#ifdef CONFIG_X86_64 - if (index == MSR_KERNEL_GS_BASE) - msr_offset_kernel_gs_base = j; -#endif ++vcpu->nmsrs; } -- cgit v1.2.3 From f2be4dd65437c60a4eb222bc40bc8caded62631a Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Sun, 20 May 2007 10:50:08 +0300 Subject: KVM: VMX: Cleanup redundant code in MSR set Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 872ca0381fbe..dc99191dbb4a 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -643,8 +643,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) break; } return kvm_set_msr_common(vcpu, msr_index, data); - msr->data = data; - break; } return 0; -- cgit v1.2.3 From 2cc51560aed0edb291341089d3475e1fbe8bfd04 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Mon, 21 May 2007 07:28:09 +0300 Subject: KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit MSR_EFER.LME/LMA bits are automatically save/restored by VMX hardware, KVM only needs to save NX/SCE bits at time of heavy weight VM Exit. But clearing NX bits in host envirnment may cause system hang if the host page table is using EXB bits, thus we leave NX bits as it is. If Host NX=1 and guest NX=0, we can do guest page table EXB bits check before inserting a shadow pte (though no guest is expecting to see this kind of gp fault). If host NX=0, we present guest no Execute-Disable feature to guest, thus no host NX=0, guest NX=1 combination. This patch reduces raw vmexit time by ~27%. Me: fix compile warnings on i386. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 67 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 21 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index dc99191dbb4a..93e5bb2c40e3 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -42,6 +42,7 @@ static struct page *vmx_io_bitmap_b; #else #define HOST_IS_64 0 #endif +#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE) static struct vmcs_descriptor { int size; @@ -85,6 +86,18 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) +static inline u64 msr_efer_save_restore_bits(struct vmx_msr_entry msr) +{ + return (u64)msr.data & EFER_SAVE_RESTORE_BITS; +} + +static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) +{ + int efer_offset = vcpu->msr_offset_efer; + return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != + msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); +} + static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -265,6 +278,19 @@ static void reload_tss(void) #endif } +static void load_transition_efer(struct kvm_vcpu *vcpu) +{ + u64 trans_efer; + int efer_offset = vcpu->msr_offset_efer; + + trans_efer = vcpu->host_msrs[efer_offset].data; + trans_efer &= ~EFER_SAVE_RESTORE_BITS; + trans_efer |= msr_efer_save_restore_bits( + vcpu->guest_msrs[efer_offset]); + wrmsrl(MSR_EFER, trans_efer); + vcpu->stat.efer_reload++; +} + static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vmx_host_state *hs = &vcpu->vmx_host_state; @@ -308,6 +334,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) } #endif load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_transition_efer(vcpu); } static void vmx_load_host_state(struct kvm_vcpu *vcpu) @@ -336,6 +364,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) } save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); + if (msr_efer_need_save_restore(vcpu)) + load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); } /* @@ -477,11 +507,13 @@ void move_msr_up(struct kvm_vcpu *vcpu, int from, int to) */ static void setup_msrs(struct kvm_vcpu *vcpu) { - int index, save_nmsrs; + int save_nmsrs; save_nmsrs = 0; #ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { + int index; + index = __find_msr_index(vcpu, MSR_SYSCALL_MASK); if (index >= 0) move_msr_up(vcpu, index, save_nmsrs++); @@ -509,22 +541,7 @@ static void setup_msrs(struct kvm_vcpu *vcpu) vcpu->msr_offset_kernel_gs_base = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); #endif - index = __find_msr_index(vcpu, MSR_EFER); - if (index >= 0) - save_nmsrs = 1; - else { - save_nmsrs = 0; - index = 0; - } - vmcs_writel(VM_ENTRY_MSR_LOAD_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_STORE_ADDR, - virt_to_phys(vcpu->guest_msrs + index)); - vmcs_writel(VM_EXIT_MSR_LOAD_ADDR, - virt_to_phys(vcpu->host_msrs + index)); - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, save_nmsrs); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, save_nmsrs); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, save_nmsrs); + vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); } /* @@ -611,10 +628,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vmx_msr_entry *msr; + int ret = 0; + switch (msr_index) { #ifdef CONFIG_X86_64 case MSR_EFER: - return kvm_set_msr_common(vcpu, msr_index, data); + ret = kvm_set_msr_common(vcpu, msr_index, data); + if (vcpu->vmx_host_state.loaded) + load_transition_efer(vcpu); + break; case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); break; @@ -639,13 +661,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) if (msr) { msr->data = data; if (vcpu->vmx_host_state.loaded) - load_msrs(vcpu->guest_msrs,vcpu->save_nmsrs); + load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); break; } - return kvm_set_msr_common(vcpu, msr_index, data); + ret = kvm_set_msr_common(vcpu, msr_index, data); } - return 0; + return ret; } /* @@ -1326,6 +1348,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) asm ("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); -- cgit v1.2.3 From 17c3ba9d37dbda490792a2b52953f09d0dee30d6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 4 Jun 2007 15:58:30 +0300 Subject: KVM: Lazy guest cr3 switching Switch guest paging context may require us to allocate memory, which might fail. Instead of wiring up error paths everywhere, make context switching lazy and actually do the switch before the next guest entry, where we can return an error if allocation fails. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 93e5bb2c40e3..4d255493a57e 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1988,6 +1988,10 @@ again: vmx_save_host_state(vcpu); kvm_load_guest_fpu(vcpu); + r = kvm_mmu_reload(vcpu); + if (unlikely(r)) + goto out; + /* * Loading guest fpu may have cleared host cr0.ts */ -- cgit v1.2.3 From 50a3485c594d0d52196cde4d208b37cda779fbf3 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Sun, 3 Jun 2007 13:35:29 -0400 Subject: KVM: Replace C code with call to ARRAY_SIZE() macro. Signed-off-by: Robert P. J. Day Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 4d255493a57e..a534e6fe8184 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1932,7 +1932,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, }; static const int kvm_vmx_max_exit_handlers = - sizeof(kvm_vmx_exit_handlers) / sizeof(*kvm_vmx_exit_handlers); + ARRAY_SIZE(kvm_vmx_exit_handlers); /* * The guest has exited. See if we can fix it or if we need userspace -- cgit v1.2.3 From d3bef15f84f91c73a5515ad4c6a1749f8f63afcf Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 5 Jun 2007 15:53:05 +0300 Subject: KVM: Move duplicate halt handling code into kvm_main.c Will soon have a thid user. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index a534e6fe8184..90abd3c58c65 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1896,12 +1896,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { skip_emulated_instruction(vcpu); - if (vcpu->irq_summary) - return 1; - - kvm_run->exit_reason = KVM_EXIT_HLT; - ++vcpu->stat.halt_exits; - return 0; + return kvm_emulate_halt(vcpu); } static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -- cgit v1.2.3 From 72d6e5a08a8ba2105b3f36e32285e8fbfbed1f71 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 5 Jun 2007 16:15:51 +0300 Subject: KVM: Emulate hlt on real mode for Intel This has two use cases: the bios can't boot from disk, and guest smp bootstrap. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 90abd3c58c65..a1f51b9d482d 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1608,8 +1608,13 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->rmode.active && handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, - error_code)) + error_code)) { + if (vcpu->halt_request) { + vcpu->halt_request = 0; + return kvm_emulate_halt(vcpu); + } return 1; + } if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == (INTR_TYPE_EXCEPTION | 1)) { kvm_run->exit_reason = KVM_EXIT_DEBUG; -- cgit v1.2.3 From d9e368d61263055eceac2966bb7ea31b89da3425 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 7 Jun 2007 19:18:30 +0300 Subject: KVM: Flush remote tlbs when reducing shadow pte permissions When a vcpu causes a shadow tlb entry to have reduced permissions, it must also clear the tlb on remote vcpus. We do that by: - setting a bit on the vcpu that requests a tlb flush before the next entry - if the vcpu is currently executing, we send an ipi to make sure it exits before we continue Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index a1f51b9d482d..b969db1e0830 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1972,6 +1972,11 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); } +static void vmx_flush_tlb(struct kvm_vcpu *vcpu) +{ + vmcs_writel(GUEST_CR3, vmcs_readl(GUEST_CR3)); +} + static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u8 fail; @@ -1997,9 +2002,15 @@ again: */ vmcs_writel(HOST_CR0, read_cr0()); + local_irq_disable(); + + vcpu->guest_mode = 1; + if (vcpu->requests) + if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) + vmx_flush_tlb(vcpu); + asm ( /* Store host registers */ - "pushf \n\t" #ifdef CONFIG_X86_64 "push %%rax; push %%rbx; push %%rdx;" "push %%rsi; push %%rdi; push %%rbp;" @@ -2091,7 +2102,6 @@ again: "pop %%ecx; popa \n\t" #endif "setbe %0 \n\t" - "popf \n\t" : "=q" (fail) : "r"(vcpu->launched), "d"((unsigned long)HOST_RSP), "c"(vcpu), @@ -2115,6 +2125,9 @@ again: [cr2]"i"(offsetof(struct kvm_vcpu, cr2)) : "cc", "memory" ); + vcpu->guest_mode = 0; + local_irq_enable(); + ++vcpu->stat.exits; vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; @@ -2167,11 +2180,6 @@ out: return r; } -static void vmx_flush_tlb(struct kvm_vcpu *vcpu) -{ - vmcs_writel(GUEST_CR3, vmcs_readl(GUEST_CR3)); -} - static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, u32 err_code) -- cgit v1.2.3 From a3870c47891629dae1765358fbaba3c49460f47a Mon Sep 17 00:00:00 2001 From: Shani Moideen Date: Mon, 11 Jun 2007 09:31:33 +0530 Subject: KVM: VMX: Replace memset(, 0, PAGESIZE) with clear_page() Signed-off-by: Shani Moideen Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index b969db1e0830..b909b5455675 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1180,16 +1180,16 @@ static int init_rmode_tss(struct kvm* kvm) } page = kmap_atomic(p1, KM_USER0); - memset(page, 0, PAGE_SIZE); + clear_page(page); *(u16*)(page + 0x66) = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; kunmap_atomic(page, KM_USER0); page = kmap_atomic(p2, KM_USER0); - memset(page, 0, PAGE_SIZE); + clear_page(page); kunmap_atomic(page, KM_USER0); page = kmap_atomic(p3, KM_USER0); - memset(page, 0, PAGE_SIZE); + clear_page(page); *(page + RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1) = ~0; kunmap_atomic(page, KM_USER0); -- cgit v1.2.3 From 94cea1bb9d050c3200b36420cc03ba744dfd4338 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 13 Jun 2007 19:43:19 +0300 Subject: KVM: Initialize the BSP bit in the APIC_BASE msr correctly Needs to be set on vcpu 0 only. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index b909b5455675..0b2aace70aec 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1238,9 +1238,9 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) memset(vcpu->regs, 0, sizeof(vcpu->regs)); vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val(); vcpu->cr8 = 0; - vcpu->apic_base = 0xfee00000 | - /*for vcpu 0*/ MSR_IA32_APICBASE_BSP | - MSR_IA32_APICBASE_ENABLE; + vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; + if (vcpu == &vcpu->kvm->vcpus[0]) + vcpu->apic_base |= MSR_IA32_APICBASE_BSP; fx_init(vcpu); -- cgit v1.2.3 From 7700270ee3c1324c18f5b7c36ee5ba1a4165919a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 13 Jun 2007 19:55:28 +0300 Subject: KVM: VMX: Ensure vcpu time stamp counter is monotonous If the time stamp counter goes backwards, a guest delay loop can become infinite. This can happen if a vcpu is migrated to another cpu, where the counter has a lower value than the first cpu. Since we're doing an IPI to the first cpu anyway, we can use that to pick up the old tsc, and use that to calculate the adjustment we need to make to the tsc offset. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 0b2aace70aec..d06c3627f640 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -160,6 +160,7 @@ static void __vcpu_clear(void *arg) vmcs_clear(vcpu->vmcs); if (per_cpu(current_vmcs, cpu) == vcpu->vmcs) per_cpu(current_vmcs, cpu) = NULL; + rdtscll(vcpu->host_tsc); } static void vcpu_clear(struct kvm_vcpu *vcpu) @@ -376,6 +377,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu) { u64 phys_addr = __pa(vcpu->vmcs); int cpu; + u64 tsc_this, delta; cpu = get_cpu(); @@ -409,6 +411,13 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu) rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ + + /* + * Make sure the time stamp counter is monotonous. + */ + rdtscll(tsc_this); + delta = vcpu->host_tsc - tsc_this; + vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta); } } -- cgit v1.2.3 From ff1dc7942ba8fa4a86619bcb37ed68afae1f69ca Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Thu, 31 May 2007 14:08:58 -0400 Subject: KVM: VMX: Fix interrupt checking on lightweight exit With kernel-injected interrupts, we need to check for interrupts on lightweight exits too. Signed-off-by: Gregory Haskins Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index d06c3627f640..b47ddccc7d7a 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1992,13 +1992,13 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; preempted: - if (!vcpu->mmio_read_completed) - do_interrupt_requests(vcpu, kvm_run); - if (vcpu->guest_debug.enabled) kvm_guest_debug_pre(vcpu); again: + if (!vcpu->mmio_read_completed) + do_interrupt_requests(vcpu, kvm_run); + vmx_save_host_state(vcpu); kvm_load_guest_fpu(vcpu); -- cgit v1.2.3 From 75880a01124c6aa5d428bdc14163039a87618be1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 20 Jun 2007 11:20:04 +0300 Subject: KVM: VMX: Reinitialize the real-mode tss when entering real mode Protected mode code may have corrupted the real-mode tss, so re-initialize it when switching to real mode. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index b47ddccc7d7a..42a916379ce2 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -31,6 +31,8 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +static int init_rmode_tss(struct kvm *kvm); + static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -951,6 +953,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) fix_rmode_seg(VCPU_SREG_DS, &vcpu->rmode.ds); fix_rmode_seg(VCPU_SREG_GS, &vcpu->rmode.gs); fix_rmode_seg(VCPU_SREG_FS, &vcpu->rmode.fs); + + init_rmode_tss(vcpu->kvm); } #ifdef CONFIG_X86_64 -- cgit v1.2.3 From 796fd1b23e463e98b3e2fc86ed571db06dc945bb Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 21 Jun 2007 11:54:45 +0300 Subject: KVM: VMX: Remove unnecessary code in vmx_tlb_flush() A vmexit implicitly flushes the tlb; the code is bogus. Noted by Shaohua Li. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 42a916379ce2..7d04ffaaf94a 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1987,7 +1987,6 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, static void vmx_flush_tlb(struct kvm_vcpu *vcpu) { - vmcs_writel(GUEST_CR3, vmcs_readl(GUEST_CR3)); } static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -- cgit v1.2.3 From e495606dd09d79f9fa496334ac3958f6ff179d82 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 28 Jun 2007 14:15:57 -0400 Subject: KVM: Clean up #includes Remove unnecessary ones, and rearange the remaining in the standard order. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 7d04ffaaf94a..80628f69916d 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -17,17 +17,18 @@ #include "kvm.h" #include "vmx.h" +#include "segment_descriptor.h" + #include #include #include #include #include #include + #include #include -#include "segment_descriptor.h" - MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -- cgit v1.2.3