diff options
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx.c | 372 |
1 files changed, 268 insertions, 104 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2d73807f0..83b7b5cd7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -28,7 +28,7 @@ #include <linux/sched.h> #include <linux/moduleparam.h> #include <linux/mod_devicetable.h> -#include <linux/ftrace_event.h> +#include <linux/trace_events.h> #include <linux/slab.h> #include <linux/tboot.h> #include <linux/hrtimer.h> @@ -40,14 +40,14 @@ #include <asm/vmx.h> #include <asm/virtext.h> #include <asm/mce.h> -#include <asm/i387.h> -#include <asm/xcr.h> +#include <asm/fpu/internal.h> #include <asm/perf_event.h> #include <asm/debugreg.h> #include <asm/kexec.h> #include <asm/apic.h> #include "trace.h" +#include "pmu.h" #define __ex(x) __kvm_handle_fault_on_reboot(x) #define __ex_clear(x, reg) \ @@ -786,7 +786,7 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) { - struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT); + struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT); if (is_error_page(page)) return NULL; @@ -1883,7 +1883,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) * If the FPU is not active (through the host task or * the guest vcpu), then restore the cr0.TS bit. */ - if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded) + if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded) stts(); load_gdt(this_cpu_ptr(&host_gdt)); } @@ -2170,8 +2170,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) if (is_guest_mode(vcpu)) msr_bitmap = vmx_msr_bitmap_nested; - else if (irqchip_in_kernel(vcpu->kvm) && - apic_x2apic_mode(vcpu->arch.apic)) { + else if (vcpu->arch.apic_base & X2APIC_ENABLE) { if (is_long_mode(vcpu)) msr_bitmap = vmx_msr_bitmap_longmode_x2apic; else @@ -2623,76 +2622,69 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ -static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) +static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { - u64 data; struct shared_msr_entry *msr; - if (!pdata) { - printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); - return -EINVAL; - } - - switch (msr_index) { + switch (msr_info->index) { #ifdef CONFIG_X86_64 case MSR_FS_BASE: - data = vmcs_readl(GUEST_FS_BASE); + msr_info->data = vmcs_readl(GUEST_FS_BASE); break; case MSR_GS_BASE: - data = vmcs_readl(GUEST_GS_BASE); + msr_info->data = vmcs_readl(GUEST_GS_BASE); break; case MSR_KERNEL_GS_BASE: vmx_load_host_state(to_vmx(vcpu)); - data = to_vmx(vcpu)->msr_guest_kernel_gs_base; + msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base; break; #endif case MSR_EFER: - return kvm_get_msr_common(vcpu, msr_index, pdata); + return kvm_get_msr_common(vcpu, msr_info); case MSR_IA32_TSC: - data = guest_read_tsc(); + msr_info->data = guest_read_tsc(); break; case MSR_IA32_SYSENTER_CS: - data = vmcs_read32(GUEST_SYSENTER_CS); + msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; case MSR_IA32_SYSENTER_EIP: - data = vmcs_readl(GUEST_SYSENTER_EIP); + msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP); break; case MSR_IA32_SYSENTER_ESP: - data = vmcs_readl(GUEST_SYSENTER_ESP); + msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); break; case MSR_IA32_BNDCFGS: if (!vmx_mpx_supported()) return 1; - data = vmcs_read64(GUEST_BNDCFGS); + msr_info->data = vmcs_read64(GUEST_BNDCFGS); break; case MSR_IA32_FEATURE_CONTROL: if (!nested_vmx_allowed(vcpu)) return 1; - data = to_vmx(vcpu)->nested.msr_ia32_feature_control; + msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control; break; case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: if (!nested_vmx_allowed(vcpu)) return 1; - return vmx_get_vmx_msr(vcpu, msr_index, pdata); + return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data); case MSR_IA32_XSS: if (!vmx_xsaves_supported()) return 1; - data = vcpu->arch.ia32_xss; + msr_info->data = vcpu->arch.ia32_xss; break; case MSR_TSC_AUX: if (!to_vmx(vcpu)->rdtscp_enabled) return 1; /* Otherwise falls through */ default: - msr = find_msr_entry(to_vmx(vcpu), msr_index); + msr = find_msr_entry(to_vmx(vcpu), msr_info->index); if (msr) { - data = msr->data; + msr_info->data = msr->data; break; } - return kvm_get_msr_common(vcpu, msr_index, pdata); + return kvm_get_msr_common(vcpu, msr_info); } - *pdata = data; return 0; } @@ -4123,7 +4115,7 @@ static int alloc_apic_access_page(struct kvm *kvm) kvm_userspace_mem.flags = 0; kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); + r = __x86_set_memory_region(kvm, &kvm_userspace_mem); if (r) goto out; @@ -4158,7 +4150,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) kvm_userspace_mem.guest_phys_addr = kvm->arch.ept_identity_map_addr; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); + r = __x86_set_memory_region(kvm, &kvm_userspace_mem); return r; } @@ -4667,16 +4659,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { - u32 msr_low, msr_high; - u64 host_pat; - rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); - host_pat = msr_low | ((u64) msr_high << 32); - /* Write the default value follow host pat */ - vmcs_write64(GUEST_IA32_PAT, host_pat); - /* Keep arch.pat sync with GUEST_IA32_PAT */ - vmx->vcpu.arch.pat = host_pat; - } + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) + vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { u32 index = vmx_msr_index[i]; @@ -4708,22 +4692,27 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) return 0; } -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) +static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct msr_data apic_base_msr; + u64 cr0; vmx->rmode.vm86_active = 0; vmx->soft_vnmi_blocked = 0; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); - kvm_set_cr8(&vmx->vcpu, 0); - apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; - if (kvm_vcpu_is_reset_bsp(&vmx->vcpu)) - apic_base_msr.data |= MSR_IA32_APICBASE_BSP; - apic_base_msr.host_initiated = true; - kvm_set_apic_base(&vmx->vcpu, &apic_base_msr); + kvm_set_cr8(vcpu, 0); + + if (!init_event) { + apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | + MSR_IA32_APICBASE_ENABLE; + if (kvm_vcpu_is_reset_bsp(vcpu)) + apic_base_msr.data |= MSR_IA32_APICBASE_BSP; + apic_base_msr.host_initiated = true; + kvm_set_apic_base(vcpu, &apic_base_msr); + } vmx_segment_cache_clear(vmx); @@ -4747,9 +4736,12 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); - vmcs_write32(GUEST_SYSENTER_CS, 0); - vmcs_writel(GUEST_SYSENTER_ESP, 0); - vmcs_writel(GUEST_SYSENTER_EIP, 0); + if (!init_event) { + vmcs_write32(GUEST_SYSENTER_CS, 0); + vmcs_writel(GUEST_SYSENTER_ESP, 0); + vmcs_writel(GUEST_SYSENTER_EIP, 0); + vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + } vmcs_writel(GUEST_RFLAGS, 0x02); kvm_rip_write(vcpu, 0xfff0); @@ -4764,18 +4756,15 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); - /* Special registers */ - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); - setup_msrs(vmx); vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ - if (cpu_has_vmx_tpr_shadow()) { + if (cpu_has_vmx_tpr_shadow() && !init_event) { vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); - if (vm_need_tpr_shadow(vmx->vcpu.kvm)) + if (vm_need_tpr_shadow(vcpu->kvm)) vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, - __pa(vmx->vcpu.arch.apic->regs)); + __pa(vcpu->arch.apic->regs)); vmcs_write32(TPR_THRESHOLD, 0); } @@ -4787,12 +4776,14 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) if (vmx->vpid != 0) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); - vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; - vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ - vmx_set_cr4(&vmx->vcpu, 0); - vmx_set_efer(&vmx->vcpu, 0); - vmx_fpu_activate(&vmx->vcpu); - update_exception_bitmap(&vmx->vcpu); + cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; + vmx_set_cr0(vcpu, cr0); /* enter rmode */ + vmx->vcpu.arch.cr0 = cr0; + vmx_set_cr4(vcpu, 0); + if (!init_event) + vmx_set_efer(vcpu, 0); + vmx_fpu_activate(vcpu); + update_exception_bitmap(vcpu); vpid_sync_context(vmx); } @@ -4965,7 +4956,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) .flags = 0, }; - ret = kvm_set_memory_region(kvm, &tss_mem); + ret = x86_set_memory_region(kvm, &tss_mem); if (ret) return ret; kvm->arch.tss_addr = addr; @@ -5475,19 +5466,21 @@ static int handle_cpuid(struct kvm_vcpu *vcpu) static int handle_rdmsr(struct kvm_vcpu *vcpu) { u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; - u64 data; + struct msr_data msr_info; - if (vmx_get_msr(vcpu, ecx, &data)) { + msr_info.index = ecx; + msr_info.host_initiated = false; + if (vmx_get_msr(vcpu, &msr_info)) { trace_kvm_msr_read_ex(ecx); kvm_inject_gp(vcpu, 0); return 1; } - trace_kvm_msr_read(ecx, data); + trace_kvm_msr_read(ecx, msr_info.data); /* FIXME: handling of bits 32:63 of rax, rdx */ - vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u; - vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u; + vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u; + vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u; skip_emulated_instruction(vcpu); return 1; } @@ -5710,9 +5703,6 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) return 0; } - /* clear all local breakpoint enable flags */ - vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155); - /* * TODO: What about debug traps on tss switch? * Are we supposed to inject them and update dr6? @@ -7333,7 +7323,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, bitmap += (port & 0x7fff) / 8; if (last_bitmap != bitmap) - if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1)) + if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1)) return true; if (b & (1 << (port & 7))) return true; @@ -7377,7 +7367,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, /* Then read the msr_index'th bit from this bitmap: */ if (msr_index < 1024*8) { unsigned char b; - if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1)) + if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1)) return true; return 1 & (b >> (msr_index & 7)); } else @@ -7642,9 +7632,9 @@ static void vmx_disable_pml(struct vcpu_vmx *vmx) vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); } -static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx) +static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vmx->vcpu.kvm; + struct vcpu_vmx *vmx = to_vmx(vcpu); u64 *pml_buf; u16 pml_idx; @@ -7666,7 +7656,7 @@ static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx) gpa = pml_buf[pml_idx]; WARN_ON(gpa & (PAGE_SIZE - 1)); - mark_page_dirty(kvm, gpa >> PAGE_SHIFT); + kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); } /* reset PML index */ @@ -7691,6 +7681,158 @@ static void kvm_flush_pml_buffers(struct kvm *kvm) kvm_vcpu_kick(vcpu); } +static void vmx_dump_sel(char *name, uint32_t sel) +{ + pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", + name, vmcs_read32(sel), + vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), + vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), + vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); +} + +static void vmx_dump_dtsel(char *name, uint32_t limit) +{ + pr_err("%s limit=0x%08x, base=0x%016lx\n", + name, vmcs_read32(limit), + vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT)); +} + +static void dump_vmcs(void) +{ + u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); + u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); + u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); + u32 secondary_exec_control = 0; + unsigned long cr4 = vmcs_readl(GUEST_CR4); + u64 efer = vmcs_readl(GUEST_IA32_EFER); + int i, n; + + if (cpu_has_secondary_exec_ctrls()) + secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + + pr_err("*** Guest State ***\n"); + pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", + vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW), + vmcs_readl(CR0_GUEST_HOST_MASK)); + pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n", + cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK)); + pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3)); + if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) && + (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA)) + { + pr_err("PDPTR0 = 0x%016lx PDPTR1 = 0x%016lx\n", + vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1)); + pr_err("PDPTR2 = 0x%016lx PDPTR3 = 0x%016lx\n", + vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3)); + } + pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", + vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); + pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n", + vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7)); + pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", + vmcs_readl(GUEST_SYSENTER_ESP), + vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP)); + vmx_dump_sel("CS: ", GUEST_CS_SELECTOR); + vmx_dump_sel("DS: ", GUEST_DS_SELECTOR); + vmx_dump_sel("SS: ", GUEST_SS_SELECTOR); + vmx_dump_sel("ES: ", GUEST_ES_SELECTOR); + vmx_dump_sel("FS: ", GUEST_FS_SELECTOR); + vmx_dump_sel("GS: ", GUEST_GS_SELECTOR); + vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT); + vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR); + vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT); + vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); + if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) || + (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER))) + pr_err("EFER = 0x%016llx PAT = 0x%016lx\n", + efer, vmcs_readl(GUEST_IA32_PAT)); + pr_err("DebugCtl = 0x%016lx DebugExceptions = 0x%016lx\n", + vmcs_readl(GUEST_IA32_DEBUGCTL), + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); + if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) + pr_err("PerfGlobCtl = 0x%016lx\n", + vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL)); + if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) + pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS)); + pr_err("Interruptibility = %08x ActivityState = %08x\n", + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), + vmcs_read32(GUEST_ACTIVITY_STATE)); + if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) + pr_err("InterruptStatus = %04x\n", + vmcs_read16(GUEST_INTR_STATUS)); + + pr_err("*** Host State ***\n"); + pr_err("RIP = 0x%016lx RSP = 0x%016lx\n", + vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP)); + pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n", + vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR), + vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR), + vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR), + vmcs_read16(HOST_TR_SELECTOR)); + pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n", + vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE), + vmcs_readl(HOST_TR_BASE)); + pr_err("GDTBase=%016lx IDTBase=%016lx\n", + vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE)); + pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n", + vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3), + vmcs_readl(HOST_CR4)); + pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n", + vmcs_readl(HOST_IA32_SYSENTER_ESP), + vmcs_read32(HOST_IA32_SYSENTER_CS), + vmcs_readl(HOST_IA32_SYSENTER_EIP)); + if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER)) + pr_err("EFER = 0x%016lx PAT = 0x%016lx\n", + vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT)); + if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + pr_err("PerfGlobCtl = 0x%016lx\n", + vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL)); + + pr_err("*** Control State ***\n"); + pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", + pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control); + pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl); + pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n", + vmcs_read32(EXCEPTION_BITMAP), + vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK), + vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH)); + pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n", + vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), + vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE), + vmcs_read32(VM_ENTRY_INSTRUCTION_LEN)); + pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n", + vmcs_read32(VM_EXIT_INTR_INFO), + vmcs_read32(VM_EXIT_INTR_ERROR_CODE), + vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); + pr_err(" reason=%08x qualification=%016lx\n", + vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION)); + pr_err("IDTVectoring: info=%08x errcode=%08x\n", + vmcs_read32(IDT_VECTORING_INFO_FIELD), + vmcs_read32(IDT_VECTORING_ERROR_CODE)); + pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET)); + if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) + pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); + if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) + pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); + if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) + pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER)); + n = vmcs_read32(CR3_TARGET_COUNT); + for (i = 0; i + 1 < n; i += 4) + pr_err("CR3 target%u=%016lx target%u=%016lx\n", + i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2), + i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2)); + if (i < n) + pr_err("CR3 target%u=%016lx\n", + i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2)); + if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) + pr_err("PLE Gap=%08x Window=%08x\n", + vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW)); + if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) + pr_err("Virtual processor ID = 0x%04x\n", + vmcs_read16(VIRTUAL_PROCESSOR_ID)); +} + /* * The guest has exited. See if we can fix it or if we need userspace * assistance. @@ -7709,7 +7851,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) * flushed already. */ if (enable_pml) - vmx_flush_pml_buffer(vmx); + vmx_flush_pml_buffer(vcpu); /* If guest state is invalid, start emulating */ if (vmx->emulation_required) @@ -7723,6 +7865,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) } if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { + dump_vmcs(); vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; vcpu->run->fail_entry.hardware_entry_failure_reason = exit_reason; @@ -7996,6 +8139,11 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) local_irq_enable(); } +static bool vmx_has_high_real_mode_segbase(void) +{ + return enable_unrestricted_guest || emulate_invalid_guest_state; +} + static bool vmx_mpx_supported(void) { return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) && @@ -8480,29 +8628,39 @@ static int get_ept_level(void) static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { - u64 ret; + u8 cache; + u64 ipat = 0; /* For VT-d and EPT combination - * 1. MMIO: always map as UC + * 1. MMIO: guest may want to apply WC, trust it. * 2. EPT with VT-d: * a. VT-d without snooping control feature: can't guarantee the - * result, try to trust guest. + * result, try to trust guest. So the same as item 1. * b. VT-d with snooping control feature: snooping control feature of * VT-d engine can guarantee the cache correctness. Just set it * to WB to keep consistent with host. So the same as item 3. * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep * consistent with host MTRR */ - if (is_mmio) - ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; - else if (kvm_arch_has_noncoherent_dma(vcpu->kvm)) - ret = kvm_get_guest_memory_type(vcpu, gfn) << - VMX_EPT_MT_EPTE_SHIFT; - else - ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) - | VMX_EPT_IPAT_BIT; + if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) { + ipat = VMX_EPT_IPAT_BIT; + cache = MTRR_TYPE_WRBACK; + goto exit; + } - return ret; + if (kvm_read_cr0(vcpu) & X86_CR0_CD) { + ipat = VMX_EPT_IPAT_BIT; + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + cache = MTRR_TYPE_WRBACK; + else + cache = MTRR_TYPE_UNCACHABLE; + goto exit; + } + + cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn); + +exit: + return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat; } static int vmx_get_lpage_level(void) @@ -8924,7 +9082,7 @@ static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu, struct vmx_msr_entry *e) { /* x2APIC MSR accesses are not allowed */ - if (apic_x2apic_mode(vcpu->arch.apic) && e->index >> 8 == 0x8) + if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8) return -EINVAL; if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */ e->index == MSR_IA32_UCODE_REV) @@ -8966,8 +9124,8 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) msr.host_initiated = false; for (i = 0; i < count; i++) { - if (kvm_read_guest(vcpu->kvm, gpa + i * sizeof(e), - &e, sizeof(e))) { + if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e), + &e, sizeof(e))) { pr_warn_ratelimited( "%s cannot read MSR entry (%u, 0x%08llx)\n", __func__, i, gpa + i * sizeof(e)); @@ -8999,9 +9157,10 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) struct vmx_msr_entry e; for (i = 0; i < count; i++) { - if (kvm_read_guest(vcpu->kvm, - gpa + i * sizeof(e), - &e, 2 * sizeof(u32))) { + struct msr_data msr_info; + if (kvm_vcpu_read_guest(vcpu, + gpa + i * sizeof(e), + &e, 2 * sizeof(u32))) { pr_warn_ratelimited( "%s cannot read MSR entry (%u, 0x%08llx)\n", __func__, i, gpa + i * sizeof(e)); @@ -9013,19 +9172,21 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) __func__, i, e.index, e.reserved); return -EINVAL; } - if (kvm_get_msr(vcpu, e.index, &e.value)) { + msr_info.host_initiated = false; + msr_info.index = e.index; + if (kvm_get_msr(vcpu, &msr_info)) { pr_warn_ratelimited( "%s cannot read MSR (%u, 0x%x)\n", __func__, i, e.index); return -EINVAL; } - if (kvm_write_guest(vcpu->kvm, - gpa + i * sizeof(e) + - offsetof(struct vmx_msr_entry, value), - &e.value, sizeof(e.value))) { + if (kvm_vcpu_write_guest(vcpu, + gpa + i * sizeof(e) + + offsetof(struct vmx_msr_entry, value), + &msr_info.data, sizeof(msr_info.data))) { pr_warn_ratelimited( "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", - __func__, i, e.index, e.value); + __func__, i, e.index, msr_info.data); return -EINVAL; } } @@ -10150,6 +10311,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, .cpu_has_accelerated_tpr = report_flexpriority, + .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, .vcpu_create = vmx_create_vcpu, .vcpu_free = vmx_free_vcpu, @@ -10255,6 +10417,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .slot_disable_log_dirty = vmx_slot_disable_log_dirty, .flush_log_dirty = vmx_flush_log_dirty, .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, + + .pmu_ops = &intel_pmu_ops, }; static int __init vmx_init(void) |