summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r--arch/x86/kvm/svm.c212
1 files changed, 163 insertions, 49 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4911bf191..8e0c0844c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -21,6 +21,7 @@
#include "kvm_cache_regs.h"
#include "x86.h"
#include "cpuid.h"
+#include "pmu.h"
#include <linux/module.h>
#include <linux/mod_devicetable.h>
@@ -28,7 +29,7 @@
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/sched.h>
-#include <linux/ftrace_event.h>
+#include <linux/trace_events.h>
#include <linux/slab.h>
#include <asm/perf_event.h>
@@ -864,6 +865,64 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}
+#define MTRR_TYPE_UC_MINUS 7
+#define MTRR2PROTVAL_INVALID 0xff
+
+static u8 mtrr2protval[8];
+
+static u8 fallback_mtrr_type(int mtrr)
+{
+ /*
+ * WT and WP aren't always available in the host PAT. Treat
+ * them as UC and UC- respectively. Everything else should be
+ * there.
+ */
+ switch (mtrr)
+ {
+ case MTRR_TYPE_WRTHROUGH:
+ return MTRR_TYPE_UNCACHABLE;
+ case MTRR_TYPE_WRPROT:
+ return MTRR_TYPE_UC_MINUS;
+ default:
+ BUG();
+ }
+}
+
+static void build_mtrr2protval(void)
+{
+ int i;
+ u64 pat;
+
+ for (i = 0; i < 8; i++)
+ mtrr2protval[i] = MTRR2PROTVAL_INVALID;
+
+ /* Ignore the invalid MTRR types. */
+ mtrr2protval[2] = 0;
+ mtrr2protval[3] = 0;
+
+ /*
+ * Use host PAT value to figure out the mapping from guest MTRR
+ * values to nested page table PAT/PCD/PWT values. We do not
+ * want to change the host PAT value every time we enter the
+ * guest.
+ */
+ rdmsrl(MSR_IA32_CR_PAT, pat);
+ for (i = 0; i < 8; i++) {
+ u8 mtrr = pat >> (8 * i);
+
+ if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID)
+ mtrr2protval[mtrr] = __cm_idx2pte(i);
+ }
+
+ for (i = 0; i < 8; i++) {
+ if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) {
+ u8 fallback = fallback_mtrr_type(i);
+ mtrr2protval[i] = mtrr2protval[fallback];
+ BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID);
+ }
+ }
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -930,6 +989,7 @@ static __init int svm_hardware_setup(void)
} else
kvm_disable_tdp();
+ build_mtrr2protval();
return 0;
err:
@@ -1084,7 +1144,40 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
return target_tsc - tsc;
}
-static void init_vmcb(struct vcpu_svm *svm)
+static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+
+ /* Unlike Intel, AMD takes the guest's CR0.CD into account.
+ *
+ * AMD does not have IPAT. To emulate it for the case of guests
+ * with no assigned devices, just set everything to WB. If guests
+ * have assigned devices, however, we cannot force WB for RAM
+ * pages only, so use the guest PAT directly.
+ */
+ if (!kvm_arch_has_assigned_device(vcpu->kvm))
+ *g_pat = 0x0606060606060606;
+ else
+ *g_pat = vcpu->arch.pat;
+}
+
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+ u8 mtrr;
+
+ /*
+ * 1. MMIO: trust guest MTRR, so same as item 3.
+ * 2. No passthrough: always map as WB, and force guest PAT to WB as well
+ * 3. Passthrough: can't guarantee the result, try to trust guest.
+ */
+ if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm))
+ return 0;
+
+ mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
+ return mtrr2protval[mtrr];
+}
+
+static void init_vmcb(struct vcpu_svm *svm, bool init_event)
{
struct vmcb_control_area *control = &svm->vmcb->control;
struct vmcb_save_area *save = &svm->vmcb->save;
@@ -1155,17 +1248,17 @@ static void init_vmcb(struct vcpu_svm *svm)
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
- svm_set_efer(&svm->vcpu, 0);
+ if (!init_event)
+ svm_set_efer(&svm->vcpu, 0);
save->dr6 = 0xffff0ff0;
kvm_set_rflags(&svm->vcpu, 2);
save->rip = 0x0000fff0;
svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
/*
- * This is the guest-visible cr0 value.
* svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
+ * It also updates the guest-visible cr0 value.
*/
- svm->vcpu.arch.cr0 = 0;
(void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
save->cr4 = X86_CR4_PAE;
@@ -1178,7 +1271,8 @@ static void init_vmcb(struct vcpu_svm *svm)
clr_exception_intercept(svm, PF_VECTOR);
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
- save->g_pat = 0x0007040600070406ULL;
+ save->g_pat = svm->vcpu.arch.pat;
+ svm_set_guest_pat(svm, &save->g_pat);
save->cr3 = 0;
save->cr4 = 0;
}
@@ -1197,13 +1291,19 @@ static void init_vmcb(struct vcpu_svm *svm)
enable_gif(svm);
}
-static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
+static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct vcpu_svm *svm = to_svm(vcpu);
u32 dummy;
u32 eax = 1;
- init_vmcb(svm);
+ if (!init_event) {
+ svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+ MSR_IA32_APICBASE_ENABLE;
+ if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
+ svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
+ }
+ init_vmcb(svm, init_event);
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
@@ -1259,12 +1359,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
clear_page(svm->vmcb);
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
svm->asid_generation = 0;
- init_vmcb(svm);
-
- svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
- MSR_IA32_APICBASE_ENABLE;
- if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
- svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
+ init_vmcb(svm, false);
svm_init_osvw(&svm->vcpu);
@@ -1577,7 +1672,8 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
* does not do it - this results in some delay at
* reboot
*/
- cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+ cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
svm->vmcb->save.cr0 = cr0;
mark_dirty(svm->vmcb, VMCB_CR);
update_cr0_intercept(svm);
@@ -1885,7 +1981,7 @@ static int shutdown_interception(struct vcpu_svm *svm)
* so reinitialize it.
*/
clear_page(svm->vmcb);
- init_vmcb(svm);
+ init_vmcb(svm, false);
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
return 0;
@@ -1955,8 +2051,8 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
u64 pdpte;
int ret;
- ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte,
- offset_in_page(cr3) + index * 8, 8);
+ ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
+ offset_in_page(cr3) + index * 8, 8);
if (ret)
return 0;
return pdpte;
@@ -2114,7 +2210,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
might_sleep();
- page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
+ page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT);
if (is_error_page(page))
goto error;
@@ -2153,7 +2249,7 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
mask = (0xf >> (4 - size)) << start_bit;
val = 0;
- if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, iopm_len))
+ if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
return NESTED_EXIT_DONE;
return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
@@ -2178,7 +2274,7 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
/* Offset is in 32 bit units but need in 8 bit units */
offset *= 4;
- if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4))
+ if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
return NESTED_EXIT_DONE;
return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
@@ -2449,7 +2545,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
p = msrpm_offsets[i];
offset = svm->nested.vmcb_msrpm + (p * 4);
- if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
+ if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
return false;
svm->nested.msrpm[p] = svm->msrpm[p] | value;
@@ -3069,42 +3165,42 @@ static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
svm_scale_tsc(vcpu, host_tsc);
}
-static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
+static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_svm *svm = to_svm(vcpu);
- switch (ecx) {
+ switch (msr_info->index) {
case MSR_IA32_TSC: {
- *data = svm->vmcb->control.tsc_offset +
+ msr_info->data = svm->vmcb->control.tsc_offset +
svm_scale_tsc(vcpu, native_read_tsc());
break;
}
case MSR_STAR:
- *data = svm->vmcb->save.star;
+ msr_info->data = svm->vmcb->save.star;
break;
#ifdef CONFIG_X86_64
case MSR_LSTAR:
- *data = svm->vmcb->save.lstar;
+ msr_info->data = svm->vmcb->save.lstar;
break;
case MSR_CSTAR:
- *data = svm->vmcb->save.cstar;
+ msr_info->data = svm->vmcb->save.cstar;
break;
case MSR_KERNEL_GS_BASE:
- *data = svm->vmcb->save.kernel_gs_base;
+ msr_info->data = svm->vmcb->save.kernel_gs_base;
break;
case MSR_SYSCALL_MASK:
- *data = svm->vmcb->save.sfmask;
+ msr_info->data = svm->vmcb->save.sfmask;
break;
#endif
case MSR_IA32_SYSENTER_CS:
- *data = svm->vmcb->save.sysenter_cs;
+ msr_info->data = svm->vmcb->save.sysenter_cs;
break;
case MSR_IA32_SYSENTER_EIP:
- *data = svm->sysenter_eip;
+ msr_info->data = svm->sysenter_eip;
break;
case MSR_IA32_SYSENTER_ESP:
- *data = svm->sysenter_esp;
+ msr_info->data = svm->sysenter_esp;
break;
/*
* Nobody will change the following 5 values in the VMCB so we can
@@ -3112,31 +3208,31 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
* implemented.
*/
case MSR_IA32_DEBUGCTLMSR:
- *data = svm->vmcb->save.dbgctl;
+ msr_info->data = svm->vmcb->save.dbgctl;
break;
case MSR_IA32_LASTBRANCHFROMIP:
- *data = svm->vmcb->save.br_from;
+ msr_info->data = svm->vmcb->save.br_from;
break;
case MSR_IA32_LASTBRANCHTOIP:
- *data = svm->vmcb->save.br_to;
+ msr_info->data = svm->vmcb->save.br_to;
break;
case MSR_IA32_LASTINTFROMIP:
- *data = svm->vmcb->save.last_excp_from;
+ msr_info->data = svm->vmcb->save.last_excp_from;
break;
case MSR_IA32_LASTINTTOIP:
- *data = svm->vmcb->save.last_excp_to;
+ msr_info->data = svm->vmcb->save.last_excp_to;
break;
case MSR_VM_HSAVE_PA:
- *data = svm->nested.hsave_msr;
+ msr_info->data = svm->nested.hsave_msr;
break;
case MSR_VM_CR:
- *data = svm->nested.vm_cr_msr;
+ msr_info->data = svm->nested.vm_cr_msr;
break;
case MSR_IA32_UCODE_REV:
- *data = 0x01000065;
+ msr_info->data = 0x01000065;
break;
default:
- return kvm_get_msr_common(vcpu, ecx, data);
+ return kvm_get_msr_common(vcpu, msr_info);
}
return 0;
}
@@ -3144,16 +3240,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
static int rdmsr_interception(struct vcpu_svm *svm)
{
u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
- u64 data;
+ struct msr_data msr_info;
- if (svm_get_msr(&svm->vcpu, ecx, &data)) {
+ msr_info.index = ecx;
+ msr_info.host_initiated = false;
+ if (svm_get_msr(&svm->vcpu, &msr_info)) {
trace_kvm_msr_read_ex(ecx);
kvm_inject_gp(&svm->vcpu, 0);
} else {
- trace_kvm_msr_read(ecx, data);
+ trace_kvm_msr_read(ecx, msr_info.data);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
+ msr_info.data & 0xffffffff);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
+ msr_info.data >> 32);
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
skip_emulated_instruction(&svm->vcpu);
}
@@ -3247,6 +3347,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
+ case MSR_IA32_CR_PAT:
+ if (npt_enabled) {
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ return 1;
+ vcpu->arch.pat = data;
+ svm_set_guest_pat(svm, &svm->vmcb->save.g_pat);
+ mark_dirty(svm->vmcb, VMCB_NPT);
+ break;
+ }
+ /* fall through */
default:
return kvm_set_msr_common(vcpu, msr);
}
@@ -3390,6 +3500,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_MWAIT] = mwait_interception,
[SVM_EXIT_XSETBV] = xsetbv_interception,
[SVM_EXIT_NPF] = pf_interception,
+ [SVM_EXIT_RSM] = emulate_on_interception,
};
static void dump_vmcb(struct kvm_vcpu *vcpu)
@@ -4075,9 +4186,9 @@ static bool svm_cpu_has_accelerated_tpr(void)
return false;
}
-static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+static bool svm_has_high_real_mode_segbase(void)
{
- return 0;
+ return true;
}
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
@@ -4350,6 +4461,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
+ .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase,
.vcpu_create = svm_create_vcpu,
.vcpu_free = svm_free_vcpu,
@@ -4444,6 +4556,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.handle_external_intr = svm_handle_external_intr,
.sched_in = svm_sched_in,
+
+ .pmu_ops = &amd_pmu_ops,
};
static int __init svm_init(void)