diff options
author | Radim Krčmář <rkrcmar@redhat.com> | 2016-08-08 20:16:22 +0200 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2018-12-17 21:55:12 +0100 |
commit | e90c6ad207bcb7a599c259596c9a9e1bb15eb7bb (patch) | |
tree | 7810ce9ad0a159460519e4ea7b8cc4403b813557 /arch | |
parent | 8420459f1d938a02b060bde1e161fdd1de212fac (diff) |
KVM: nVMX: fix msr bitmaps to prevent L2 from accessing L0 x2APIC
commit d048c098218e91ed0e10dfa1f0f80e2567fe4ef7 upstream.
msr bitmap can be used to avoid a VM exit (interception) on guest MSR
accesses. In some configurations of VMX controls, the guest can even
directly access host's x2APIC MSRs. See SDM 29.5 VIRTUALIZING MSR-BASED
APIC ACCESSES.
L2 could read all L0's x2APIC MSRs and write TPR, EOI, and SELF_IPI.
To do so, L1 would first trick KVM to disable all possible interceptions
by enabling APICv features and then would turn those features off;
nested_vmx_merge_msr_bitmap() only disabled interceptions, so VMX would
not intercept previously enabled MSRs even though they were not safe
with the new configuration.
Correctly re-enabling interceptions is not enough as a second bug would
still allow L1+L2 to access host's MSRs: msr bitmap was shared for all
VMCSs, so L1 could trigger a race to get the desired combination of msr
bitmap and VMX controls.
This fix allocates a msr bitmap for every L1 VCPU, allows only safe
x2APIC MSRs from L1's msr bitmap, and disables msr bitmaps if they would
have to intercept everything anyway.
Fixes: 3af18d9c5fe9 ("KVM: nVMX: Prepare for using hardware MSR bitmap")
Reported-by: Jim Mattson <jmattson@google.com>
Suggested-by: Wincy Van <fanwenyi0529@gmail.com>
Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
[bwh: Backported to 4.4:
- handle_vmon() doesn't allocate a cached vmcs12
- Adjust context]
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kvm/vmx.c | 96 |
1 files changed, 38 insertions, 58 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c5a4b1978cbf..3df636400ec8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -431,6 +431,8 @@ struct nested_vmx { u16 posted_intr_nv; u64 msr_ia32_feature_control; + unsigned long *msr_bitmap; + struct hrtimer preemption_timer; bool preemption_timer_expired; @@ -912,7 +914,6 @@ static unsigned long *vmx_msr_bitmap_legacy; static unsigned long *vmx_msr_bitmap_longmode; static unsigned long *vmx_msr_bitmap_legacy_x2apic; static unsigned long *vmx_msr_bitmap_longmode_x2apic; -static unsigned long *vmx_msr_bitmap_nested; static unsigned long *vmx_vmread_bitmap; static unsigned long *vmx_vmwrite_bitmap; @@ -2358,7 +2359,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) unsigned long *msr_bitmap; if (is_guest_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_nested; + msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; else if (vcpu->arch.apic_base & X2APIC_ENABLE) { if (is_long_mode(vcpu)) msr_bitmap = vmx_msr_bitmap_longmode_x2apic; @@ -6192,13 +6193,6 @@ static __init int hardware_setup(void) if (!vmx_msr_bitmap_longmode_x2apic) goto out4; - if (nested) { - vmx_msr_bitmap_nested = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_nested) - goto out5; - } - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmread_bitmap) goto out6; @@ -6216,8 +6210,6 @@ static __init int hardware_setup(void) memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - if (nested) - memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE); if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; @@ -6354,9 +6346,6 @@ out8: out7: free_page((unsigned long)vmx_vmread_bitmap); out6: - if (nested) - free_page((unsigned long)vmx_msr_bitmap_nested); -out5: free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); out4: free_page((unsigned long)vmx_msr_bitmap_longmode); @@ -6382,8 +6371,6 @@ static __exit void hardware_unsetup(void) free_page((unsigned long)vmx_io_bitmap_a); free_page((unsigned long)vmx_vmwrite_bitmap); free_page((unsigned long)vmx_vmread_bitmap); - if (nested) - free_page((unsigned long)vmx_msr_bitmap_nested); free_kvm_area(); } @@ -6825,10 +6812,17 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } + if (cpu_has_vmx_msr_bitmap()) { + vmx->nested.msr_bitmap = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx->nested.msr_bitmap) + goto out_msr_bitmap; + } + if (enable_shadow_vmcs) { shadow_vmcs = alloc_vmcs(); if (!shadow_vmcs) - return -ENOMEM; + goto out_shadow_vmcs; /* mark vmcs as shadow */ shadow_vmcs->revision_id |= (1u << 31); /* init shadow vmcs */ @@ -6850,6 +6844,12 @@ static int handle_vmon(struct kvm_vcpu *vcpu) skip_emulated_instruction(vcpu); nested_vmx_succeed(vcpu); return 1; + +out_shadow_vmcs: + free_page((unsigned long)vmx->nested.msr_bitmap); + +out_msr_bitmap: + return -ENOMEM; } /* @@ -6919,6 +6919,10 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.vmxon = false; free_vpid(vmx->nested.vpid02); nested_release_vmcs12(vmx); + if (vmx->nested.msr_bitmap) { + free_page((unsigned long)vmx->nested.msr_bitmap); + vmx->nested.msr_bitmap = NULL; + } if (enable_shadow_vmcs) free_vmcs(vmx->nested.current_shadow_vmcs); /* Unpin physical memory we referred to in current vmcs02 */ @@ -9248,8 +9252,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, { int msr; struct page *page; - unsigned long *msr_bitmap; + unsigned long *msr_bitmap_l1; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; + /* This shortcut is ok because we support only x2APIC MSRs so far. */ if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) return false; @@ -9258,58 +9264,32 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, WARN_ON(1); return false; } - msr_bitmap = (unsigned long *)kmap(page); + msr_bitmap_l1 = (unsigned long *)kmap(page); + + memset(msr_bitmap_l0, 0xff, PAGE_SIZE); if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { if (nested_cpu_has_apic_reg_virt(vmcs12)) for (msr = 0x800; msr <= 0x8ff; msr++) nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, msr, MSR_TYPE_R); - /* TPR is allowed */ - nested_vmx_disable_intercept_for_msr(msr_bitmap, - vmx_msr_bitmap_nested, + + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_TASKPRI >> 4), MSR_TYPE_R | MSR_TYPE_W); + if (nested_cpu_has_vid(vmcs12)) { - /* EOI and self-IPI are allowed */ nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_EOI >> 4), MSR_TYPE_W); nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_SELF_IPI >> 4), MSR_TYPE_W); } - } else { - /* - * Enable reading intercept of all the x2apic - * MSRs. We should not rely on vmcs12 to do any - * optimizations here, it may have been modified - * by L1. - */ - for (msr = 0x800; msr <= 0x8ff; msr++) - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - msr, - MSR_TYPE_R); - - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_TASKPRI >> 4), - MSR_TYPE_W); - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_EOI >> 4), - MSR_TYPE_W); - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_SELF_IPI >> 4), - MSR_TYPE_W); } kunmap(page); nested_release_page_clean(page); @@ -9729,10 +9709,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) } if (cpu_has_vmx_msr_bitmap() && - exec_control & CPU_BASED_USE_MSR_BITMAPS) { - nested_vmx_merge_msr_bitmap(vcpu, vmcs12); - /* MSR_BITMAP will be set by following vmx_set_efer. */ - } else + exec_control & CPU_BASED_USE_MSR_BITMAPS && + nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) + ; /* MSR_BITMAP will be set by following vmx_set_efer. */ + else exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; /* |