Paolo Bonzini <pbonzini@xxxxxxxxxx> writes: > +static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu) > +{ > + struct kvm_cpuid_entry2 *best; > + > + best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); > + return best && (best->edx & bit(X86_FEATURE_LM)); > +} > + We could combine all guest_cpuid_has* functions into a single function guest_cpuid_has_feature(vcpu, feature) and avoid code duplication. Not relevant to this change - just a note (to self). > static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) > { > struct kvm_cpuid_entry2 *best; > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index f6b641207416..a49606871277 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -2262,12 +2262,253 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt) > return rc; > } > > +static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) > +{ > + u32 eax, ebx, ecx, edx; > + > + eax = 0x80000001; > + ecx = 0; > + ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); > + return edx & bit(X86_FEATURE_LM); > +} > + > +#define get_smstate(type, smbase, offset) \ > + ({ \ > + type __val; \ > + int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val, \ > + sizeof(__val), NULL); \ > + if (r != X86EMUL_CONTINUE) \ > + return X86EMUL_UNHANDLEABLE; \ > + __val; \ > + }) > + > +static void rsm_set_desc_flags(struct desc_struct *desc, u16 flags) > +{ > + desc->g = (flags >> 15) & 1; > + desc->d = (flags >> 14) & 1; > + desc->l = (flags >> 13) & 1; > + desc->avl = (flags >> 12) & 1; > + desc->p = (flags >> 7) & 1; > + desc->dpl = (flags >> 5) & 3; > + desc->s = (flags >> 4) & 1; > + desc->type = flags & 15; > +} > + > +static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) > +{ > + struct desc_struct desc; > + int offset; > + u16 selector; > + > + selector = get_smstate(u32, smbase, 0x7fa8 + n * 4); Probably a good idea to use #defines for all the offsets here and elsewhere. > + > + if (n < 3) > + offset = 0x7f84 + n * 12; > + else > + offset = 0x7f2c + (n - 3) * 12; > + > + set_desc_base(&desc, get_smstate(u32, smbase, offset + 8)); > + set_desc_limit(&desc, get_smstate(u32, smbase, offset + 4)); > + rsm_set_desc_flags(&desc, get_smstate(u32, smbase, offset)); > + ctxt->ops->set_segment(ctxt, selector, &desc, 0, n); > + return X86EMUL_CONTINUE; > +} > + > +static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) > +{ > + struct desc_struct desc; > + int offset; > + u16 selector; > + u32 base3; > + > + offset = 0x7e00 + n * 16; > + > + selector = get_smstate(u16, smbase, offset); > + rsm_set_desc_flags(&desc, get_smstate(u16, smbase, offset + 2)); > + set_desc_limit(&desc, get_smstate(u32, smbase, offset + 4)); > + set_desc_base(&desc, get_smstate(u32, smbase, offset + 8)); > + base3 = get_smstate(u32, smbase, offset + 12); > + > + ctxt->ops->set_segment(ctxt, selector, &desc, base3, n); > + return X86EMUL_CONTINUE; > +} > + > +static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, > + u64 cr0, u64 cr4) > +{ > + int bad; > + > + /* > + * First enable PAE, long mode needs it before CR0.PG = 1 is set. > + * Then enable protected mode. However, PCID cannot be enabled > + * if EFER.LMA=0, so set it separately. > + */ > + bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); > + if (bad) > + return X86EMUL_UNHANDLEABLE; > + > + bad = ctxt->ops->set_cr(ctxt, 0, cr0); > + if (bad) > + return X86EMUL_UNHANDLEABLE; > + > + if (cr4 & X86_CR4_PCIDE) { > + bad = ctxt->ops->set_cr(ctxt, 4, cr4); > + if (bad) > + return X86EMUL_UNHANDLEABLE; > + } > + > + return X86EMUL_CONTINUE; > +} > + > +static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) > +{ > + struct desc_struct desc; > + struct desc_ptr dt; > + u16 selector; > + u32 val, cr0, cr4; > + int i; > + > + cr0 = get_smstate(u32, smbase, 0x7ffc); > + ctxt->ops->set_cr(ctxt, 3, get_smstate(u32, smbase, 0x7ff8)); > + ctxt->eflags = get_smstate(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; > + ctxt->_eip = get_smstate(u32, smbase, 0x7ff0); > + > + for (i = 0; i < 8; i++) > + *reg_write(ctxt, i) = get_smstate(u32, smbase, 0x7fd0 + i * 4); > + > + val = get_smstate(u32, smbase, 0x7fcc); > + ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); > + val = get_smstate(u32, smbase, 0x7fc8); > + ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); > + > + selector = get_smstate(u32, smbase, 0x7fc4); > + set_desc_base(&desc, get_smstate(u32, smbase, 0x7f64)); > + set_desc_limit(&desc, get_smstate(u32, smbase, 0x7f60)); > + rsm_set_desc_flags(&desc, get_smstate(u32, smbase, 0x7f5c)); > + ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR); > + > + selector = get_smstate(u32, smbase, 0x7fc0); > + set_desc_base(&desc, get_smstate(u32, smbase, 0x7f80)); > + set_desc_limit(&desc, get_smstate(u32, smbase, 0x7f7c)); > + rsm_set_desc_flags(&desc, get_smstate(u32, smbase, 0x7f78)); > + ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR); > + > + dt.address = get_smstate(u32, smbase, 0x7f74); > + dt.size = get_smstate(u32, smbase, 0x7f70); > + ctxt->ops->set_gdt(ctxt, &dt); > + > + dt.address = get_smstate(u32, smbase, 0x7f58); > + dt.size = get_smstate(u32, smbase, 0x7f54); > + ctxt->ops->set_idt(ctxt, &dt); > + > + for (i = 0; i < 6; i++) { > + int r = rsm_load_seg_32(ctxt, smbase, i); > + if (r != X86EMUL_CONTINUE) > + return r; This return is redundant since rsm_load_seg_32 always returns success. Same for rsm_load_seg_64() > + } > + > + cr4 = get_smstate(u32, smbase, 0x7f14); > + > + ctxt->ops->set_smbase(ctxt, get_smstate(u32, smbase, 0x7ef8)); > + > + return rsm_enter_protected_mode(ctxt, cr0, cr4); > +} > + > +static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) > +{ > + struct desc_struct desc; > + struct desc_ptr dt; > + u64 val, cr0, cr4; > + u32 base3; > + u16 selector; > + int i; > + > + for (i = 0; i < 16; i++) > + *reg_write(ctxt, i) = get_smstate(u64, smbase, 0x7ff8 - i * 8); > + > + ctxt->_eip = get_smstate(u64, smbase, 0x7f78); > + ctxt->eflags = get_smstate(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED; > + > + val = get_smstate(u32, smbase, 0x7f68); > + ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); > + val = get_smstate(u32, smbase, 0x7f60); > + ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); > + > + cr0 = get_smstate(u64, smbase, 0x7f58); > + ctxt->ops->set_cr(ctxt, 3, get_smstate(u64, smbase, 0x7f50)); > + cr4 = get_smstate(u64, smbase, 0x7f48); > + ctxt->ops->set_smbase(ctxt, get_smstate(u32, smbase, 0x7f00)); > + val = get_smstate(u64, smbase, 0x7ed0); > + ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA); > + > + selector = get_smstate(u32, smbase, 0x7e90); > + rsm_set_desc_flags(&desc, get_smstate(u32, smbase, 0x7e92)); > + set_desc_limit(&desc, get_smstate(u32, smbase, 0x7e94)); > + set_desc_base(&desc, get_smstate(u32, smbase, 0x7e98)); > + base3 = get_smstate(u32, smbase, 0x7e9c); > + ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR); > + > + dt.size = get_smstate(u32, smbase, 0x7e84); > + dt.address = get_smstate(u64, smbase, 0x7e88); > + ctxt->ops->set_idt(ctxt, &dt); > + > + selector = get_smstate(u32, smbase, 0x7e70); > + rsm_set_desc_flags(&desc, get_smstate(u32, smbase, 0x7e72)); > + set_desc_limit(&desc, get_smstate(u32, smbase, 0x7e74)); > + set_desc_base(&desc, get_smstate(u32, smbase, 0x7e78)); > + base3 = get_smstate(u32, smbase, 0x7e7c); > + ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR); > + > + dt.size = get_smstate(u32, smbase, 0x7e64); > + dt.address = get_smstate(u64, smbase, 0x7e68); > + ctxt->ops->set_gdt(ctxt, &dt); > + > + for (i = 0; i < 6; i++) { > + int r = rsm_load_seg_64(ctxt, smbase, i); > + if (r != X86EMUL_CONTINUE) > + return r; > + } > + > + return rsm_enter_protected_mode(ctxt, cr0, cr4); > +} > + > static int em_rsm(struct x86_emulate_ctxt *ctxt) > { > + unsigned long cr0, cr4, efer; > + u64 smbase; > + int ret; > + > + printk("rsm\n"); > if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0) > return emulate_ud(ctxt); > > - return X86EMUL_UNHANDLEABLE; > + /* > + * Get back to real mode, to prepare a safe state in which > + * to load CR0/CR3/CR4/EFER. Also this will ensure that > + * addresses passed to read_std/write_std are not virtual. > + */ I am trying to understand this. Aren't we are already in real mode here since we are in smm or am I missing something.. > + cr0 = ctxt->ops->get_cr(ctxt, 0); > + if (cr0 & X86_CR0_PE) > + ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); > + cr4 = ctxt->ops->get_cr(ctxt, 4); > + if (cr0 & X86_CR4_PAE) > + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); > + efer = 0; > + ctxt->ops->set_msr(ctxt, MSR_EFER, efer); > + > + ctxt->ops->get_msr(ctxt, MSR_IA32_SMBASE, &smbase); > + if (emulator_has_longmode(ctxt)) > + ret = rsm_load_state_64(ctxt, smbase + 0x8000); > + else > + ret = rsm_load_state_32(ctxt, smbase + 0x8000); > + > + if (ret != X86EMUL_CONTINUE) { > + /* FIXME: should triple fault */ > + return X86EMUL_UNHANDLEABLE; > + } > + > + ctxt->emul_flags &= ~X86EMUL_SMM_MASK; > + return X86EMUL_CONTINUE; > } > > static void > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 4cd7a2a18e93..ab6a38617813 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -6232,12 +6232,226 @@ static void process_nmi(struct kvm_vcpu *vcpu) > kvm_make_request(KVM_REQ_EVENT, vcpu); > } > > +#define put_smstate(type, buf, offset, val) \ > + *(type *)((buf) + (offset) - 0x7e00) = val > + > +static u16 process_smi_get_segment_flags(struct kvm_segment *seg) > +{ > + u16 flags = 0; > + flags |= seg->g << 15; > + flags |= seg->db << 14; > + flags |= seg->l << 13; > + flags |= seg->avl << 12; > + flags |= seg->present << 7; > + flags |= seg->dpl << 5; > + flags |= seg->s << 4; > + flags |= seg->type; > + return flags; > +} > + > +static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n) > +{ > + struct kvm_segment seg; > + int offset; > + > + kvm_get_segment(vcpu, &seg, n); > + put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector); > + > + if (n < 3) > + offset = 0x7f84 + n * 12; > + else > + offset = 0x7f2c + (n - 3) * 12; > + > + put_smstate(u32, buf, offset + 8, seg.base); > + put_smstate(u32, buf, offset + 4, seg.limit); > + put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg)); > +} > + > +static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) > +{ > + struct kvm_segment seg; > + int offset; > + > + kvm_get_segment(vcpu, &seg, n); > + offset = 0x7e00 + n * 16; > + > + put_smstate(u16, buf, offset, seg.selector); > + put_smstate(u16, buf, offset + 2, process_smi_get_segment_flags(&seg)); > + put_smstate(u32, buf, offset + 4, seg.limit); > + put_smstate(u64, buf, offset + 8, seg.base); > +} > + > +static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf) > +{ > + struct desc_ptr dt; > + struct kvm_segment seg; > + unsigned long val; > + int i; > + > + put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu)); > + put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu)); > + put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu)); > + put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu)); > + > + for (i = 0; i < 8; i++) > + put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i)); > + > + kvm_get_dr(vcpu, 6, &val); > + put_smstate(u32, buf, 0x7fcc, (u32)val); > + kvm_get_dr(vcpu, 7, &val); > + put_smstate(u32, buf, 0x7fc8, (u32)val); > + > + kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); > + put_smstate(u32, buf, 0x7fc4, seg.selector); > + put_smstate(u32, buf, 0x7f64, seg.base); > + put_smstate(u32, buf, 0x7f60, seg.limit); > + put_smstate(u32, buf, 0x7f5c, process_smi_get_segment_flags(&seg)); > + > + kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); > + put_smstate(u32, buf, 0x7fc0, seg.selector); > + put_smstate(u32, buf, 0x7f80, seg.base); > + put_smstate(u32, buf, 0x7f7c, seg.limit); > + put_smstate(u32, buf, 0x7f78, process_smi_get_segment_flags(&seg)); > + > + kvm_x86_ops->get_gdt(vcpu, &dt); > + put_smstate(u32, buf, 0x7f74, dt.address); > + put_smstate(u32, buf, 0x7f70, dt.size); > + > + kvm_x86_ops->get_idt(vcpu, &dt); > + put_smstate(u32, buf, 0x7f58, dt.address); > + put_smstate(u32, buf, 0x7f54, dt.size); > + > + for (i = 0; i < 6; i++) > + process_smi_save_seg_32(vcpu, buf, i); > + > + put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu)); > + > + /* revision id */ > + put_smstate(u32, buf, 0x7efc, 0x00020000); > + put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase); > +} > + > +static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf) > +{ > +#ifdef CONFIG_X86_64 > + struct desc_ptr dt; > + struct kvm_segment seg; > + unsigned long val; > + int i; > + > + for (i = 0; i < 16; i++) > + put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i)); > + > + put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu)); > + put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu)); > + > + kvm_get_dr(vcpu, 6, &val); > + put_smstate(u64, buf, 0x7f68, val); > + kvm_get_dr(vcpu, 7, &val); > + put_smstate(u64, buf, 0x7f60, val); > + > + put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu)); > + put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu)); > + put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu)); > + > + put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase); > + > + /* revision id */ > + put_smstate(u32, buf, 0x7efc, 0x00020064); Is the revision id (and 0x00020000 for process_smi*_32()) from the spec ? I can't seem to find them. Bandan ... -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html