Re: [PATCH 09/13] KVM: x86: save/load state on SMM switch

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Paolo Bonzini <pbonzini@xxxxxxxxxx> writes:

> +static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_cpuid_entry2 *best;
> +
> +	best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
> +	return best && (best->edx & bit(X86_FEATURE_LM));
> +}
> +

We could combine all guest_cpuid_has* functions into a single
function guest_cpuid_has_feature(vcpu, feature) and avoid code duplication.
Not relevant to this change - just a note (to self).

>  static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_cpuid_entry2 *best;
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index f6b641207416..a49606871277 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -2262,12 +2262,253 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
>  	return rc;
>  }
>  
> +static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
> +{
> +	u32 eax, ebx, ecx, edx;
> +
> +	eax = 0x80000001;
> +	ecx = 0;
> +	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
> +	return edx & bit(X86_FEATURE_LM);
> +}
> +
> +#define get_smstate(type, smbase, offset)				  \
> +	({								  \
> +	 type __val;							  \
> +	 int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val,       \
> +				     sizeof(__val), NULL);		  \
> +	 if (r != X86EMUL_CONTINUE)					  \
> +		 return X86EMUL_UNHANDLEABLE;				  \
> +	 __val;								  \
> +	})
> +
> +static void rsm_set_desc_flags(struct desc_struct *desc, u16 flags)
> +{
> +	desc->g    = (flags >> 15) & 1;
> +	desc->d    = (flags >> 14) & 1;
> +	desc->l    = (flags >> 13) & 1;
> +	desc->avl  = (flags >> 12) & 1;
> +	desc->p    = (flags >> 7) & 1;
> +	desc->dpl  = (flags >> 5) & 3;
> +	desc->s    = (flags >> 4) & 1;
> +	desc->type = flags & 15;
> +}
> +
> +static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
> +{
> +	struct desc_struct desc;
> +	int offset;
> +	u16 selector;
> +
> +	selector = get_smstate(u32, smbase, 0x7fa8 + n * 4);
Probably a good idea to use #defines for all the offsets here
and elsewhere. 

> +
> +	if (n < 3)
> +		offset = 0x7f84 + n * 12;
> +	else
> +		offset = 0x7f2c + (n - 3) * 12;
> +
> +	set_desc_base(&desc,      get_smstate(u32, smbase, offset + 8));
> +	set_desc_limit(&desc,     get_smstate(u32, smbase, offset + 4));
> +	rsm_set_desc_flags(&desc, get_smstate(u32, smbase, offset));
> +	ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
> +	return X86EMUL_CONTINUE;
> +}
> +
> +static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
> +{
> +	struct desc_struct desc;
> +	int offset;
> +	u16 selector;
> +	u32 base3;
> +
> +	offset = 0x7e00 + n * 16;
> +
> +	selector =                get_smstate(u16, smbase, offset);
> +	rsm_set_desc_flags(&desc, get_smstate(u16, smbase, offset + 2));
> +	set_desc_limit(&desc,     get_smstate(u32, smbase, offset + 4));
> +	set_desc_base(&desc,      get_smstate(u32, smbase, offset + 8));
> +	base3 =                   get_smstate(u32, smbase, offset + 12);
> +
> +	ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
> +	return X86EMUL_CONTINUE;
> +}
> +
> +static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
> +				     u64 cr0, u64 cr4)
> +{
> +	int bad;
> +
> +	/*
> +	 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
> +	 * Then enable protected mode.	However, PCID cannot be enabled
> +	 * if EFER.LMA=0, so set it separately.
> +	 */
> +	bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
> +	if (bad)
> +		return X86EMUL_UNHANDLEABLE;
> +
> +	bad = ctxt->ops->set_cr(ctxt, 0, cr0);
> +	if (bad)
> +		return X86EMUL_UNHANDLEABLE;
> +
> +	if (cr4 & X86_CR4_PCIDE) {
> +		bad = ctxt->ops->set_cr(ctxt, 4, cr4);
> +		if (bad)
> +			return X86EMUL_UNHANDLEABLE;
> +	}
> +
> +	return X86EMUL_CONTINUE;
> +}
> +
> +static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
> +{
> +	struct desc_struct desc;
> +	struct desc_ptr dt;
> +	u16 selector;
> +	u32 val, cr0, cr4;
> +	int i;
> +
> +	cr0 =                      get_smstate(u32, smbase, 0x7ffc);
> +	ctxt->ops->set_cr(ctxt, 3, get_smstate(u32, smbase, 0x7ff8));
> +	ctxt->eflags =             get_smstate(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
> +	ctxt->_eip =               get_smstate(u32, smbase, 0x7ff0);
> +
> +	for (i = 0; i < 8; i++)
> +		*reg_write(ctxt, i) = get_smstate(u32, smbase, 0x7fd0 + i * 4);
> +
> +	val = get_smstate(u32, smbase, 0x7fcc);
> +	ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
> +	val = get_smstate(u32, smbase, 0x7fc8);
> +	ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
> +
> +	selector =                 get_smstate(u32, smbase, 0x7fc4);
> +	set_desc_base(&desc,       get_smstate(u32, smbase, 0x7f64));
> +	set_desc_limit(&desc,      get_smstate(u32, smbase, 0x7f60));
> +	rsm_set_desc_flags(&desc,  get_smstate(u32, smbase, 0x7f5c));
> +	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
> +
> +	selector =                 get_smstate(u32, smbase, 0x7fc0);
> +	set_desc_base(&desc,       get_smstate(u32, smbase, 0x7f80));
> +	set_desc_limit(&desc,      get_smstate(u32, smbase, 0x7f7c));
> +	rsm_set_desc_flags(&desc,  get_smstate(u32, smbase, 0x7f78));
> +	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
> +
> +	dt.address =               get_smstate(u32, smbase, 0x7f74);
> +	dt.size =                  get_smstate(u32, smbase, 0x7f70);
> +	ctxt->ops->set_gdt(ctxt, &dt);
> +
> +	dt.address =               get_smstate(u32, smbase, 0x7f58);
> +	dt.size =                  get_smstate(u32, smbase, 0x7f54);
> +	ctxt->ops->set_idt(ctxt, &dt);
> +
> +	for (i = 0; i < 6; i++) {
> +		int r = rsm_load_seg_32(ctxt, smbase, i);
> +		if (r != X86EMUL_CONTINUE)
> +			return r;
This return is redundant since rsm_load_seg_32 always returns
success. Same for rsm_load_seg_64()

> +	}
> +
> +	cr4 = get_smstate(u32, smbase, 0x7f14);
> +
> +	ctxt->ops->set_smbase(ctxt, get_smstate(u32, smbase, 0x7ef8));
> +
> +	return rsm_enter_protected_mode(ctxt, cr0, cr4);
> +}
> +
> +static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
> +{
> +	struct desc_struct desc;
> +	struct desc_ptr dt;
> +	u64 val, cr0, cr4;
> +	u32 base3;
> +	u16 selector;
> +	int i;
> +
> +	for (i = 0; i < 16; i++)
> +		*reg_write(ctxt, i) = get_smstate(u64, smbase, 0x7ff8 - i * 8);
> +
> +	ctxt->_eip   = get_smstate(u64, smbase, 0x7f78);
> +	ctxt->eflags = get_smstate(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
> +
> +	val = get_smstate(u32, smbase, 0x7f68);
> +	ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
> +	val = get_smstate(u32, smbase, 0x7f60);
> +	ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
> +
> +	cr0 =                       get_smstate(u64, smbase, 0x7f58);
> +	ctxt->ops->set_cr(ctxt, 3,  get_smstate(u64, smbase, 0x7f50));
> +	cr4 =                       get_smstate(u64, smbase, 0x7f48);
> +	ctxt->ops->set_smbase(ctxt, get_smstate(u32, smbase, 0x7f00));
> +	val =                       get_smstate(u64, smbase, 0x7ed0);
> +	ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
> +
> +	selector =                  get_smstate(u32, smbase, 0x7e90);
> +	rsm_set_desc_flags(&desc,   get_smstate(u32, smbase, 0x7e92));
> +	set_desc_limit(&desc,       get_smstate(u32, smbase, 0x7e94));
> +	set_desc_base(&desc,        get_smstate(u32, smbase, 0x7e98));
> +	base3 =                     get_smstate(u32, smbase, 0x7e9c);
> +	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
> +
> +	dt.size =                   get_smstate(u32, smbase, 0x7e84);
> +	dt.address =                get_smstate(u64, smbase, 0x7e88);
> +	ctxt->ops->set_idt(ctxt, &dt);
> +
> +	selector =                  get_smstate(u32, smbase, 0x7e70);
> +	rsm_set_desc_flags(&desc,   get_smstate(u32, smbase, 0x7e72));
> +	set_desc_limit(&desc,       get_smstate(u32, smbase, 0x7e74));
> +	set_desc_base(&desc,        get_smstate(u32, smbase, 0x7e78));
> +	base3 =                     get_smstate(u32, smbase, 0x7e7c);
> +	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
> +
> +	dt.size =                   get_smstate(u32, smbase, 0x7e64);
> +	dt.address =                get_smstate(u64, smbase, 0x7e68);
> +	ctxt->ops->set_gdt(ctxt, &dt);
> +
> +	for (i = 0; i < 6; i++) {
> +		int r = rsm_load_seg_64(ctxt, smbase, i);
> +		if (r != X86EMUL_CONTINUE)
> +			return r;
> +	}
> +
> +	return rsm_enter_protected_mode(ctxt, cr0, cr4);
> +}
> +
>  static int em_rsm(struct x86_emulate_ctxt *ctxt)
>  {
> +	unsigned long cr0, cr4, efer;
> +	u64 smbase;
> +	int ret;
> +
> +	printk("rsm\n");
>  	if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
>  		return emulate_ud(ctxt);
>  
> -	return X86EMUL_UNHANDLEABLE;
> +	/*
> +	 * Get back to real mode, to prepare a safe state in which
> +	 * to load CR0/CR3/CR4/EFER.  Also this will ensure that
> +	 * addresses passed to read_std/write_std are not virtual.
> +	 */
I am trying to understand this. Aren't we are already in real mode
here since we are in smm or am I missing something..

> +	cr0 = ctxt->ops->get_cr(ctxt, 0);
> +	if (cr0 & X86_CR0_PE)
> +		ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
> +	cr4 = ctxt->ops->get_cr(ctxt, 4);
> +	if (cr0 & X86_CR4_PAE)
> +		ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
> +	efer = 0;
> +	ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
> +
> +	ctxt->ops->get_msr(ctxt, MSR_IA32_SMBASE, &smbase);
> +	if (emulator_has_longmode(ctxt))
> +		ret = rsm_load_state_64(ctxt, smbase + 0x8000);
> +	else
> +		ret = rsm_load_state_32(ctxt, smbase + 0x8000);
> +
> +	if (ret != X86EMUL_CONTINUE) {
> +		/* FIXME: should triple fault */
> +		return X86EMUL_UNHANDLEABLE;
> +	}
> +
> +	ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
> +	return X86EMUL_CONTINUE;
>  }
>  
>  static void
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 4cd7a2a18e93..ab6a38617813 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6232,12 +6232,226 @@ static void process_nmi(struct kvm_vcpu *vcpu)
>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
>  }
>  
> +#define put_smstate(type, buf, offset, val)			  \
> +	*(type *)((buf) + (offset) - 0x7e00) = val
> +
> +static u16 process_smi_get_segment_flags(struct kvm_segment *seg)
> +{
> +	u16 flags = 0;
> +	flags |= seg->g       << 15;
> +	flags |= seg->db      << 14;
> +	flags |= seg->l       << 13;
> +	flags |= seg->avl     << 12;
> +	flags |= seg->present << 7;
> +	flags |= seg->dpl     << 5;
> +	flags |= seg->s       << 4;
> +	flags |= seg->type;
> +	return flags;
> +}
> +
> +static void process_smi_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
> +{
> +	struct kvm_segment seg;
> +	int offset;
> +
> +	kvm_get_segment(vcpu, &seg, n);
> +	put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
> +
> +	if (n < 3)
> +		offset = 0x7f84 + n * 12;
> +	else
> +		offset = 0x7f2c + (n - 3) * 12;
> +
> +	put_smstate(u32, buf, offset + 8, seg.base);
> +	put_smstate(u32, buf, offset + 4, seg.limit);
> +	put_smstate(u32, buf, offset, process_smi_get_segment_flags(&seg));
> +}
> +
> +static void process_smi_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
> +{
> +	struct kvm_segment seg;
> +	int offset;
> +
> +	kvm_get_segment(vcpu, &seg, n);
> +	offset = 0x7e00 + n * 16;
> +
> +	put_smstate(u16, buf, offset, seg.selector);
> +	put_smstate(u16, buf, offset + 2, process_smi_get_segment_flags(&seg));
> +	put_smstate(u32, buf, offset + 4, seg.limit);
> +	put_smstate(u64, buf, offset + 8, seg.base);
> +}
> +
> +static void process_smi_save_state_32(struct kvm_vcpu *vcpu, char *buf)
> +{
> +	struct desc_ptr dt;
> +	struct kvm_segment seg;
> +	unsigned long val;
> +	int i;
> +
> +	put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
> +	put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
> +	put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
> +	put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
> +
> +	for (i = 0; i < 8; i++)
> +		put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
> +
> +	kvm_get_dr(vcpu, 6, &val);
> +	put_smstate(u32, buf, 0x7fcc, (u32)val);
> +	kvm_get_dr(vcpu, 7, &val);
> +	put_smstate(u32, buf, 0x7fc8, (u32)val);
> +
> +	kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
> +	put_smstate(u32, buf, 0x7fc4, seg.selector);
> +	put_smstate(u32, buf, 0x7f64, seg.base);
> +	put_smstate(u32, buf, 0x7f60, seg.limit);
> +	put_smstate(u32, buf, 0x7f5c, process_smi_get_segment_flags(&seg));
> +
> +	kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
> +	put_smstate(u32, buf, 0x7fc0, seg.selector);
> +	put_smstate(u32, buf, 0x7f80, seg.base);
> +	put_smstate(u32, buf, 0x7f7c, seg.limit);
> +	put_smstate(u32, buf, 0x7f78, process_smi_get_segment_flags(&seg));
> +
> +	kvm_x86_ops->get_gdt(vcpu, &dt);
> +	put_smstate(u32, buf, 0x7f74, dt.address);
> +	put_smstate(u32, buf, 0x7f70, dt.size);
> +
> +	kvm_x86_ops->get_idt(vcpu, &dt);
> +	put_smstate(u32, buf, 0x7f58, dt.address);
> +	put_smstate(u32, buf, 0x7f54, dt.size);
> +
> +	for (i = 0; i < 6; i++)
> +		process_smi_save_seg_32(vcpu, buf, i);
> +
> +	put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
> +
> +	/* revision id */
> +	put_smstate(u32, buf, 0x7efc, 0x00020000);
> +	put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
> +}
> +
> +static void process_smi_save_state_64(struct kvm_vcpu *vcpu, char *buf)
> +{
> +#ifdef CONFIG_X86_64
> +	struct desc_ptr dt;
> +	struct kvm_segment seg;
> +	unsigned long val;
> +	int i;
> +
> +	for (i = 0; i < 16; i++)
> +		put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
> +
> +	put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
> +	put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
> +
> +	kvm_get_dr(vcpu, 6, &val);
> +	put_smstate(u64, buf, 0x7f68, val);
> +	kvm_get_dr(vcpu, 7, &val);
> +	put_smstate(u64, buf, 0x7f60, val);
> +
> +	put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
> +	put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
> +	put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
> +
> +	put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
> +
> +	/* revision id */
> +	put_smstate(u32, buf, 0x7efc, 0x00020064);
Is the revision id (and  0x00020000 for process_smi*_32()) from the
spec ? I can't seem to find them.

Bandan
...
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux