Quoting Michael Roth (2020-11-05 10:24:37) > Quoting Joerg Roedel (2020-09-07 08:15:02) > > From: Joerg Roedel <jroedel@xxxxxxx> > > > > Do not allocate a vmcb_control_area and a vmcb_save_area on the stack, > > as these structures will become larger with future extenstions of > > SVM and thus the svm_set_nested_state() function will become a too large > > stack frame. > > > > Signed-off-by: Joerg Roedel <jroedel@xxxxxxx> > > --- > > arch/x86/kvm/svm/nested.c | 47 +++++++++++++++++++++++++++------------ > > 1 file changed, 33 insertions(+), 14 deletions(-) > > > > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c > > index fb68467e6049..28036629abf8 100644 > > --- a/arch/x86/kvm/svm/nested.c > > +++ b/arch/x86/kvm/svm/nested.c > > @@ -1060,10 +1060,14 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, > > struct vmcb *hsave = svm->nested.hsave; > > struct vmcb __user *user_vmcb = (struct vmcb __user *) > > &user_kvm_nested_state->data.svm[0]; > > - struct vmcb_control_area ctl; > > - struct vmcb_save_area save; > > + struct vmcb_control_area *ctl; > > + struct vmcb_save_area *save; > > + int ret; > > u32 cr0; > > > > + BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) > > > + KVM_STATE_NESTED_SVM_VMCB_SIZE); > > + > > if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM) > > return -EINVAL; > > > > @@ -1095,13 +1099,22 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, > > return -EINVAL; > > if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE) > > return -EINVAL; > > - if (copy_from_user(&ctl, &user_vmcb->control, sizeof(ctl))) > > - return -EFAULT; > > - if (copy_from_user(&save, &user_vmcb->save, sizeof(save))) > > - return -EFAULT; > > > > - if (!nested_vmcb_check_controls(&ctl)) > > - return -EINVAL; > > + ret = -ENOMEM; > > + ctl = kzalloc(sizeof(*ctl), GFP_KERNEL); > > + save = kzalloc(sizeof(*save), GFP_KERNEL); > > + if (!ctl || !save) > > + goto out_free; > > + > > + ret = -EFAULT; > > + if (copy_from_user(ctl, &user_vmcb->control, sizeof(*ctl))) > > + goto out_free; > > + if (copy_from_user(save, &user_vmcb->save, sizeof(*save))) > > + goto out_free; > > + > > + ret = -EINVAL; > > + if (!nested_vmcb_check_controls(ctl)) > > + goto out_free; > > > > /* > > * Processor state contains L2 state. Check that it is > > @@ -1109,15 +1122,15 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, > > */ > > cr0 = kvm_read_cr0(vcpu); > > if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW)) > > - return -EINVAL; > > + goto out_free; > > > > /* > > * Validate host state saved from before VMRUN (see > > * nested_svm_check_permissions). > > * TODO: validate reserved bits for all saved state. > > */ > > - if (!(save.cr0 & X86_CR0_PG)) > > - return -EINVAL; > > + if (!(save->cr0 & X86_CR0_PG)) > > + goto out_free; > > > > /* > > * All checks done, we can enter guest mode. L1 control fields > > @@ -1126,15 +1139,21 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, > > * contains saved L1 state. > > */ > > copy_vmcb_control_area(&hsave->control, &svm->vmcb->control); > > - hsave->save = save; > > + hsave->save = *save; > > > > svm->nested.vmcb = kvm_state->hdr.svm.vmcb_pa; > > - load_nested_vmcb_control(svm, &ctl); > > + load_nested_vmcb_control(svm, ctl); > > nested_prepare_vmcb_control(svm); > > > > out_set_gif: > > svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); > > - return 0; > > + > > + ret = 0; > > +out_free: > > + kfree(save); > > + kfree(ctl); > > This change seems to trigger a crash via smm-test.c (and state-test.c) KVM > selftest when we call vcpu_load_state->KVM_SET_NESTED_STATE. I think what's > happening is we are hitting the 'goto out_set_gif;' and then attempting to > free save and ctl, which are still uninitialized at that point: Sorry, looks like this one was already fixed upstream by d5cd6f34014592a232ce79dc25e295778bd43c22 Please ignore. > > [ 1999.801176] APIC base relocation is unsupported by KVM > [ 1999.828562] BUG: unable to handle page fault for address: > fffff12379020288 > [ 1999.841968] #PF: supervisor read access in kernel mode > [ 1999.847693] #PF: error_code(0x0000) - not-present page > [ 1999.853426] PGD 0 P4D 0 > [ 1999.856252] Oops: 0000 [#1] SMP NOPTI > [ 1999.861366] CPU: 112 PID: 10162 Comm: smm_test Tainted: G > E 5.9.1-amdsos-build31t0+ #1 > [ 1999.871655] Hardware name: AMD Corporation ETHANOL_X/ETHANOL_X, BIOS > RXM0092B 10/27/2020 > [ 1999.880694] RIP: 0010:kfree+0x5b/0x3c0 > [ 1999.884876] Code: 80 49 01 dc 0f 82 70 03 00 00 48 c7 c0 00 00 00 80 > 48 2b 05 97 4a 1c 01 49 01 c4 49 c1 ec 0c 49 c1 e4 06 4c 03 25 75 4a 1c > 01 <49> 8b 44 24 08 48 8d 50 ff a8 01 4c 0f 45 e2 49 8b 54 24 08 48 8d > [ 1999.906674] RSP: 0018:ffffb7004d9d3cf8 EFLAGS: 00010282 > [ 1999.912502] RAX: 0000723d80000000 RBX: 000000004080aebf RCX: > 0000000002000000 > [ 1999.920464] RDX: 0000000000000000 RSI: 0000000000000000 RDI: > 000000004080aebf > [ 1999.929335] RBP: ffffb7004d9d3d28 R08: ffff8de1c2a43628 R09: > 0000000000000000 > [ 1999.937298] R10: 0000000000000000 R11: 0000000000000000 R12: > fffff12379020280 > [ 1999.945258] R13: ffffffffc0f6626a R14: 0000000000000000 R15: > ffffb7004d9d3db0 > [ 1999.953221] FS: 00007f231b4c1740(0000) GS:ffff8de20e800000(0000) > knlGS:0000000000000000 > [ 1999.963255] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 1999.969667] CR2: fffff12379020288 CR3: 0000001faf5aa006 CR4: > 0000000000770ee0 > [ 1999.977627] PKRU: 55555554 > [ 1999.980644] Call Trace: > [ 1999.983384] svm_leave_nested+0xea/0x2f0 [kvm_amd] > [ 1999.988743] kvm_arch_vcpu_ioctl+0x6fd/0x1260 [kvm] > [ 1999.995026] ? avic_vcpu_load+0x20/0x130 [kvm_amd] > [ 2000.000372] kvm_vcpu_kick+0x705/0xae0 [kvm] > [ 2000.005216] __x64_sys_ioctl+0x91/0xc0 > [ 2000.009470] do_syscall_64+0x38/0x90 > [ 2000.013461] entry_SYSCALL_64_after_hwframe+0x44/0xa9 > [ 2000.019090] RIP: 0033:0x7f231b5db50b > [ 2000.024052] Code: 0f 1e fa 48 8b 05 85 39 0d 00 64 c7 00 26 00 00 00 > 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f > 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 55 39 0d 00 f7 d8 64 89 01 48 > [ 2000.045005] RSP: 002b:00007ffc01de6918 EFLAGS: 00000246 ORIG_RAX: > 0000000000000010 > [ 2000.053453] RAX: ffffffffffffffda RBX: 0000000002147880 RCX: > 00007f231b5db50b > [ 2000.062375] RDX: 0000000002148c98 RSI: 000000004080aebf RDI: > 0000000000000009 > [ 2000.070335] RBP: 000000000214d0c0 R08: 00000000004103f8 R09: > 0000000000000000 > [ 2000.078296] R10: 0000000000000000 R11: 0000000000000246 R12: > 00007ffc01de6960 > [ 2000.087278] R13: 0000000000000002 R14: 00007f231b711000 R15: > 0000000002147880 > [ 2000.095244] Modules linked in: nls_iso8859_1(E) dm_multipath(E) > scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) intel_rapl_msr(E) > intel_rapl_common(E) amd64_edac_mod(E) kvm_amd(E) kvm(E) joydev(E) > input_leds(E) crct10dif_pclmul(E) crc32_pclmul(E) ghash_clmulni_intel(E) > aesni_intel(E) crypto_simd(E) cryptd(E) glue_helper(E) hid_generic(E) > efi_pstore(E) rapl(E) ipmi_si(E) ipmi_devintf(E) ipmi_msghandler(E) > usbhid(E) hid(E) ast(E) drm_vram_helper(E) drm_ttm_helper(E) ttm(E) > drm_kms_helper(E) cec(E) i2c_algo_bit(E) fb_sys_fops(E) syscopyarea(E) > sysfillrect(E) sysimgblt(E) wmi_bmof(E) e1000e(E) i2c_piix4(E) ccp(E) > wmi(E) mac_hid(E) sch_fq_codel(E) drm(E) ip_tables(E) x_tables(E) > autofs4(E) > [ 2000.164824] CR2: fffff12379020288 > [ 2000.168522] ---[ end trace c5975ced3c660340 ]--- > > > + > > + return ret; > > } > > > > struct kvm_x86_nested_ops svm_nested_ops = { > > -- > > 2.28.0 > > > > _______________________________________________ > > Virtualization mailing list > > Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization