Re: Stable bugfix backport request of "KVM: x86: smm: preserve interrupt shadow in SMRAM"?

Dongli Zhang <dongli.zhang@xxxxxxxxxx> · Fri, 26 Jan 2024 17:33:28 -0800

Hi Greg,

On 1/26/24 17:08, Greg KH wrote:
> On Fri, Jan 26, 2024 at 04:20:16PM -0800, Dongli Zhang wrote:
>> Hi Maxim and Paolo, 
>>
>> This is the linux-stable backport request regarding the below patch.
> 
> For what tree(s)?

It is linux-5.15.y as in the Subject of the patch.

However, more versions require this bugfix, e.g., 6.1 or 5.4.
I have a backport for 5.4 as well.

I just send the version on top of 5.15 for suggestion, or there
is already a backport available.

> 
> And you forgot to sign off on the patch :(

I have a signed-off after the commit message. There are some conflicts:
e.g., the smram buffer offsets used in function calls.

I have added the commit messages to explain the conflicts between
Paolo's signed-off and my own signed-off.

BTW, I have created a kvm selftest program to reproduce this issue. Although I
cannot reproduce on baremetal (perhaps it is too fast), I can always reproduce
on a KVM running on top of a VM.

$ ./smm_interrupt_window
Create thread for vcpu=0
Create thread for vcpu=1
Waiting for 2-second for test to start ...
vcpu=0: stage = 1
vcpu=1: stage = 2
Start the test!
==== Test Assertion Failure ====
  x86_64/mytest.c:96: exit_reason == (2)
  pid=5541 tid=5544 errno=0 - Success
     1	0x0000000000401dd3: vcpu_worker at mytest.c:96
     2	0x0000000000417cc9: start_thread at libpthread.o:?
     3	0x0000000000470d32: __clone at ??:?
  Wanted KVM exit reason: 2 (IO), got: 9 (FAIL_ENTRY)

There are below in the dmesg.

[  165.292990] VMCS 0000000088f567e4, last attempted VM-entry on CPU 14
... ...
[  165.304272] RFLAGS=0x00000002         DR7 = 0x0000000000000400
... ...
[  165.329264] Interruptibility = 00000009  ActivityState = 00000000

// SPDX-License-Identifier: GPL-2.0
/*
 * Reproduce the issue fixed by the commit fb28875fd7da ("KVM: x86: smm:
 * preserve interrupt shadow in SMRAM").
 *
 * The vCPU#0 sends SMI to vCPU#1 that is running sti to trap into the
 * interrupt window.
 *
 * Adapted from smm_test.c
 */
#include <pthread.h>

#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"

#define SMRAM_SIZE 65536
#define SMRAM_MEMSLOT ((1 << 16) | 1)
#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
#define SMRAM_GPA 0x1000000
#define SMRAM_STAGE 0xfe

#define STR(x) #x
#define XSTR(s) STR(s)

#define SYNC_PORT 0xe

#define NR_VCPUS		2

uint8_t smi_handler[] = {
	0xb0, SMRAM_STAGE,    /* mov $SMRAM_STAGE, %al */
	0x0f, 0xaa,           /* rsm */
};

static inline void sync_with_host(uint64_t phase)
{
	asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
		     : "+a" (phase));
}

static void guest_code(int cpu)
{
	uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
	int i;

	wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE);

	if (cpu == 0) {
		sync_with_host(1);
		/*
		 * vCPU#0 keeps cli/nop/sti
		 */
		while(1) {
			asm volatile("cli");
			asm volatile("nop");
			asm volatile("nop");
			asm volatile("nop");
			asm volatile("sti");
			asm volatile("nop");
			asm volatile("nop");
			asm volatile("nop");
		}
	}

	if (cpu == 1) {
		sync_with_host(2);
		/*
		 * vCPU#1 keeps sending SMI to vCPU#0
		 */
		while(1) {
			x2apic_write_reg(APIC_ICR, APIC_INT_ASSERT | APIC_DM_SMI);
			for (i = 0; i < 1000000; i++)
				asm volatile("nop");
		}

	}
}

static void *vcpu_worker(void *data)
{
	struct kvm_vcpu *vcpu = data;
	int stage_reported;
	struct kvm_regs regs;

	pr_info("Create thread for vcpu=%u\n", vcpu->id);

	if (vcpu->id == 0) {
		vcpu_run(vcpu);
		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
		memset(&regs, 0, sizeof(regs));
		vcpu_regs_get(vcpu, &regs);
		stage_reported = regs.rax & 0xff;
		pr_info("vcpu=%u: stage = %d\n", vcpu->id, stage_reported);

		vcpu_run(vcpu);
		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
		memset(&regs, 0, sizeof(regs));
		vcpu_regs_get(vcpu, &regs);
		stage_reported = regs.rax & 0xff;
		pr_info("vcpu=%u: stage = %d\n", vcpu->id, stage_reported);
	}

	if (vcpu->id == 1) {
		pr_info("Waiting for 2-second for test to start ...\n");
		sleep(2);

		vcpu_run(vcpu);
		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
		memset(&regs, 0, sizeof(regs));
		vcpu_regs_get(vcpu, &regs);
		stage_reported = regs.rax & 0xff;
		pr_info("vcpu=%u: stage = %d\n", vcpu->id, stage_reported);

		pr_info("Start the test!\n");
		vcpu_run(vcpu);
	}

	return NULL;
}

int main(int argc, char **argv)
{
	struct kvm_vcpu *vcpus[NR_VCPUS];
	struct kvm_vm *vm;
	pthread_t tids[NR_VCPUS];

	vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);

	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
				    SMRAM_MEMSLOT, SMRAM_PAGES, 0);

	TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
		    == SMRAM_GPA, "could not allocate guest physical addresses?");

	memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);

	memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler, sizeof(smi_handler));

	vcpu_set_msr(vcpus[0], MSR_IA32_SMBASE, SMRAM_GPA);
	vcpu_set_msr(vcpus[1], MSR_IA32_SMBASE, SMRAM_GPA);

	vcpu_args_set(vcpus[0], 1, 0);
	vcpu_args_set(vcpus[1], 1, 1);

	pthread_create(&tids[0], NULL, vcpu_worker, vcpus[0]);
	pthread_create(&tids[1], NULL, vcpu_worker, vcpus[1]);

	pthread_join(tids[0], NULL);
	pthread_join(tids[1], NULL);

	return 0;
}

Dongli Zhang

> 
> thanks,
> 
> greg k-h