Re: [PATCH v1 1/3] x86/tdx: Add TDX Guest event notify interrupt support

"Huang, Kai" <kai.huang@xxxxxxxxx> · Tue, 28 Mar 2023 02:38:19 +0000

On Sat, 2023-03-25 at 23:20 -0700, Kuppuswamy Sathyanarayanan wrote:
> Host-guest event notification via configured interrupt vector is useful
> in cases where a guest makes an asynchronous request and needs a
> callback from the host to indicate the completion or to let the host
> notify the guest about events like device removal. One usage example is,
> callback requirement of GetQuote asynchronous hypercall.
> 
> In TDX guest, SetupEventNotifyInterrupt hypercall can be used by the
> guest to specify which interrupt vector to use as an event-notify
> vector to the VMM. 
> 

"to the VMM" -> "from the VMM"?

> Details about the SetupEventNotifyInterrupt
> hypercall can be found in TDX Guest-Host Communication Interface
> (GHCI) Specification, sec 3.5 "VP.VMCALL<SetupEventNotifyInterrupt>".

It seems we shouldn't mention the exact section number.

> 
> As per design, VMM will post the event completion IRQ using the same
> CPU in which SetupEventNotifyInterrupt hypercall request is received.

"in which" -> "on which"

> So allocate an IRQ vector from "x86_vector_domain", and set the CPU
> affinity of the IRQ vector to the current CPU.

IMHO "current CPU" is a little bit vague.  Perhaps just "to the CPU on which
SetupEventNotifyInterrupt hypercall is made".

Also, perhaps it's better to mention to use IRQF_NOBALANCING to prevent the IRQ
from being migrated to another cpu.

> 
> Add tdx_register_event_irq_cb()/tdx_unregister_event_irq_cb()
> interfaces to allow drivers register/unregister event noficiation
> handlers.
> 
> Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx>
> Reviewed-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
> Reviewed-by: Mika Westerberg <mika.westerberg@xxxxxxxxxxxxxxx>
> Acked-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
> Acked-by: Wander Lairson Costa <wander@xxxxxxxxxx>
> Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@xxxxxxxxxxxxxxx>
> ---
>  arch/x86/coco/tdx/tdx.c    | 163 +++++++++++++++++++++++++++++++++++++
>  arch/x86/include/asm/tdx.h |   6 ++
>  2 files changed, 169 insertions(+)
> 
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index 055300e08fb3..d03985952d45 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -7,12 +7,18 @@
>  #include <linux/cpufeature.h>
>  #include <linux/export.h>
>  #include <linux/io.h>
> +#include <linux/string.h>
> +#include <linux/uaccess.h>

Do you need above two headers?

Also, perhaps you should explicitly include <.../list.h> and <.../spinlock.h>.

> +#include <linux/interrupt.h>
> +#include <linux/irq.h>
> +#include <linux/numa.h>
>  #include <asm/coco.h>
>  #include <asm/tdx.h>
>  #include <asm/vmx.h>
>  #include <asm/insn.h>
>  #include <asm/insn-eval.h>
>  #include <asm/pgtable.h>
> +#include <asm/irqdomain.h>
>  
>  /* TDX module Call Leaf IDs */
>  #define TDX_GET_INFO			1
> @@ -27,6 +33,7 @@
>  /* TDX hypercall Leaf IDs */
>  #define TDVMCALL_MAP_GPA		0x10001
>  #define TDVMCALL_REPORT_FATAL_ERROR	0x10003
> +#define TDVMCALL_SETUP_NOTIFY_INTR	0x10004
>  
>  /* MMIO direction */
>  #define EPT_READ	0
> @@ -51,6 +58,16 @@
>  
>  #define TDREPORT_SUBTYPE_0	0
>  
> +struct event_irq_entry {
> +	tdx_event_irq_cb_t handler;
> +	void *data;
> +	struct list_head head;
> +};
> +
> +static int tdx_event_irq;

__ro_after_init?

> +static LIST_HEAD(event_irq_cb_list);
> +static DEFINE_SPINLOCK(event_irq_cb_lock);
> +
>  /*
>   * Wrapper for standard use of __tdx_hypercall with no output aside from
>   * return code.
> @@ -873,3 +890,149 @@ void __init tdx_early_init(void)
>  
>  	pr_info("Guest detected\n");
>  }
> +
> +static irqreturn_t tdx_event_irq_handler(int irq, void *dev_id)
> +{
> +	struct event_irq_entry *entry;
> +
> +	spin_lock(&event_irq_cb_lock);
> +	list_for_each_entry(entry, &event_irq_cb_list, head) {
> +		if (entry->handler)
> +			entry->handler(entry->data);
> +	}
> +	spin_unlock(&event_irq_cb_lock);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +/* Reserve an IRQ from x86_vector_domain for TD event notification */
> +static int __init tdx_event_irq_init(void)
> +{
> +	struct irq_alloc_info info;
> +	cpumask_t saved_cpumask;
> +	struct irq_cfg *cfg;
> +	int cpu, irq;
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
> +		return 0;
> +
> +	init_irq_alloc_info(&info, NULL);
> +
> +	/*
> +	 * Event notification vector will be delivered to the CPU
> +	 * in which TDVMCALL_SETUP_NOTIFY_INTR hypercall is requested.
> +	 * So set the IRQ affinity to the current CPU.
> +	 */
> +	cpu = get_cpu();
> +	cpumask_copy(&saved_cpumask, current->cpus_ptr);
> +	info.mask = cpumask_of(cpu);
> +	put_cpu();

The 'saved_cpumask' related code is ugly.  If you move put_cpu() to the end of
this function, I think you can remove all related code:

	cpu = get_cpu();

	/*
	 * Set @info->mask to local cpu to make sure a valid vector is
	 * pre-allocated when TDX event notification IRQ is allocated
	 * from x86_vector_domain.
	 */
	init_irq_alloc_info(&info, cpumask_of(cpu));

	// rest staff: request_irq(), hypercall ...

	put_cpu();

> +
> +	irq = irq_domain_alloc_irqs(x86_vector_domain, 1, NUMA_NO_NODE, &info);

Should you use cpu_to_node(cpu) instead of NUMA_NO_NODE?

> +	if (irq <= 0) {
> +		pr_err("Event notification IRQ allocation failed %d\n", irq);
> +		return -EIO;
> +	}
> +
> +	irq_set_handler(irq, handle_edge_irq);
> +
> +	cfg = irq_cfg(irq);
> +	if (!cfg) {
> +		pr_err("Event notification IRQ config not found\n");
> +		goto err_free_irqs;
> +	}

You are depending on irq_domain_alloc_irqs() to have already allocated a vector
on the local cpu.  Then if !cfg, your code of calling irq_domain_alloc_irqs() to
allocate vector is broken.

So, perhaps you should just WARN() if vector hasn't been allocated to catch bug.

	WARN(!(irq_cfg(irq)->vector));

> +
> +	if (request_irq(irq, tdx_event_irq_handler, IRQF_NOBALANCING,

It's better to add a comment to explain why using IRQF_NOBALANCING.

> +			"tdx_event_irq", NULL)) {
> +		pr_err("Event notification IRQ request failed\n");
> +		goto err_free_irqs;
> +	}
> +
> +	set_cpus_allowed_ptr(current, cpumask_of(cpu));
> +
> +	/*
> +	 * Register callback vector address with VMM. More details
> +	 * about the ABI can be found in TDX Guest-Host-Communication
> +	 * Interface (GHCI), sec titled
> +	 * "TDG.VP.VMCALL<SetupEventNotifyInterrupt>".
> +	 */
> +	if (_tdx_hypercall(TDVMCALL_SETUP_NOTIFY_INTR, cfg->vector, 0, 0, 0)) {
> +		pr_err("Event notification hypercall failed\n");
> +		goto err_restore_cpus;
> +	}
> +
> +	set_cpus_allowed_ptr(current, &saved_cpumask);
> +
> +	tdx_event_irq = irq;
> +
> +	return 0;
> +
> +err_restore_cpus:
> +	set_cpus_allowed_ptr(current, &saved_cpumask);
> +	free_irq(irq, NULL);
> +err_free_irqs:
> +	irq_domain_free_irqs(irq, 1);
> +
> +	return -EIO;
> +}
> +arch_initcall(tdx_event_irq_init)
> +