Re: [PATCH v3 12/21] x86/virt/tdx: Create TDMRs to cover all system RAM

Dave Hansen <dave.hansen@xxxxxxxxx> · Thu, 28 Apr 2022 09:22:36 -0700

On 4/5/22 21:49, Kai Huang wrote:
> The kernel configures TDX usable memory regions to the TDX module via
> an array of "TD Memory Region" (TDMR). 

One bit of language that's repeated in these changelogs that I don't
like is "configure ... to".  I think that's a misuse of the word
configure.  I'd say something more like:

	The kernel configures TDX-usable memory regions by passing an
	array of "TD Memory Regions" (TDMRs) to the TDX module.

Could you please take a look over this series and reword those?

> Each TDMR entry (TDMR_INFO)
> contains the information of the base/size of a memory region, the
> base/size of the associated Physical Address Metadata Table (PAMT) and
> a list of reserved areas in the region.
> 
> Create a number of TDMRs according to the verified e820 RAM entries.
> As the first step only set up the base/size information for each TDMR.
> 
> TDMR must be 1G aligned and the size must be in 1G granularity.  This

 ^ Each

> implies that one TDMR could cover multiple e820 RAM entries.  If a RAM
> entry spans the 1GB boundary and the former part is already covered by
> the previous TDMR, just create a new TDMR for the latter part.
> 
> TDX only supports a limited number of TDMRs (currently 64).  Abort the
> TDMR construction process when the number of TDMRs exceeds this
> limitation.

... and what does this *MEAN*?  Is TDX disabled?  Does it throw away the
RAM?  Does it eat puppies?

>  arch/x86/virt/vmx/tdx/tdx.c | 138 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 138 insertions(+)
> 
> diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> index 6b0c51aaa7f2..82534e70df96 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.c
> +++ b/arch/x86/virt/vmx/tdx/tdx.c
> @@ -54,6 +54,18 @@
>  		((u32)(((_keyid_part) & 0xffffffffull) + 1))
>  #define TDX_KEYID_NUM(_keyid_part)	((u32)((_keyid_part) >> 32))
>  
> +/* TDMR must be 1gb aligned */
> +#define TDMR_ALIGNMENT		BIT_ULL(30)
> +#define TDMR_PFN_ALIGNMENT	(TDMR_ALIGNMENT >> PAGE_SHIFT)
> +
> +/* Align up and down the address to TDMR boundary */
> +#define TDMR_ALIGN_DOWN(_addr)	ALIGN_DOWN((_addr), TDMR_ALIGNMENT)
> +#define TDMR_ALIGN_UP(_addr)	ALIGN((_addr), TDMR_ALIGNMENT)
> +
> +/* TDMR's start and end address */
> +#define TDMR_START(_tdmr)	((_tdmr)->base)
> +#define TDMR_END(_tdmr)		((_tdmr)->base + (_tdmr)->size)

Make these 'static inline's please.  #defines are only for constants or
things that can't use real functions.

>  /*
>   * TDX module status during initialization
>   */
> @@ -813,6 +825,44 @@ static int e820_check_against_cmrs(void)
>  	return 0;
>  }
>  
> +/* The starting offset of reserved areas within TDMR_INFO */
> +#define TDMR_RSVD_START		64

				^ extra whitespace

> +static struct tdmr_info *__alloc_tdmr(void)
> +{
> +	int tdmr_sz;
> +
> +	/*
> +	 * TDMR_INFO's actual size depends on maximum number of reserved
> +	 * areas that one TDMR supports.
> +	 */
> +	tdmr_sz = TDMR_RSVD_START + tdx_sysinfo.max_reserved_per_tdmr *
> +		sizeof(struct tdmr_reserved_area);

You have a structure for this.  I know this because it's the return type
of the function.  You have TDMR_RSVD_START available via the structure
itself.  So, derive that 64 either via:

	sizeof(struct tdmr_info)

or,

	offsetof(struct tdmr_info, reserved_areas);

Which would make things look like this:

	tdmr_base_sz = sizeof(struct tdmr_info);
	tdmr_reserved_area_sz = sizeof(struct tdmr_reserved_area) *
				tdx_sysinfo.max_reserved_per_tdmr;

	tdmr_sz = tdmr_base_sz + tdmr_reserved_area_sz;

Could you explain why on earth you felt the need for the TDMR_RSVD_START
#define?

> +	/*
> +	 * TDX requires TDMR_INFO to be 512 aligned.  Always align up

Again, 512 what?  512 pages?  512 hippos?

> +	 * TDMR_INFO size to 512 so the memory allocated via kzalloc()
> +	 * can meet the alignment requirement.
> +	 */
> +	tdmr_sz = ALIGN(tdmr_sz, TDMR_INFO_ALIGNMENT);
> +
> +	return kzalloc(tdmr_sz, GFP_KERNEL);
> +}
> +
> +/* Create a new TDMR at given index in the TDMR array */
> +static struct tdmr_info *alloc_tdmr(struct tdmr_info **tdmr_array, int idx)
> +{
> +	struct tdmr_info *tdmr;
> +
> +	if (WARN_ON_ONCE(tdmr_array[idx]))
> +		return NULL;
> +
> +	tdmr = __alloc_tdmr();
> +	tdmr_array[idx] = tdmr;
> +
> +	return tdmr;
> +}
> +
>  static void free_tdmrs(struct tdmr_info **tdmr_array, int tdmr_num)
>  {
>  	int i;
> @@ -826,6 +876,89 @@ static void free_tdmrs(struct tdmr_info **tdmr_array, int tdmr_num)
>  	}
>  }
>  
> +/*
> + * Create TDMRs to cover all RAM entries in e820_table.  The created
> + * TDMRs are saved to @tdmr_array and @tdmr_num is set to the actual
> + * number of TDMRs.  All entries in @tdmr_array must be initially NULL.
> + */
> +static int create_tdmrs(struct tdmr_info **tdmr_array, int *tdmr_num)
> +{
> +	struct tdmr_info *tdmr;
> +	u64 start, end;
> +	int i, tdmr_idx;
> +	int ret = 0;
> +
> +	tdmr_idx = 0;
> +	tdmr = alloc_tdmr(tdmr_array, 0);
> +	if (!tdmr)
> +		return -ENOMEM;
> +	/*
> +	 * Loop over all RAM entries in e820 and create TDMRs to cover
> +	 * them.  To keep it simple, always try to use one TDMR to cover
> +	 * one RAM entry.
> +	 */
> +	e820_for_each_mem(i, start, end) {
> +		start = TDMR_ALIGN_DOWN(start);
> +		end = TDMR_ALIGN_UP(end);
			    ^ vertically align those ='s, please.

> +		/*
> +		 * If the current TDMR's size hasn't been initialized, it
> +		 * is a new allocated TDMR to cover the new RAM entry.
> +		 * Otherwise the current TDMR already covers the previous
> +		 * RAM entry.  In the latter case, check whether the
> +		 * current RAM entry has been fully or partially covered
> +		 * by the current TDMR, since TDMR is 1G aligned.
> +		 */
> +		if (tdmr->size) {
> +			/*
> +			 * Loop to next RAM entry if the current entry
> +			 * is already fully covered by the current TDMR.
> +			 */
> +			if (end <= TDMR_END(tdmr))
> +				continue;

This loop is actually pretty well commented and looks OK.  The
TDMR_END() construct even adds to readability.  *BUT*, the

> +			/*
> +			 * If part of current RAM entry has already been
> +			 * covered by current TDMR, skip the already
> +			 * covered part.
> +			 */
> +			if (start < TDMR_END(tdmr))
> +				start = TDMR_END(tdmr);
> +
> +			/*
> +			 * Create a new TDMR to cover the current RAM
> +			 * entry, or the remaining part of it.
> +			 */
> +			tdmr_idx++;
> +			if (tdmr_idx >= tdx_sysinfo.max_tdmrs) {
> +				ret = -E2BIG;
> +				goto err;
> +			}
> +			tdmr = alloc_tdmr(tdmr_array, tdmr_idx);
> +			if (!tdmr) {
> +				ret = -ENOMEM;
> +				goto err;
> +			}

This is a bit verbose for this loop.  Why not just hide the 'max_tdmrs'
inside the alloc_tdmr() function?  That will make this loop smaller and
easier to read.

> +		}
> +
> +		tdmr->base = start;
> +		tdmr->size = end - start;
> +	}
> +
> +	/* @tdmr_idx is always the index of last valid TDMR. */
> +	*tdmr_num = tdmr_idx + 1;
> +
> +	return 0;
> +err:
> +	/*
> +	 * Clean up already allocated TDMRs in case of error.  @tdmr_idx
> +	 * indicates the last TDMR that wasn't created successfully,
> +	 * therefore only needs to free @tdmr_idx TDMRs.
> +	 */
> +	free_tdmrs(tdmr_array, tdmr_idx);
> +	return ret;
> +}
> +
>  static int construct_tdmrs(struct tdmr_info **tdmr_array, int *tdmr_num)
>  {
>  	int ret;
> @@ -834,8 +967,13 @@ static int construct_tdmrs(struct tdmr_info **tdmr_array, int *tdmr_num)
>  	if (ret)
>  		goto err;
>  
> +	ret = create_tdmrs(tdmr_array, tdmr_num);
> +	if (ret)
> +		goto err;
> +
>  	/* Return -EFAULT until constructing TDMRs is done */
>  	ret = -EFAULT;
> +	free_tdmrs(tdmr_array, *tdmr_num);
>  err:
>  	return ret;
>  }