On Mon, Oct 16, 2023 at 08:27:43AM -0500, Michael Roth wrote: > +/* > + * SEV_DATA_RANGE_LIST: > + * Array containing range of pages that firmware transitions to HV-fixed > + * page state. > + */ > +struct sev_data_range_list *snp_range_list; > +static int __sev_snp_init_locked(int *error); Put the function above the caller instead of doing a forward declaration. > static inline bool sev_version_greater_or_equal(u8 maj, u8 min) > { > struct sev_device *sev = psp_master->sev_data; > @@ -466,9 +479,9 @@ static inline int __sev_do_init_locked(int *psp_ret) > return __sev_init_locked(psp_ret); > } > > -static int __sev_platform_init_locked(int *error) > +static int ___sev_platform_init_locked(int *error, bool probe) > { > - int rc = 0, psp_ret = SEV_RET_NO_FW_CALL; > + int rc, psp_ret = SEV_RET_NO_FW_CALL; > struct psp_device *psp = psp_master; > struct sev_device *sev; > > @@ -480,6 +493,34 @@ static int __sev_platform_init_locked(int *error) > if (sev->state == SEV_STATE_INIT) > return 0; > > + /* > + * Legacy guests cannot be running while SNP_INIT(_EX) is executing, > + * so perform SEV-SNP initialization at probe time. > + */ > + rc = __sev_snp_init_locked(error); > + if (rc && rc != -ENODEV) { > + /* > + * Don't abort the probe if SNP INIT failed, > + * continue to initialize the legacy SEV firmware. > + */ > + dev_err(sev->dev, "SEV-SNP: failed to INIT rc %d, error %#x\n", rc, *error); > + } > + > + /* Delay SEV/SEV-ES support initialization */ > + if (probe && !psp_init_on_probe) > + return 0; > + > + if (!sev_es_tmr) { > + /* Obtain the TMR memory area for SEV-ES use */ > + sev_es_tmr = sev_fw_alloc(SEV_ES_TMR_SIZE); > + if (sev_es_tmr) > + /* Must flush the cache before giving it to the firmware */ > + clflush_cache_range(sev_es_tmr, SEV_ES_TMR_SIZE); > + else > + dev_warn(sev->dev, > + "SEV: TMR allocation failed, SEV-ES support unavailable\n"); > + } > + > if (sev_init_ex_buffer) { > rc = sev_read_init_ex_file(); > if (rc) > @@ -522,6 +563,11 @@ static int __sev_platform_init_locked(int *error) > return 0; > } > > +static int __sev_platform_init_locked(int *error) > +{ > + return ___sev_platform_init_locked(error, false); > +} Uff, this is silly. And it makes the code hard to follow and that meat of the platform init functionality in the ___-prefixed function a mess. And the problem is that that "probe" functionality is replicated from the one place where it is actually needed - sev_pci_init() which calls that new sev_platform_init_on_probe() function - to everything that calls __sev_platform_init_locked() for which you've added a wrapper. What you should do, instead, is split the code around __sev_snp_init_locked() in a separate function which does only that and is called something like __sev_platform_init_snp_locked() or so which does that unconditional work. And then you define: _sev_platform_init_locked(int *error, bool probe) note the *one* '_' - i.e., first layer: _sev_platform_init_locked(int *error, bool probe): { __sev_platform_init_snp_locked(error); if (!probe) return 0; if (psp_init_on_probe) __sev_platform_init_locked(error); ... } and you do the probing in that function only so that it doesn't get lost in the bunch of things __sev_platform_init_locked() does. And then you call _sev_platform_init_locked() everywhere and no need for a second sev_platform_init_on_probe(). > + > int sev_platform_init(int *error) > { > int rc; > @@ -534,6 +580,17 @@ int sev_platform_init(int *error) > } > EXPORT_SYMBOL_GPL(sev_platform_init); > > +static int sev_platform_init_on_probe(int *error) > +{ > + int rc; > + > + mutex_lock(&sev_cmd_mutex); > + rc = ___sev_platform_init_locked(error, true); > + mutex_unlock(&sev_cmd_mutex); > + > + return rc; > +} > + > static int __sev_platform_shutdown_locked(int *error) > { > struct sev_device *sev = psp_master->sev_data; > @@ -838,6 +895,191 @@ static int sev_update_firmware(struct device *dev) > return ret; > } > > +static void snp_set_hsave_pa(void *arg) > +{ > + wrmsrl(MSR_VM_HSAVE_PA, 0); > +} > + > +static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg) > +{ > + struct sev_data_range_list *range_list = arg; > + struct sev_data_range *range = &range_list->ranges[range_list->num_elements]; > + size_t size; > + > + if ((range_list->num_elements * sizeof(struct sev_data_range) + > + sizeof(struct sev_data_range_list)) > PAGE_SIZE) > + return -E2BIG; Why? A comment would be helpful like with the rest this patch adds. > + switch (rs->desc) { > + case E820_TYPE_RESERVED: > + case E820_TYPE_PMEM: > + case E820_TYPE_ACPI: > + range->base = rs->start & PAGE_MASK; > + size = (rs->end + 1) - rs->start; > + range->page_count = size >> PAGE_SHIFT; > + range_list->num_elements++; > + break; > + default: > + break; > + } > + > + return 0; > +} > + > +static int __sev_snp_init_locked(int *error) > +{ > + struct psp_device *psp = psp_master; > + struct sev_data_snp_init_ex data; > + struct sev_device *sev; > + int rc = 0; > + > + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) > + return -ENODEV; > + > + if (!psp || !psp->sev_data) > + return -ENODEV; Only caller checks this already. > + sev = psp->sev_data; > + > + if (sev->snp_initialized) Do we really need this silly boolean or is there a way to query the platform whether SNP has been initialized? > + return 0; > + > + if (!sev_version_greater_or_equal(SNP_MIN_API_MAJOR, SNP_MIN_API_MINOR)) { > + dev_dbg(sev->dev, "SEV-SNP support requires firmware version >= %d:%d\n", > + SNP_MIN_API_MAJOR, SNP_MIN_API_MINOR); > + return 0; > + } > + > + /* > + * The SNP_INIT requires the MSR_VM_HSAVE_PA must be set to 0h > + * across all cores. > + */ > + on_each_cpu(snp_set_hsave_pa, NULL, 1); > + > + /* > + * Starting in SNP firmware v1.52, the SNP_INIT_EX command takes a list of > + * system physical address ranges to convert into the HV-fixed page states > + * during the RMP initialization. For instance, the memory that UEFI > + * reserves should be included in the range list. This allows system > + * components that occasionally write to memory (e.g. logging to UEFI > + * reserved regions) to not fail due to RMP initialization and SNP enablement. > + */ > + if (sev_version_greater_or_equal(SNP_MIN_API_MAJOR, 52)) { Is there a generic way to probe SNP_INIT_EX presence in the firmware or are FW version numbers the only way? > + /* > + * Firmware checks that the pages containing the ranges enumerated > + * in the RANGES structure are either in the Default page state or in the "default" > + * firmware page state. > + */ > + snp_range_list = kzalloc(PAGE_SIZE, GFP_KERNEL); > + if (!snp_range_list) { > + dev_err(sev->dev, > + "SEV: SNP_INIT_EX range list memory allocation failed\n"); > + return -ENOMEM; > + } > + > + /* > + * Retrieve all reserved memory regions setup by UEFI from the e820 memory map > + * to be setup as HV-fixed pages. > + */ > + ^ Superfluous newline. > + rc = walk_iomem_res_desc(IORES_DESC_NONE, IORESOURCE_MEM, 0, ~0, > + snp_range_list, snp_filter_reserved_mem_regions); > + if (rc) { > + dev_err(sev->dev, > + "SEV: SNP_INIT_EX walk_iomem_res_desc failed rc = %d\n", rc); > + return rc; > + } > + > + memset(&data, 0, sizeof(data)); > + data.init_rmp = 1; > + data.list_paddr_en = 1; > + data.list_paddr = __psp_pa(snp_range_list); > + > + /* > + * Before invoking SNP_INIT_EX with INIT_RMP=1, make sure that > + * all dirty cache lines containing the RMP are flushed. > + * > + * NOTE: that includes writes via RMPUPDATE instructions, which > + * are also cacheable writes. > + */ > + wbinvd_on_all_cpus(); > + > + rc = __sev_do_cmd_locked(SEV_CMD_SNP_INIT_EX, &data, error); > + if (rc) > + return rc; > + } else { > + /* > + * SNP_INIT is equivalent to SNP_INIT_EX with INIT_RMP=1, so > + * just as with that case, make sure all dirty cache lines > + * containing the RMP are flushed. > + */ > + wbinvd_on_all_cpus(); > + > + rc = __sev_do_cmd_locked(SEV_CMD_SNP_INIT, NULL, error); > + if (rc) > + return rc; > + } So instead of duplicating the code here at the end of the if-else branching, you can do: void *arg = &data; if () { ... cmd = SEV_CMD_SNP_INIT_EX; } else { cmd = SEV_CMD_SNP_INIT; arg = NULL; } wbinvd_on_all_cpus(); rc = __sev_do_cmd_locked(cmd, arg, error); if (rc) return rc; > + /* Prepare for first SNP guest launch after INIT */ > + wbinvd_on_all_cpus(); Why is that WBINVD needed? > + rc = __sev_do_cmd_locked(SEV_CMD_SNP_DF_FLUSH, NULL, error); > + if (rc) > + return rc; > + > + sev->snp_initialized = true; > + dev_dbg(sev->dev, "SEV-SNP firmware initialized\n"); > + > + return rc; > +} > + > +static int __sev_snp_shutdown_locked(int *error) > +{ > + struct sev_device *sev = psp_master->sev_data; > + struct sev_data_snp_shutdown_ex data; > + int ret; > + > + if (!sev->snp_initialized) > + return 0; > + > + memset(&data, 0, sizeof(data)); > + data.length = sizeof(data); > + data.iommu_snp_shutdown = 1; > + > + wbinvd_on_all_cpus(); > + > +retry: > + ret = __sev_do_cmd_locked(SEV_CMD_SNP_SHUTDOWN_EX, &data, error); > + /* SHUTDOWN may require DF_FLUSH */ > + if (*error == SEV_RET_DFFLUSH_REQUIRED) { > + ret = __sev_do_cmd_locked(SEV_CMD_SNP_DF_FLUSH, NULL, NULL); > + if (ret) { > + dev_err(sev->dev, "SEV-SNP DF_FLUSH failed\n"); > + return ret; When you return here, sev->snp_initialized is still true but, in reality, it probably is in some half-broken state after issuing those commands you it is not really initialized anymore. > + } > + goto retry; This needs an upper limit from which to break out and not potentially endless-loop. > + } > + if (ret) { > + dev_err(sev->dev, "SEV-SNP firmware shutdown failed\n"); > + return ret; > + } > + > + sev->snp_initialized = false; > + dev_dbg(sev->dev, "SEV-SNP firmware shutdown\n"); > + > + return ret; > +} > + > +static int sev_snp_shutdown(int *error) > +{ > + int rc; > + > + mutex_lock(&sev_cmd_mutex); > + rc = __sev_snp_shutdown_locked(error); Why is this "locked" version even there if it is called only here? IOW, put all the logic in here - no need for __sev_snp_shutdown_locked(). > + mutex_unlock(&sev_cmd_mutex); > + > + return rc; > +} ... -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette