On Wed, Jul 07, 2021 at 01:14:45PM -0500, Brijesh Singh wrote: > +struct __packed psc_hdr { > + u16 cur_entry; > + u16 end_entry; > + u32 reserved; > +}; > + > +struct __packed psc_entry { > + u64 cur_page : 12, > + gfn : 40, > + operation : 4, > + pagesize : 1, > + reserved : 7; > +}; > + > +struct __packed snp_psc_desc { > + struct psc_hdr hdr; > + struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY]; > +}; The majority of kernel code puts __packed after the struct definition, let's put it there too pls, out of the way. ... > +static int vmgexit_psc(struct snp_psc_desc *desc) > +{ > + int cur_entry, end_entry, ret; > + struct snp_psc_desc *data; > + struct ghcb_state state; > + struct ghcb *ghcb; > + struct psc_hdr *hdr; > + unsigned long flags; > + > + local_irq_save(flags); > + > + ghcb = __sev_get_ghcb(&state); > + if (unlikely(!ghcb)) > + panic("SEV-SNP: Failed to get GHCB\n"); > + > + /* Copy the input desc into GHCB shared buffer */ > + data = (struct snp_psc_desc *)ghcb->shared_buffer; > + memcpy(ghcb->shared_buffer, desc, sizeof(*desc)); > + > + hdr = &data->hdr; > + cur_entry = hdr->cur_entry; > + end_entry = hdr->end_entry; > + > + /* > + * As per the GHCB specification, the hypervisor can resume the guest > + * before processing all the entries. Checks whether all the entries > + * are processed. If not, then keep retrying. > + * > + * The stragtegy here is to wait for the hypervisor to change the page > + * state in the RMP table before guest access the memory pages. If the > + * page state was not successful, then later memory access will result > + * in the crash. > + */ > + while (hdr->cur_entry <= hdr->end_entry) { > + ghcb_set_sw_scratch(ghcb, (u64)__pa(data)); > + > + ret = sev_es_ghcb_hv_call(ghcb, NULL, SVM_VMGEXIT_PSC, 0, 0); > + > + /* > + * Page State Change VMGEXIT can pass error code through > + * exit_info_2. > + */ > + if (WARN(ret || ghcb->save.sw_exit_info_2, > + "SEV-SNP: page state change failed ret=%d exit_info_2=%llx\n", > + ret, ghcb->save.sw_exit_info_2)) > + return 1; Yikes, you return here and below with interrupts disabled. All your returns need to be "goto out;" instead where you do out: __sev_put_ghcb(&state); local_irq_restore(flags); Yap, you very likely need to put the GHCB too. > + /* > + * Lets do some sanity check that entry processing is not going > + * backward. This will happen only if hypervisor is tricking us. > + */ > + if (WARN((hdr->end_entry > end_entry) || (cur_entry > hdr->cur_entry), > + "SEV-SNP: page state change processing going backward, end_entry " > + "(expected %d got %d) cur_entry (expected %d got %d)\n", > + end_entry, hdr->end_entry, cur_entry, hdr->cur_entry)) > + return 1; WARNING: quoted string split across lines #293: FILE: arch/x86/kernel/sev.c:750: + "SEV-SNP: page state change processing going backward, end_entry " + "(expected %d got %d) cur_entry (expected %d got %d)\n", If you're wondering what to do, yes, you can really stretch that string and shorten it too: if (WARN((hdr->end_entry > end_entry) || (cur_entry > hdr->cur_entry), "SEV-SNP: PSC processing going backwards, end_entry %d (got %d) cur_entry: %d (got %d)\n", end_entry, hdr->end_entry, cur_entry, hdr->cur_entry)) return 1; so that it fits on a single line and grepping can find it. > + /* Lets verify that reserved bit is not set in the header*/ > + if (WARN(hdr->reserved, "Reserved bit is set in the PSC header\n")) psc_entry has a ->reserved field too and since we're iterating over the entries... > + return 1; > + } > + > + __sev_put_ghcb(&state); > + local_irq_restore(flags); > + > + return 0; > +} > + > +static void __set_page_state(struct snp_psc_desc *data, unsigned long vaddr, > + unsigned long vaddr_end, int op) > +{ > + struct psc_hdr *hdr; > + struct psc_entry *e; > + unsigned long pfn; > + int i; > + > + hdr = &data->hdr; > + e = data->entries; > + > + memset(data, 0, sizeof(*data)); > + i = 0; > + > + while (vaddr < vaddr_end) { > + if (is_vmalloc_addr((void *)vaddr)) > + pfn = vmalloc_to_pfn((void *)vaddr); > + else > + pfn = __pa(vaddr) >> PAGE_SHIFT; > + > + e->gfn = pfn; > + e->operation = op; > + hdr->end_entry = i; > + > + /* > + * The GHCB specification provides the flexibility to > + * use either 4K or 2MB page size in the RMP table. > + * The current SNP support does not keep track of the > + * page size used in the RMP table. To avoid the > + * overlap request, use the 4K page size in the RMP > + * table. > + */ > + e->pagesize = RMP_PG_SIZE_4K; > + > + vaddr = vaddr + PAGE_SIZE; > + e++; > + i++; > + } > + > + /* Terminate the guest on page state change failure. */ That comment is kinda obvious :) > + if (vmgexit_psc(data)) > + sev_es_terminate(1, GHCB_TERM_PSC); > +} > + > +static void set_page_state(unsigned long vaddr, unsigned int npages, int op) > +{ > + unsigned long vaddr_end, next_vaddr; > + struct snp_psc_desc *desc; > + > + vaddr = vaddr & PAGE_MASK; > + vaddr_end = vaddr + (npages << PAGE_SHIFT); > + > + desc = kmalloc(sizeof(*desc), GFP_KERNEL_ACCOUNT); kzalloc() so that you don't have to memset() later in __set_page_state(). > + if (!desc) > + panic("failed to allocate memory"); Make that error message more distinctive so that *if* it happens, one can pinpoint the place in the code where the panic comes from. > + while (vaddr < vaddr_end) { > + /* > + * Calculate the last vaddr that can be fit in one > + * struct snp_psc_desc. > + */ > + next_vaddr = min_t(unsigned long, vaddr_end, > + (VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE) + vaddr); > + > + __set_page_state(desc, vaddr, next_vaddr, op); > + > + vaddr = next_vaddr; > + } > + > + kfree(desc); > +} > + -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette