RE: [PATCH v5 5/6] Drivers: hv: vmbus: Support TDX guests

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> From: Michael Kelley (LINUX) <mikelley@xxxxxxxxxxxxx>
> Sent: Monday, May 1, 2023 10:33 AM
> ...
> From: Dexuan Cui
> >
> > Add Hyper-V specific code so that a TDX guest can run on Hyper-V:
> >   No need to use hv_vp_assist_page.
> >   Don't use the unsafe Hyper-V TSC page.
> >   Don't try to use HV_REGISTER_CRASH_CTL.
> >   Don't trust Hyper-V's TLB-flushing hypercalls.
> >   Don't use lazy EOI.
> >   Share SynIC Event/Message pages and VMBus Monitor pages with the
> >  host.
> 
> This patch no longer does anything with the VMBus monitor pages.
Sorry, I forgot to update the commit log. Will drop this from the log.

> >   Use pgprot_decrypted(PAGE_KERNEL)in hv_ringbuffer_init().
> 
> The above line in the commit message is stale and can be dropped.
Will drop this from the commit log.

> > @@ -116,6 +117,7 @@ int hv_synic_alloc(void)
> >  {
> >  	int cpu;
> >  	struct hv_per_cpu_context *hv_cpu;
> > +	int ret = -ENOMEM;
> >
> >  	/*
> >  	 * First, zero all per-cpu memory areas so hv_synic_free() can
> > @@ -159,6 +161,28 @@ int hv_synic_alloc(void)
> >  				goto err;
> >  			}
> >  		}
> > +
> > +		/* It's better to leak the page if the decryption fails. */
> > +		if (hv_isolation_type_tdx()) {
> > +			ret = set_memory_decrypted(
> > +				(unsigned long)hv_cpu->synic_message_page, 1);
> > +			if (ret) {
> > +				pr_err("Failed to decrypt SYNIC msg page\n");
> > +				hv_cpu->synic_message_page = NULL;
> > +				goto err;
> > +			}
> > +
> > +			ret = set_memory_decrypted(
> > +				(unsigned long)hv_cpu->synic_event_page, 1);
> > +			if (ret) {
> > +				pr_err("Failed to decrypt SYNIC event page\n");
> > +				hv_cpu->synic_event_page = NULL;
> > +				goto err;
> > +			}
> 
> The error handling still doesn't work quite correctly.   In the TDX case, upon
> exiting this function, the synic_message_page and the synic_event_page
> must
> each either be mapped decrypted or be NULL.  This requirement is so
> that hv_synic_free() will do the right thing in changing the mapping back to
> encrypted.  hv_synic_free() can't handle a non-NULL page being encrypted.
> 
> In the above code, if we fail to decrypt the synic_message_page, then setting
> it to NULL will leak the page (which we'll live with) and ensures that
> hv_synic_free()
> will handle it correctly.  But at that point we'll exit with synic_event_page
> non-NULL and in the encrypted state, which hv_synic_free() can't handle.
> 
> Michael

Thanks for spotting the issue! 
I think the below extra changes should do the job:

@@ -121,91 +121,102 @@ int hv_synic_alloc(void)

        /*
         * First, zero all per-cpu memory areas so hv_synic_free() can
         * detect what memory has been allocated and cleanup properly
         * after any failures.
         */
        for_each_present_cpu(cpu) {
                hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
                memset(hv_cpu, 0, sizeof(*hv_cpu));
        }

        hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
                                         GFP_KERNEL);
        if (hv_context.hv_numa_map == NULL) {
                pr_err("Unable to allocate NUMA map\n");
                goto err;
        }

        for_each_present_cpu(cpu) {
                hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);

                tasklet_init(&hv_cpu->msg_dpc,
                             vmbus_on_msg_dpc, (unsigned long) hv_cpu);

                /*
                 * Synic message and event pages are allocated by paravisor.
                 * Skip these pages allocation here.
                 */
                if (!hv_isolation_type_snp() && !hv_root_partition) {
                        hv_cpu->synic_message_page =
                                (void *)get_zeroed_page(GFP_ATOMIC);
                        if (hv_cpu->synic_message_page == NULL) {
                                pr_err("Unable to allocate SYNIC message page\n");
                                goto err;
                        }

                        hv_cpu->synic_event_page =
                                (void *)get_zeroed_page(GFP_ATOMIC);
                        if (hv_cpu->synic_event_page == NULL) {
                                pr_err("Unable to allocate SYNIC event page\n");
+
+                               free_page((unsigned long)hv_cpu->synic_message_page);
+                               hv_cpu->synic_message_page = NULL;
+
                                goto err;
                        }
                }

                /* It's better to leak the page if the decryption fails. */
                if (hv_isolation_type_tdx()) {
                        ret = set_memory_decrypted(
                                (unsigned long)hv_cpu->synic_message_page, 1);
                        if (ret) {
                                pr_err("Failed to decrypt SYNIC msg page\n");
                                hv_cpu->synic_message_page = NULL;
+
+                               /*
+                                * Free the event page so that a TDX VM won't
+                                * try to encrypt the page in hv_synic_free().
+                                */
+                               free_page((unsigned long)hv_cpu->synic_event_page);
+                               hv_cpu->synic_event_page = NULL;
                                goto err;
                        }

                        ret = set_memory_decrypted(
                                (unsigned long)hv_cpu->synic_event_page, 1);
                        if (ret) {
                                pr_err("Failed to decrypt SYNIC event page\n");
                                hv_cpu->synic_event_page = NULL;
                                goto err;
                        }

                        memset(hv_cpu->synic_message_page, 0, PAGE_SIZE);
                        memset(hv_cpu->synic_event_page, 0, PAGE_SIZE);
                }
        }

        return 0;
 err:
        /*
         * Any memory allocations that succeeded will be freed when
         * the caller cleans up by calling hv_synic_free()
         */
        return ret;
 }

I'm going to use the below (i.e. v5 + the above extra changes) in v6.
Please let me know if there is still any bug.

@@ -116,6 +117,7 @@ int hv_synic_alloc(void)
 {
        int cpu;
        struct hv_per_cpu_context *hv_cpu;
+       int ret = -ENOMEM;

        /*
         * First, zero all per-cpu memory areas so hv_synic_free() can
@@ -156,9 +158,42 @@ int hv_synic_alloc(void)
                                (void *)get_zeroed_page(GFP_ATOMIC);
                        if (hv_cpu->synic_event_page == NULL) {
                                pr_err("Unable to allocate SYNIC event page\n");
+
+                               free_page((unsigned long)hv_cpu->synic_message_page);
+                               hv_cpu->synic_message_page = NULL;
+
                                goto err;
                        }
                }
+
+               /* It's better to leak the page if the decryption fails. */
+               if (hv_isolation_type_tdx()) {
+                       ret = set_memory_decrypted(
+                               (unsigned long)hv_cpu->synic_message_page, 1);
+                       if (ret) {
+                               pr_err("Failed to decrypt SYNIC msg page\n");
+                               hv_cpu->synic_message_page = NULL;
+
+                               /*
+                                * Free the event page so that a TDX VM won't
+                                * try to encrypt the page in hv_synic_free().
+                                */
+                               free_page((unsigned long)hv_cpu->synic_event_page);
+                               hv_cpu->synic_event_page = NULL;
+                               goto err;
+                       }
+
+                       ret = set_memory_decrypted(
+                               (unsigned long)hv_cpu->synic_event_page, 1);
+                       if (ret) {
+                               pr_err("Failed to decrypt SYNIC event page\n");
+                               hv_cpu->synic_event_page = NULL;
+                               goto err;
+                       }
+
+                       memset(hv_cpu->synic_message_page, 0, PAGE_SIZE);
+                       memset(hv_cpu->synic_event_page, 0, PAGE_SIZE);
+               }
        }

        return 0;
@@ -167,18 +202,40 @@ int hv_synic_alloc(void)
         * Any memory allocations that succeeded will be freed when
         * the caller cleans up by calling hv_synic_free()
         */
-       return -ENOMEM;
+       return ret;
 }


 void hv_synic_free(void)
 {
        int cpu;
+       int ret;

        for_each_present_cpu(cpu) {
                struct hv_per_cpu_context *hv_cpu
                        = per_cpu_ptr(hv_context.cpu_context, cpu);

+               /* It's better to leak the page if the encryption fails. */
+               if (hv_isolation_type_tdx()) {
+                       if (hv_cpu->synic_message_page) {
+                               ret = set_memory_encrypted((unsigned long)
+                                       hv_cpu->synic_message_page, 1);
+                               if (ret) {
+                                       pr_err("Failed to encrypt SYNIC msg page\n");
+                                       hv_cpu->synic_message_page = NULL;
+                               }
+                       }
+
+                       if (hv_cpu->synic_event_page) {
+                               ret = set_memory_encrypted((unsigned long)
+                                       hv_cpu->synic_event_page, 1);
+                               if (ret) {
+                                       pr_err("Failed to encrypt SYNIC event page\n");
+                                       hv_cpu->synic_event_page = NULL;
+                               }
+                       }
+               }
+
                free_page((unsigned long)hv_cpu->synic_event_page);
                free_page((unsigned long)hv_cpu->synic_message_page);
        }


I'll post a separate patch (currently if hv_synic_alloc() --> get_zeroed_page() fails,
hv_context.hv_numa_map is leaked):


--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1515,27 +1515,27 @@ static int vmbus_bus_init(void)
        }

        ret = hv_synic_alloc();
        if (ret)
                goto err_alloc;

        /*
         * Initialize the per-cpu interrupt state and stimer state.
         * Then connect to the host.
         */
        ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
                                hv_synic_init, hv_synic_cleanup);
        if (ret < 0)
-               goto err_cpuhp;
+               goto err_alloc;
        hyperv_cpuhp_online = ret;

        ret = vmbus_connect();
        if (ret)
                goto err_connect;

        if (hv_is_isolation_supported())
                sysctl_record_panic_msg = 0;

        /*
         * Only register if the crash MSRs are available
         */
        if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
@@ -1567,29 +1567,28 @@ static int vmbus_bus_init(void)
        /*
         * Always register the vmbus unload panic notifier because we
         * need to shut the VMbus channel connection on panic.
         */
        atomic_notifier_chain_register(&panic_notifier_list,
                               &hyperv_panic_vmbus_unload_block);

        vmbus_request_offers();

        return 0;

 err_connect:
        cpuhp_remove_state(hyperv_cpuhp_online);
-err_cpuhp:
-       hv_synic_free();
 err_alloc:
+       hv_synic_free();
        if (vmbus_irq == -1) {
                hv_remove_vmbus_handler();
        } else {
                free_percpu_irq(vmbus_irq, vmbus_evt);
                free_percpu(vmbus_evt);
        }
 err_setup:
        bus_unregister(&hv_bus);
        unregister_sysctl_table(hv_ctl_table_hdr);
        hv_ctl_table_hdr = NULL;
        return ret;
 }





[Index of Archives]     [Linux Kernel]     [Kernel Newbies]     [x86 Platform Driver]     [Netdev]     [Linux Wireless]     [Netfilter]     [Bugtraq]     [Linux Filesystems]     [Yosemite Discussion]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]

  Powered by Linux