Wei Liu <wei.liu@xxxxxxxxxx> writes: > They are used to deposit pages into Microsoft Hypervisor and bring up > logical and virtual processors. > > Signed-off-by: Lillian Grassin-Drake <ligrassi@xxxxxxxxxxxxx> > Signed-off-by: Sunil Muthuswamy <sunilmut@xxxxxxxxxxxxx> > Signed-off-by: Nuno Das Neves <nunodasneves@xxxxxxxxxxxxxxxxxxx> > Co-Developed-by: Lillian Grassin-Drake <ligrassi@xxxxxxxxxxxxx> > Co-Developed-by: Sunil Muthuswamy <sunilmut@xxxxxxxxxxxxx> > Co-Developed-by: Nuno Das Neves <nunodasneves@xxxxxxxxxxxxxxxxxxx> > Signed-off-by: Wei Liu <wei.liu@xxxxxxxxxx> > --- > v2: > 1. Adapt to hypervisor side changes > 2. Address Vitaly's comments > --- > arch/x86/hyperv/Makefile | 2 +- > arch/x86/hyperv/hv_proc.c | 217 ++++++++++++++++++++++++++++++ > arch/x86/include/asm/mshyperv.h | 4 + > include/asm-generic/hyperv-tlfs.h | 67 +++++++++ > 4 files changed, 289 insertions(+), 1 deletion(-) > create mode 100644 arch/x86/hyperv/hv_proc.c > > diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile > index 89b1f74d3225..565358020921 100644 > --- a/arch/x86/hyperv/Makefile > +++ b/arch/x86/hyperv/Makefile > @@ -1,6 +1,6 @@ > # SPDX-License-Identifier: GPL-2.0-only > obj-y := hv_init.o mmu.o nested.o > -obj-$(CONFIG_X86_64) += hv_apic.o > +obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o > > ifdef CONFIG_X86_64 > obj-$(CONFIG_PARAVIRT_SPINLOCKS) += hv_spinlock.o > diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c > new file mode 100644 > index 000000000000..0fd972c9129a > --- /dev/null > +++ b/arch/x86/hyperv/hv_proc.c > @@ -0,0 +1,217 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#include <linux/types.h> > +#include <linux/version.h> > +#include <linux/vmalloc.h> > +#include <linux/mm.h> > +#include <linux/clockchips.h> > +#include <linux/acpi.h> > +#include <linux/hyperv.h> > +#include <linux/slab.h> > +#include <linux/cpuhotplug.h> > +#include <linux/minmax.h> > +#include <asm/hypervisor.h> > +#include <asm/mshyperv.h> > +#include <asm/apic.h> > + > +#include <asm/trace/hyperv.h> > + > +#define HV_DEPOSIT_MAX_ORDER (8) > +#define HV_DEPOSIT_MAX (1 << HV_DEPOSIT_MAX_ORDER) > + > +/* > + * Deposits exact number of pages > + * Must be called with interrupts enabled > + * Max 256 pages > + */ > +int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) > +{ > + struct page **pages; > + int *counts; > + int num_allocations; > + int i, j, page_count; > + int order; > + int desired_order; > + u16 status; > + int ret; > + u64 base_pfn; > + struct hv_deposit_memory *input_page; > + unsigned long flags; > + > + if (num_pages > HV_DEPOSIT_MAX) > + return -E2BIG; > + if (!num_pages) > + return 0; > + > + /* One buffer for page pointers and counts */ > + pages = page_address(alloc_page(GFP_KERNEL)); > + if (!pages) > + return -ENOMEM; > + > + counts = kcalloc(HV_DEPOSIT_MAX, sizeof(int), GFP_KERNEL); > + if (!counts) { > + free_page((unsigned long)pages); > + return -ENOMEM; > + } > + > + /* Allocate all the pages before disabling interrupts */ > + num_allocations = 0; > + i = 0; > + order = HV_DEPOSIT_MAX_ORDER; > + > + while (num_pages) { > + /* Find highest order we can actually allocate */ > + desired_order = 31 - __builtin_clz(num_pages); > + order = min(desired_order, order); > + do { > + pages[i] = alloc_pages_node(node, GFP_KERNEL, order); > + if (!pages[i]) { > + if (!order) { > + ret = -ENOMEM; > + goto err_free_allocations; > + } > + --order; > + } > + } while (!pages[i]); > + > + split_page(pages[i], order); > + counts[i] = 1 << order; > + num_pages -= counts[i]; > + i++; > + num_allocations++; > + } > + > + local_irq_save(flags); > + > + input_page = *this_cpu_ptr(hyperv_pcpu_input_arg); > + > + input_page->partition_id = partition_id; > + > + /* Populate gpa_page_list - these will fit on the input page */ > + for (i = 0, page_count = 0; i < num_allocations; ++i) { > + base_pfn = page_to_pfn(pages[i]); > + for (j = 0; j < counts[i]; ++j, ++page_count) > + input_page->gpa_page_list[page_count] = base_pfn + j; > + } > + status = hv_do_rep_hypercall(HVCALL_DEPOSIT_MEMORY, > + page_count, 0, input_page, > + NULL) & HV_HYPERCALL_RESULT_MASK; > + local_irq_restore(flags); > + > + if (status != HV_STATUS_SUCCESS) { > + pr_err("Failed to deposit pages: %d\n", status); > + ret = status; > + goto err_free_allocations; > + } > + > + ret = 0; > + goto free_buf; > + > +err_free_allocations: > + for (i = 0; i < num_allocations; ++i) { > + base_pfn = page_to_pfn(pages[i]); > + for (j = 0; j < counts[i]; ++j) > + __free_page(pfn_to_page(base_pfn + j)); > + } > + > +free_buf: > + free_page((unsigned long)pages); > + kfree(counts); > + return ret; > +} > +EXPORT_SYMBOL_GPL(hv_call_deposit_pages); > + > +int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) > +{ > + struct hv_add_logical_processor_in *input; > + struct hv_add_logical_processor_out *output; > + int status; > + unsigned long flags; > + int ret = 0; > + > + /* > + * When adding a logical processor, the hypervisor may return > + * HV_STATUS_INSUFFICIENT_MEMORY. When that happens, we deposit more > + * pages and retry. > + */ > + do { > + local_irq_save(flags); > + > + input = *this_cpu_ptr(hyperv_pcpu_input_arg); > + /* We don't do anything with the output right now */ > + output = *this_cpu_ptr(hyperv_pcpu_output_arg); > + > + input->lp_index = lp_index; > + input->apic_id = apic_id; > + input->flags = 0; > + input->proximity_domain_info.domain_id = node_to_pxm(node); > + input->proximity_domain_info.flags.reserved = 0; > + input->proximity_domain_info.flags.proximity_info_valid = 1; > + input->proximity_domain_info.flags.proximity_preferred = 1; > + status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR, > + input, output); > + local_irq_restore(flags); > + > + if (status != HV_STATUS_INSUFFICIENT_MEMORY) { > + if (status != HV_STATUS_SUCCESS) { > + pr_err("%s: cpu %u apic ID %u, %d\n", __func__, > + lp_index, apic_id, status); > + ret = status; > + } > + break; > + } > + ret = hv_call_deposit_pages(node, hv_current_partition_id, 1); > + } while (!ret); > + > + return ret; > +} > + > +int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) > +{ > + struct hv_create_vp *input; > + u16 status; > + unsigned long irq_flags; > + int ret = 0; > + > + /* Root VPs don't seem to need pages deposited */ > + if (partition_id != hv_current_partition_id) { > + ret = hv_call_deposit_pages(node, partition_id, 90); > + if (ret) > + return ret; > + } > + > + do { > + local_irq_save(irq_flags); > + > + input = *this_cpu_ptr(hyperv_pcpu_input_arg); > + > + input->partition_id = partition_id; > + input->vp_index = vp_index; > + input->flags = flags; > + input->subnode_type = HvSubnodeAny; > + if (node != NUMA_NO_NODE) { > + input->proximity_domain_info.domain_id = node_to_pxm(node); > + input->proximity_domain_info.flags.reserved = 0; > + input->proximity_domain_info.flags.proximity_info_valid = 1; > + input->proximity_domain_info.flags.proximity_preferred = 1; > + } else { > + input->proximity_domain_info.as_uint64 = 0; > + } > + status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); > + local_irq_restore(irq_flags); > + > + if (status != HV_STATUS_INSUFFICIENT_MEMORY) { > + if (status != HV_STATUS_SUCCESS) { > + pr_err("%s: vcpu %u, lp %u, %d\n", __func__, > + vp_index, flags, status); > + ret = status; > + } > + break; > + } > + ret = hv_call_deposit_pages(node, partition_id, 1); > + > + } while (!ret); > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(hv_call_create_vp); > + > diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h > index 67f5d35a73d3..4e590a167160 100644 > --- a/arch/x86/include/asm/mshyperv.h > +++ b/arch/x86/include/asm/mshyperv.h > @@ -80,6 +80,10 @@ extern void __percpu **hyperv_pcpu_output_arg; > > extern u64 hv_current_partition_id; > > +int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); > +int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); > +int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); You seem to be only doing EXPORT_SYMBOL_GPL() for hv_call_deposit_pages() and hv_call_create_vp() but not for hv_call_add_logical_proc() - is this intended? Also, I don't see hv_call_create_vp()/hv_call_add_logical_proc() usage outside of arch/x86/kernel/cpu/mshyperv.c so maybe we don't need to export them at all? > + > static inline u64 hv_do_hypercall(u64 control, void *input, void *output) > { > u64 input_address = input ? virt_to_phys(input) : 0; > diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h > index 87b1a79b19eb..b6c74e1a5524 100644 > --- a/include/asm-generic/hyperv-tlfs.h > +++ b/include/asm-generic/hyperv-tlfs.h > @@ -142,6 +142,8 @@ struct ms_hyperv_tsc_page { > #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 > #define HVCALL_SEND_IPI_EX 0x0015 > #define HVCALL_GET_PARTITION_ID 0x0046 > +#define HVCALL_DEPOSIT_MEMORY 0x0048 > +#define HVCALL_CREATE_VP 0x004e > #define HVCALL_GET_VP_REGISTERS 0x0050 > #define HVCALL_SET_VP_REGISTERS 0x0051 > #define HVCALL_POST_MESSAGE 0x005c > @@ -149,6 +151,7 @@ struct ms_hyperv_tsc_page { > #define HVCALL_POST_DEBUG_DATA 0x0069 > #define HVCALL_RETRIEVE_DEBUG_DATA 0x006a > #define HVCALL_RESET_DEBUG_SESSION 0x006b > +#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076 > #define HVCALL_RETARGET_INTERRUPT 0x007e > #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af > #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 > @@ -413,6 +416,70 @@ struct hv_get_partition_id { > u64 partition_id; > } __packed; > > +/* HvDepositMemory hypercall */ > +struct hv_deposit_memory { > + u64 partition_id; > + u64 gpa_page_list[]; > +} __packed; > + > +struct hv_proximity_domain_flags { > + u32 proximity_preferred : 1; > + u32 reserved : 30; > + u32 proximity_info_valid : 1; > +}; > + > +/* Not a union in windows but useful for zeroing */ > +union hv_proximity_domain_info { > + struct { > + u32 domain_id; > + struct hv_proximity_domain_flags flags; > + }; > + u64 as_uint64; > +}; > + > +struct hv_lp_startup_status { > + u64 hv_status; > + u64 substatus1; > + u64 substatus2; > + u64 substatus3; > + u64 substatus4; > + u64 substatus5; > + u64 substatus6; > +}; > + > +/* HvAddLogicalProcessor hypercall */ > +struct hv_add_logical_processor_in { > + u32 lp_index; > + u32 apic_id; > + union hv_proximity_domain_info proximity_domain_info; > + u64 flags; > +}; > + > +struct hv_add_logical_processor_out { > + struct hv_lp_startup_status startup_status; > +}; > + > +enum HV_SUBNODE_TYPE > +{ > + HvSubnodeAny = 0, > + HvSubnodeSocket, > + HvSubnodeAmdNode, > + HvSubnodeL3, > + HvSubnodeCount, > + HvSubnodeInvalid = -1 > +}; > + > +/* HvCreateVp hypercall */ > +struct hv_create_vp { > + u64 partition_id; > + u32 vp_index; > + u8 padding[3]; > + u8 subnode_type; > + u64 subnode_id; > + union hv_proximity_domain_info proximity_domain_info; > + u64 flags; > +}; You add '__packed' to 'struct hv_deposit_memory' but not to 'struct hv_create_vp'/'struct hv_add_logical_processor_in', this looks inconsistent. > + > /* HvRetargetDeviceInterrupt hypercall */ > union hv_msi_entry { > u64 as_uint64; -- Vitaly