The SEV TIO spec defines a new TIO_GUEST_MESSAGE message to provide a secure communication channel between a SNP VM and the PSP. The defined messages provide way to read TDI info and do secure MMIO/DMA setup. On top of this, GHCB defines an extension to return certificates/ measurements/report and TDI run status to the VM. The TIO_GUEST_MESSAGE handler also checks if a specific TDI bound to the VM and exits the KVM to allow the userspace to bind it. Skip adjust_direct_map() in rmpupdate() for now as it fails on MMIO. Signed-off-by: Alexey Kardashevskiy <aik@xxxxxxx> --- arch/x86/include/asm/kvm-x86-ops.h | 2 + arch/x86/include/asm/kvm_host.h | 2 + arch/x86/include/asm/sev.h | 1 + arch/x86/include/uapi/asm/svm.h | 2 + arch/x86/kvm/svm/svm.h | 2 + include/linux/kvm_host.h | 2 + include/uapi/linux/kvm.h | 29 +++ arch/x86/kvm/svm/sev.c | 217 ++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 3 + arch/x86/kvm/x86.c | 12 ++ arch/x86/virt/svm/sev.c | 23 ++- virt/kvm/vfio.c | 139 +++++++++++++ arch/x86/kvm/Kconfig | 1 + 13 files changed, 431 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 68ad4f923664..80e8176a4ea0 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -139,6 +139,8 @@ KVM_X86_OP_OPTIONAL(alloc_apic_backing_page) KVM_X86_OP_OPTIONAL_RET0(gmem_prepare) KVM_X86_OP_OPTIONAL_RET0(private_max_mapping_level) KVM_X86_OP_OPTIONAL(gmem_invalidate) +KVM_X86_OP_OPTIONAL(tsm_bind) +KVM_X86_OP_OPTIONAL(tsm_unbind) #undef KVM_X86_OP #undef KVM_X86_OP_OPTIONAL diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4a68cb3eba78..80bdac4e47ac 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1830,6 +1830,8 @@ struct kvm_x86_ops { int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end); int (*private_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn); + int (*tsm_bind)(struct kvm *kvm, struct device *dev, u32 guest_rid); + void (*tsm_unbind)(struct kvm *kvm, struct device *dev); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 80d9aa16fe61..8edd7bccabf2 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -464,6 +464,7 @@ int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level); void snp_dump_hva_rmpentry(unsigned long address); int psmash(u64 pfn); int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable); +int rmp_make_private_mmio(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable); int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); void kdump_sev_callback(void); diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 1814b413fd57..ac90a69e6327 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -116,6 +116,7 @@ #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 +#define SVM_VMGEXIT_SEV_TIO_GUEST_REQUEST 0x80000020 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ @@ -237,6 +238,7 @@ { SVM_VMGEXIT_GUEST_REQUEST, "vmgexit_guest_request" }, \ { SVM_VMGEXIT_EXT_GUEST_REQUEST, "vmgexit_ext_guest_request" }, \ { SVM_VMGEXIT_AP_CREATION, "vmgexit_ap_creation" }, \ + { SVM_VMGEXIT_SEV_TIO_GUEST_REQUEST, "vmgexit_sev_tio_guest_request" }, \ { SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \ { SVM_EXIT_ERR, "invalid_guest_state" } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 76107c7d0595..d04d583c1741 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -749,6 +749,8 @@ void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu); int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn); +int sev_tsm_bind(struct kvm *kvm, struct device *dev, u32 guest_rid); +void sev_tsm_unbind(struct kvm *kvm, struct device *dev); #else static inline struct page *snp_safe_alloc_page_node(int node, gfp_t gfp) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d004d96c2ace..fdb331b3e0d3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2497,5 +2497,7 @@ void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, struct kvm_pre_fault_memory *range); #endif +int kvm_arch_tsm_bind(struct kvm *kvm, struct device *dev, u32 guest_rid); +void kvm_arch_tsm_unbind(struct kvm *kvm, struct device *dev); #endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 637efc055145..37f76bbdfa9b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -135,6 +135,17 @@ struct kvm_xen_exit { } u; }; +struct kvm_user_vmgexit { +#define KVM_USER_VMGEXIT_TIO_REQ 4 + __u32 type; /* KVM_USER_VMGEXIT_* type */ + union { + struct { + __u32 guest_rid; + __u32 ret; + } tio_req; + }; +} __packed; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -178,6 +189,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_VMGEXIT 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -446,6 +458,7 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + struct kvm_user_vmgexit vmgexit; /* Fix the size of the union. */ char padding[256]; }; @@ -1166,6 +1179,22 @@ struct kvm_vfio_spapr_tce { __s32 tablefd; }; +#define KVM_DEV_VFIO_DEVICE 2 +#define KVM_DEV_VFIO_DEVICE_TDI_BIND 1 +#define KVM_DEV_VFIO_DEVICE_TDI_UNBIND 2 + +/* + * struct kvm_vfio_tsm_bind + * + * @guest_rid: Hypervisor provided identifier used by the guest to identify + * the TDI in guest messages + * @devfd: a fd of VFIO device + */ +struct kvm_vfio_tsm_bind { + __u32 guest_rid; + __s32 devfd; +} __packed; + /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 9badf4fa7e1d..e36b93b9cc2b 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -20,6 +20,8 @@ #include <linux/processor.h> #include <linux/trace_events.h> #include <uapi/linux/sev-guest.h> +#include <linux/tsm.h> +#include <linux/pci.h> #include <asm/pkru.h> #include <asm/trapnr.h> @@ -3413,6 +3415,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) control->exit_info_1 == control->exit_info_2) goto vmgexit_err; break; + case SVM_VMGEXIT_SEV_TIO_GUEST_REQUEST: + break; default: reason = GHCB_ERR_INVALID_EVENT; goto vmgexit_err; @@ -4128,6 +4132,182 @@ static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t r return 1; /* resume guest */ } +static int tio_make_mmio_private(struct vcpu_svm *svm, struct pci_dev *pdev, + phys_addr_t mmio_gpa, phys_addr_t mmio_size, + unsigned int rangeid) +{ + int ret = 0; + + if (!mmio_gpa || !mmio_size || mmio_size != pci_resource_len(pdev, rangeid)) { + pci_err(pdev, "Invalid MMIO #%d gpa=%llx..%llx\n", + rangeid, mmio_gpa, mmio_gpa + mmio_size); + return SEV_RET_INVALID_PARAM; + } + + /* Could as well exit to the userspace and ioctl(KVM_MEMORY_ATTRIBUTE_PRIVATE) */ + ret = kvm_vm_set_mem_attributes(svm->vcpu.kvm, mmio_gpa >> PAGE_SHIFT, + (mmio_gpa + mmio_size) >> PAGE_SHIFT, + KVM_MEMORY_ATTRIBUTE_PRIVATE); + if (ret) + pci_err(pdev, "Failed to mark MMIO #%d gpa=%llx..%llx as private, ret=%d\n", + rangeid, mmio_gpa, mmio_gpa + mmio_size, ret); + else + pci_notice(pdev, "Marked MMIO#%d gpa=%llx..%llx as private\n", + rangeid, mmio_gpa, mmio_gpa + mmio_size); + + for (phys_addr_t off = 0; off < mmio_size; off += PAGE_SIZE) { + ret = rmp_make_private_mmio((pci_resource_start(pdev, rangeid) + off) >> PAGE_SHIFT, + (mmio_gpa + off), PG_LEVEL_4K, svm->asid, + false/*Immutable*/); + if (ret) + pci_err(pdev, "Failed to map TIO #%d %pR +%llx %llx -> gpa=%llx ret=%d\n", + rangeid, pci_resource_n(pdev, rangeid), off, mmio_size, + mmio_gpa + off, ret); + } + + return SEV_RET_SUCCESS; +} + +static int snp_complete_sev_tio_guest_request(struct kvm_vcpu *vcpu, struct tsm_tdi *tdi) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_control_area *control = &svm->vmcb->control; + struct kvm *kvm = vcpu->kvm; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + enum tsm_tdisp_state state = TDISP_STATE_UNAVAIL; + unsigned long exitcode = 0, data_npages; + struct tio_guest_request tioreq = { 0 }; + struct snp_guest_msg_hdr *req_hdr; + gpa_t req_gpa, resp_gpa; + struct fd sevfd; + u64 data_gpa; + int ret; + + if (!sev_snp_guest(kvm)) + return -EINVAL; + + mutex_lock(&sev->guest_req_mutex); + + req_gpa = control->exit_info_1; + resp_gpa = control->exit_info_2; + + ret = kvm_read_guest(kvm, req_gpa, sev->guest_req_buf, PAGE_SIZE); + if (ret) + goto out_unlock; + + tioreq.data.gctx_paddr = __psp_pa(sev->snp_context); + tioreq.data.req_paddr = __psp_pa(sev->guest_req_buf); + tioreq.data.res_paddr = __psp_pa(sev->guest_resp_buf); + + sevfd = fdget(sev->fd); + if (!sevfd.file) + goto out_unlock; + + req_hdr = sev->guest_req_buf; + if (req_hdr->msg_type == TIO_MSG_MMIO_VALIDATE_REQ) { + const u64 raw_gpa = vcpu->arch.regs[VCPU_REGS_RDX]; + + ret = tio_make_mmio_private(svm, tdi->pdev, + MMIO_VALIDATE_GPA(raw_gpa), + MMIO_VALIDATE_LEN(raw_gpa), + MMIO_VALIDATE_RANGEID(raw_gpa)); + if (ret != SEV_RET_SUCCESS) + goto put_unlock; + } + + ret = tsm_guest_request(tdi, + (req_hdr->msg_type == TIO_MSG_TDI_INFO_REQ) ? &state : NULL, + &tioreq); + if (ret) + goto put_unlock; + + struct tio_blob_table_entry t[4] = { + { .guid = TIO_GUID_MEASUREMENTS, + .offset = sizeof(t), + .length = tdi->tdev->meas ? tdi->tdev->meas->len : 0 }, + { .guid = TIO_GUID_CERTIFICATES, + .offset = sizeof(t) + t[0].length, + .length = tdi->tdev->certs ? tdi->tdev->certs->len : 0 }, + { .guid = TIO_GUID_REPORT, + .offset = sizeof(t) + t[0].length + t[1].length, + .length = tdi->report ? tdi->report->len : 0 }, + { .guid.b = { 0 } } + }; + void *tp[4] = { + tdi->tdev->meas ? tdi->tdev->meas->data : NULL, + tdi->tdev->certs ? tdi->tdev->certs->data : NULL, + tdi->report ? tdi->report->data : NULL + }; + + data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; + data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; + vcpu->arch.regs[VCPU_REGS_RBX] = PAGE_ALIGN(t[0].length + t[1].length + + t[2].length + sizeof(t)) >> PAGE_SHIFT; + if (data_gpa && ((data_npages << PAGE_SHIFT) >= vcpu->arch.regs[VCPU_REGS_RBX])) { + if (kvm_write_guest(kvm, data_gpa + 0, &t, sizeof(t)) || + kvm_write_guest(kvm, data_gpa + t[0].offset, tp[0], t[0].length) || + kvm_write_guest(kvm, data_gpa + t[1].offset, tp[1], t[1].length) || + kvm_write_guest(kvm, data_gpa + t[2].offset, tp[2], t[2].length)) + exitcode = SEV_RET_INVALID_ADDRESS; + } + + if (req_hdr->msg_type == TIO_MSG_TDI_INFO_REQ) + vcpu->arch.regs[VCPU_REGS_RDX] = state; + + ret = kvm_write_guest(kvm, resp_gpa, sev->guest_resp_buf, PAGE_SIZE); + if (ret) + goto put_unlock; + + ret = 1; /* Resume guest */ + + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, SNP_GUEST_ERR(0, tioreq.fw_err)); + +put_unlock: + fdput(sevfd); +out_unlock: + mutex_unlock(&sev->guest_req_mutex); + + return ret; +} + +static int snp_try_complete_sev_tio_guest_request(struct kvm_vcpu *vcpu) +{ + struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; + u32 guest_rid = vcpu->arch.regs[VCPU_REGS_RCX]; + struct tsm_tdi *tdi = tsm_tdi_find(guest_rid, (u64) __psp_pa(sev->snp_context)); + + if (!tdi) { + pr_err("TDI is not bound to %x:%02x.%d\n", + PCI_BUS_NUM(guest_rid), PCI_SLOT(guest_rid), PCI_FUNC(guest_rid)); + return 1; /* Resume guest */ + } + + return snp_complete_sev_tio_guest_request(vcpu, tdi); +} + +static int snp_sev_tio_guest_request(struct kvm_vcpu *vcpu) +{ + u32 guest_rid = vcpu->arch.regs[VCPU_REGS_RCX]; + struct kvm *kvm = vcpu->kvm; + struct kvm_sev_info *sev; + struct tsm_tdi *tdi; + + if (!sev_snp_guest(kvm)) + return SEV_RET_INVALID_GUEST; + + sev = &to_kvm_svm(kvm)->sev_info; + tdi = tsm_tdi_find(guest_rid, (u64) __psp_pa(sev->snp_context)); + if (!tdi) { + vcpu->run->exit_reason = KVM_EXIT_VMGEXIT; + vcpu->run->vmgexit.type = KVM_USER_VMGEXIT_TIO_REQ; + vcpu->run->vmgexit.tio_req.guest_rid = guest_rid; + vcpu->arch.complete_userspace_io = snp_try_complete_sev_tio_guest_request; + return 0; /* Exit KVM */ + } + + return snp_complete_sev_tio_guest_request(vcpu, tdi); +} + static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -4408,6 +4588,9 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) case SVM_VMGEXIT_EXT_GUEST_REQUEST: ret = snp_handle_ext_guest_req(svm, control->exit_info_1, control->exit_info_2); break; + case SVM_VMGEXIT_SEV_TIO_GUEST_REQUEST: + ret = snp_sev_tio_guest_request(vcpu); + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", @@ -5000,3 +5183,37 @@ int sev_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn) return level; } + +int sev_tsm_bind(struct kvm *kvm, struct device *dev, u32 guest_rid) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct tsm_tdi *tdi = tsm_tdi_get(dev); + struct fd sevfd; + int ret; + + if (!tdi) + return -ENODEV; + + sevfd = fdget(sev->fd); + if (!sevfd.file) + return -EPERM; + + dev_info(dev, "Binding guest=%x:%02x.%d\n", + PCI_BUS_NUM(guest_rid), PCI_SLOT(guest_rid), PCI_FUNC(guest_rid)); + ret = tsm_tdi_bind(tdi, guest_rid, (u64) __psp_pa(sev->snp_context), sev->asid); + fdput(sevfd); + + return ret; +} + +void sev_tsm_unbind(struct kvm *kvm, struct device *dev) +{ + struct tsm_tdi *tdi = tsm_tdi_get(dev); + + if (!tdi) + return; + + dev_notice(dev, "Unbinding guest=%x:%02x.%d\n", + PCI_BUS_NUM(tdi->guest_rid), PCI_SLOT(tdi->guest_rid), PCI_FUNC(tdi->guest_rid)); + tsm_tdi_unbind(tdi); +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d6f252555ab3..ab6e41eed697 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5093,6 +5093,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .vm_copy_enc_context_from = sev_vm_copy_enc_context_from, .vm_move_enc_context_from = sev_vm_move_enc_context_from, + + .tsm_bind = sev_tsm_bind, + .tsm_unbind = sev_tsm_unbind, #endif .check_emulate_instruction = svm_check_emulate_instruction, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 70219e406987..97261cffa9ad 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -14055,3 +14055,15 @@ static void __exit kvm_x86_exit(void) WARN_ON_ONCE(static_branch_unlikely(&kvm_has_noapic_vcpu)); } module_exit(kvm_x86_exit); + +int kvm_arch_tsm_bind(struct kvm *kvm, struct device *dev, u32 guest_rid) +{ + return static_call(kvm_x86_tsm_bind)(kvm, dev, guest_rid); +} +EXPORT_SYMBOL_GPL(kvm_arch_tsm_bind); + +void kvm_arch_tsm_unbind(struct kvm *kvm, struct device *dev) +{ + static_call(kvm_x86_tsm_unbind)(kvm, dev); +} +EXPORT_SYMBOL_GPL(kvm_arch_tsm_unbind); diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 44e7609c9bd6..91f5729dfcad 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -945,7 +945,7 @@ static int adjust_direct_map(u64 pfn, int rmp_level) * The optimal solution would be range locking to avoid locking disjoint * regions unnecessarily but there's no support for that yet. */ -static int rmpupdate(u64 pfn, struct rmp_state *state) +static int rmpupdate(u64 pfn, struct rmp_state *state, bool mmio) { unsigned long paddr = pfn << PAGE_SHIFT; int ret, level; @@ -955,7 +955,7 @@ static int rmpupdate(u64 pfn, struct rmp_state *state) level = RMP_TO_PG_LEVEL(state->pagesize); - if (adjust_direct_map(pfn, level)) + if (!mmio && adjust_direct_map(pfn, level)) return -EFAULT; do { @@ -989,10 +989,25 @@ int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immut state.gpa = gpa; state.pagesize = PG_LEVEL_TO_RMP(level); - return rmpupdate(pfn, &state); + return rmpupdate(pfn, &state, false); } EXPORT_SYMBOL_GPL(rmp_make_private); +int rmp_make_private_mmio(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable) +{ + struct rmp_state state; + + memset(&state, 0, sizeof(state)); + state.assigned = 1; + state.asid = asid; + state.immutable = immutable; + state.gpa = gpa; + state.pagesize = PG_LEVEL_TO_RMP(level); + + return rmpupdate(pfn, &state, true); +} +EXPORT_SYMBOL_GPL(rmp_make_private_mmio); + /* Transition a page to hypervisor-owned/shared state in the RMP table. */ int rmp_make_shared(u64 pfn, enum pg_level level) { @@ -1001,7 +1016,7 @@ int rmp_make_shared(u64 pfn, enum pg_level level) memset(&state, 0, sizeof(state)); state.pagesize = PG_LEVEL_TO_RMP(level); - return rmpupdate(pfn, &state); + return rmpupdate(pfn, &state, false); } EXPORT_SYMBOL_GPL(rmp_make_shared); diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 76b7f6085dcd..a4e9db212adc 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/vfio.h> +#include <linux/tsm.h> #include "vfio.h" #ifdef CONFIG_SPAPR_TCE_IOMMU @@ -29,8 +30,14 @@ struct kvm_vfio_file { #endif }; +struct kvm_vfio_tdi { + struct list_head node; + struct vfio_device *vdev; +}; + struct kvm_vfio { struct list_head file_list; + struct list_head tdi_list; struct mutex lock; bool noncoherent; }; @@ -80,6 +87,22 @@ static bool kvm_vfio_file_is_valid(struct file *file) return ret; } +static struct vfio_device *kvm_vfio_file_device(struct file *file) +{ + struct vfio_device *(*fn)(struct file *file); + struct vfio_device *ret; + + fn = symbol_get(vfio_file_device); + if (!fn) + return NULL; + + ret = fn(file); + + symbol_put(vfio_file_device); + + return ret; +} + #ifdef CONFIG_SPAPR_TCE_IOMMU static struct iommu_group *kvm_vfio_file_iommu_group(struct file *file) { @@ -297,6 +320,103 @@ static int kvm_vfio_set_file(struct kvm_device *dev, long attr, return -ENXIO; } +static int kvm_dev_tsm_bind(struct kvm_device *dev, void __user *arg) +{ + struct kvm_vfio *kv = dev->private; + struct kvm_vfio_tsm_bind tb; + struct kvm_vfio_tdi *ktdi; + struct vfio_device *vdev; + struct fd fdev; + int ret; + + if (copy_from_user(&tb, arg, sizeof(tb))) + return -EFAULT; + + ktdi = kzalloc(sizeof(*ktdi), GFP_KERNEL_ACCOUNT); + if (!ktdi) + return -ENOMEM; + + fdev = fdget(tb.devfd); + if (!fdev.file) + return -EBADF; + + ret = -ENOENT; + + mutex_lock(&kv->lock); + + vdev = kvm_vfio_file_device(fdev.file); + if (vdev) { + ret = kvm_arch_tsm_bind(dev->kvm, vdev->dev, tb.guest_rid); + if (!ret) { + ktdi->vdev = vdev; + list_add_tail(&ktdi->node, &kv->tdi_list); + } else { + vfio_put_device(vdev); + } + } + + fdput(fdev); + mutex_unlock(&kv->lock); + if (ret) + kfree(ktdi); + + return ret; +} + +static int kvm_dev_tsm_unbind(struct kvm_device *dev, void __user *arg) +{ + struct kvm_vfio *kv = dev->private; + struct kvm_vfio_tsm_bind tb; + struct kvm_vfio_tdi *ktdi; + struct vfio_device *vdev; + struct fd fdev; + int ret; + + if (copy_from_user(&tb, arg, sizeof(tb))) + return -EFAULT; + + fdev = fdget(tb.devfd); + if (!fdev.file) + return -EBADF; + + ret = -ENOENT; + + mutex_lock(&kv->lock); + + vdev = kvm_vfio_file_device(fdev.file); + if (vdev) { + list_for_each_entry(ktdi, &kv->tdi_list, node) { + if (ktdi->vdev != vdev) + continue; + + kvm_arch_tsm_unbind(dev->kvm, vdev->dev); + list_del(&ktdi->node); + kfree(ktdi); + vfio_put_device(vdev); + ret = 0; + break; + } + vfio_put_device(vdev); + } + + fdput(fdev); + mutex_unlock(&kv->lock); + return ret; +} + +static int kvm_vfio_set_device(struct kvm_device *dev, long attr, + void __user *arg) +{ + switch (attr) { + case KVM_DEV_VFIO_DEVICE_TDI_BIND: + return kvm_dev_tsm_bind(dev, arg); + case KVM_DEV_VFIO_DEVICE_TDI_UNBIND: + return kvm_dev_tsm_unbind(dev, arg); + } + + return -ENXIO; +} + static int kvm_vfio_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -304,6 +424,9 @@ static int kvm_vfio_set_attr(struct kvm_device *dev, case KVM_DEV_VFIO_FILE: return kvm_vfio_set_file(dev, attr->attr, u64_to_user_ptr(attr->addr)); + case KVM_DEV_VFIO_DEVICE: + return kvm_vfio_set_device(dev, attr->attr, + u64_to_user_ptr(attr->addr)); } return -ENXIO; @@ -323,6 +446,13 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, return 0; } + break; + case KVM_DEV_VFIO_DEVICE: + switch (attr->attr) { + case KVM_DEV_VFIO_DEVICE_TDI_BIND: + case KVM_DEV_VFIO_DEVICE_TDI_UNBIND: + return 0; + } break; } @@ -332,8 +462,16 @@ static int kvm_vfio_has_attr(struct kvm_device *dev, static void kvm_vfio_release(struct kvm_device *dev) { struct kvm_vfio *kv = dev->private; + struct kvm_vfio_tdi *ktdi, *tmp2; struct kvm_vfio_file *kvf, *tmp; + list_for_each_entry_safe(ktdi, tmp2, &kv->tdi_list, node) { + kvm_arch_tsm_unbind(dev->kvm, ktdi->vdev->dev); + list_del(&ktdi->node); + vfio_put_device(ktdi->vdev); + kfree(ktdi); + } + list_for_each_entry_safe(kvf, tmp, &kv->file_list, node) { #ifdef CONFIG_SPAPR_TCE_IOMMU kvm_spapr_tce_release_vfio_group(dev->kvm, kvf); @@ -379,6 +517,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) INIT_LIST_HEAD(&kv->file_list); mutex_init(&kv->lock); + INIT_LIST_HEAD(&kv->tdi_list); dev->private = kv; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 472a1537b7a9..5e07a1fddb67 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -143,6 +143,7 @@ config KVM_AMD_SEV select KVM_GENERIC_PRIVATE_MEM select HAVE_KVM_ARCH_GMEM_PREPARE select HAVE_KVM_ARCH_GMEM_INVALIDATE + select KVM_VFIO help Provides support for launching Encrypted VMs (SEV) and Encrypted VMs with Encrypted State (SEV-ES) on AMD processors. -- 2.45.2