Reviewed-by: Andrew Walbran <qwandor@xxxxxxxxxx> On Wed, 1 Dec 2021 at 17:04, 'Quentin Perret' via kernel-team <kernel-team@xxxxxxxxxxx> wrote: > > From: Will Deacon <will@xxxxxxxxxx> > > By default, protected KVM isolates memory pages so that they are > accessible only to their owner: be it the host kernel, the hypervisor > at EL2 or (in future) the guest. Establishing shared-memory regions > between these components therefore involves a transition for each page > so that the owner can share memory with a borrower under a certain set > of permissions. > > Introduce a do_share() helper for safely sharing a memory region between > two components. Currently, only host-to-hyp sharing is implemented, but > the code is easily extended to handle other combinations and the > permission checks for each component are reusable. > > Signed-off-by: Will Deacon <will@xxxxxxxxxx> > Signed-off-by: Quentin Perret <qperret@xxxxxxxxxx> > --- > arch/arm64/kvm/hyp/nvhe/mem_protect.c | 237 ++++++++++++++++++++++++++ > 1 file changed, 237 insertions(+) > > diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c > index 757dfefe3aeb..74ca4043b08a 100644 > --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c > +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c > @@ -471,3 +471,240 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) > ret = host_stage2_idmap(addr); > BUG_ON(ret && ret != -EAGAIN); > } > + > +/* This corresponds to locking order */ > +enum pkvm_component_id { > + PKVM_ID_HOST, > + PKVM_ID_HYP, > +}; > + > +struct pkvm_mem_transition { > + u64 nr_pages; > + > + struct { > + enum pkvm_component_id id; > + /* Address in the initiator's address space */ > + u64 addr; > + > + union { > + struct { > + /* Address in the completer's address space */ > + u64 completer_addr; > + } host; > + }; > + } initiator; > + > + struct { > + enum pkvm_component_id id; > + } completer; > +}; > + > +struct pkvm_mem_share { > + const struct pkvm_mem_transition tx; > + const enum kvm_pgtable_prot prot; It would be helpful to add a comment documenting what this is used for (i.e. whether it is for the initiator or completer). Or even rename it to something like completer_prot to make that clear. > +}; > + > +struct check_walk_data { > + enum pkvm_page_state desired; > + enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); > +}; > + > +static int __check_page_state_visitor(u64 addr, u64 end, u32 level, > + kvm_pte_t *ptep, > + enum kvm_pgtable_walk_flags flag, > + void * const arg) > +{ > + struct check_walk_data *d = arg; > + kvm_pte_t pte = *ptep; > + > + if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte))) > + return -EINVAL; > + > + return d->get_page_state(pte) == d->desired ? 0 : -EPERM; > +} > + > +static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, > + struct check_walk_data *data) > +{ > + struct kvm_pgtable_walker walker = { > + .cb = __check_page_state_visitor, > + .arg = data, > + .flags = KVM_PGTABLE_WALK_LEAF, > + }; > + > + return kvm_pgtable_walk(pgt, addr, size, &walker); > +} > + > +static enum pkvm_page_state host_get_page_state(kvm_pte_t pte) > +{ > + if (!kvm_pte_valid(pte) && pte) > + return PKVM_NOPAGE; > + > + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); > +} > + > +static int __host_check_page_state_range(u64 addr, u64 size, > + enum pkvm_page_state state) > +{ > + struct check_walk_data d = { > + .desired = state, > + .get_page_state = host_get_page_state, > + }; > + > + hyp_assert_lock_held(&host_kvm.lock); > + return check_page_state_range(&host_kvm.pgt, addr, size, &d); > +} > + > +static int __host_set_page_state_range(u64 addr, u64 size, > + enum pkvm_page_state state) > +{ > + enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state); > + > + return host_stage2_idmap_locked(addr, size, prot); > +} > + > +static int host_request_owned_transition(u64 *completer_addr, > + const struct pkvm_mem_transition *tx) > +{ > + u64 size = tx->nr_pages * PAGE_SIZE; > + u64 addr = tx->initiator.addr; > + > + *completer_addr = tx->initiator.host.completer_addr; > + return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED); > +} > + > +static int host_initiate_share(u64 *completer_addr, > + const struct pkvm_mem_transition *tx) > +{ > + u64 size = tx->nr_pages * PAGE_SIZE; > + u64 addr = tx->initiator.addr; > + > + *completer_addr = tx->initiator.host.completer_addr; > + return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); > +} > + > +static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte) > +{ > + if (!kvm_pte_valid(pte)) > + return PKVM_NOPAGE; > + > + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); > +} > + > +static int __hyp_check_page_state_range(u64 addr, u64 size, > + enum pkvm_page_state state) > +{ > + struct check_walk_data d = { > + .desired = state, > + .get_page_state = hyp_get_page_state, > + }; > + > + hyp_assert_lock_held(&pkvm_pgd_lock); > + return check_page_state_range(&pkvm_pgtable, addr, size, &d); > +} > + > +static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) > +{ > + return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || > + tx->initiator.id != PKVM_ID_HOST); > +} > + > +static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx, > + enum kvm_pgtable_prot perms) > +{ > + u64 size = tx->nr_pages * PAGE_SIZE; > + > + if (perms != PAGE_HYP) > + return -EPERM; > + > + if (__hyp_ack_skip_pgtable_check(tx)) > + return 0; > + > + return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); > +} > + > +static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx, > + enum kvm_pgtable_prot perms) > +{ > + void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); > + enum kvm_pgtable_prot prot; > + > + prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED); > + return pkvm_create_mappings_locked(start, end, prot); > +} > + > +static int check_share(struct pkvm_mem_share *share) > +{ > + const struct pkvm_mem_transition *tx = &share->tx; > + u64 completer_addr; > + int ret; > + > + switch (tx->initiator.id) { > + case PKVM_ID_HOST: > + ret = host_request_owned_transition(&completer_addr, tx); > + break; > + default: > + ret = -EINVAL; > + } > + > + if (ret) > + return ret; > + > + switch (tx->completer.id) { > + case PKVM_ID_HYP: > + ret = hyp_ack_share(completer_addr, tx, share->prot); > + break; > + default: > + ret = -EINVAL; > + } > + > + return ret; > +} > + > +static int __do_share(struct pkvm_mem_share *share) > +{ > + const struct pkvm_mem_transition *tx = &share->tx; > + u64 completer_addr; > + int ret; > + > + switch (tx->initiator.id) { > + case PKVM_ID_HOST: > + ret = host_initiate_share(&completer_addr, tx); > + break; > + default: > + ret = -EINVAL; > + } > + > + if (ret) > + return ret; > + > + switch (tx->completer.id) { > + case PKVM_ID_HYP: > + ret = hyp_complete_share(completer_addr, tx, share->prot); > + break; > + default: > + ret = -EINVAL; > + } > + > + return ret; > +} > + > +/* > + * do_share(): > + * > + * The page owner grants access to another component with a given set > + * of permissions. > + * > + * Initiator: OWNED => SHARED_OWNED > + * Completer: NOPAGE => SHARED_BORROWED > + */ > +static int do_share(struct pkvm_mem_share *share) > +{ > + int ret; > + > + ret = check_share(share); > + if (ret) > + return ret; > + > + return WARN_ON(__do_share(share)); > +} > -- > 2.34.0.rc2.393.gf8c9666880-goog > > -- > To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@xxxxxxxxxxx. > _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/mailman/listinfo/kvmarm