Hi Will, On 8/25/20 10:39 AM, Will Deacon wrote: > The generic page-table walker is pretty useless as it stands, because it > doesn't understand enough to allocate anything. Teach it about stage-1 > page-tables, and hook up an API for allocating these for the hypervisor > at EL2. > > Cc: Marc Zyngier <maz@xxxxxxxxxx> > Cc: Quentin Perret <qperret@xxxxxxxxxx> > Signed-off-by: Will Deacon <will@xxxxxxxxxx> > --- > arch/arm64/include/asm/kvm_pgtable.h | 34 +++++++ > arch/arm64/kvm/hyp/pgtable.c | 131 +++++++++++++++++++++++++++ > 2 files changed, 165 insertions(+) > > diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h > index 51ccbbb0efae..ec9f98527dcc 100644 > --- a/arch/arm64/include/asm/kvm_pgtable.h > +++ b/arch/arm64/include/asm/kvm_pgtable.h > @@ -77,6 +77,40 @@ struct kvm_pgtable_walker { > const enum kvm_pgtable_walk_flags flags; > }; > > +/** > + * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table. > + * @pgt: Uninitialised page-table structure to initialise. > + * @va_bits: Maximum virtual address bits. > + * > + * Return: 0 on success, negative error code on failure. > + */ > +int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits); > + > +/** > + * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table. > + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). > + * > + * The page-table is assumed to be unreachable by any hardware walkers prior > + * to freeing and therefore no TLB invalidation is performed. > + */ > +void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); > + > +/** > + * kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table. > + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). > + * @addr: Virtual address at which to place the mapping. > + * @size: Size of the mapping. > + * @phys: Physical address of the memory to map. > + * @prot: Permissions and attributes for the mapping. > + * > + * If device attributes are not explicitly requested in @prot, then the > + * mapping will be normal, cacheable. > + * > + * Return: 0 on success, negative error code on failure. >From my understanding of the code, when the caller replaces an existing leaf entry or a table with a different one, KVM will print a warning instead of using break-before-make (if necessary). It might be worth pointing out that it is expected from the callers not to do that, because it's not immediately obvious. > + */ > +int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, > + enum kvm_pgtable_prot prot); > + > /** > * kvm_pgtable_walk() - Walk a page-table. > * @pgt: Page-table structure initialised by kvm_pgtable_*_init(). > diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c > index 462001bbe028..d75166823ad9 100644 > --- a/arch/arm64/kvm/hyp/pgtable.c > +++ b/arch/arm64/kvm/hyp/pgtable.c > @@ -24,8 +24,18 @@ > > #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) > > +#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) > +#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) > +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3 > +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1 > +#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) > +#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 > +#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) > + > #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51) > > +#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) I compared the macros to the Arm ARM attribute fields in stage 1 VMSAv8-64 block and page descriptors, and they match. I looked at the algorithm below, and for what it's worth it looks alright to me. Thanks, Alex > + > struct kvm_pgtable_walk_data { > struct kvm_pgtable *pgt; > struct kvm_pgtable_walker *walker; > @@ -288,3 +298,124 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, > > return _kvm_pgtable_walk(&walk_data); > } > + > +struct hyp_map_data { > + u64 phys; > + kvm_pte_t attr; > +}; > + > +static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot, > + struct hyp_map_data *data) > +{ > + bool device = prot & KVM_PGTABLE_PROT_DEVICE; > + u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL; > + kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype); > + u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS; > + u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW : > + KVM_PTE_LEAF_ATTR_LO_S1_AP_RO; > + > + if (!(prot & KVM_PGTABLE_PROT_R)) > + return -EINVAL; > + > + if (prot & KVM_PGTABLE_PROT_X) { > + if (prot & KVM_PGTABLE_PROT_W) > + return -EINVAL; > + > + if (device) > + return -EINVAL; > + } else { > + attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; > + } > + > + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); > + attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); > + attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; > + data->attr = attr; > + return 0; > +} > + > +static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, > + kvm_pte_t *ptep, struct hyp_map_data *data) > +{ > + u64 granule = kvm_granule_size(level), phys = data->phys; > + > + if (!kvm_block_mapping_supported(addr, end, phys, level)) > + return false; > + > + WARN_ON(!kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)); > + data->phys += granule; > + return true; > +} > + > +static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, > + enum kvm_pgtable_walk_flags flag, void * const arg) > +{ > + kvm_pte_t *childp; > + > + if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg)) > + return 0; > + > + if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) > + return -EINVAL; > + > + childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); > + if (!childp) > + return -ENOMEM; > + > + kvm_set_table_pte(ptep, childp); > + return 0; > +} > + > +int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, > + enum kvm_pgtable_prot prot) > +{ > + int ret; > + struct hyp_map_data map_data = { > + .phys = ALIGN_DOWN(phys, PAGE_SIZE), > + }; > + struct kvm_pgtable_walker walker = { > + .cb = hyp_map_walker, > + .flags = KVM_PGTABLE_WALK_LEAF, > + .arg = &map_data, > + }; > + > + ret = hyp_map_set_prot_attr(prot, &map_data); > + if (ret) > + return ret; > + > + ret = kvm_pgtable_walk(pgt, addr, size, &walker); > + dsb(ishst); > + isb(); > + return ret; > +} > + > +int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits) > +{ > + pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); > + if (!pgt->pgd) > + return -ENOMEM; > + > + pgt->ia_bits = va_bits; > + pgt->start_level = kvm_start_level(va_bits); > + pgt->mmu = NULL; > + return 0; > +} > + > +static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, > + enum kvm_pgtable_walk_flags flag, void * const arg) > +{ > + free_page((unsigned long)kvm_pte_follow(*ptep)); > + return 0; > +} > + > +void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) > +{ > + struct kvm_pgtable_walker walker = { > + .cb = hyp_free_walker, > + .flags = KVM_PGTABLE_WALK_TABLE_POST, > + }; > + > + WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); > + free_page((unsigned long)pgt->pgd); > + pgt->pgd = NULL; > +} _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/mailman/listinfo/kvmarm