On Mon, Aug 05, 2024 at 11:32:40AM +0200, James Gowans wrote: > Make the file data usable to userspace by adding mmap. That's all that > QEMU needs for guest RAM, so that's all be bother implementing for now. > > When mmaping the file the VMA is marked as PFNMAP to indicate that there > are no struct pages for the memory in this VMA. Remap_pfn_range() is > used to actually populate the page tables. All PTEs are pre-faulted into > the pgtables at mmap time so that the pgtables are usable when this > virtual address range is given to VFIO's MAP_DMA. Thanks for sending this out! I'm going through the series with the intention to see how it might fit within the existing guest_memfd work for pKVM/CoCo/Gunyah. It might've been mentioned in the MM alignment session -- you might be interested to join the guest_memfd bi-weekly call to see how we are overlapping [1]. [1]: https://lore.kernel.org/kvm/ae794891-fe69-411a-b82e-6963b594a62a@xxxxxxxxxx/T/ --- Was the decision to pre-fault everything because it was convenient to do or otherwise intentionally different from hugetlb? > > Signed-off-by: James Gowans <jgowans@xxxxxxxxxx> > --- > fs/guestmemfs/file.c | 43 +++++++++++++++++++++++++++++++++++++- > fs/guestmemfs/guestmemfs.c | 2 +- > fs/guestmemfs/guestmemfs.h | 3 +++ > 3 files changed, 46 insertions(+), 2 deletions(-) > > diff --git a/fs/guestmemfs/file.c b/fs/guestmemfs/file.c > index 618c93b12196..b1a52abcde65 100644 > --- a/fs/guestmemfs/file.c > +++ b/fs/guestmemfs/file.c > @@ -1,6 +1,7 @@ > // SPDX-License-Identifier: GPL-2.0-only > > #include "guestmemfs.h" > +#include <linux/mm.h> > > static int truncate(struct inode *inode, loff_t newsize) > { > @@ -41,6 +42,46 @@ static int inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct > return 0; > } > > +/* > + * To be able to use PFNMAP VMAs for VFIO DMA mapping we need the page tables > + * populated with mappings. Pre-fault everything. > + */ > +static int mmap(struct file *filp, struct vm_area_struct *vma) > +{ > + int rc; > + unsigned long *mappings_block; > + struct guestmemfs_inode *guestmemfs_inode; > + > + guestmemfs_inode = guestmemfs_get_persisted_inode(filp->f_inode->i_sb, > + filp->f_inode->i_ino); > + > + mappings_block = guestmemfs_inode->mappings; > + > + /* Remap-pfn-range will mark the range VM_IO */ > + for (unsigned long vma_addr_offset = vma->vm_start; > + vma_addr_offset < vma->vm_end; > + vma_addr_offset += PMD_SIZE) { > + int block, mapped_block; > + unsigned long map_size = min(PMD_SIZE, vma->vm_end - vma_addr_offset); > + > + block = (vma_addr_offset - vma->vm_start) / PMD_SIZE; > + mapped_block = *(mappings_block + block); > + /* > + * It's wrong to use rempa_pfn_range; this will install PTE-level entries. > + * The whole point of 2 MiB allocs is to improve TLB perf! > + * We should use something like mm/huge_memory.c#insert_pfn_pmd > + * but that is currently static. > + * TODO: figure out the best way to install PMDs. > + */ > + rc = remap_pfn_range(vma, > + vma_addr_offset, > + (guestmemfs_base >> PAGE_SHIFT) + (mapped_block * 512), > + map_size, > + vma->vm_page_prot); > + } > + return 0; > +} > + > const struct inode_operations guestmemfs_file_inode_operations = { > .setattr = inode_setattr, > .getattr = simple_getattr, > @@ -48,5 +89,5 @@ const struct inode_operations guestmemfs_file_inode_operations = { > > const struct file_operations guestmemfs_file_fops = { > .owner = THIS_MODULE, > - .iterate_shared = NULL, > + .mmap = mmap, > }; > diff --git a/fs/guestmemfs/guestmemfs.c b/fs/guestmemfs/guestmemfs.c > index c45c796c497a..38f20ad25286 100644 > --- a/fs/guestmemfs/guestmemfs.c > +++ b/fs/guestmemfs/guestmemfs.c > @@ -9,7 +9,7 @@ > #include <linux/memblock.h> > #include <linux/statfs.h> > > -static phys_addr_t guestmemfs_base, guestmemfs_size; > +phys_addr_t guestmemfs_base, guestmemfs_size; > struct guestmemfs_sb *psb; > > static int statfs(struct dentry *root, struct kstatfs *buf) > diff --git a/fs/guestmemfs/guestmemfs.h b/fs/guestmemfs/guestmemfs.h > index 7ea03ac8ecca..0f2788ce740e 100644 > --- a/fs/guestmemfs/guestmemfs.h > +++ b/fs/guestmemfs/guestmemfs.h > @@ -8,6 +8,9 @@ > #define GUESTMEMFS_FILENAME_LEN 255 > #define GUESTMEMFS_PSB(sb) ((struct guestmemfs_sb *)sb->s_fs_info) > > +/* Units of bytes */ > +extern phys_addr_t guestmemfs_base, guestmemfs_size; > + > struct guestmemfs_sb { > /* Inode number */ > unsigned long next_free_ino; > -- > 2.34.1 > >