On 18/01/2022 13:21, Chao Peng wrote: > Introduce a new memfd_create() flag indicating the content of the > created memfd is inaccessible from userspace. It does this by force > setting F_SEAL_INACCESSIBLE seal when the file is created. It also set > F_SEAL_SEAL to prevent future sealing, which means, it can not coexist > with MFD_ALLOW_SEALING. > > The pages backed by such memfd will be used as guest private memory in > confidential computing environments such as Intel TDX/AMD SEV. Since > page migration/swapping is not yet supported for such usages so these > pages are currently marked as UNMOVABLE and UNEVICTABLE which makes > them behave like long-term pinned pages. > > Signed-off-by: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx> > --- > include/uapi/linux/memfd.h | 1 + > mm/memfd.c | 20 +++++++++++++++++++- > 2 files changed, 20 insertions(+), 1 deletion(-) > > diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h > index 7a8a26751c23..48750474b904 100644 > --- a/include/uapi/linux/memfd.h > +++ b/include/uapi/linux/memfd.h > @@ -8,6 +8,7 @@ > #define MFD_CLOEXEC 0x0001U > #define MFD_ALLOW_SEALING 0x0002U > #define MFD_HUGETLB 0x0004U > +#define MFD_INACCESSIBLE 0x0008U > > /* > * Huge page size encoding when MFD_HUGETLB is specified, and a huge page > diff --git a/mm/memfd.c b/mm/memfd.c > index 9f80f162791a..26998d96dc11 100644 > --- a/mm/memfd.c > +++ b/mm/memfd.c > @@ -245,16 +245,19 @@ long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg) > #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1) > #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN) > > -#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB) > +#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | \ > + MFD_INACCESSIBLE) > > SYSCALL_DEFINE2(memfd_create, > const char __user *, uname, > unsigned int, flags) > { > + struct address_space *mapping; > unsigned int *file_seals; > struct file *file; > int fd, error; > char *name; > + gfp_t gfp; > long len; > > if (!(flags & MFD_HUGETLB)) { > @@ -267,6 +270,10 @@ SYSCALL_DEFINE2(memfd_create, > return -EINVAL; > } > > + /* Disallow sealing when MFD_INACCESSIBLE is set. */ > + if (flags & MFD_INACCESSIBLE && flags & MFD_ALLOW_SEALING) > + return -EINVAL; > + > /* length includes terminating zero */ > len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1); > if (len <= 0) > @@ -315,6 +322,17 @@ SYSCALL_DEFINE2(memfd_create, > *file_seals &= ~F_SEAL_SEAL; > } > > + if (flags & MFD_INACCESSIBLE) { > + mapping = file_inode(file)->i_mapping; > + gfp = mapping_gfp_mask(mapping); > + gfp &= ~__GFP_MOVABLE; > + mapping_set_gfp_mask(mapping, gfp); > + mapping_set_unevictable(mapping); > + > + file_seals = memfd_file_seals_ptr(file); > + *file_seals &= F_SEAL_SEAL | F_SEAL_INACCESSIBLE; This looks backwards - the flags should be set on *file_seals, but here you are unsetting all other flags. Steve > + } > + > fd_install(fd, file); > kfree(name); > return fd; >