Hi Fuad,
On 1/18/25 2:29 AM, Fuad Tabba wrote:
From: Ackerley Tng <ackerleytng@xxxxxxxxxx>
Track whether guest_memfd memory can be mapped within the inode,
since it is property of the guest_memfd's memory contents.
The guest_memfd PRIVATE memory attribute is not used for two
reasons. First because it reflects the userspace expectation for
that memory location, and therefore can be toggled by userspace.
The second is, although each guest_memfd file has a 1:1 binding
with a KVM instance, the plan is to allow multiple files per
inode, e.g. to allow intra-host migration to a new KVM instance,
without destroying guest_memfd.
Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx>
Co-developed-by: Vishal Annapurve <vannapurve@xxxxxxxxxx>
Signed-off-by: Vishal Annapurve <vannapurve@xxxxxxxxxx>
Co-developed-by: Fuad Tabba <tabba@xxxxxxxxxx>
Signed-off-by: Fuad Tabba <tabba@xxxxxxxxxx>
---
virt/kvm/guest_memfd.c | 56 ++++++++++++++++++++++++++++++++++++++----
1 file changed, 51 insertions(+), 5 deletions(-)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 6453658d2650..0a7b6cf8bd8f 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -18,6 +18,17 @@ struct kvm_gmem {
struct list_head entry;
};
+struct kvm_gmem_inode_private {
+#ifdef CONFIG_KVM_GMEM_MAPPABLE
+ struct xarray mappable_offsets;
+#endif
+};
+
+static struct kvm_gmem_inode_private *kvm_gmem_private(struct inode *inode)
+{
+ return inode->i_mapping->i_private_data;
+}
+
/**
* folio_file_pfn - like folio_file_page, but return a pfn.
* @folio: The folio which contains this index.
@@ -312,8 +323,28 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
return gfn - slot->base_gfn + slot->gmem.pgoff;
}
+static void kvm_gmem_evict_inode(struct inode *inode)
+{
+ struct kvm_gmem_inode_private *private = kvm_gmem_private(inode);
+
+#ifdef CONFIG_KVM_GMEM_MAPPABLE
+ /*
+ * .evict_inode can be called before private data is set up if there are
+ * issues during inode creation.
+ */
+ if (private)
+ xa_destroy(&private->mappable_offsets);
+#endif
+
+ truncate_inode_pages_final(inode->i_mapping);
+
+ kfree(private);
+ clear_inode(inode);
+}
+
static const struct super_operations kvm_gmem_super_operations = {
- .statfs = simple_statfs,
+ .statfs = simple_statfs,
+ .evict_inode = kvm_gmem_evict_inode,
};
As I understood, ->destroy_inode() may be more suitable place where the xarray is
released. ->evict_inode() usually detach the inode from the existing struct, to make
it offline. ->destroy_inode() is actually the place where the associated resource
(memory) is relased.
Another benefit with ->destroy_inode() is we're not concerned to truncate_inode_pages_final()
and clear_inode().
static int kvm_gmem_init_fs_context(struct fs_context *fc)
@@ -440,6 +471,7 @@ static struct inode *kvm_gmem_inode_make_secure_inode(const char *name,
loff_t size, u64 flags)
{
const struct qstr qname = QSTR_INIT(name, strlen(name));
+ struct kvm_gmem_inode_private *private;
struct inode *inode;
int err;
@@ -448,10 +480,19 @@ static struct inode *kvm_gmem_inode_make_secure_inode(const char *name,
return inode;
err = security_inode_init_security_anon(inode, &qname, NULL);
- if (err) {
- iput(inode);
- return ERR_PTR(err);
- }
+ if (err)
+ goto out;
+
+ err = -ENOMEM;
+ private = kzalloc(sizeof(*private), GFP_KERNEL);
+ if (!private)
+ goto out;
+
+#ifdef CONFIG_KVM_GMEM_MAPPABLE
+ xa_init(&private->mappable_offsets);
+#endif
+
+ inode->i_mapping->i_private_data = private;
The whole block of code needs to be guarded by CONFIG_KVM_GMEM_MAPPABLE because
kzalloc(sizeof(...)) is translated to kzalloc(0) when CONFIG_KVM_GMEM_MAPPABLE
is disabled, and kzalloc() will always fail. It will lead to unusable guest-memfd
if CONFIG_KVM_GMEM_MAPPABLE is disabled.
inode->i_private = (void *)(unsigned long)flags;
inode->i_op = &kvm_gmem_iops;
@@ -464,6 +505,11 @@ static struct inode *kvm_gmem_inode_make_secure_inode(const char *name,
WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
return inode;
+
+out:
+ iput(inode);
+
+ return ERR_PTR(err);
}
static struct file *kvm_gmem_inode_create_getfile(void *priv, loff_t size,
Thanks,
Gavin