On 18.10.23 14:25, Wedson Almeida Filho wrote: > From: Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx> > > Allow Rust file systems to attach extra [typed] data to each inode. If > no data is needed, use the regular inode kmem_cache, otherwise we create > a new one. > > Signed-off-by: Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx> > --- > rust/helpers.c | 7 +++ > rust/kernel/fs.rs | 128 +++++++++++++++++++++++++++++++++++--- > rust/kernel/mem_cache.rs | 2 - > samples/rust/rust_rofs.rs | 9 ++- > 4 files changed, 131 insertions(+), 15 deletions(-) > > diff --git a/rust/helpers.c b/rust/helpers.c > index bc19f3b7b93e..7b12a6d4cf5c 100644 > --- a/rust/helpers.c > +++ b/rust/helpers.c > @@ -222,6 +222,13 @@ void rust_helper_kunmap_local(const void *vaddr) > } > EXPORT_SYMBOL_GPL(rust_helper_kunmap_local); > > +void *rust_helper_alloc_inode_sb(struct super_block *sb, > + struct kmem_cache *cache, gfp_t gfp) > +{ > + return alloc_inode_sb(sb, cache, gfp); > +} > +EXPORT_SYMBOL_GPL(rust_helper_alloc_inode_sb); > + > void rust_helper_i_uid_write(struct inode *inode, uid_t uid) > { > i_uid_write(inode, uid); > diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs > index b1ad5c110dbb..b07203758674 100644 > --- a/rust/kernel/fs.rs > +++ b/rust/kernel/fs.rs > @@ -9,8 +9,12 @@ > use crate::error::{code::*, from_result, to_result, Error, Result}; > use crate::folio::{LockedFolio, UniqueFolio}; > use crate::types::{ARef, AlwaysRefCounted, Either, ForeignOwnable, Opaque, ScopeGuard}; > -use crate::{bindings, init::PinInit, str::CStr, time::Timespec, try_pin_init, ThisModule}; > -use core::{marker::PhantomData, marker::PhantomPinned, mem::ManuallyDrop, pin::Pin, ptr}; > +use crate::{ > + bindings, container_of, init::PinInit, mem_cache::MemCache, str::CStr, time::Timespec, > + try_pin_init, ThisModule, > +}; > +use core::mem::{size_of, ManuallyDrop, MaybeUninit}; > +use core::{marker::PhantomData, marker::PhantomPinned, pin::Pin, ptr}; > use macros::{pin_data, pinned_drop}; > > #[cfg(CONFIG_BUFFER_HEAD)] > @@ -35,6 +39,9 @@ pub trait FileSystem { > /// Data associated with each file system instance (super-block). > type Data: ForeignOwnable + Send + Sync; > > + /// Type of data associated with each inode. > + type INodeData: Send + Sync; > + > /// The name of the file system type. > const NAME: &'static CStr; > > @@ -165,6 +172,7 @@ fn try_from(v: u32) -> Result<Self> { > pub struct Registration { > #[pin] > fs: Opaque<bindings::file_system_type>, > + inode_cache: Option<MemCache>, > #[pin] > _pin: PhantomPinned, > } > @@ -182,6 +190,14 @@ impl Registration { > pub fn new<T: FileSystem + ?Sized>(module: &'static ThisModule) -> impl PinInit<Self, Error> { > try_pin_init!(Self { > _pin: PhantomPinned, > + inode_cache: if size_of::<T::INodeData>() == 0 { > + None > + } else { > + Some(MemCache::try_new::<INodeWithData<T::INodeData>>( > + T::NAME, > + Some(Self::inode_init_once_callback::<T>), > + )?) > + }, > fs <- Opaque::try_ffi_init(|fs_ptr: *mut bindings::file_system_type| { > // SAFETY: `try_ffi_init` guarantees that `fs_ptr` is valid for write. > unsafe { fs_ptr.write(bindings::file_system_type::default()) }; > @@ -239,6 +255,16 @@ pub fn new<T: FileSystem + ?Sized>(module: &'static ThisModule) -> impl PinInit< > unsafe { T::Data::from_foreign(ptr) }; > } > } > + > + unsafe extern "C" fn inode_init_once_callback<T: FileSystem + ?Sized>( > + outer_inode: *mut core::ffi::c_void, > + ) { > + let ptr = outer_inode.cast::<INodeWithData<T::INodeData>>(); > + > + // SAFETY: This is only used in `new`, so we know that we have a valid `INodeWithData` > + // instance whose inode part can be initialised. > + unsafe { bindings::inode_init_once(ptr::addr_of_mut!((*ptr).inode)) }; > + } > } > > #[pinned_drop] > @@ -280,6 +306,15 @@ pub fn super_block(&self) -> &SuperBlock<T> { > unsafe { &*(*self.0.get()).i_sb.cast() } > } > > + /// Returns the data associated with the inode. > + pub fn data(&self) -> &T::INodeData { > + let outerp = container_of!(self.0.get(), INodeWithData<T::INodeData>, inode); > + // SAFETY: `self` is guaranteed to be valid by the existence of a shared reference > + // (`&self`) to it. Additionally, we know `T::INodeData` is always initialised in an > + // `INode`. > + unsafe { &*(*outerp).data.as_ptr() } > + } > + > /// Returns the size of the inode contents. > pub fn size(&self) -> i64 { > // SAFETY: `self` is guaranteed to be valid by the existence of a shared reference. > @@ -300,15 +335,29 @@ unsafe fn dec_ref(obj: ptr::NonNull<Self>) { > } > } > > +struct INodeWithData<T> { > + data: MaybeUninit<T>, > + inode: bindings::inode, No `Opaque`? > +} > + > /// An inode that is locked and hasn't been initialised yet. > #[repr(transparent)] > pub struct NewINode<T: FileSystem + ?Sized>(ARef<INode<T>>); > > impl<T: FileSystem + ?Sized> NewINode<T> { > /// Initialises the new inode with the given parameters. > - pub fn init(self, params: INodeParams) -> Result<ARef<INode<T>>> { > - // SAFETY: This is a new inode, so it's safe to manipulate it mutably. > - let inode = unsafe { &mut *self.0 .0.get() }; > + pub fn init(self, params: INodeParams<T::INodeData>) -> Result<ARef<INode<T>>> { > + let outerp = container_of!(self.0 .0.get(), INodeWithData<T::INodeData>, inode); > + > + // SAFETY: This is a newly-created inode. No other references to it exist, so it is > + // safe to mutably dereference it. > + let outer = unsafe { &mut *outerp.cast_mut() }; > + > + // N.B. We must always write this to a newly allocated inode because the free callback > + // expects the data to be initialised and drops it. This should be an invariant. > + outer.data.write(params.value); > + > + let inode = &mut outer.inode; > > let mode = match params.typ { > INodeType::Dir => { > @@ -424,7 +473,7 @@ pub enum INodeType { > /// Required inode parameters. > /// > /// This is used when creating new inodes. > -pub struct INodeParams { > +pub struct INodeParams<T> { > /// The access mode. It's a mask that grants execute (1), write (2) and read (4) access to > /// everyone, the owner group, and the owner. > pub mode: u16, > @@ -459,6 +508,9 @@ pub struct INodeParams { > > /// Last access time. > pub atime: Timespec, > + > + /// Value to attach to this node. > + pub value: T, > } > > /// A file system super block. > @@ -735,8 +787,12 @@ impl<T: FileSystem + ?Sized> Tables<T> { > } > > const SUPER_BLOCK: bindings::super_operations = bindings::super_operations { > - alloc_inode: None, > - destroy_inode: None, > + alloc_inode: if size_of::<T::INodeData>() != 0 { > + Some(Self::alloc_inode_callback) > + } else { > + None > + }, > + destroy_inode: Some(Self::destroy_inode_callback), > free_inode: None, > dirty_inode: None, > write_inode: None, > @@ -766,6 +822,61 @@ impl<T: FileSystem + ?Sized> Tables<T> { > shutdown: None, > }; > > + unsafe extern "C" fn alloc_inode_callback( > + sb: *mut bindings::super_block, > + ) -> *mut bindings::inode { > + // SAFETY: The callback contract guarantees that `sb` is valid for read. > + let super_type = unsafe { (*sb).s_type }; > + > + // SAFETY: This callback is only used in `Registration`, so `super_type` is necessarily > + // embedded in a `Registration`, which is guaranteed to be valid because it has a > + // superblock associated to it. > + let reg = unsafe { &*container_of!(super_type, Registration, fs) }; > + > + // SAFETY: `sb` and `cache` are guaranteed to be valid by the callback contract and by > + // the existence of a superblock respectively. > + let ptr = unsafe { > + bindings::alloc_inode_sb(sb, MemCache::ptr(®.inode_cache), bindings::GFP_KERNEL) > + } > + .cast::<INodeWithData<T::INodeData>>(); > + if ptr.is_null() { > + return ptr::null_mut(); > + } > + ptr::addr_of_mut!((*ptr).inode) > + } > + > + unsafe extern "C" fn destroy_inode_callback(inode: *mut bindings::inode) { > + // SAFETY: By the C contract, `inode` is a valid pointer. > + let is_bad = unsafe { bindings::is_bad_inode(inode) }; > + > + // SAFETY: The inode is guaranteed to be valid by the callback contract. Additionally, the > + // superblock is also guaranteed to still be valid by the inode existence. > + let super_type = unsafe { (*(*inode).i_sb).s_type }; > + > + // SAFETY: This callback is only used in `Registration`, so `super_type` is necessarily > + // embedded in a `Registration`, which is guaranteed to be valid because it has a > + // superblock associated to it. > + let reg = unsafe { &*container_of!(super_type, Registration, fs) }; > + let ptr = container_of!(inode, INodeWithData<T::INodeData>, inode).cast_mut(); > + > + if !is_bad { > + // SAFETY: The code either initialises the data or marks the inode as bad. Since the Where exactly is it marked as "bad"? -- Cheers, Benno > + // inode is not bad, the data is initialised, and thus safe to drop. > + unsafe { ptr::drop_in_place((*ptr).data.as_mut_ptr()) }; > + } > + > + if size_of::<T::INodeData>() == 0 { > + // SAFETY: When the size of `INodeData` is zero, we don't use a separate mem_cache, so > + // it is allocated from the regular mem_cache, which is what `free_inode_nonrcu` uses > + // to free the inode. > + unsafe { bindings::free_inode_nonrcu(inode) }; > + } else { > + // The callback contract guarantees that the inode was previously allocated via the > + // `alloc_inode_callback` callback, so it is safe to free it back to the cache. > + unsafe { bindings::kmem_cache_free(MemCache::ptr(®.inode_cache), ptr.cast()) }; > + } > + } > + > unsafe extern "C" fn statfs_callback( > dentry: *mut bindings::dentry, > buf: *mut bindings::kstatfs, > @@ -1120,6 +1231,7 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> { > /// struct MyFs; > /// impl fs::FileSystem for MyFs { > /// type Data = (); > +/// type INodeData =(); > /// const NAME: &'static CStr = c_str!("myfs"); > /// fn super_params(_: &NewSuperBlock<Self>) -> Result<SuperParams<Self::Data>> { > /// todo!() > diff --git a/rust/kernel/mem_cache.rs b/rust/kernel/mem_cache.rs > index 05e5f2bc9781..bf6ce2d2d3e1 100644 > --- a/rust/kernel/mem_cache.rs > +++ b/rust/kernel/mem_cache.rs > @@ -20,7 +20,6 @@ impl MemCache { > /// Allocates a new `kmem_cache` for type `T`. > /// > /// `init` is called by the C code when entries are allocated. > - #[allow(dead_code)] > pub(crate) fn try_new<T>( > name: &'static CStr, > init: Option<unsafe extern "C" fn(*mut core::ffi::c_void)>, > @@ -43,7 +42,6 @@ pub(crate) fn try_new<T>( > /// Returns the pointer to the `kmem_cache` instance, or null if it's `None`. > /// > /// This is a helper for functions like `alloc_inode_sb` where the cache is optional. > - #[allow(dead_code)] > pub(crate) fn ptr(c: &Option<Self>) -> *mut bindings::kmem_cache { > match c { > Some(m) => m.ptr.as_ptr(), > diff --git a/samples/rust/rust_rofs.rs b/samples/rust/rust_rofs.rs > index 093425650f26..dfe745439842 100644 > --- a/samples/rust/rust_rofs.rs > +++ b/samples/rust/rust_rofs.rs > @@ -53,6 +53,7 @@ struct Entry { > struct RoFs; > impl fs::FileSystem for RoFs { > type Data = (); > + type INodeData = &'static Entry; > const NAME: &'static CStr = c_str!("rust-fs"); > > fn super_params(_sb: &NewSuperBlock<Self>) -> Result<SuperParams<Self::Data>> { > @@ -79,6 +80,7 @@ fn init_root(sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>> { > atime: UNIX_EPOCH, > ctime: UNIX_EPOCH, > mtime: UNIX_EPOCH, > + value: &ENTRIES[0], > }), > } > } > @@ -122,6 +124,7 @@ fn lookup(parent: &INode<Self>, name: &[u8]) -> Result<ARef<INode<Self>>> { > atime: UNIX_EPOCH, > ctime: UNIX_EPOCH, > mtime: UNIX_EPOCH, > + value: e, > }), > }; > } > @@ -131,11 +134,7 @@ fn lookup(parent: &INode<Self>, name: &[u8]) -> Result<ARef<INode<Self>>> { > } > > fn read_folio(inode: &INode<Self>, mut folio: LockedFolio<'_>) -> Result { > - let data = match inode.ino() { > - 2 => ENTRIES[2].contents, > - 3 => ENTRIES[3].contents, > - _ => return Err(EINVAL), > - }; > + let data = inode.data().contents; > > let pos = usize::try_from(folio.pos()).unwrap_or(usize::MAX); > let copied = if pos >= data.len() { > -- > 2.34.1 > >