On 18.10.23 14:25, Wedson Almeida Filho wrote: > From: Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx> > > Allow Rust file systems to specify their root directory. Also allow them > to create (and do cache lookups of) directory inodes. (More types of > inodes are added in subsequent patches in the series.) > > The `NewINode` type ensures that a new inode is properly initialised > before it is marked so. It also facilitates error paths by automatically > marking inodes as failed if they're not properly initialised. > > Signed-off-by: Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx> > --- > rust/helpers.c | 12 +++ > rust/kernel/fs.rs | 178 +++++++++++++++++++++++++++++++------- > samples/rust/rust_rofs.rs | 22 ++++- > 3 files changed, 181 insertions(+), 31 deletions(-) > > diff --git a/rust/helpers.c b/rust/helpers.c > index fe45f8ddb31f..c5a2bec6467d 100644 > --- a/rust/helpers.c > +++ b/rust/helpers.c > @@ -145,6 +145,18 @@ struct kunit *rust_helper_kunit_get_current_test(void) > } > EXPORT_SYMBOL_GPL(rust_helper_kunit_get_current_test); > > +void rust_helper_i_uid_write(struct inode *inode, uid_t uid) > +{ > + i_uid_write(inode, uid); > +} > +EXPORT_SYMBOL_GPL(rust_helper_i_uid_write); > + > +void rust_helper_i_gid_write(struct inode *inode, gid_t gid) > +{ > + i_gid_write(inode, gid); > +} > +EXPORT_SYMBOL_GPL(rust_helper_i_gid_write); > + > off_t rust_helper_i_size_read(const struct inode *inode) > { > return i_size_read(inode); > diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs > index 30fa1f312f33..f3a41cf57502 100644 > --- a/rust/kernel/fs.rs > +++ b/rust/kernel/fs.rs > @@ -7,9 +7,9 @@ > //! C headers: [`include/linux/fs.h`](../../include/linux/fs.h) > > use crate::error::{code::*, from_result, to_result, Error, Result}; > -use crate::types::{AlwaysRefCounted, Opaque}; > -use crate::{bindings, init::PinInit, str::CStr, try_pin_init, ThisModule}; > -use core::{marker::PhantomData, marker::PhantomPinned, pin::Pin, ptr}; > +use crate::types::{ARef, AlwaysRefCounted, Either, Opaque}; > +use crate::{bindings, init::PinInit, str::CStr, time::Timespec, try_pin_init, ThisModule}; > +use core::{marker::PhantomData, marker::PhantomPinned, mem::ManuallyDrop, pin::Pin, ptr}; > use macros::{pin_data, pinned_drop}; > > /// Maximum size of an inode. > @@ -22,6 +22,12 @@ pub trait FileSystem { > > /// Returns the parameters to initialise a super block. > fn super_params(sb: &NewSuperBlock<Self>) -> Result<SuperParams>; > + > + /// Initialises and returns the root inode of the given superblock. > + /// > + /// This is called during initialisation of a superblock after [`FileSystem::super_params`] has > + /// completed successfully. > + fn init_root(sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>>; > } > > /// A registration of a file system. > @@ -143,12 +149,136 @@ unsafe fn dec_ref(obj: ptr::NonNull<Self>) { > } > } > > +/// An inode that is locked and hasn't been initialised yet. > +#[repr(transparent)] > +pub struct NewINode<T: FileSystem + ?Sized>(ARef<INode<T>>); > + > +impl<T: FileSystem + ?Sized> NewINode<T> { > + /// Initialises the new inode with the given parameters. > + pub fn init(self, params: INodeParams) -> Result<ARef<INode<T>>> { > + // SAFETY: This is a new inode, so it's safe to manipulate it mutably. How do you know that this is a new inode? Maybe add a type invariant? > + let inode = unsafe { &mut *self.0 .0.get() }; > + > + let mode = match params.typ { > + INodeType::Dir => { > + // SAFETY: `simple_dir_operations` never changes, it's safe to reference it. > + inode.__bindgen_anon_3.i_fop = unsafe { &bindings::simple_dir_operations }; > + > + // SAFETY: `simple_dir_inode_operations` never changes, it's safe to reference it. > + inode.i_op = unsafe { &bindings::simple_dir_inode_operations }; > + bindings::S_IFDIR > + } > + }; > + > + inode.i_mode = (params.mode & 0o777) | u16::try_from(mode)?; > + inode.i_size = params.size; > + inode.i_blocks = params.blocks; > + > + inode.__i_ctime = params.ctime.into(); > + inode.i_mtime = params.mtime.into(); > + inode.i_atime = params.atime.into(); > + > + // SAFETY: inode is a new inode, so it is valid for write. > + unsafe { > + bindings::set_nlink(inode, params.nlink); > + bindings::i_uid_write(inode, params.uid); > + bindings::i_gid_write(inode, params.gid); > + bindings::unlock_new_inode(inode); > + } > + > + // SAFETY: We are manually destructuring `self` and preventing `drop` from being called. > + Ok(unsafe { (&ManuallyDrop::new(self).0 as *const ARef<INode<T>>).read() }) Add a comment that explains why you need to do this instead of `self.0`. > + } > +} > + > +impl<T: FileSystem + ?Sized> Drop for NewINode<T> { > + fn drop(&mut self) { > + // SAFETY: The new inode failed to be turned into an initialised inode, so it's safe (and > + // in fact required) to call `iget_failed` on it. > + unsafe { bindings::iget_failed(self.0 .0.get()) }; > + } > +} > + > +/// The type of the inode. > +#[derive(Copy, Clone)] > +pub enum INodeType { > + /// Directory type. > + Dir, > +} > + > +/// Required inode parameters. > +/// > +/// This is used when creating new inodes. > +pub struct INodeParams { > + /// The access mode. It's a mask that grants execute (1), write (2) and read (4) access to > + /// everyone, the owner group, and the owner. > + pub mode: u16, > + > + /// Type of inode. > + /// > + /// Also carries additional per-type data. > + pub typ: INodeType, > + > + /// Size of the contents of the inode. > + /// > + /// Its maximum value is [`MAX_LFS_FILESIZE`]. > + pub size: i64, > + > + /// Number of blocks. > + pub blocks: u64, > + > + /// Number of links to the inode. > + pub nlink: u32, > + > + /// User id. > + pub uid: u32, > + > + /// Group id. > + pub gid: u32, > + > + /// Creation time. > + pub ctime: Timespec, > + > + /// Last modification time. > + pub mtime: Timespec, > + > + /// Last access time. > + pub atime: Timespec, > +} > + > /// A file system super block. > /// > /// Wraps the kernel's `struct super_block`. > #[repr(transparent)] > pub struct SuperBlock<T: FileSystem + ?Sized>(Opaque<bindings::super_block>, PhantomData<T>); > > +impl<T: FileSystem + ?Sized> SuperBlock<T> { > + /// Tries to get an existing inode or create a new one if it doesn't exist yet. > + pub fn get_or_create_inode(&self, ino: Ino) -> Result<Either<ARef<INode<T>>, NewINode<T>>> { > + // SAFETY: The only initialisation missing from the superblock is the root, and this > + // function is needed to create the root, so it's safe to call it. This is a weird safety comment. Why is the superblock not fully initialized? Why is safe to call the function? This comment doesn't really explain anything. > + let inode = > + ptr::NonNull::new(unsafe { bindings::iget_locked(self.0.get(), ino) }).ok_or(ENOMEM)?; > + > + // SAFETY: `inode` is valid for read, but there could be concurrent writers (e.g., if it's > + // an already-initialised inode), so we use `read_volatile` to read its current state. > + let state = unsafe { ptr::read_volatile(ptr::addr_of!((*inode.as_ptr()).i_state)) }; Are you sure that `read_volatile` is sufficient for this use case? The documentation [1] clearly states that concurrent write operations are still UB: Just like in C, whether an operation is volatile has no bearing whatsoever on questions involving concurrent access from multiple threads. Volatile accesses behave exactly like non-atomic accesses in that regard. In particular, a race between a read_volatile and any write operation to the same location is undefined behavior. [1]: https://doc.rust-lang.org/core/ptr/fn.read_volatile.html -- Cheers, Benno > + if state & u64::from(bindings::I_NEW) == 0 { > + // The inode is cached. Just return it. > + // > + // SAFETY: `inode` had its refcount incremented by `iget_locked`; this increment is now > + // owned by `ARef`. > + Ok(Either::Left(unsafe { ARef::from_raw(inode.cast()) })) > + } else { > + // SAFETY: The new inode is valid but not fully initialised yet, so it's ok to create a > + // `NewINode`. > + Ok(Either::Right(NewINode(unsafe { > + ARef::from_raw(inode.cast()) > + }))) > + } > + } > +} > + > /// Required superblock parameters. > /// > /// This is returned by implementations of [`FileSystem::super_params`]. > @@ -215,41 +345,28 @@ impl<T: FileSystem + ?Sized> Tables<T> { > sb.0.s_blocksize = 1 << sb.0.s_blocksize_bits; > sb.0.s_flags |= bindings::SB_RDONLY; > > - // The following is scaffolding code that will be removed in a subsequent patch. It is > - // needed to build a root dentry, otherwise core code will BUG(). > - // SAFETY: `sb` is the superblock being initialised, it is valid for read and write. > - let inode = unsafe { bindings::new_inode(&mut sb.0) }; > - if inode.is_null() { > - return Err(ENOMEM); > - } > - > - // SAFETY: `inode` is valid for write. > - unsafe { bindings::set_nlink(inode, 2) }; > - > - { > - // SAFETY: This is a newly-created inode. No other references to it exist, so it is > - // safe to mutably dereference it. > - let inode = unsafe { &mut *inode }; > - inode.i_ino = 1; > - inode.i_mode = (bindings::S_IFDIR | 0o755) as _; > - > - // SAFETY: `simple_dir_operations` never changes, it's safe to reference it. > - inode.__bindgen_anon_3.i_fop = unsafe { &bindings::simple_dir_operations }; > + // SAFETY: The callback contract guarantees that `sb_ptr` is a unique pointer to a > + // newly-created (and initialised above) superblock. > + let sb = unsafe { &mut *sb_ptr.cast() }; > + let root = T::init_root(sb)?; > > - // SAFETY: `simple_dir_inode_operations` never changes, it's safe to reference it. > - inode.i_op = unsafe { &bindings::simple_dir_inode_operations }; > + // Reject root inode if it belongs to a different superblock. > + if !ptr::eq(root.super_block(), sb) { > + return Err(EINVAL); > } > > // SAFETY: `d_make_root` requires that `inode` be valid and referenced, which is the > // case for this call. > // > // It takes over the inode, even on failure, so we don't need to clean it up. > - let dentry = unsafe { bindings::d_make_root(inode) }; > + let dentry = unsafe { bindings::d_make_root(ManuallyDrop::new(root).0.get()) }; > if dentry.is_null() { > return Err(ENOMEM); > } > > - sb.0.s_root = dentry; > + // SAFETY: The callback contract guarantees that `sb_ptr` is a unique pointer to a > + // newly-created (and initialised above) superblock. > + unsafe { (*sb_ptr).s_root = dentry }; > > Ok(0) > }) > @@ -314,9 +431,9 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> { > /// > /// ``` > /// # mod module_fs_sample { > -/// use kernel::fs::{NewSuperBlock, SuperParams}; > +/// use kernel::fs::{INode, NewSuperBlock, SuperBlock, SuperParams}; > /// use kernel::prelude::*; > -/// use kernel::{c_str, fs}; > +/// use kernel::{c_str, fs, types::ARef}; > /// > /// kernel::module_fs! { > /// type: MyFs, > @@ -332,6 +449,9 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> { > /// fn super_params(_: &NewSuperBlock<Self>) -> Result<SuperParams> { > /// todo!() > /// } > +/// fn init_root(_sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>> { > +/// todo!() > +/// } > /// } > /// # } > /// ``` > diff --git a/samples/rust/rust_rofs.rs b/samples/rust/rust_rofs.rs > index 9878bf88b991..9e5f4c7d1c06 100644 > --- a/samples/rust/rust_rofs.rs > +++ b/samples/rust/rust_rofs.rs > @@ -2,9 +2,9 @@ > > //! Rust read-only file system sample. > > -use kernel::fs::{NewSuperBlock, SuperParams}; > +use kernel::fs::{INode, INodeParams, INodeType, NewSuperBlock, SuperBlock, SuperParams}; > use kernel::prelude::*; > -use kernel::{c_str, fs}; > +use kernel::{c_str, fs, time::UNIX_EPOCH, types::ARef, types::Either}; > > kernel::module_fs! { > type: RoFs, > @@ -26,4 +26,22 @@ fn super_params(_sb: &NewSuperBlock<Self>) -> Result<SuperParams> { > time_gran: 1, > }) > } > + > + fn init_root(sb: &SuperBlock<Self>) -> Result<ARef<INode<Self>>> { > + match sb.get_or_create_inode(1)? { > + Either::Left(existing) => Ok(existing), > + Either::Right(new) => new.init(INodeParams { > + typ: INodeType::Dir, > + mode: 0o555, > + size: 1, > + blocks: 1, > + nlink: 2, > + uid: 0, > + gid: 0, > + atime: UNIX_EPOCH, > + ctime: UNIX_EPOCH, > + mtime: UNIX_EPOCH, > + }), > + } > + } > } > -- > 2.34.1 > >