From: Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx> Allow Rust file systems that are backed by block devices (in addition to in-memory ones). Signed-off-by: Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx> --- rust/helpers.c | 14 +++ rust/kernel/block.rs | 1 - rust/kernel/fs.rs | 60 ++++++++--- rust/kernel/fs/inode.rs | 221 +++++++++++++++++++++++++++++++++++++- rust/kernel/fs/sb.rs | 49 ++++++++- samples/rust/rust_rofs.rs | 2 +- 6 files changed, 328 insertions(+), 19 deletions(-) diff --git a/rust/helpers.c b/rust/helpers.c index 360a1d38ac19..6c6d18df055f 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -21,6 +21,7 @@ */ #include <kunit/test-bug.h> +#include <linux/blkdev.h> #include <linux/bug.h> #include <linux/build_bug.h> #include <linux/cacheflush.h> @@ -258,6 +259,13 @@ void rust_helper_kunmap_local(const void *vaddr) } EXPORT_SYMBOL_GPL(rust_helper_kunmap_local); +struct folio *rust_helper_read_mapping_folio(struct address_space *mapping, + pgoff_t index, struct file *file) +{ + return read_mapping_folio(mapping, index, file); +} +EXPORT_SYMBOL_GPL(rust_helper_read_mapping_folio); + void rust_helper_i_uid_write(struct inode *inode, uid_t uid) { i_uid_write(inode, uid); @@ -294,6 +302,12 @@ unsigned int rust_helper_MKDEV(unsigned int major, unsigned int minor) } EXPORT_SYMBOL_GPL(rust_helper_MKDEV); +sector_t rust_helper_bdev_nr_sectors(struct block_device *bdev) +{ + return bdev_nr_sectors(bdev); +} +EXPORT_SYMBOL_GPL(rust_helper_bdev_nr_sectors); + unsigned long rust_helper_copy_to_user(void __user *to, const void *from, unsigned long n) { diff --git a/rust/kernel/block.rs b/rust/kernel/block.rs index 868623d7c873..4d669bd5dce9 100644 --- a/rust/kernel/block.rs +++ b/rust/kernel/block.rs @@ -31,7 +31,6 @@ impl Device { /// /// Callers must ensure that `ptr` is valid and remains so for the lifetime of the returned /// object. - #[allow(dead_code)] pub(crate) unsafe fn from_raw<'a>(ptr: *mut bindings::block_device) -> &'a Self { // SAFETY: The safety requirements guarantee that the cast below is ok. unsafe { &*ptr.cast::<Self>() } diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs index 387e87e3edaf..864aca24d12c 100644 --- a/rust/kernel/fs.rs +++ b/rust/kernel/fs.rs @@ -26,6 +26,11 @@ /// This is C's `loff_t`. pub type Offset = i64; +/// An index into the page cache. +/// +/// This is C's `pgoff_t`. +pub type PageOffset = usize; + /// Maximum size of an inode. pub const MAX_LFS_FILESIZE: Offset = bindings::MAX_LFS_FILESIZE; @@ -37,6 +42,9 @@ pub trait FileSystem { /// The name of the file system type. const NAME: &'static CStr; + /// Determines how superblocks for this file system type are keyed. + const SUPER_TYPE: sb::Type = sb::Type::Independent; + /// Determines if an implementation doesn't specify the required types. /// /// This is meant for internal use only. @@ -44,7 +52,10 @@ pub trait FileSystem { const IS_UNSPECIFIED: bool = false; /// Initialises the new superblock and returns the data to attach to it. - fn fill_super(sb: &mut SuperBlock<Self, sb::New>) -> Result<Self::Data>; + fn fill_super( + sb: &mut SuperBlock<Self, sb::New>, + mapper: Option<inode::Mapper>, + ) -> Result<Self::Data>; /// Initialises and returns the root inode of the given superblock. /// @@ -100,7 +111,7 @@ impl FileSystem for UnspecifiedFS { type Data = (); const NAME: &'static CStr = crate::c_str!("unspecified"); const IS_UNSPECIFIED: bool = true; - fn fill_super(_: &mut SuperBlock<Self, sb::New>) -> Result { + fn fill_super(_: &mut SuperBlock<Self, sb::New>, _: Option<inode::Mapper>) -> Result { Err(ENOTSUPP) } @@ -139,7 +150,9 @@ pub fn new<T: FileSystem + ?Sized>(module: &'static ThisModule) -> impl PinInit< fs.name = T::NAME.as_char_ptr(); fs.init_fs_context = Some(Self::init_fs_context_callback::<T>); fs.kill_sb = Some(Self::kill_sb_callback::<T>); - fs.fs_flags = 0; + fs.fs_flags = if let sb::Type::BlockDev = T::SUPER_TYPE { + bindings::FS_REQUIRES_DEV as i32 + } else { 0 }; // SAFETY: Pointers stored in `fs` are static so will live for as long as the // registration is active (it is undone in `drop`). @@ -162,9 +175,16 @@ pub fn new<T: FileSystem + ?Sized>(module: &'static ThisModule) -> impl PinInit< unsafe extern "C" fn kill_sb_callback<T: FileSystem + ?Sized>( sb_ptr: *mut bindings::super_block, ) { - // SAFETY: In `get_tree_callback` we always call `get_tree_nodev`, so `kill_anon_super` is - // the appropriate function to call for cleanup. - unsafe { bindings::kill_anon_super(sb_ptr) }; + match T::SUPER_TYPE { + // SAFETY: In `get_tree_callback` we always call `get_tree_bdev` for + // `sb::Type::BlockDev`, so `kill_block_super` is the appropriate function to call + // for cleanup. + sb::Type::BlockDev => unsafe { bindings::kill_block_super(sb_ptr) }, + // SAFETY: In `get_tree_callback` we always call `get_tree_nodev` for + // `sb::Type::Independent`, so `kill_anon_super` is the appropriate function to call + // for cleanup. + sb::Type::Independent => unsafe { bindings::kill_anon_super(sb_ptr) }, + } // SAFETY: The C API contract guarantees that `sb_ptr` is valid for read. let ptr = unsafe { (*sb_ptr).s_fs_info }; @@ -200,9 +220,18 @@ impl<T: FileSystem + ?Sized> Tables<T> { }; unsafe extern "C" fn get_tree_callback(fc: *mut bindings::fs_context) -> ffi::c_int { - // SAFETY: `fc` is valid per the callback contract. `fill_super_callback` also has - // the right type and is a valid callback. - unsafe { bindings::get_tree_nodev(fc, Some(Self::fill_super_callback)) } + match T::SUPER_TYPE { + // SAFETY: `fc` is valid per the callback contract. `fill_super_callback` also has + // the right type and is a valid callback. + sb::Type::BlockDev => unsafe { + bindings::get_tree_bdev(fc, Some(Self::fill_super_callback)) + }, + // SAFETY: `fc` is valid per the callback contract. `fill_super_callback` also has + // the right type and is a valid callback. + sb::Type::Independent => unsafe { + bindings::get_tree_nodev(fc, Some(Self::fill_super_callback)) + }, + } } unsafe extern "C" fn fill_super_callback( @@ -221,7 +250,14 @@ impl<T: FileSystem + ?Sized> Tables<T> { sb.s_xattr = &Tables::<T>::XATTR_HANDLERS[0]; sb.s_flags |= bindings::SB_RDONLY; - let data = T::fill_super(new_sb)?; + let mapper = if matches!(T::SUPER_TYPE, sb::Type::BlockDev) { + // SAFETY: This is the only mapper created for this inode, so it is unique. + Some(unsafe { new_sb.bdev().inode().mapper() }) + } else { + None + }; + + let data = T::fill_super(new_sb, mapper)?; // N.B.: Even on failure, `kill_sb` is called and frees the data. sb.s_fs_info = data.into_foreign().cast_mut(); @@ -369,7 +405,7 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> { /// /// ``` /// # mod module_fs_sample { -/// use kernel::fs::{dentry, inode::INode, sb, sb::SuperBlock, self}; +/// use kernel::fs::{dentry, inode::INode, inode::Mapper, sb, sb::SuperBlock, self}; /// use kernel::prelude::*; /// /// kernel::module_fs! { @@ -384,7 +420,7 @@ fn init(module: &'static ThisModule) -> impl PinInit<Self, Error> { /// impl fs::FileSystem for MyFs { /// type Data = (); /// const NAME: &'static CStr = kernel::c_str!("myfs"); -/// fn fill_super(_: &mut SuperBlock<Self, sb::New>) -> Result { +/// fn fill_super(_: &mut SuperBlock<Self, sb::New>, _: Option<Mapper>) -> Result { /// todo!() /// } /// fn init_root(_sb: &SuperBlock<Self>) -> Result<dentry::Root<Self>> { diff --git a/rust/kernel/fs/inode.rs b/rust/kernel/fs/inode.rs index 75b68d697a6e..5b3602362521 100644 --- a/rust/kernel/fs/inode.rs +++ b/rust/kernel/fs/inode.rs @@ -7,13 +7,16 @@ //! C headers: [`include/linux/fs.h`](srctree/include/linux/fs.h) use super::{ - address_space, dentry, dentry::DEntry, file, sb::SuperBlock, FileSystem, Offset, UnspecifiedFS, + address_space, dentry, dentry::DEntry, file, sb::SuperBlock, FileSystem, Offset, PageOffset, + UnspecifiedFS, }; -use crate::error::{code::*, Result}; +use crate::error::{code::*, from_err_ptr, Result}; use crate::types::{ARef, AlwaysRefCounted, Either, ForeignOwnable, Lockable, Locked, Opaque}; -use crate::{bindings, block, str::CStr, str::CString, time::Timespec}; +use crate::{ + bindings, block, build_error, folio, folio::Folio, str::CStr, str::CString, time::Timespec, +}; use core::mem::ManuallyDrop; -use core::{marker::PhantomData, ptr}; +use core::{cmp, marker::PhantomData, ops::Deref, ptr}; use macros::vtable; /// The number of an inode. @@ -93,6 +96,129 @@ pub fn size(&self) -> Offset { // SAFETY: `self` is guaranteed to be valid by the existence of a shared reference. unsafe { bindings::i_size_read(self.0.get()) } } + + /// Returns a mapper for this inode. + /// + /// # Safety + /// + /// Callers must ensure that mappers are unique for a given inode and range. For inodes that + /// back a block device, a mapper is always created when the filesystem is mounted; so callers + /// in such situations must ensure that that mapper is never used. + pub unsafe fn mapper(&self) -> Mapper<T> { + Mapper { + inode: self.into(), + begin: 0, + end: Offset::MAX, + } + } + + /// Returns a mapped folio at the given offset. + /// + /// # Safety + /// + /// Callers must ensure that there are no concurrent mutable mappings of the folio. + pub unsafe fn mapped_folio( + &self, + offset: Offset, + ) -> Result<folio::Mapped<'_, folio::PageCache<T>>> { + let page_index = offset >> bindings::PAGE_SHIFT; + let page_offset = offset & ((bindings::PAGE_SIZE - 1) as Offset); + let folio = self.read_mapping_folio(page_index.try_into()?)?; + + // SAFETY: The safety requirements guarantee that there are no concurrent mutable mappings + // of the folio. + unsafe { Folio::map_owned(folio, page_offset.try_into()?) } + } + + /// Returns the folio at the given page index. + pub fn read_mapping_folio( + &self, + index: PageOffset, + ) -> Result<ARef<Folio<folio::PageCache<T>>>> { + let folio = from_err_ptr(unsafe { + bindings::read_mapping_folio( + (*self.0.get()).i_mapping, + index.try_into()?, + ptr::null_mut(), + ) + })?; + let ptr = ptr::NonNull::new(folio) + .ok_or(EIO)? + .cast::<Folio<folio::PageCache<T>>>(); + // SAFETY: The folio returned by read_mapping_folio has had its refcount incremented. + Ok(unsafe { ARef::from_raw(ptr) }) + } + + /// Iterate over the given range, one folio at a time. + /// + /// # Safety + /// + /// Callers must ensure that there are no concurrent mutable mappings of the folio. + pub unsafe fn for_each_page<U>( + &self, + first: Offset, + len: Offset, + mut cb: impl FnMut(&[u8]) -> Result<Option<U>>, + ) -> Result<Option<U>> { + if first >= self.size() { + return Ok(None); + } + let mut remain = cmp::min(len, self.size() - first); + first.checked_add(remain).ok_or(EIO)?; + + let mut next = first; + while remain > 0 { + // SAFETY: The safety requirements of this function satisfy those of `mapped_folio`. + let data = unsafe { self.mapped_folio(next)? }; + let avail = cmp::min(data.len(), remain.try_into().unwrap_or(usize::MAX)); + let ret = cb(&data[..avail])?; + if ret.is_some() { + return Ok(ret); + } + + next += avail as Offset; + remain -= avail as Offset; + } + + Ok(None) + } +} + +impl<T: FileSystem + ?Sized, U: Deref<Target = INode<T>>> Locked<U, ReadSem> { + /// Returns a mapped folio at the given offset. + // TODO: This conflicts with Locked<Folio>::write. Once we settle on a way to handle reading + // the contents of certain inodes (e.g., directories, links), then we switch to that and + // remove this. + pub fn mapped_folio<'a>( + &'a self, + offset: Offset, + ) -> Result<folio::Mapped<'a, folio::PageCache<T>>> + where + T: 'a, + { + if T::IS_UNSPECIFIED { + build_error!("unspecified file systems cannot safely map folios"); + } + + // SAFETY: The inode is locked in read mode, so it's ok to map its contents. + unsafe { self.deref().mapped_folio(offset) } + } + + /// Iterate over the given range, one folio at a time. + // TODO: This has the same issue as mapped_folio above. + pub fn for_each_page<V>( + &self, + first: Offset, + len: Offset, + cb: impl FnMut(&[u8]) -> Result<Option<V>>, + ) -> Result<Option<V>> { + if T::IS_UNSPECIFIED { + build_error!("unspecified file systems cannot safely map folios"); + } + + // SAFETY: The inode is locked in read mode, so it's ok to map its contents. + unsafe { self.deref().for_each_page(first, len, cb) } + } } // SAFETY: The type invariants guarantee that `INode` is always ref-counted. @@ -111,6 +237,7 @@ unsafe fn dec_ref(obj: ptr::NonNull<Self>) { /// Indicates that the an inode's rw semapahore is locked in read (shared) mode. pub struct ReadSem; +// SAFETY: `raw_lock` calls `inode_lock_shared` which locks the inode in shared mode. unsafe impl<T: FileSystem + ?Sized> Lockable<ReadSem> for INode<T> { fn raw_lock(&self) { // SAFETY: Since there's a reference to the inode, it must be valid. @@ -432,3 +559,89 @@ extern "C" fn drop_cstring(ptr: *mut core::ffi::c_void) { Self(&Table::<U>::TABLE, PhantomData) } } + +/// Allows mapping the contents of the inode. +/// +/// # Invariants +/// +/// Mappers are unique per range per inode. +pub struct Mapper<T: FileSystem + ?Sized = UnspecifiedFS> { + inode: ARef<INode<T>>, + begin: Offset, + end: Offset, +} + +// SAFETY: All inode and folio operations are safe from any thread. +unsafe impl<T: FileSystem + ?Sized> Send for Mapper<T> {} + +// SAFETY: All inode and folio operations are safe from any thread. +unsafe impl<T: FileSystem + ?Sized> Sync for Mapper<T> {} + +impl<T: FileSystem + ?Sized> Mapper<T> { + /// Splits the mapper into two ranges. + /// + /// The first range is from the beginning of `self` up to and including `offset - 1`. The + /// second range is from `offset` to the end of `self`. + pub fn split_at(mut self, offset: Offset) -> (Self, Self) { + let inode = self.inode.clone(); + if offset <= self.begin { + ( + Self { + inode, + begin: offset, + end: offset, + }, + self, + ) + } else if offset >= self.end { + ( + self, + Self { + inode, + begin: offset, + end: offset, + }, + ) + } else { + let end = self.end; + self.end = offset; + ( + self, + Self { + inode, + begin: offset, + end, + }, + ) + } + } + + /// Returns a mapped folio at the given offset. + pub fn mapped_folio(&self, offset: Offset) -> Result<folio::Mapped<'_, folio::PageCache<T>>> { + if offset < self.begin || offset >= self.end { + return Err(ERANGE); + } + + // SAFETY: By the type invariant, there are no other mutable mappings of the folio. + let mut map = unsafe { self.inode.mapped_folio(offset) }?; + map.cap_len((self.end - offset).try_into()?); + Ok(map) + } + + /// Iterate over the given range, one folio at a time. + pub fn for_each_page<U>( + &self, + first: Offset, + len: Offset, + cb: impl FnMut(&[u8]) -> Result<Option<U>>, + ) -> Result<Option<U>> { + if first < self.begin || first >= self.end { + return Err(ERANGE); + } + + let actual_len = cmp::min(len, self.end - first); + + // SAFETY: By the type invariant, there are no other mutable mappings of the folio. + unsafe { self.inode.for_each_page(first, actual_len, cb) } + } +} diff --git a/rust/kernel/fs/sb.rs b/rust/kernel/fs/sb.rs index 7c0c52e6da0a..93c7b2770163 100644 --- a/rust/kernel/fs/sb.rs +++ b/rust/kernel/fs/sb.rs @@ -8,11 +8,22 @@ use super::inode::{self, INode, Ino}; use super::FileSystem; -use crate::bindings; use crate::error::{code::*, Result}; use crate::types::{ARef, Either, ForeignOwnable, Opaque}; +use crate::{bindings, block, build_error}; use core::{marker::PhantomData, ptr}; +/// Type of superblock keying. +/// +/// It determines how C's `fs_context_operations::get_tree` is implemented. +pub enum Type { + /// Multiple independent superblocks may exist. + Independent, + + /// Uses a block device. + BlockDev, +} + /// A typestate for [`SuperBlock`] that indicates that it's a new one, so not fully initialized /// yet. pub struct New; @@ -75,6 +86,28 @@ pub fn rdonly(&self) -> bool { // SAFETY: `s_flags` only changes during init, so it is safe to read it. unsafe { (*self.0.get()).s_flags & bindings::SB_RDONLY != 0 } } + + /// Returns the block device associated with the superblock. + pub fn bdev(&self) -> &block::Device { + if !matches!(T::SUPER_TYPE, Type::BlockDev) { + build_error!("bdev is only available in blockdev superblocks"); + } + + // SAFETY: The superblock is valid and given that it's a blockdev superblock it must have a + // valid `s_bdev` that remains valid while the superblock (`self`) is valid. + unsafe { block::Device::from_raw((*self.0.get()).s_bdev) } + } + + /// Returns the number of sectors in the underlying block device. + pub fn sector_count(&self) -> block::Sector { + if !matches!(T::SUPER_TYPE, Type::BlockDev) { + build_error!("sector_count is only available in blockdev superblocks"); + } + + // SAFETY: The superblock is valid and given that it's a blockdev superblock it must have a + // valid `s_bdev`. + unsafe { bindings::bdev_nr_sectors((*self.0.get()).s_bdev) } + } } impl<T: FileSystem + ?Sized> SuperBlock<T, New> { @@ -85,6 +118,20 @@ pub fn set_magic(&mut self, magic: usize) -> &mut Self { unsafe { (*self.0.get()).s_magic = magic as core::ffi::c_ulong }; self } + + /// Sets the device blocksize, subjected to the minimum accepted by the device. + /// + /// Returns the actual value set. + pub fn min_blocksize(&mut self, size: i32) -> i32 { + if !matches!(T::SUPER_TYPE, Type::BlockDev) { + build_error!("min_blocksize is only available in blockdev superblocks"); + } + + // SAFETY: This a new superblock that is being initialised, so it it's ok to set the block + // size. Additionally, we've checked that this is the superblock is backed by a block + // device, so it is also valid. + unsafe { bindings::sb_min_blocksize(self.0.get(), size) } + } } impl<T: FileSystem + ?Sized, S: DataInited> SuperBlock<T, S> { diff --git a/samples/rust/rust_rofs.rs b/samples/rust/rust_rofs.rs index 7027ca067f8f..fea3360b6e7a 100644 --- a/samples/rust/rust_rofs.rs +++ b/samples/rust/rust_rofs.rs @@ -101,7 +101,7 @@ impl fs::FileSystem for RoFs { type Data = (); const NAME: &'static CStr = c_str!("rust_rofs"); - fn fill_super(sb: &mut sb::SuperBlock<Self, sb::New>) -> Result { + fn fill_super(sb: &mut sb::SuperBlock<Self, sb::New>, _: Option<inode::Mapper>) -> Result { sb.set_magic(0x52555354); Ok(()) } -- 2.34.1