On 18.10.23 14:25, Wedson Almeida Filho wrote: > From: Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx> > > Allow Rust file systems to handle ref-counted folios. > > Provide the minimum needed to implement `read_folio` (part of `struct > address_space_operations`) in read-only file systems and to read > uncached blocks. > > Signed-off-by: Wedson Almeida Filho <walmeida@xxxxxxxxxxxxx> > --- > rust/bindings/bindings_helper.h | 3 + > rust/bindings/lib.rs | 2 + > rust/helpers.c | 81 ++++++++++++ > rust/kernel/folio.rs | 215 ++++++++++++++++++++++++++++++++ > rust/kernel/lib.rs | 1 + > 5 files changed, 302 insertions(+) > create mode 100644 rust/kernel/folio.rs > > diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h > index ca1898ce9527..53a99ea512d1 100644 > --- a/rust/bindings/bindings_helper.h > +++ b/rust/bindings/bindings_helper.h > @@ -11,6 +11,7 @@ > #include <linux/fs.h> > #include <linux/fs_context.h> > #include <linux/slab.h> > +#include <linux/pagemap.h> > #include <linux/refcount.h> > #include <linux/wait.h> > #include <linux/sched.h> > @@ -27,3 +28,5 @@ const slab_flags_t BINDINGS_SLAB_ACCOUNT = SLAB_ACCOUNT; > const unsigned long BINDINGS_SB_RDONLY = SB_RDONLY; > > const loff_t BINDINGS_MAX_LFS_FILESIZE = MAX_LFS_FILESIZE; > + > +const size_t BINDINGS_PAGE_SIZE = PAGE_SIZE; > diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs > index 426915d3fb57..a96b7f08e57d 100644 > --- a/rust/bindings/lib.rs > +++ b/rust/bindings/lib.rs > @@ -59,3 +59,5 @@ mod bindings_helper { > pub const SB_RDONLY: core::ffi::c_ulong = BINDINGS_SB_RDONLY; > > pub const MAX_LFS_FILESIZE: loff_t = BINDINGS_MAX_LFS_FILESIZE; > + > +pub const PAGE_SIZE: usize = BINDINGS_PAGE_SIZE; > diff --git a/rust/helpers.c b/rust/helpers.c > index c5a2bec6467d..f2ce3e7b688c 100644 > --- a/rust/helpers.c > +++ b/rust/helpers.c > @@ -23,10 +23,14 @@ > #include <kunit/test-bug.h> > #include <linux/bug.h> > #include <linux/build_bug.h> > +#include <linux/cacheflush.h> > #include <linux/err.h> > #include <linux/errname.h> > #include <linux/fs.h> > +#include <linux/highmem.h> > +#include <linux/mm.h> > #include <linux/mutex.h> > +#include <linux/pagemap.h> > #include <linux/refcount.h> > #include <linux/sched/signal.h> > #include <linux/spinlock.h> > @@ -145,6 +149,77 @@ struct kunit *rust_helper_kunit_get_current_test(void) > } > EXPORT_SYMBOL_GPL(rust_helper_kunit_get_current_test); > > +void *rust_helper_kmap(struct page *page) > +{ > + return kmap(page); > +} > +EXPORT_SYMBOL_GPL(rust_helper_kmap); > + > +void rust_helper_kunmap(struct page *page) > +{ > + kunmap(page); > +} > +EXPORT_SYMBOL_GPL(rust_helper_kunmap); > + > +void rust_helper_folio_get(struct folio *folio) > +{ > + folio_get(folio); > +} > +EXPORT_SYMBOL_GPL(rust_helper_folio_get); > + > +void rust_helper_folio_put(struct folio *folio) > +{ > + folio_put(folio); > +} > +EXPORT_SYMBOL_GPL(rust_helper_folio_put); > + > +struct page *rust_helper_folio_page(struct folio *folio, size_t n) > +{ > + return folio_page(folio, n); > +} > + > +loff_t rust_helper_folio_pos(struct folio *folio) > +{ > + return folio_pos(folio); > +} > +EXPORT_SYMBOL_GPL(rust_helper_folio_pos); > + > +size_t rust_helper_folio_size(struct folio *folio) > +{ > + return folio_size(folio); > +} > +EXPORT_SYMBOL_GPL(rust_helper_folio_size); > + > +void rust_helper_folio_mark_uptodate(struct folio *folio) > +{ > + folio_mark_uptodate(folio); > +} > +EXPORT_SYMBOL_GPL(rust_helper_folio_mark_uptodate); > + > +void rust_helper_folio_set_error(struct folio *folio) > +{ > + folio_set_error(folio); > +} > +EXPORT_SYMBOL_GPL(rust_helper_folio_set_error); > + > +void rust_helper_flush_dcache_folio(struct folio *folio) > +{ > + flush_dcache_folio(folio); > +} > +EXPORT_SYMBOL_GPL(rust_helper_flush_dcache_folio); > + > +void *rust_helper_kmap_local_folio(struct folio *folio, size_t offset) > +{ > + return kmap_local_folio(folio, offset); > +} > +EXPORT_SYMBOL_GPL(rust_helper_kmap_local_folio); > + > +void rust_helper_kunmap_local(const void *vaddr) > +{ > + kunmap_local(vaddr); > +} > +EXPORT_SYMBOL_GPL(rust_helper_kunmap_local); > + > void rust_helper_i_uid_write(struct inode *inode, uid_t uid) > { > i_uid_write(inode, uid); > @@ -163,6 +238,12 @@ off_t rust_helper_i_size_read(const struct inode *inode) > } > EXPORT_SYMBOL_GPL(rust_helper_i_size_read); > > +void rust_helper_mapping_set_large_folios(struct address_space *mapping) > +{ > + mapping_set_large_folios(mapping); > +} > +EXPORT_SYMBOL_GPL(rust_helper_mapping_set_large_folios); > + > /* > * `bindgen` binds the C `size_t` type as the Rust `usize` type, so we can > * use it in contexts where Rust expects a `usize` like slice (array) indices. > diff --git a/rust/kernel/folio.rs b/rust/kernel/folio.rs > new file mode 100644 > index 000000000000..ef8a08b97962 > --- /dev/null > +++ b/rust/kernel/folio.rs > @@ -0,0 +1,215 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +//! Groups of contiguous pages, folios. > +//! > +//! C headers: [`include/linux/mm.h`](../../include/linux/mm.h) > + > +use crate::error::{code::*, Result}; > +use crate::types::{ARef, AlwaysRefCounted, Opaque, ScopeGuard}; > +use core::{cmp::min, ptr}; > + > +/// Wraps the kernel's `struct folio`. > +/// > +/// # Invariants > +/// > +/// Instances of this type are always ref-counted, that is, a call to `folio_get` ensures that the > +/// allocation remains valid at least until the matching call to `folio_put`. > +#[repr(transparent)] > +pub struct Folio(pub(crate) Opaque<bindings::folio>); > + > +// SAFETY: The type invariants guarantee that `Folio` is always ref-counted. > +unsafe impl AlwaysRefCounted for Folio { > + fn inc_ref(&self) { > + // SAFETY: The existence of a shared reference means that the refcount is nonzero. > + unsafe { bindings::folio_get(self.0.get()) }; > + } > + > + unsafe fn dec_ref(obj: ptr::NonNull<Self>) { > + // SAFETY: The safety requirements guarantee that the refcount is nonzero. > + unsafe { bindings::folio_put(obj.cast().as_ptr()) } > + } > +} > + > +impl Folio { > + /// Tries to allocate a new folio. > + /// > + /// On success, returns a folio made up of 2^order pages. > + pub fn try_new(order: u32) -> Result<UniqueFolio> { > + if order > bindings::MAX_ORDER { > + return Err(EDOM); > + } > + > + // SAFETY: We checked that `order` is within the max allowed value. > + let f = ptr::NonNull::new(unsafe { bindings::folio_alloc(bindings::GFP_KERNEL, order) }) > + .ok_or(ENOMEM)?; > + > + // SAFETY: The folio returned by `folio_alloc` is referenced. The ownership of the > + // reference is transferred to the `ARef` instance. > + Ok(UniqueFolio(unsafe { ARef::from_raw(f.cast()) })) > + } > + > + /// Returns the byte position of this folio in its file. > + pub fn pos(&self) -> i64 { > + // SAFETY: The folio is valid because the shared reference implies a non-zero refcount. > + unsafe { bindings::folio_pos(self.0.get()) } > + } > + > + /// Returns the byte size of this folio. > + pub fn size(&self) -> usize { > + // SAFETY: The folio is valid because the shared reference implies a non-zero refcount. > + unsafe { bindings::folio_size(self.0.get()) } > + } > + > + /// Flushes the data cache for the pages that make up the folio. > + pub fn flush_dcache(&self) { > + // SAFETY: The folio is valid because the shared reference implies a non-zero refcount. > + unsafe { bindings::flush_dcache_folio(self.0.get()) } > + } > +} > + > +/// A [`Folio`] that has a single reference to it. This should be an invariant. > +pub struct UniqueFolio(pub(crate) ARef<Folio>); > + > +impl UniqueFolio { > + /// Maps the contents of a folio page into a slice. > + pub fn map_page(&self, page_index: usize) -> Result<MapGuard<'_>> { > + if page_index >= self.0.size() / bindings::PAGE_SIZE { > + return Err(EDOM); > + } > + > + // SAFETY: We just checked that the index is within bounds of the folio. > + let page = unsafe { bindings::folio_page(self.0 .0.get(), page_index) }; > + > + // SAFETY: `page` is valid because it was returned by `folio_page` above. > + let ptr = unsafe { bindings::kmap(page) }; > + > + // SAFETY: We just mapped `ptr`, so it's valid for read. > + let data = unsafe { core::slice::from_raw_parts(ptr.cast::<u8>(), bindings::PAGE_SIZE) }; > + > + Ok(MapGuard { data, page }) > + } > +} > + > +/// A mapped [`UniqueFolio`]. > +pub struct MapGuard<'a> { > + data: &'a [u8], > + page: *mut bindings::page, > +} > + > +impl core::ops::Deref for MapGuard<'_> { > + type Target = [u8]; > + > + fn deref(&self) -> &Self::Target { > + self.data > + } > +} > + > +impl Drop for MapGuard<'_> { > + fn drop(&mut self) { > + // SAFETY: A `MapGuard` instance is only created when `kmap` succeeds, so it's ok to unmap > + // it when the guard is dropped. > + unsafe { bindings::kunmap(self.page) }; > + } > +} > + > +/// A locked [`Folio`]. This should be an invariant. -- Cheers, Benno > +pub struct LockedFolio<'a>(&'a Folio); > + > +impl LockedFolio<'_> { > + /// Creates a new locked folio from a raw pointer. > + /// > + /// # Safety > + /// > + /// Callers must ensure that the folio is valid and locked. Additionally, that the > + /// responsibility of unlocking is transferred to the new instance of [`LockedFolio`]. Lastly, > + /// that the returned [`LockedFolio`] doesn't outlive the refcount that keeps it alive. > + #[allow(dead_code)] > + pub(crate) unsafe fn from_raw(folio: *const bindings::folio) -> Self { > + let ptr = folio.cast(); > + // SAFETY: The safety requirements ensure that `folio` (from which `ptr` is derived) is > + // valid and will remain valid while the `LockedFolio` instance lives. > + Self(unsafe { &*ptr }) > + } > + > + /// Marks the folio as being up to date. > + pub fn mark_uptodate(&mut self) { > + // SAFETY: The folio is valid because the shared reference implies a non-zero refcount. > + unsafe { bindings::folio_mark_uptodate(self.0 .0.get()) } > + } > + > + /// Sets the error flag on the folio. > + pub fn set_error(&mut self) { > + // SAFETY: The folio is valid because the shared reference implies a non-zero refcount. > + unsafe { bindings::folio_set_error(self.0 .0.get()) } > + } > + > + fn for_each_page( > + &mut self, > + offset: usize, > + len: usize, > + mut cb: impl FnMut(&mut [u8]) -> Result, > + ) -> Result { > + let mut remaining = len; > + let mut next_offset = offset; > + > + // Check that we don't overflow the folio. > + let end = offset.checked_add(len).ok_or(EDOM)?; > + if end > self.size() { > + return Err(EINVAL); > + } > + > + while remaining > 0 { > + let page_offset = next_offset & (bindings::PAGE_SIZE - 1); > + let usable = min(remaining, bindings::PAGE_SIZE - page_offset); > + // SAFETY: The folio is valid because the shared reference implies a non-zero refcount; > + // `next_offset` is also guaranteed be lesss than the folio size. > + let ptr = unsafe { bindings::kmap_local_folio(self.0 .0.get(), next_offset) }; > + > + // SAFETY: `ptr` was just returned by the `kmap_local_folio` above. > + let _guard = ScopeGuard::new(|| unsafe { bindings::kunmap_local(ptr) }); > + > + // SAFETY: `kmap_local_folio` maps whole page so we know it's mapped for at least > + // `usable` bytes. > + let s = unsafe { core::slice::from_raw_parts_mut(ptr.cast::<u8>(), usable) }; > + cb(s)?; > + > + next_offset += usable; > + remaining -= usable; > + } > + > + Ok(()) > + } > + > + /// Writes the given slice into the folio. > + pub fn write(&mut self, offset: usize, data: &[u8]) -> Result { > + let mut remaining = data; > + > + self.for_each_page(offset, data.len(), |s| { > + s.copy_from_slice(&remaining[..s.len()]); > + remaining = &remaining[s.len()..]; > + Ok(()) > + }) > + } > + > + /// Writes zeroes into the folio. > + pub fn zero_out(&mut self, offset: usize, len: usize) -> Result { > + self.for_each_page(offset, len, |s| { > + s.fill(0); > + Ok(()) > + }) > + } > +} > + > +impl core::ops::Deref for LockedFolio<'_> { > + type Target = Folio; > + fn deref(&self) -> &Self::Target { > + self.0 > + } > +} > + > +impl Drop for LockedFolio<'_> { > + fn drop(&mut self) { > + // SAFETY: The folio is valid because the shared reference implies a non-zero refcount. > + unsafe { bindings::folio_unlock(self.0 .0.get()) } > + } > +} > diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs > index 00059b80c240..0e85b380da64 100644 > --- a/rust/kernel/lib.rs > +++ b/rust/kernel/lib.rs > @@ -34,6 +34,7 @@ > mod allocator; > mod build_assert; > pub mod error; > +pub mod folio; > pub mod fs; > pub mod init; > pub mod ioctl; > -- > 2.34.1 > >