On Fri, Sep 13, 2024 at 10:31:20PM +0200, Alice Ryhl wrote: > On Fri, Sep 13, 2024 at 10:15 PM Simona Vetter <simona.vetter@xxxxxxxx> wrote: > > > > On Thu, Sep 12, 2024 at 09:54:01AM +0000, Alice Ryhl wrote: > > > Rust Binder holds incoming transactions in a read-only mmap'd region > > > where it manually manages the pages. These pages are only in use until > > > the incoming transaction is consumed by userspace, but the kernel will > > > keep the pages around for future transactions. Rust Binder registers a > > > shrinker with the kernel so that it can give back these pages if the > > > system comes under memory pressure. > > > > > > Separate types are provided for registered and unregistered shrinkers. > > > The unregistered shrinker type can be used to configure the shrinker > > > before registering it. Separating it into two types also enables the > > > user to construct the private data between the calls to `shrinker_alloc` > > > and `shrinker_register` and avoid constructing the private data if > > > allocating the shrinker fails. > > > > > > The user specifies the callbacks in use by implementing the Shrinker > > > trait for the type used for the private data. This requires specifying > > > three things: implementations for count_objects and scan_objects, and > > > the pointer type that the private data will be wrapped in. > > > > > > The return values of count_objects and scan_objects are provided using > > > new types called CountObjects and ScanObjects respectively. These types > > > prevent the user from e.g. returning SHRINK_STOP from count_objects or > > > returning SHRINK_EMPTY from scan_objects. > > > > > > Signed-off-by: Alice Ryhl <aliceryhl@xxxxxxxxxx> > > > > Scary given that drm has a few, but I learned a few new thinks about > > shrinkers. Some comments below, but looks all really nice imo. > > > > Cheers, Sima > > > > > --- > > > rust/bindings/bindings_helper.h | 2 + > > > rust/kernel/lib.rs | 1 + > > > rust/kernel/shrinker.rs | 324 ++++++++++++++++++++++++++++++++++++++++ > > > 3 files changed, 327 insertions(+) > > > > > > diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h > > > index ae82e9c941af..7fc958e05dc5 100644 > > > --- a/rust/bindings/bindings_helper.h > > > +++ b/rust/bindings/bindings_helper.h > > > @@ -18,6 +18,7 @@ > > > #include <linux/phy.h> > > > #include <linux/refcount.h> > > > #include <linux/sched.h> > > > +#include <linux/shrinker.h> > > > #include <linux/slab.h> > > > #include <linux/wait.h> > > > #include <linux/workqueue.h> > > > @@ -31,4 +32,5 @@ const gfp_t RUST_CONST_HELPER_GFP_KERNEL_ACCOUNT = GFP_KERNEL_ACCOUNT; > > > const gfp_t RUST_CONST_HELPER_GFP_NOWAIT = GFP_NOWAIT; > > > const gfp_t RUST_CONST_HELPER___GFP_ZERO = __GFP_ZERO; > > > const gfp_t RUST_CONST_HELPER___GFP_HIGHMEM = ___GFP_HIGHMEM; > > > +const gfp_t RUST_CONST_HELPER___GFP_FS = ___GFP_FS; > > > const blk_features_t RUST_CONST_HELPER_BLK_FEAT_ROTATIONAL = BLK_FEAT_ROTATIONAL; > > > diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs > > > index f10b06a78b9d..f35eb290f2e0 100644 > > > --- a/rust/kernel/lib.rs > > > +++ b/rust/kernel/lib.rs > > > @@ -45,6 +45,7 @@ > > > pub mod prelude; > > > pub mod print; > > > pub mod rbtree; > > > +pub mod shrinker; > > > mod static_assert; > > > #[doc(hidden)] > > > pub mod std_vendor; > > > diff --git a/rust/kernel/shrinker.rs b/rust/kernel/shrinker.rs > > > new file mode 100644 > > > index 000000000000..9af726bfe0b1 > > > --- /dev/null > > > +++ b/rust/kernel/shrinker.rs > > > @@ -0,0 +1,324 @@ > > > +// SPDX-License-Identifier: GPL-2.0 > > > + > > > +// Copyright (C) 2024 Google LLC. > > > + > > > +//! Shrinker for handling memory pressure. > > > +//! > > > +//! C header: [`include/linux/shrinker.h`](srctree/include/linux/shrinker.h) > > > + > > > +use crate::{alloc::AllocError, bindings, c_str, str::CStr, types::ForeignOwnable}; > > > + > > > +use core::{ > > > + ffi::{c_int, c_ulong, c_void}, > > > + marker::PhantomData, > > > + ptr::NonNull, > > > +}; > > > + > > > +const SHRINK_STOP: c_ulong = bindings::SHRINK_STOP as c_ulong; > > > +const SHRINK_EMPTY: c_ulong = bindings::SHRINK_EMPTY as c_ulong; > > > + > > > +/// The default value for the number of seeks needed to recreate an object. > > > +pub const DEFAULT_SEEKS: u32 = bindings::DEFAULT_SEEKS; > > > + > > > +/// An unregistered shrinker. > > > +/// > > > +/// This type can be used to modify the settings of the shrinker before it is registered. > > > +/// > > > +/// # Invariants > > > +/// > > > +/// The `shrinker` pointer references an unregistered shrinker. > > > +pub struct UnregisteredShrinker { > > > + shrinker: NonNull<bindings::shrinker>, > > > +} > > > + > > > +// SAFETY: Moving an unregistered shrinker between threads is okay. > > > +unsafe impl Send for UnregisteredShrinker {} > > > +// SAFETY: An unregistered shrinker is thread safe. > > > +unsafe impl Sync for UnregisteredShrinker {} > > > + > > > +impl UnregisteredShrinker { > > > + /// Create a new shrinker. > > > + pub fn alloc(name: &CStr) -> Result<Self, AllocError> { > > > + // SAFETY: Passing `0` as flags is okay. Using `%s` as the format string is okay when we > > > > I'd elaborate here that we have to pass 0 as the only valid value because > > all the non-zero flags are for memcg and numa aware shrinkers, and we do > > not support those because we don't expose the relevant data from > > ShrinkControl. > > > > > + // pass a nul-terminated string as the string for `%s` to print. > > > + let ptr = > > > + unsafe { bindings::shrinker_alloc(0, c_str!("%s").as_char_ptr(), name.as_char_ptr()) }; > > > + > > > + let shrinker = NonNull::new(ptr).ok_or(AllocError)?; > > > + > > > + // INVARIANT: The creation of the shrinker was successful. > > > + Ok(Self { shrinker }) > > > + } > > > + > > > + /// Create a new shrinker using format arguments for the name. > > > + pub fn alloc_fmt(name: core::fmt::Arguments<'_>) -> Result<Self, AllocError> { > > > + // SAFETY: Passing `0` as flags is okay. Using `%pA` as the format string is okay when we > > > + // pass a `fmt::Arguments` as the value to print. > > > + let ptr = unsafe { > > > + bindings::shrinker_alloc( > > > + 0, > > > + c_str!("%pA").as_char_ptr(), > > > + &name as *const _ as *const c_void, > > > + ) > > > + }; > > > + > > > + let shrinker = NonNull::new(ptr).ok_or(AllocError)?; > > > + > > > + // INVARIANT: The creation of the shrinker was successful. > > > + Ok(Self { shrinker }) > > > + } > > > + > > > + /// Set the number of seeks needed to recreate an object. > > > + pub fn set_seeks(&mut self, seeks: u32) { > > > > Not sure we want to expose this, with ssd seeks kinda don't matter and > > it's just a bit about relative fairness. I think nowadays it's more > > important that the count_object values are normalized to size, if not all > > objects you shrink have the same fixed size. > > > > So not sure we need this, at least initially. > > Good point about keeping the objects the same fixed size. I'll > incorporate that in the docs. > > > > + unsafe { (*self.shrinker.as_ptr()).seeks = seeks as c_int }; > > > + } > > > + > > > + /// Register the shrinker. > > > + /// > > > + /// The provided pointer is used as the private data, and the type `T` determines the callbacks > > > + /// that the shrinker will use. > > > + pub fn register<T: Shrinker>(self, private_data: T::Ptr) -> RegisteredShrinker<T> { > > > + let shrinker = self.shrinker; > > > + let ptr = shrinker.as_ptr(); > > > + > > > + // The destructor of `self` calls `shrinker_free`, so skip the destructor. > > > + core::mem::forget(self); > > > + > > > + let private_data_ptr = <T::Ptr as ForeignOwnable>::into_foreign(private_data); > > > + > > > + // SAFETY: We own the private data, so we can assign to it. > > > + unsafe { (*ptr).private_data = private_data_ptr.cast_mut() }; > > > + // SAFETY: The shrinker is not yet registered, so we can update this field. > > > + unsafe { (*ptr).count_objects = Some(rust_count_objects::<T>) }; > > > + // SAFETY: The shrinker is not yet registered, so we can update this field. > > > + unsafe { (*ptr).scan_objects = Some(rust_scan_objects::<T>) }; > > > + > > > + // SAFETY: The shrinker is unregistered, so it's safe to register it. > > > + unsafe { bindings::shrinker_register(ptr) }; > > > + > > > + RegisteredShrinker { > > > + shrinker, > > > + _phantom: PhantomData, > > > + } > > > + } > > > +} > > > + > > > +impl Drop for UnregisteredShrinker { > > > + fn drop(&mut self) { > > > + // SAFETY: The shrinker is a valid but unregistered shrinker, and we will not use it > > > + // anymore. > > > + unsafe { bindings::shrinker_free(self.shrinker.as_ptr()) }; > > > + } > > > +} > > > + > > > +/// A shrinker that is registered with the kernel. > > > +/// > > > +/// # Invariants > > > +/// > > > +/// The `shrinker` pointer refers to a registered shrinker using `T` as the private data. > > > +pub struct RegisteredShrinker<T: Shrinker> { > > > + shrinker: NonNull<bindings::shrinker>, > > > + _phantom: PhantomData<T::Ptr>, > > > +} > > > + > > > +// SAFETY: This allows you to deregister the shrinker from a different thread, which means that > > > +// private data could be dropped from any thread. > > > +unsafe impl<T: Shrinker> Send for RegisteredShrinker<T> where T::Ptr: Send {} > > > +// SAFETY: The only thing you can do with an immutable reference is access the private data, which > > > +// is okay to access in parallel as the `Shrinker` trait requires the private data to be `Sync`. > > > +unsafe impl<T: Shrinker> Sync for RegisteredShrinker<T> {} > > > + > > > +impl<T: Shrinker> RegisteredShrinker<T> { > > > + /// Access the private data in this shrinker. > > > + pub fn private_data(&self) -> <T::Ptr as ForeignOwnable>::Borrowed<'_> { > > > + // SAFETY: We own the private data, so we can access it. > > > + let private = unsafe { (*self.shrinker.as_ptr()).private_data }; > > > + // SAFETY: By the type invariants, the private data is `T`. This access could happen in > > > + // parallel with a shrinker callback, but that's okay as the `Shrinker` trait ensures that > > > + // `T::Ptr` is `Sync`. > > > + unsafe { <T::Ptr as ForeignOwnable>::borrow(private) } > > > + } > > > +} > > > + > > > +impl<T: Shrinker> Drop for RegisteredShrinker<T> { > > > + fn drop(&mut self) { > > > + // SAFETY: We own the private data, so we can access it. > > > + let private = unsafe { (*self.shrinker.as_ptr()).private_data }; > > > + // SAFETY: We will not access the shrinker after this call. > > > + unsafe { bindings::shrinker_free(self.shrinker.as_ptr()) }; > > > + // SAFETY: The above call blocked until the completion of any shrinker callbacks, so there > > > + // are no longer any users of the private data. > > > + drop(unsafe { <T::Ptr as ForeignOwnable>::from_foreign(private) }); > > > + } > > > +} > > > + > > > +/// Callbacks for a shrinker. > > > +pub trait Shrinker { > > > + /// The pointer type used to store the private data of the shrinker. > > > + /// > > > + /// Needs to be `Sync` because the shrinker callback could access this value immutably from > > > + /// several thread in parallel. > > > + type Ptr: ForeignOwnable + Sync; > > > + > > > + /// Count the number of freeable items in the cache. > > > + /// > > > + /// May be called from atomic context. > > > > That's wrong, reclaim is allowed to block. Or my understanding of how this > > works is very busted. We do run in a pseudo locking context, the core > > code annotates that with fs_reclaim_acquire/release. > > Ah, ok. Every shrinker I've looked at uses try_lock everywhere so I > assumed this could happen. Thanks for verifying that. > > > > + fn count_objects( > > > + me: <Self::Ptr as ForeignOwnable>::Borrowed<'_>, > > > + sc: ShrinkControl<'_>, > > > + ) -> CountObjects; > > > + > > > + /// Count the number of freeable items in the cache. > > > + /// > > > + /// May be called from atomic context. > > > > Same here. > > > > > + fn scan_objects( > > > + me: <Self::Ptr as ForeignOwnable>::Borrowed<'_>, > > > + sc: ShrinkControl<'_>, > > > + ) -> ScanObjects; > > > +} > > > + > > > +/// How many objects are there in the cache? > > > +/// > > > +/// This is used as the return value of [`Shrinker::count_objects`]. > > > +pub struct CountObjects { > > > + inner: c_ulong, > > > +} > > > + > > > +impl CountObjects { > > > + /// Indicates that the number of objects is unknown. > > > + pub const UNKNOWN: Self = Self { inner: 0 }; > > > + > > > + /// Indicates that the number of objects is zero. > > > + pub const EMPTY: Self = Self { > > > + inner: SHRINK_EMPTY, > > > > So I spend way too much time looking at all this, and I think overflows > > don't matter to the core shrinker code (aside from maybe a bit of > > unfairness), as long as we don't accidently return SHRINK_EMPTY here. But > > that's only relevant for memcg aware shrinkers I think. > > Overflow is one thing. The current API automatically converts 0 to > SHRINK_EMPTY, whereas many C shrinkers just return the size directly, > which means they return UNKNOWN when it's really empty. Thoughts? I think the automatic conversion SHRINK_EMPTY and making sure implementations don't accidentally return SHRINK_STOP or one of the special is good, since that has a direct impact on the code flow. It was just the additional overflow prevention you're doing here where I'm not sure it's enough, and the C code clearly doesn't care. > > > + }; > > > + > > > + /// The maximum possible number of freeable objects. > > > + pub const MAX: Self = Self { > > > + // The shrinker code assumes that it can multiply this value by two without overflow. > > > + inner: c_ulong::MAX / 2, > > > > I think the limits is actually ulong_max/4, since priority can be 0 from > > drop_slabs() and we multiply by 4 is seeks are nonzero. But then I tried > > to look at what the limit for nr_to_scan and hence ScanObjects, and I > > think aside from the special values the mm/shrinker.c simply does not care > > about overflows at all. Both unsigned and signed integer math is well > > defined for overflow in linux, so no compiler license to do stupid stuff, > > and worst case if you do overflow you just end up shrinking a bit > > unfairly. But afaict nothing breaks. > > > > So not sure we should enforce this when core mm doesn't bother. > > > > Same for ScanObjects below. > > > > > + }; > > > + > > > + /// Creates a new `CountObjects` with the given value. > > > + pub fn from_count(count: usize) -> Self { > > > + if count == 0 { > > > + return Self::EMPTY; > > > + } > > > + > > > + if count > Self::MAX.inner as usize { > > > + return Self::MAX; > > > + } > > > + > > > + Self { > > > + inner: count as c_ulong, > > > + } > > > + } > > > +} > > > + > > > +/// How many objects were freed? > > > +/// > > > +/// This is used as the return value of [`Shrinker::scan_objects`]. > > > +pub struct ScanObjects { > > > + inner: c_ulong, > > > +} > > > + > > > +impl ScanObjects { > > > + /// Indicates that the shrinker should stop trying to free objects from this cache due to > > > + /// potential deadlocks. > > > + pub const STOP: Self = Self { inner: SHRINK_STOP }; > > > + > > > + /// The maximum possible number of freeable objects. > > > + pub const MAX: Self = Self { > > > + // The shrinker code assumes that it can multiply this value by two without overflow. > > > + inner: c_ulong::MAX / 2, > > > + }; > > > + > > > + /// Creates a new `CountObjects` with the given value. > > > + pub fn from_count(count: usize) -> Self { > > > + if count > Self::MAX.inner as usize { > > > + return Self::MAX; > > > + } > > > + > > > + Self { > > > + inner: count as c_ulong, > > > + } > > > + } > > > +} > > > + > > > +/// This struct is used to pass information from page reclaim to the shrinkers. > > > +pub struct ShrinkControl<'a> { > > > + ptr: NonNull<bindings::shrink_control>, > > > + _phantom: PhantomData<&'a bindings::shrink_control>, > > > +} > > > + > > > +impl<'a> ShrinkControl<'a> { > > > + /// Create a `ShrinkControl` from a raw pointer. > > > + /// > > > + /// # Safety > > > + /// > > > + /// The pointer should point at a valid `shrink_control` for the duration of 'a. > > > + pub unsafe fn from_raw(ptr: *mut bindings::shrink_control) -> Self { > > > + Self { > > > + // SAFETY: Caller promises that this pointer is valid. > > > + ptr: unsafe { NonNull::new_unchecked(ptr) }, > > > + _phantom: PhantomData, > > > + } > > > + } > > > + > > > + /// Determines whether it is safe to recurse into filesystem code. > > > + pub fn gfp_fs(&self) -> bool { > > > > I guess you need this in your new binder code, because the current one > > seems to side-step __GFP_FS recursion with just trylocking absolutely > > everything aside from the lru spinlock. At least I haven't found any > > gfp_fs tests, but I might be blind. > > Ah, yeah, I don't think C binder tests for that. > > Either way, it probably makes more sense to just expose a method to > get the flags directly, rather than have a dedicated method for > testing this particular flags. Or what do you think? I think we could be a bit more opionated here and just expose checks like this. Like some don't make sense in shrinkers in general (like the reclaim related flags, we're in a shrinker, so one of those is set). And some only make sense when it's a memcg/numa aware shrinker. Beyond that I think there's really only good reasons to look at gfp_fs/io for recursion control, but core mm is moving towards a world where those are per-thread flags controlled with memalloc_*_save/restore functions. Also I just realized we now need to filter the raw gfp flags through current_gfp_context (but not sure core mm does that for us already). So definitely leaning towards exposing meaningful query functions and not raw flags here. Cheers, Sima > > > > + // SAFETY: Okay by type invariants. > > > + let mask = unsafe { (*self.ptr.as_ptr()).gfp_mask }; > > > + > > > + (mask & bindings::__GFP_FS) != 0 > > > + } > > > + > > > + /// Returns the number of objects that `scan_objects` should try to reclaim. > > > + pub fn nr_to_scan(&self) -> usize { > > > + // SAFETY: Okay by type invariants. > > > + unsafe { (*self.ptr.as_ptr()).nr_to_scan as usize } > > > + } > > > + > > > + /// The callback should set this value to the number of objects processed. > > > + pub fn set_nr_scanned(&mut self, val: usize) { > > > > Unless my grep skills are really bad I think the drm/i915 shrinker is the > > only one that bothers with this. Not sure we want to bother either? > > > > > + let mut val = val as c_ulong; > > > + // SAFETY: Okay by type invariants. > > > + let max = unsafe { (*self.ptr.as_ptr()).nr_to_scan }; > > > + if val > max { > > > + val = max; > > > + } > > > + > > > + // SAFETY: Okay by type invariants. > > > + unsafe { (*self.ptr.as_ptr()).nr_scanned = val }; > > > + } > > > +} > > > + > > > +unsafe extern "C" fn rust_count_objects<T: Shrinker>( > > > + shrink: *mut bindings::shrinker, > > > + sc: *mut bindings::shrink_control, > > > +) -> c_ulong { > > > + // SAFETY: We own the private data, so we can access it. > > > + let private = unsafe { (*shrink).private_data }; > > > + // SAFETY: This function is only used with shrinkers where `T` is the type of the private data. > > > + let private = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) }; > > > + // SAFETY: The caller passes a valid `sc` pointer. > > > + let sc = unsafe { ShrinkControl::from_raw(sc) }; > > > + > > > + let ret = T::count_objects(private, sc); > > > + ret.inner > > > +} > > > + > > > +unsafe extern "C" fn rust_scan_objects<T: Shrinker>( > > > + shrink: *mut bindings::shrinker, > > > + sc: *mut bindings::shrink_control, > > > +) -> c_ulong { > > > + // SAFETY: We own the private data, so we can access it. > > > + let private = unsafe { (*shrink).private_data }; > > > + // SAFETY: This function is only used with shrinkers where `T` is the type of the private data. > > > + let private = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) }; > > > + // SAFETY: The caller passes a valid `sc` pointer. > > > + let sc = unsafe { ShrinkControl::from_raw(sc) }; > > > + > > > + let ret = T::scan_objects(private, sc); > > > + ret.inner > > > +} > > > > > > --- > > > base-commit: 93dc3be19450447a3a7090bd1dfb9f3daac3e8d2 > > > change-id: 20240911-shrinker-f8371af00b68 > > > > > > Best regards, > > > -- > > > Alice Ryhl <aliceryhl@xxxxxxxxxx> > > > > > > > -- > > Simona Vetter > > Software Engineer, Intel Corporation > > http://blog.ffwll.ch -- Simona Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch