From: Andreas Hindborg <a.hindborg@xxxxxxxxxxx> Signed-off-by: Andreas Hindborg <a.hindborg@xxxxxxxxxxx> --- drivers/block/Kconfig | 4 + drivers/block/Makefile | 3 + drivers/block/rnull.rs | 323 +++++++++++++++++++++++++++++++++++++++++ rust/helpers.c | 1 + scripts/Makefile.build | 2 +- 5 files changed, 332 insertions(+), 1 deletion(-) create mode 100644 drivers/block/rnull.rs diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 5b9d4aaebb81..fb877d4f8ddf 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -354,6 +354,10 @@ config VIRTIO_BLK This is the virtual block driver for virtio. It can be used with QEMU based VMMs (like KVM or Xen). Say Y or M. +config BLK_DEV_RUST_NULL + tristate "Rust null block driver" + depends on RUST + config BLK_DEV_RBD tristate "Rados block device (RBD)" depends on INET && BLOCK diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 101612cba303..1105a2d4fdcb 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -9,6 +9,9 @@ # needed for trace events ccflags-y += -I$(src) +obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull_mod.o +rnull_mod-y := rnull.o + obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o diff --git a/drivers/block/rnull.rs b/drivers/block/rnull.rs new file mode 100644 index 000000000000..05fef30e910c --- /dev/null +++ b/drivers/block/rnull.rs @@ -0,0 +1,323 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! This is a Rust implementation of the C null block driver. +//! +//! Supported features: +//! +//! - optional memory backing +//! - blk-mq interface +//! - direct completion +//! - softirq completion +//! - timer completion +//! +//! The driver is configured at module load time by parameters +//! `param_memory_backed`, `param_capacity_mib`, `param_irq_mode` and +//! `param_completion_time_nsec!. + +use core::ops::Deref; + +use kernel::{ + bindings, + block::{ + bio::Segment, + mq::{self, GenDisk, Operations, RequestDataRef, TagSet}, + }, + error::Result, + folio::*, + hrtimer::{RawTimer, TimerCallback}, + new_mutex, pr_info, + prelude::*, + sync::{Arc, Mutex}, + types::{ARef, ForeignOwnable}, + xarray::XArray, +}; + +use kernel::new_spinlock; +use kernel::CacheAligned; +use kernel::sync::SpinLock; + +module! { + type: NullBlkModule, + name: "rnull_mod", + author: "Andreas Hindborg", + license: "GPL v2", + params: { + param_memory_backed: bool { + default: true, + permissions: 0, + description: "Use memory backing", + }, + // Problems with pin_init when `irq_mode` + param_irq_mode: u8 { + default: 0, + permissions: 0, + description: "IRQ Mode (0: None, 1: Soft, 2: Timer)", + }, + param_capacity_mib: u64 { + default: 4096, + permissions: 0, + description: "Device capacity in MiB", + }, + param_completion_time_nsec: u64 { + default: 1_000_000, + permissions: 0, + description: "Completion time in nano seconds for timer mode", + }, + param_block_size: u16 { + default: 4096, + permissions: 0, + description: "Block size in bytes", + }, + }, +} + +#[derive(Debug)] +enum IRQMode { + None, + Soft, + Timer, +} + +impl TryFrom<u8> for IRQMode { + type Error = kernel::error::Error; + + fn try_from(value: u8) -> Result<Self> { + match value { + 0 => Ok(Self::None), + 1 => Ok(Self::Soft), + 2 => Ok(Self::Timer), + _ => Err(kernel::error::code::EINVAL), + } + } +} + +struct NullBlkModule { + _disk: Pin<Box<Mutex<GenDisk<NullBlkDevice>>>>, +} + +fn add_disk(tagset: Arc<TagSet<NullBlkDevice>>) -> Result<GenDisk<NullBlkDevice>> { + let block_size = *param_block_size.read(); + if block_size % 512 != 0 || !(512..=4096).contains(&block_size) { + return Err(kernel::error::code::EINVAL); + } + + let irq_mode = (*param_irq_mode.read()).try_into()?; + + let queue_data = Box::pin_init(pin_init!( + QueueData { + tree <- TreeContainer::new(), + completion_time_nsec: *param_completion_time_nsec.read(), + irq_mode, + memory_backed: *param_memory_backed.read(), + block_size, + } + ))?; + + let block_size = queue_data.block_size; + + let mut disk = GenDisk::try_new(tagset, queue_data)?; + disk.set_name(format_args!("rnullb{}", 0))?; + disk.set_capacity_sectors(*param_capacity_mib.read() << 11); + disk.set_queue_logical_block_size(block_size.into()); + disk.set_queue_physical_block_size(block_size.into()); + disk.set_rotational(false); + Ok(disk) +} + +impl kernel::Module for NullBlkModule { + fn init(_module: &'static ThisModule) -> Result<Self> { + pr_info!("Rust null_blk loaded\n"); + let tagset = Arc::pin_init(TagSet::try_new(1, (), 256, 1))?; + let disk = Box::pin_init(new_mutex!(add_disk(tagset)?, "nullb:disk"))?; + + disk.lock().add()?; + + Ok(Self { _disk: disk }) + } +} + +impl Drop for NullBlkModule { + fn drop(&mut self) { + pr_info!("Dropping rnullb\n"); + } +} + +struct NullBlkDevice; + +type Tree = XArray<Box<UniqueFolio>>; +type TreeRef<'a> = &'a Tree; + +#[pin_data] +struct TreeContainer { + // `XArray` is safe to use without a lock, as it applies internal locking. + // However, there are two reasons to use an external lock: a) cache line + // contention and b) we don't want to take the lock for each page we + // process. + // + // A: The `XArray` lock (xa_lock) is located on the same cache line as the + // xarray data pointer (xa_head). The effect of this arrangement is that + // under heavy contention, we often get a cache miss when we try to follow + // the data pointer after acquiring the lock. We would rather have consumers + // spinning on another lock, so we do not get a miss on xa_head. This issue + // can potentially be fixed by padding the C `struct xarray`. + // + // B: The current `XArray` Rust API requires that we take the `xa_lock` for + // each `XArray` operation. This is very inefficient when the lock is + // contended and we have many operations to perform. Eventually we should + // update the `XArray` API to allow multiple tree operations under a single + // lock acquisition. For now, serialize tree access with an external lock. + #[pin] + tree: CacheAligned<Tree>, + #[pin] + lock: CacheAligned<SpinLock<()>>, +} + +impl TreeContainer { + fn new() -> impl PinInit<Self> { + pin_init!(TreeContainer { + tree <- CacheAligned::new_initializer(XArray::new(0)), + lock <- CacheAligned::new_initializer(new_spinlock!((), "rnullb:mem")), + }) + } +} + +#[pin_data] +struct QueueData { + #[pin] + tree: TreeContainer, + completion_time_nsec: u64, + irq_mode: IRQMode, + memory_backed: bool, + block_size: u16, +} + +impl NullBlkDevice { + #[inline(always)] + fn write(tree: TreeRef<'_>, sector: usize, segment: &Segment<'_>) -> Result { + let idx = sector >> bindings::PAGE_SECTORS_SHIFT; + + let mut folio = if let Some(page) = tree.get_locked(idx) { + page + } else { + tree.set(idx, Box::try_new(Folio::try_new(0)?)?)?; + tree.get_locked(idx).unwrap() + }; + + segment.copy_to_folio(&mut folio)?; + + Ok(()) + } + + #[inline(always)] + fn read(tree: TreeRef<'_>, sector: usize, segment: &mut Segment<'_>) -> Result { + let idx = sector >> bindings::PAGE_SECTORS_SHIFT; + + if let Some(folio) = tree.get_locked(idx) { + segment.copy_from_folio(folio.deref())?; + } + + Ok(()) + } + + #[inline(never)] + fn transfer( + command: bindings::req_op, + tree: TreeRef<'_>, + sector: usize, + segment: &mut Segment<'_>, + ) -> Result { + match command { + bindings::req_op_REQ_OP_WRITE => Self::write(tree, sector, segment)?, + bindings::req_op_REQ_OP_READ => Self::read(tree, sector, segment)?, + _ => (), + } + Ok(()) + } +} + +#[pin_data] +struct Pdu { + #[pin] + timer: kernel::hrtimer::Timer<Self>, +} + +impl TimerCallback for Pdu { + type Receiver = RequestDataRef<NullBlkDevice>; + + fn run(this: Self::Receiver) { + this.request().end_ok(); + } +} + +kernel::impl_has_timer! { + impl HasTimer<Self> for Pdu { self.timer } +} + +#[vtable] +impl Operations for NullBlkDevice { + type RequestData = Pdu; + type RequestDataInit = impl PinInit<Pdu>; + type QueueData = Pin<Box<QueueData>>; + type HwData = (); + type TagSetData = (); + + fn new_request_data( + _tagset_data: <Self::TagSetData as ForeignOwnable>::Borrowed<'_>, + ) -> Self::RequestDataInit { + pin_init!( Pdu { + timer <- kernel::hrtimer::Timer::new(), + }) + } + + #[inline(always)] + fn queue_rq( + _hw_data: (), + queue_data: &QueueData, + rq: ARef<mq::Request<Self>>, + _is_last: bool, + ) -> Result { + rq.start(); + if queue_data.memory_backed { + let guard = queue_data.tree.lock.lock(); + let tree = queue_data.tree.tree.deref(); + + let mut sector = rq.sector(); + for bio in rq.bio_iter() { + for mut segment in bio.segment_iter() { + Self::transfer(rq.command(), tree, sector, &mut segment)?; + sector += segment.len() >> bindings::SECTOR_SHIFT; + } + } + + drop(guard); + } + + + match queue_data.irq_mode { + IRQMode::None => rq.end_ok(), + IRQMode::Soft => rq.complete(), + IRQMode::Timer => { + mq::Request::owned_data_ref(rq).schedule(queue_data.completion_time_nsec) + } + } + + Ok(()) + } + + fn commit_rqs( + _hw_data: <Self::HwData as ForeignOwnable>::Borrowed<'_>, + _queue_data: <Self::QueueData as ForeignOwnable>::Borrowed<'_>, + ) { + } + + fn complete(rq: &mq::Request<Self>) { + rq.end_ok(); + } + + fn init_hctx( + _tagset_data: <Self::TagSetData as ForeignOwnable>::Borrowed<'_>, + _hctx_idx: u32, + ) -> Result<Self::HwData> { + Ok(()) + } +} diff --git a/rust/helpers.c b/rust/helpers.c index 017fa90366e6..9c8976629e90 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -200,6 +200,7 @@ struct page *rust_helper_folio_page(struct folio *folio, size_t n) { return folio_page(folio, n); } +EXPORT_SYMBOL_GPL(rust_helper_folio_page); loff_t rust_helper_folio_pos(struct folio *folio) { diff --git a/scripts/Makefile.build b/scripts/Makefile.build index dae447a1ad30..f64be2310010 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -262,7 +262,7 @@ $(obj)/%.lst: $(src)/%.c FORCE # Compile Rust sources (.rs) # --------------------------------------------------------------------------- -rust_allowed_features := new_uninit,offset_of +rust_allowed_features := new_uninit,offset_of,allocator_api,impl_trait_in_assoc_type # `--out-dir` is required to avoid temporaries being created by `rustc` in the # current working directory, which may be not accessible in the out-of-tree -- 2.44.0