[RFC PATCH 4/5] rust: block: add rnull, Rust null_blk implementation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Andreas Hindborg <a.hindborg@xxxxxxxxxxx>

Signed-off-by: Andreas Hindborg <a.hindborg@xxxxxxxxxxx>
---
 drivers/block/Kconfig  |   4 +
 drivers/block/Makefile |   3 +
 drivers/block/rnull.rs | 323 +++++++++++++++++++++++++++++++++++++++++
 rust/helpers.c         |   1 +
 scripts/Makefile.build |   2 +-
 5 files changed, 332 insertions(+), 1 deletion(-)
 create mode 100644 drivers/block/rnull.rs

diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 5b9d4aaebb81..fb877d4f8ddf 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -354,6 +354,10 @@ config VIRTIO_BLK
 	  This is the virtual block driver for virtio.  It can be used with
           QEMU based VMMs (like KVM or Xen).  Say Y or M.
 
+config BLK_DEV_RUST_NULL
+	tristate "Rust null block driver"
+	depends on RUST
+
 config BLK_DEV_RBD
 	tristate "Rados block device (RBD)"
 	depends on INET && BLOCK
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 101612cba303..1105a2d4fdcb 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -9,6 +9,9 @@
 # needed for trace events
 ccflags-y				+= -I$(src)
 
+obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull_mod.o
+rnull_mod-y := rnull.o
+
 obj-$(CONFIG_MAC_FLOPPY)	+= swim3.o
 obj-$(CONFIG_BLK_DEV_SWIM)	+= swim_mod.o
 obj-$(CONFIG_BLK_DEV_FD)	+= floppy.o
diff --git a/drivers/block/rnull.rs b/drivers/block/rnull.rs
new file mode 100644
index 000000000000..05fef30e910c
--- /dev/null
+++ b/drivers/block/rnull.rs
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This is a Rust implementation of the C null block driver.
+//!
+//! Supported features:
+//!
+//! - optional memory backing
+//! - blk-mq interface
+//! - direct completion
+//! - softirq completion
+//! - timer completion
+//!
+//! The driver is configured at module load time by parameters
+//! `param_memory_backed`, `param_capacity_mib`, `param_irq_mode` and
+//! `param_completion_time_nsec!.
+
+use core::ops::Deref;
+
+use kernel::{
+    bindings,
+    block::{
+        bio::Segment,
+        mq::{self, GenDisk, Operations, RequestDataRef, TagSet},
+    },
+    error::Result,
+    folio::*,
+    hrtimer::{RawTimer, TimerCallback},
+    new_mutex, pr_info,
+    prelude::*,
+    sync::{Arc, Mutex},
+    types::{ARef, ForeignOwnable},
+    xarray::XArray,
+};
+
+use kernel::new_spinlock;
+use kernel::CacheAligned;
+use kernel::sync::SpinLock;
+
+module! {
+    type: NullBlkModule,
+    name: "rnull_mod",
+    author: "Andreas Hindborg",
+    license: "GPL v2",
+    params: {
+        param_memory_backed: bool {
+            default: true,
+            permissions: 0,
+            description: "Use memory backing",
+        },
+        // Problems with pin_init when `irq_mode`
+        param_irq_mode: u8 {
+            default: 0,
+            permissions: 0,
+            description: "IRQ Mode (0: None, 1: Soft, 2: Timer)",
+        },
+        param_capacity_mib: u64 {
+            default: 4096,
+            permissions: 0,
+            description: "Device capacity in MiB",
+        },
+        param_completion_time_nsec: u64 {
+            default: 1_000_000,
+            permissions: 0,
+            description: "Completion time in nano seconds for timer mode",
+        },
+        param_block_size: u16 {
+            default: 4096,
+            permissions: 0,
+            description: "Block size in bytes",
+        },
+    },
+}
+
+#[derive(Debug)]
+enum IRQMode {
+    None,
+    Soft,
+    Timer,
+}
+
+impl TryFrom<u8> for IRQMode {
+    type Error = kernel::error::Error;
+
+    fn try_from(value: u8) -> Result<Self> {
+        match value {
+            0 => Ok(Self::None),
+            1 => Ok(Self::Soft),
+            2 => Ok(Self::Timer),
+            _ => Err(kernel::error::code::EINVAL),
+        }
+    }
+}
+
+struct NullBlkModule {
+    _disk: Pin<Box<Mutex<GenDisk<NullBlkDevice>>>>,
+}
+
+fn add_disk(tagset: Arc<TagSet<NullBlkDevice>>) -> Result<GenDisk<NullBlkDevice>> {
+    let block_size = *param_block_size.read();
+    if block_size % 512 != 0 || !(512..=4096).contains(&block_size) {
+        return Err(kernel::error::code::EINVAL);
+    }
+
+    let irq_mode = (*param_irq_mode.read()).try_into()?;
+
+    let queue_data = Box::pin_init(pin_init!(
+        QueueData {
+            tree <- TreeContainer::new(),
+            completion_time_nsec: *param_completion_time_nsec.read(),
+            irq_mode,
+            memory_backed: *param_memory_backed.read(),
+            block_size,
+        }
+    ))?;
+
+    let block_size = queue_data.block_size;
+
+    let mut disk = GenDisk::try_new(tagset, queue_data)?;
+    disk.set_name(format_args!("rnullb{}", 0))?;
+    disk.set_capacity_sectors(*param_capacity_mib.read() << 11);
+    disk.set_queue_logical_block_size(block_size.into());
+    disk.set_queue_physical_block_size(block_size.into());
+    disk.set_rotational(false);
+    Ok(disk)
+}
+
+impl kernel::Module for NullBlkModule {
+    fn init(_module: &'static ThisModule) -> Result<Self> {
+        pr_info!("Rust null_blk loaded\n");
+        let tagset = Arc::pin_init(TagSet::try_new(1, (), 256, 1))?;
+        let disk = Box::pin_init(new_mutex!(add_disk(tagset)?, "nullb:disk"))?;
+
+        disk.lock().add()?;
+
+        Ok(Self { _disk: disk })
+    }
+}
+
+impl Drop for NullBlkModule {
+    fn drop(&mut self) {
+        pr_info!("Dropping rnullb\n");
+    }
+}
+
+struct NullBlkDevice;
+
+type Tree = XArray<Box<UniqueFolio>>;
+type TreeRef<'a> = &'a Tree;
+
+#[pin_data]
+struct TreeContainer {
+    // `XArray` is safe to use without a lock, as it applies internal locking.
+    // However, there are two reasons to use an external lock: a) cache line
+    // contention and b) we don't want to take the lock for each page we
+    // process.
+    //
+    // A: The `XArray` lock (xa_lock) is located on the same cache line as the
+    // xarray data pointer (xa_head). The effect of this arrangement is that
+    // under heavy contention, we often get a cache miss when we try to follow
+    // the data pointer after acquiring the lock. We would rather have consumers
+    // spinning on another lock, so we do not get a miss on xa_head. This issue
+    // can potentially be fixed by padding the C `struct xarray`.
+    //
+    // B: The current `XArray` Rust API requires that we take the `xa_lock` for
+    // each `XArray` operation. This is very inefficient when the lock is
+    // contended and we have many operations to perform. Eventually we should
+    // update the `XArray` API to allow multiple tree operations under a single
+    // lock acquisition. For now, serialize tree access with an external lock.
+    #[pin]
+    tree: CacheAligned<Tree>,
+    #[pin]
+    lock: CacheAligned<SpinLock<()>>,
+}
+
+impl TreeContainer {
+    fn new() -> impl PinInit<Self> {
+        pin_init!(TreeContainer {
+            tree <- CacheAligned::new_initializer(XArray::new(0)),
+            lock <- CacheAligned::new_initializer(new_spinlock!((), "rnullb:mem")),
+        })
+    }
+}
+
+#[pin_data]
+struct QueueData {
+    #[pin]
+    tree: TreeContainer,
+    completion_time_nsec: u64,
+    irq_mode: IRQMode,
+    memory_backed: bool,
+    block_size: u16,
+}
+
+impl NullBlkDevice {
+    #[inline(always)]
+    fn write(tree: TreeRef<'_>, sector: usize, segment: &Segment<'_>) -> Result {
+        let idx = sector >> bindings::PAGE_SECTORS_SHIFT;
+
+        let mut folio = if let Some(page) = tree.get_locked(idx) {
+            page
+        } else {
+            tree.set(idx, Box::try_new(Folio::try_new(0)?)?)?;
+            tree.get_locked(idx).unwrap()
+        };
+
+        segment.copy_to_folio(&mut folio)?;
+
+        Ok(())
+    }
+
+    #[inline(always)]
+    fn read(tree: TreeRef<'_>, sector: usize, segment: &mut Segment<'_>) -> Result {
+        let idx = sector >> bindings::PAGE_SECTORS_SHIFT;
+
+        if let Some(folio) = tree.get_locked(idx) {
+            segment.copy_from_folio(folio.deref())?;
+        }
+
+        Ok(())
+    }
+
+    #[inline(never)]
+    fn transfer(
+        command: bindings::req_op,
+        tree: TreeRef<'_>,
+        sector: usize,
+        segment: &mut Segment<'_>,
+    ) -> Result {
+        match command {
+            bindings::req_op_REQ_OP_WRITE => Self::write(tree, sector, segment)?,
+            bindings::req_op_REQ_OP_READ => Self::read(tree, sector, segment)?,
+            _ => (),
+        }
+        Ok(())
+    }
+}
+
+#[pin_data]
+struct Pdu {
+    #[pin]
+    timer: kernel::hrtimer::Timer<Self>,
+}
+
+impl TimerCallback for Pdu {
+    type Receiver = RequestDataRef<NullBlkDevice>;
+
+    fn run(this: Self::Receiver) {
+        this.request().end_ok();
+    }
+}
+
+kernel::impl_has_timer! {
+    impl HasTimer<Self> for Pdu { self.timer }
+}
+
+#[vtable]
+impl Operations for NullBlkDevice {
+    type RequestData = Pdu;
+    type RequestDataInit = impl PinInit<Pdu>;
+    type QueueData = Pin<Box<QueueData>>;
+    type HwData = ();
+    type TagSetData = ();
+
+    fn new_request_data(
+        _tagset_data: <Self::TagSetData as ForeignOwnable>::Borrowed<'_>,
+    ) -> Self::RequestDataInit {
+        pin_init!( Pdu {
+            timer <- kernel::hrtimer::Timer::new(),
+        })
+    }
+
+    #[inline(always)]
+    fn queue_rq(
+        _hw_data: (),
+        queue_data: &QueueData,
+        rq: ARef<mq::Request<Self>>,
+        _is_last: bool,
+    ) -> Result {
+        rq.start();
+        if queue_data.memory_backed {
+            let guard = queue_data.tree.lock.lock();
+            let tree = queue_data.tree.tree.deref();
+
+            let mut sector = rq.sector();
+            for bio in rq.bio_iter() {
+                for mut segment in bio.segment_iter() {
+                    Self::transfer(rq.command(), tree, sector, &mut segment)?;
+                    sector += segment.len() >> bindings::SECTOR_SHIFT;
+                }
+            }
+
+            drop(guard);
+        }
+
+
+        match queue_data.irq_mode {
+            IRQMode::None => rq.end_ok(),
+            IRQMode::Soft => rq.complete(),
+            IRQMode::Timer => {
+                mq::Request::owned_data_ref(rq).schedule(queue_data.completion_time_nsec)
+            }
+        }
+
+        Ok(())
+    }
+
+    fn commit_rqs(
+        _hw_data: <Self::HwData as ForeignOwnable>::Borrowed<'_>,
+        _queue_data: <Self::QueueData as ForeignOwnable>::Borrowed<'_>,
+    ) {
+    }
+
+    fn complete(rq: &mq::Request<Self>) {
+        rq.end_ok();
+    }
+
+    fn init_hctx(
+        _tagset_data: <Self::TagSetData as ForeignOwnable>::Borrowed<'_>,
+        _hctx_idx: u32,
+    ) -> Result<Self::HwData> {
+        Ok(())
+    }
+}
diff --git a/rust/helpers.c b/rust/helpers.c
index 017fa90366e6..9c8976629e90 100644
--- a/rust/helpers.c
+++ b/rust/helpers.c
@@ -200,6 +200,7 @@ struct page *rust_helper_folio_page(struct folio *folio, size_t n)
 {
 	return folio_page(folio, n);
 }
+EXPORT_SYMBOL_GPL(rust_helper_folio_page);
 
 loff_t rust_helper_folio_pos(struct folio *folio)
 {
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index dae447a1ad30..f64be2310010 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -262,7 +262,7 @@ $(obj)/%.lst: $(src)/%.c FORCE
 # Compile Rust sources (.rs)
 # ---------------------------------------------------------------------------
 
-rust_allowed_features := new_uninit,offset_of
+rust_allowed_features := new_uninit,offset_of,allocator_api,impl_trait_in_assoc_type
 
 # `--out-dir` is required to avoid temporaries being created by `rustc` in the
 # current working directory, which may be not accessible in the out-of-tree
-- 
2.44.0





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux