Benno Lossin <benno.lossin@xxxxxxxxx> writes: > On 01.06.24 10:18, Andreas Hindborg wrote: >> From: Andreas Hindborg <a.hindborg@xxxxxxxxxxx> > > [...] > >> +impl<T: Operations> GenDisk<T, Initialized> { >> + /// Try to create a new `GenDisk`. >> + pub fn try_new(tagset: Arc<TagSet<T>>) -> Result<Self> { >> + let lock_class_key = crate::sync::LockClassKey::new(); >> + >> + // SAFETY: `tagset.raw_tag_set()` points to a valid and initialized tag set >> + let gendisk = from_err_ptr(unsafe { >> + bindings::__blk_mq_alloc_disk( >> + tagset.raw_tag_set(), >> + core::ptr::null_mut(), // TODO: We can pass queue limits right here >> + core::ptr::null_mut(), >> + lock_class_key.as_ptr(), >> + ) >> + })?; >> + >> + const TABLE: bindings::block_device_operations = bindings::block_device_operations { >> + submit_bio: None, >> + open: None, >> + release: None, >> + ioctl: None, >> + compat_ioctl: None, >> + check_events: None, >> + unlock_native_capacity: None, >> + getgeo: None, >> + set_read_only: None, >> + swap_slot_free_notify: None, >> + report_zones: None, >> + devnode: None, >> + alternative_gpt_sector: None, >> + get_unique_id: None, >> + // TODO: Set to THIS_MODULE. Waiting for const_refs_to_static feature to >> + // be merged (unstable in rustc 1.78 which is staged for linux 6.10) >> + // https://github.com/rust-lang/rust/issues/119618 > > AFAIK the 1.78 upgrade already is in rust-next (and also should appear > in v6.10-rc2, right?) do you have this on your radar? I am tracking this and I plan to add support in a later patch. > >> + owner: core::ptr::null_mut(), >> + pr_ops: core::ptr::null_mut(), >> + free_disk: None, >> + poll_bio: None, >> + }; >> + >> + // SAFETY: gendisk is a valid pointer as we initialized it above >> + unsafe { (*gendisk).fops = &TABLE }; >> + >> + // INVARIANT: `gendisk` was initialized above. >> + // INVARIANT: `gendisk.queue.queue_data` is set to `data` in the call to >> + // `__blk_mq_alloc_disk` above. >> + Ok(GenDisk { >> + tagset, >> + gendisk, >> + _phantom: PhantomData, >> + }) >> + } > > [...] > >> + /// This function is called by the C kernel. A pointer to this function is >> + /// installed in the `blk_mq_ops` vtable for the driver. >> + /// >> + /// # Safety >> + /// >> + /// This function may only be called by blk-mq C infrastructure. >> + unsafe extern "C" fn commit_rqs_callback(_hctx: *mut bindings::blk_mq_hw_ctx) { >> + T::commit_rqs() >> + } >> + >> + /// This function is called by the C kernel. It is not currently >> + /// implemented, and there is no way to exercise this code path. > > Is it also possible to completely remove it? ie use `None` in the > VTABLE, or will the C side error? No, this pointer is not allowed to be null. It must be a callable function, hence the stub. It will be populated soon enough when I send patches for the remote completion logic. > >> + /// >> + /// # Safety >> + /// >> + /// This function may only be called by blk-mq C infrastructure. >> + unsafe extern "C" fn complete_callback(_rq: *mut bindings::request) {} > > [...] > >> +impl<'a> RawWriter<'a> { >> + /// Create a new `RawWriter` instance. >> + fn new(buffer: &'a mut [u8]) -> Result<RawWriter<'a>> { >> + *(buffer.last_mut().ok_or(EINVAL)?) = 0; >> + >> + // INVARIANT: We null terminated the buffer above. >> + Ok(Self { buffer, pos: 0 }) >> + } >> + >> + pub(crate) fn from_array<const N: usize>( >> + a: &'a mut [core::ffi::c_char; N], >> + ) -> Result<RawWriter<'a>> { > > You could change the return type to be `RawWriter<'a>` and check using > `build_assert!` that `N > 0`. Then you can also call `unwrap_unchecked` > on the result that you get below. > > I don't know if we want that, but it is a possibility. I guess we could potentially make the type generic over a const buffer size. But let's put a pin in that for now. I'll look into that down the road. > >> + Self::new( >> + // SAFETY: the buffer of `a` is valid for read and write as `u8` for >> + // at least `N` bytes. >> + unsafe { core::slice::from_raw_parts_mut(a.as_mut_ptr().cast::<u8>(), N) }, >> + ) >> + } >> +} > > [...] > >> +/// Store the result of `op(target.load())` in target, returning new value of >> +/// taret. >> +fn atomic_relaxed_op_return(target: &AtomicU64, op: impl Fn(u64) -> u64) -> u64 { >> + let old = target.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| Some(op(x))); >> + >> + // SAFETY: Because the operation passed to `fetch_update` above always >> + // return `Some`, `old` will always be `Ok`. >> + let old = unsafe { old.unwrap_unchecked() }; >> + >> + op(old) >> +} >> + >> +/// Store the result of `op(target.load)` in `target` if `target.load() != >> +/// pred`, returning previous value of target > > The function returns a bool, not a u64 (value). From the body I read > that you return whether the value was updated. Thanks 👍 > >> +fn atomic_relaxed_op_unless(target: &AtomicU64, op: impl Fn(u64) -> u64, pred: u64) -> bool { >> + let x = target.load(Ordering::Relaxed); >> + loop { >> + if x == pred { >> + break; >> + } >> + if target >> + .compare_exchange_weak(x, op(x), Ordering::Relaxed, Ordering::Relaxed) >> + .is_ok() >> + { >> + break; >> + } > > If this fails, you are not re-reading the value of `target`, so if > someone else just set it to `pred`, you will still continue to try to > set it from `x` to `op(x)`, but it might never have the value `x` again. > This would lead to a potentially infinite loop, right? Yea, that is not good. I should have moved the assignment of `x` into the loop > >> + } > > Do you think you can also implement this using `fetch_update`? I guess > this would do what you want, right?: > > target.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |x| { > if x == pred { > None > } else { > Some(op(x)) > } > }).is_ok() Makes sense, I will steal that. > >> + >> + x == pred >> +} > > [...] > >> +impl<T: Operations> TagSet<T> { >> + /// Try to create a new tag set >> + pub fn try_new( >> + nr_hw_queues: u32, >> + num_tags: u32, >> + num_maps: u32, >> + ) -> impl PinInit<Self, error::Error> { >> + try_pin_init!( TagSet { >> + // INVARIANT: We initialize `inner` here and it is valid after the >> + // initializer has run. >> + inner <- unsafe {kernel::init::pin_init_from_closure(move |place: *mut Opaque<bindings::blk_mq_tag_set>| -> Result<()> { >> + let place = place.cast::<bindings::blk_mq_tag_set>(); >> + >> + // SAFETY: pin_init_from_closure promises that `place` is writable, and >> + // zeroes is a valid bit pattern for this structure. >> + core::ptr::write_bytes(place, 0, 1); >> + >> + /// For a raw pointer to a struct, write a struct field without >> + /// creating a reference to the field >> + macro_rules! write_ptr_field { >> + ($target:ident, $field:ident, $value:expr) => { >> + ::core::ptr::write(::core::ptr::addr_of_mut!((*$target).$field), $value) >> + }; >> + } >> + >> + // SAFETY: pin_init_from_closure promises that `place` is writable >> + write_ptr_field!(place, ops, OperationsVTable::<T>::build()); >> + write_ptr_field!(place, nr_hw_queues , nr_hw_queues); >> + write_ptr_field!(place, timeout , 0); // 0 means default which is 30 * HZ in C >> + write_ptr_field!(place, numa_node , bindings::NUMA_NO_NODE); >> + write_ptr_field!(place, queue_depth , num_tags); >> + write_ptr_field!(place, cmd_size , core::mem::size_of::<RequestDataWrapper>().try_into()?); >> + write_ptr_field!(place, flags , bindings::BLK_MQ_F_SHOULD_MERGE); >> + write_ptr_field!(place, driver_data , core::ptr::null_mut::<core::ffi::c_void>()); >> + write_ptr_field!(place, nr_maps , num_maps); > > Did something not work with my suggestion? I did not want to change it if we are rewriting it to `Opaque::init` in a cycle or two, which I think is a better solution. Best regards, Andreas