On Mon, Jul 15, 2024 at 11:12 AM Steven Price <steven.price@xxxxxxx> wrote: > >>> + > >>> +pub(crate) const GPU_ID: GpuRegister = GpuRegister(0x0); > >>> +pub(crate) const fn gpu_arch_major(x: u64) -> GpuRegister { > >>> + GpuRegister((x) >> 28) > >>> +} > >>> +pub(crate) const fn gpu_arch_minor(x: u64) -> GpuRegister { > >>> + GpuRegister((x) & genmask(27, 24) >> 24) > >>> +} > >>> +pub(crate) const fn gpu_arch_rev(x: u64) -> GpuRegister { > >>> + GpuRegister((x) & genmask(23, 20) >> 20) > >>> +} > >>> +pub(crate) const fn gpu_prod_major(x: u64) -> GpuRegister { > >>> + GpuRegister((x) & genmask(19, 16) >> 16) > >>> +} > >>> +pub(crate) const fn gpu_ver_major(x: u64) -> GpuRegister { > >>> + GpuRegister((x) & genmask(15, 12) >> 12) > >>> +} > >>> +pub(crate) const fn gpu_ver_minor(x: u64) -> GpuRegister { > >>> + GpuRegister((x) & genmask(11, 4) >> 4) > >>> +} > >>> +pub(crate) const fn gpu_ver_status(x: u64) -> GpuRegister { > >>> + GpuRegister(x & genmask(3, 0)) > >>> +} > >>> +pub(crate) const GPU_L2_FEATURES: GpuRegister = GpuRegister(0x4); > >>> +pub(crate) const fn gpu_l2_features_line_size(x: u64) -> GpuRegister { > >>> + GpuRegister(1 << ((x) & genmask(7, 0))) > >>> +} > >>> +pub(crate) const GPU_CORE_FEATURES: GpuRegister = GpuRegister(0x8); > >>> +pub(crate) const GPU_TILER_FEATURES: GpuRegister = GpuRegister(0xc); > >>> +pub(crate) const GPU_MEM_FEATURES: GpuRegister = GpuRegister(0x10); > >>> +pub(crate) const GROUPS_L2_COHERENT: GpuRegister = GpuRegister(bit(0)); > >>> +pub(crate) const GPU_MMU_FEATURES: GpuRegister = GpuRegister(0x14); > >>> +pub(crate) const fn gpu_mmu_features_va_bits(x: u64) -> GpuRegister { > >>> + GpuRegister((x) & genmask(7, 0)) > >>> +} > >>> +pub(crate) const fn gpu_mmu_features_pa_bits(x: u64) -> GpuRegister { > >>> + GpuRegister(((x) >> 8) & genmask(7, 0)) > >>> +} > >>> +pub(crate) const GPU_AS_PRESENT: GpuRegister = GpuRegister(0x18); > >>> +pub(crate) const GPU_CSF_ID: GpuRegister = GpuRegister(0x1c); > >>> +pub(crate) const GPU_INT_RAWSTAT: GpuRegister = GpuRegister(0x20); > >>> +pub(crate) const GPU_INT_CLEAR: GpuRegister = GpuRegister(0x24); > >>> +pub(crate) const GPU_INT_MASK: GpuRegister = GpuRegister(0x28); > >>> +pub(crate) const GPU_INT_STAT: GpuRegister = GpuRegister(0x2c); > >>> +pub(crate) const GPU_IRQ_FAULT: GpuRegister = GpuRegister(bit(0)); > >>> +pub(crate) const GPU_IRQ_PROTM_FAULT: GpuRegister = GpuRegister(bit(1)); > >>> +pub(crate) const GPU_IRQ_RESET_COMPLETED: GpuRegister = GpuRegister(bit(8)); > >>> +pub(crate) const GPU_IRQ_POWER_CHANGED: GpuRegister = GpuRegister(bit(9)); > >>> +pub(crate) const GPU_IRQ_POWER_CHANGED_ALL: GpuRegister = GpuRegister(bit(10)); > >>> +pub(crate) const GPU_IRQ_CLEAN_CACHES_COMPLETED: GpuRegister = GpuRegister(bit(17)); > >>> +pub(crate) const GPU_IRQ_DOORBELL_MIRROR: GpuRegister = GpuRegister(bit(18)); > >>> +pub(crate) const GPU_IRQ_MCU_STATUS_CHANGED: GpuRegister = GpuRegister(bit(19)); > >>> +pub(crate) const GPU_CMD: GpuRegister = GpuRegister(0x30); > >>> +const fn gpu_cmd_def(ty: u64, payload: u64) -> u64 { > >>> + (ty) | ((payload) << 8) > >>> +} > >>> +pub(crate) const fn gpu_soft_reset() -> GpuRegister { > >>> + GpuRegister(gpu_cmd_def(1, 1)) > >>> +} > >>> +pub(crate) const fn gpu_hard_reset() -> GpuRegister { > >>> + GpuRegister(gpu_cmd_def(1, 2)) > >>> +} > >>> +pub(crate) const CACHE_CLEAN: GpuRegister = GpuRegister(bit(0)); > >>> +pub(crate) const CACHE_INV: GpuRegister = GpuRegister(bit(1)); > >>> +pub(crate) const GPU_STATUS: GpuRegister = GpuRegister(0x34); > >>> +pub(crate) const GPU_STATUS_ACTIVE: GpuRegister = GpuRegister(bit(0)); > >>> +pub(crate) const GPU_STATUS_PWR_ACTIVE: GpuRegister = GpuRegister(bit(1)); > >>> +pub(crate) const GPU_STATUS_PAGE_FAULT: GpuRegister = GpuRegister(bit(4)); > >>> +pub(crate) const GPU_STATUS_PROTM_ACTIVE: GpuRegister = GpuRegister(bit(7)); > >>> +pub(crate) const GPU_STATUS_DBG_ENABLED: GpuRegister = GpuRegister(bit(8)); > >>> +pub(crate) const GPU_FAULT_STATUS: GpuRegister = GpuRegister(0x3c); > >>> +pub(crate) const GPU_FAULT_ADDR_LO: GpuRegister = GpuRegister(0x40); > >>> +pub(crate) const GPU_FAULT_ADDR_HI: GpuRegister = GpuRegister(0x44); > >>> +pub(crate) const GPU_PWR_KEY: GpuRegister = GpuRegister(0x50); > >>> +pub(crate) const GPU_PWR_KEY_UNLOCK: GpuRegister = GpuRegister(0x2968a819); > >>> +pub(crate) const GPU_PWR_OVERRIDE0: GpuRegister = GpuRegister(0x54); > >>> +pub(crate) const GPU_PWR_OVERRIDE1: GpuRegister = GpuRegister(0x58); > >>> +pub(crate) const GPU_TIMESTAMP_OFFSET_LO: GpuRegister = GpuRegister(0x88); > >>> +pub(crate) const GPU_TIMESTAMP_OFFSET_HI: GpuRegister = GpuRegister(0x8c); > >>> +pub(crate) const GPU_CYCLE_COUNT_LO: GpuRegister = GpuRegister(0x90); > >>> +pub(crate) const GPU_CYCLE_COUNT_HI: GpuRegister = GpuRegister(0x94); > >>> +pub(crate) const GPU_TIMESTAMP_LO: GpuRegister = GpuRegister(0x98); > >>> +pub(crate) const GPU_TIMESTAMP_HI: GpuRegister = GpuRegister(0x9c); > >>> +pub(crate) const GPU_THREAD_MAX_THREADS: GpuRegister = GpuRegister(0xa0); > >>> +pub(crate) const GPU_THREAD_MAX_WORKGROUP_SIZE: GpuRegister = GpuRegister(0xa4); > >>> +pub(crate) const GPU_THREAD_MAX_BARRIER_SIZE: GpuRegister = GpuRegister(0xa8); > >>> +pub(crate) const GPU_THREAD_FEATURES: GpuRegister = GpuRegister(0xac); > >>> +pub(crate) const fn gpu_texture_features(n: u64) -> GpuRegister { > >>> + GpuRegister(0xB0 + ((n) * 4)) > >>> +} > >>> +pub(crate) const GPU_SHADER_PRESENT_LO: GpuRegister = GpuRegister(0x100); > >>> +pub(crate) const GPU_SHADER_PRESENT_HI: GpuRegister = GpuRegister(0x104); > >>> +pub(crate) const GPU_TILER_PRESENT_LO: GpuRegister = GpuRegister(0x110); > >>> +pub(crate) const GPU_TILER_PRESENT_HI: GpuRegister = GpuRegister(0x114); > >>> +pub(crate) const GPU_L2_PRESENT_LO: GpuRegister = GpuRegister(0x120); > >>> +pub(crate) const GPU_L2_PRESENT_HI: GpuRegister = GpuRegister(0x124); > >>> +pub(crate) const SHADER_READY_LO: GpuRegister = GpuRegister(0x140); > >>> +pub(crate) const SHADER_READY_HI: GpuRegister = GpuRegister(0x144); > >>> +pub(crate) const TILER_READY_LO: GpuRegister = GpuRegister(0x150); > >>> +pub(crate) const TILER_READY_HI: GpuRegister = GpuRegister(0x154); > >>> +pub(crate) const L2_READY_LO: GpuRegister = GpuRegister(0x160); > >>> +pub(crate) const L2_READY_HI: GpuRegister = GpuRegister(0x164); > >>> +pub(crate) const SHADER_PWRON_LO: GpuRegister = GpuRegister(0x180); > >>> +pub(crate) const SHADER_PWRON_HI: GpuRegister = GpuRegister(0x184); > >>> +pub(crate) const TILER_PWRON_LO: GpuRegister = GpuRegister(0x190); > >>> +pub(crate) const TILER_PWRON_HI: GpuRegister = GpuRegister(0x194); > >>> +pub(crate) const L2_PWRON_LO: GpuRegister = GpuRegister(0x1a0); > >>> +pub(crate) const L2_PWRON_HI: GpuRegister = GpuRegister(0x1a4); > >>> +pub(crate) const SHADER_PWROFF_LO: GpuRegister = GpuRegister(0x1c0); > >>> +pub(crate) const SHADER_PWROFF_HI: GpuRegister = GpuRegister(0x1c4); > >>> +pub(crate) const TILER_PWROFF_LO: GpuRegister = GpuRegister(0x1d0); > >>> +pub(crate) const TILER_PWROFF_HI: GpuRegister = GpuRegister(0x1d4); > >>> +pub(crate) const L2_PWROFF_LO: GpuRegister = GpuRegister(0x1e0); > >>> +pub(crate) const L2_PWROFF_HI: GpuRegister = GpuRegister(0x1e4); > >>> +pub(crate) const SHADER_PWRTRANS_LO: GpuRegister = GpuRegister(0x200); > >>> +pub(crate) const SHADER_PWRTRANS_HI: GpuRegister = GpuRegister(0x204); > >>> +pub(crate) const TILER_PWRTRANS_LO: GpuRegister = GpuRegister(0x210); > >>> +pub(crate) const TILER_PWRTRANS_HI: GpuRegister = GpuRegister(0x214); > >>> +pub(crate) const L2_PWRTRANS_LO: GpuRegister = GpuRegister(0x220); > >>> +pub(crate) const L2_PWRTRANS_HI: GpuRegister = GpuRegister(0x224); > >>> +pub(crate) const SHADER_PWRACTIVE_LO: GpuRegister = GpuRegister(0x240); > >>> +pub(crate) const SHADER_PWRACTIVE_HI: GpuRegister = GpuRegister(0x244); > >>> +pub(crate) const TILER_PWRACTIVE_LO: GpuRegister = GpuRegister(0x250); > >>> +pub(crate) const TILER_PWRACTIVE_HI: GpuRegister = GpuRegister(0x254); > >>> +pub(crate) const L2_PWRACTIVE_LO: GpuRegister = GpuRegister(0x260); > >>> +pub(crate) const L2_PWRACTIVE_HI: GpuRegister = GpuRegister(0x264); > >>> +pub(crate) const GPU_REVID: GpuRegister = GpuRegister(0x280); > >>> +pub(crate) const GPU_COHERENCY_FEATURES: GpuRegister = GpuRegister(0x300); > >>> +pub(crate) const GPU_COHERENCY_PROTOCOL: GpuRegister = GpuRegister(0x304); > >>> +pub(crate) const GPU_COHERENCY_ACE: GpuRegister = GpuRegister(0); > >>> +pub(crate) const GPU_COHERENCY_ACE_LITE: GpuRegister = GpuRegister(1); > >>> +pub(crate) const GPU_COHERENCY_NONE: GpuRegister = GpuRegister(31); > >>> +pub(crate) const MCU_CONTROL: GpuRegister = GpuRegister(0x700); > >>> +pub(crate) const MCU_CONTROL_ENABLE: GpuRegister = GpuRegister(1); > >>> +pub(crate) const MCU_CONTROL_AUTO: GpuRegister = GpuRegister(2); > >>> +pub(crate) const MCU_CONTROL_DISABLE: GpuRegister = GpuRegister(0); > >> > >> From this I presume it was scripted. These MCU_CONTROL_xxx defines are > >> not GPU registers but values for the GPU registers. We might need to > >> make changes to the C header to make it easier to convert to Rust. Or > >> indeed generate both the C and Rust headers from a common source. > >> > >> Generally looks reasonable, although as it stands this would of course > >> be a much smaller patch in plain C ;) It would look better if you split > >> the Rust-enabling parts from the actual new code. I also think there > >> needs to be a little more thought into what registers are useful to dump > >> and some documentation on the dump format. > >> > >> Naïve Rust question: there are a bunch of unwrap() calls in the code > >> which to my C-trained brain look like BUG_ON()s - and in C I'd be > >> complaining about them. What is the Rust style here? AFAICT they are all > >> valid (they should never panic) but it makes me uneasy when I'm reading > >> the code. > >> > >> Steve > >> > > > > Yeah, the unwraps() have to go. I didn’t give much thought to error handling here. > > > > Although, as you pointed out, most of these should never panic, unless the size of the dump was miscomputed. > > > > What do you suggest instead? I guess that printing a warning and then returning from panthor_core_dump() would be a good course of action. I don’t think there’s a Rust equivalent to WARN_ONCE, though. > > In C I'd be handling at least the allocation failures and returning > errors up the stack - most likely with some sort of WARN_ON() or similar > (because these are 'should never happen' programming bugs - but trivial > to recover from). > > For the try_from(size).unwrap() type cases, I've no idea to be honest - > Ideally they would be compile time checks. I've very little clue about > Rust but on the surface it looks like you've got the wrong type because > it's checking that things don't overflow when changing type. Of course > the standard C approach is to just do the type conversion and pretend > you're sure that an overflow can never happen ;) Rust has infallible conversions (called from instead of try_from) for the cases where the conversion is infallible. Some thoughts on the various examples: if isize::try_from(size).unwrap() == isize::MAX { return Err(EINVAL); } This is saying: * If size is exactly isize::MAX, then return EINVAL. * If size is greater than isize::MAX, then BUG. It should probably instead be: if size >= isize::MAX as usize { return Err(EINVAL); } bindings::__vmalloc_noprof(size.try_into().unwrap(), ...) This should probably have handling for size being too big, but I guess it will go away when this code uses the Rust vmalloc wrappers. alloc.alloc_header(HeaderType::Registers, sz.try_into().unwrap()); Change alloc_header to take an usize instead of u32. Then the cast goes away. bos.push(bo, GFP_KERNEL).unwrap(); The error isn't possible because the vector is pre-allocated, but we can still handle it by returning ENOMEM. > In particular for alloc<T>() - core::mem::size_of::<T>() is returning a > value (of type usize) which is then being converted to isize. A C > programmer wouldn't have any qualms about assigning a sizeof() into an > int, even though theorectically that could overflow if the structure was > massive. But this should really be a compile time check as it's clearly > dead code at runtime. > > Steve > >