On Wed, Oct 2, 2024 at 12:41 AM Srinivasan Shanmugam <srinivasan.shanmugam@xxxxxxx> wrote: > > The patch modifies the gfx_v10_0_kiq_set_resources function to write > the cleaner shader's memory controller address to the ring buffer. It > also adds a new function, gfx_v10_0_ring_emit_cleaner_shader, which > emits the PACKET3_RUN_CLEANER_SHADER packet to the ring buffer. > > This patch adds support for the PACKET3_RUN_CLEANER_SHADER packet in the > gfx_v10_0 module. This packet is used to emit the cleaner shader, which > is used to clear GPU memory before it's reused, helping to prevent data > leakage between different processes. > > Finally, the patch updates the ring function structures to include the > new gfx_v10_0_ring_emit_cleaner_shader function. This allows the > cleaner shader to be emitted as part of the ring's operations. > > Cc: Christian König <christian.koenig@xxxxxxx> > Cc: Alex Deucher <alexander.deucher@xxxxxxx> > Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 40 +++++++++++++++++++++++--- > 1 file changed, 36 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index c544ea2aea6e..2b230971c58a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -3677,13 +3677,19 @@ static int gfx_v10_0_set_powergating_state(void *handle, > enum amd_powergating_state state); > static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) > { > + struct amdgpu_device *adev = kiq_ring->adev; > + u64 shader_mc_addr; > + > + /* Cleaner shader MC address */ > + shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; > + > amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); > amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | > PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ > amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ > amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ > - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ > - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ > + amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ > + amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ > amdgpu_ring_write(kiq_ring, 0); /* oac mask */ > amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ > } > @@ -4557,6 +4563,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) > break; > } > > + adev->gfx.xcc_mask = 1; I think you can drop this. The xcc mask is calculated in amdgpu_discovery.c for devices which have an IP discovery table. Alex > adev->gfx.config.gb_addr_config = gb_addr_config; > > adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << > @@ -4726,6 +4733,11 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) > adev->gfx.mec.num_queue_per_pipe = 8; > break; > } > + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { > + default: > + adev->gfx.enable_cleaner_shader = false; > + break; > + } > > /* KIQ event */ > r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, > @@ -4842,6 +4854,9 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) > > gfx_v10_0_alloc_ip_dump(adev); > > + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); > + if (r) > + return r; > return 0; > } > > @@ -4881,6 +4896,8 @@ static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block) > amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); > amdgpu_gfx_kiq_fini(adev, 0); > > + amdgpu_gfx_cleaner_shader_sw_fini(adev); > + > gfx_v10_0_pfp_fini(adev); > gfx_v10_0_ce_fini(adev); > gfx_v10_0_me_fini(adev); > @@ -4891,6 +4908,7 @@ static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block) > gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev); > > gfx_v10_0_free_microcode(adev); > + amdgpu_gfx_sysfs_isolation_shader_fini(adev); > > kfree(adev->gfx.ip_dump_core); > kfree(adev->gfx.ip_dump_compute_queues); > @@ -7374,6 +7392,9 @@ static int gfx_v10_0_hw_init(void *handle) > if (!amdgpu_emu_mode) > gfx_v10_0_init_golden_registers(adev); > > + amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, > + adev->gfx.cleaner_shader_ptr); > + > if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { > /** > * For gfx 10, rlc firmware loading relies on smu firmware is > @@ -9699,6 +9720,13 @@ static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block) > amdgpu_gfx_off_ctrl(adev, true); > } > > +static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) > +{ > + /* Emit the cleaner shader */ > + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); > + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ > +} > + > static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { > .name = "gfx_v10_0", > .early_init = gfx_v10_0_early_init, > @@ -9749,7 +9777,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { > 5 + /* HDP_INVL */ > 8 + 8 + /* FENCE x2 */ > 2 + /* SWITCH_BUFFER */ > - 8, /* gfx_v10_0_emit_mem_sync */ > + 8 + /* gfx_v10_0_emit_mem_sync */ > + 2, /* gfx_v10_0_ring_emit_cleaner_shader */ > .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */ > .emit_ib = gfx_v10_0_ring_emit_ib_gfx, > .emit_fence = gfx_v10_0_ring_emit_fence, > @@ -9772,6 +9801,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { > .soft_recovery = gfx_v10_0_ring_soft_recovery, > .emit_mem_sync = gfx_v10_0_emit_mem_sync, > .reset = gfx_v10_0_reset_kgq, > + .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader, > }; > > static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { > @@ -9791,7 +9821,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { > SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + > 2 + /* gfx_v10_0_ring_emit_vm_flush */ > 8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */ > - 8, /* gfx_v10_0_emit_mem_sync */ > + 8 + /* gfx_v10_0_emit_mem_sync */ > + 2, /* gfx_v10_0_ring_emit_cleaner_shader */ > .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ > .emit_ib = gfx_v10_0_ring_emit_ib_compute, > .emit_fence = gfx_v10_0_ring_emit_fence, > @@ -9809,6 +9840,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { > .soft_recovery = gfx_v10_0_ring_soft_recovery, > .emit_mem_sync = gfx_v10_0_emit_mem_sync, > .reset = gfx_v10_0_reset_kcq, > + .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader, > }; > > static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { > -- > 2.34.1 >