Inlined: On 2022-09-08 21:50, jiadong.zhu@xxxxxxx wrote: > From: "Jiadong.Zhu" <Jiadong.Zhu@xxxxxxx> > > Trigger MCBP according to the priroty of the "priority" Spell out MCBP here, "Mid-Command Buffer Preemption." > software rings and the hw fence signaling > condition. "signalling" > > The muxer records some lastest locations from the "lastest"? ENOENT Please use an actual word. Run your patches through scripts/checkpatch.pl. > software ring which is used to resubmit packages > in preemption scenarios. > > v2: update comment style > > Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c | 101 ++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h | 29 ++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 12 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 163 ++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h | 16 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c | 26 +++ > 9 files changed, 351 insertions(+), 3 deletions(-) > create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c > create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h > > diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile > index 85224bc81ce5..24c5aa19bbf2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/Makefile > +++ b/drivers/gpu/drm/amd/amdgpu/Makefile > @@ -59,7 +59,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ > amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ > amdgpu_fw_attestation.o amdgpu_securedisplay.o \ > amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ > - amdgpu_sw_ring.o amdgpu_ring_mux.o > + amdgpu_sw_ring.o amdgpu_ring_mux.o amdgpu_mcbp.o > > amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index 258cffe3c06a..af86d87e2f3b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > } > } > > + amdgpu_ring_ib_begin(ring); > if (job && ring->funcs->init_cond_exec) > patch_offset = amdgpu_ring_init_cond_exec(ring); > > @@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) > ring->funcs->emit_wave_limit(ring, false); > > + amdgpu_ring_ib_end(ring); > amdgpu_ring_commit(ring); > return 0; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c > new file mode 100644 > index 000000000000..2a12101a7699 > --- /dev/null > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.c > @@ -0,0 +1,101 @@ > +/* > + * Copyright 2022 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + * > + */ > + > +#include <linux/delay.h> > +#include <linux/kernel.h> > +#include <linux/firmware.h> > +#include <linux/module.h> > +#include <linux/pci.h> > +#include <drm/gpu_scheduler.h> > + > +#include "amdgpu.h" > +#include "amdgpu_mcbp.h" > +#include "amdgpu_ring.h" > + > +/* trigger mcbp and find if we need resubmit */ > +int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux) > +{ > + struct amdgpu_mux_entry *e; > + struct amdgpu_ring *ring = NULL; > + int i; > + > + DRM_INFO("%s in\n", __func__); > + > + spin_lock(&mux->lock); > + > + amdgpu_ring_preempt_ib(mux->real_ring); > + > + ring = NULL; > + for (i = 0; i < mux->num_ring_entries; i++) { > + e = &mux->ring_entries[i]; > + if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) { > + ring = e->ring; > + break; > + } > + } > + > + if (!ring) { > + DRM_ERROR("cannot find low priority ring\n"); > + return -ENOENT; > + } > + > + amdgpu_fence_process(ring); > + > + DRM_INFO("after preempted ring_prio(%d) last_seq(%x) sync_seq(%x)\n", > + ring->hw_prio, atomic_read(&ring->fence_drv.last_seq), ring->fence_drv.sync_seq); > + > + if (atomic_read(&ring->fence_drv.last_seq) != > + ring->fence_drv.sync_seq) { > + DRM_INFO("schedule resubmit\n"); > + mux->s_resubmit = true; > + amdgpu_ring_mux_schedule_resubmit(mux); > + } > + > + spin_unlock(&mux->lock); > + return 0; > +} > + > + > +/*scan on low prio rings to have unsignaled fence and high ring has no fence.*/ > +int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux) > +{ > + struct amdgpu_ring *ring; > + uint32_t seq, last_seq; > + int i, need_preempt; > + > + need_preempt = 0; > + for (i = 0; i < mux->num_ring_entries; i++) { > + ring = mux->ring_entries[i].ring; > + last_seq = atomic_read(&ring->fence_drv.last_seq); > + seq = READ_ONCE(ring->fence_drv.sync_seq); > + DRM_INFO("ring(%p) prio(%d) last_seq(%x) seq(%x)\n", > + ring, ring->hw_prio, last_seq, seq); > + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT && last_seq < seq) > + return 0; > + if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && last_seq < seq) > + need_preempt = 1; > + } > + > + DRM_INFO("%s return %d\n", __func__, need_preempt && !mux->s_resubmit); > + return need_preempt && !mux->s_resubmit; > +} DRM_INFO() seem here to be debug messages and I feel they should be removed. Sometimes we enable INFO level messages and this will overflow the log. > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h > new file mode 100644 > index 000000000000..0033bcba8d03 > --- /dev/null > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mcbp.h > @@ -0,0 +1,29 @@ > +/* > + * Copyright 2022 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + * > + */ > + > +#ifndef __AMDGPU_MCBP_H__ > +#define __AMDGPU_MCBP_H__ > + > +int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux); > +int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux); > +#endif > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > index 5b70a2c36d81..6d7f8a40e308 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > @@ -583,3 +583,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring) > > return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop); > } > + > +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring) > +{ > + if (ring->is_sw_ring) > + amdgpu_sw_ring_ib_begin(ring); > +} > + > +void amdgpu_ring_ib_end(struct amdgpu_ring *ring) > +{ > + if (ring->is_sw_ring) > + amdgpu_sw_ring_ib_end(ring); > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index d3155dc86c07..399037b0d6e6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -311,6 +311,9 @@ struct amdgpu_ring { > #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) > > int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); > +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring); > +void amdgpu_ring_ib_end(struct amdgpu_ring *ring); > + > void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); > void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); > void amdgpu_ring_commit(struct amdgpu_ring *ring); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c > index ea4a3c66119a..0c9b639b844e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c > @@ -20,28 +20,60 @@ > * OTHER DEALINGS IN THE SOFTWARE. > * > */ > - > +#include <linux/slab.h> > #include <drm/drm_print.h> > > #include "amdgpu_ring_mux.h" > +#include "amdgpu_mcbp.h" > #include "amdgpu_ring.h" > > #define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ/2) > > +static struct kmem_cache *amdgpu_mux_chunk_slab; > + > static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, > u64 s_begin, u64 s_end); > +static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux); > +static void amdgpu_mux_resubmit_fallback(struct timer_list *t); > > int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) > { > mux->real_ring = ring; > + > memset(mux->ring_entries, 0, sizeof(mux->ring_entries)); > mux->num_ring_entries = 0; > + > + mux->s_resubmit = false; > + > + amdgpu_mux_chunk_slab = kmem_cache_create( > + "amdgpu_mux_chunk", sizeof(struct amdgpu_mux_chunk), 0, > + SLAB_HWCACHE_ALIGN, NULL); > + if (!amdgpu_mux_chunk_slab) { > + DRM_ERROR("create amdgpu_mux_chunk cache failed\n"); > + return -ENOMEM; > + } > + > spin_lock_init(&mux->lock); > + > + timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0); > + > return 0; > } > > void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux) > { > + struct amdgpu_mux_entry *e; > + struct amdgpu_mux_chunk *chunk, *chunk2; > + int i; > + > + for (i = 0; i < mux->num_ring_entries; i++) { > + e = &mux->ring_entries[i]; > + list_for_each_entry_safe(chunk, chunk2, &e->list, entry) { > + list_del(&chunk->entry); > + kmem_cache_free(amdgpu_mux_chunk_slab, chunk); > + } > + } > + kmem_cache_destroy(amdgpu_mux_chunk_slab); > memset(mux->ring_entries, 0, sizeof(mux->ring_entries)); > mux->num_ring_entries = 0; > } > @@ -64,6 +96,8 @@ int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring > e->sw_rptr = 0; > e->sw_wptr = 0; > > + INIT_LIST_HEAD(&e->list); > + > return 0; > } > > @@ -180,3 +214,130 @@ static int copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring > > return 0; > } > + > +void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux) > +{ > + mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT); > +} > + > +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) > +{ > + struct amdgpu_mux_entry *e; > + struct amdgpu_mux_chunk *chunk; > + > + if (mux->s_resubmit) > + amdgpu_mux_resubmit_chunks(mux); > + > + e = amdgpu_get_sw_entry(mux, ring); > + if (!e) { > + DRM_ERROR("cannot find entry!\n"); > + return; > + } > + > + chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL); > + if (!chunk) { > + DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n"); > + return; > + } > + > + chunk->start = ring->wptr; > + list_add_tail(&chunk->entry, &e->list); > +} > + > +static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) > +{ > + uint32_t last_seq, size = 0; > + struct amdgpu_mux_entry *e; > + struct amdgpu_mux_chunk *chunk, *tmp; > + > + e = amdgpu_get_sw_entry(mux, ring); > + if (!e) { > + DRM_ERROR("cannot find entry!\n"); > + return; > + } > + > + last_seq = atomic_read(&ring->fence_drv.last_seq); > + > + list_for_each_entry_safe(chunk, tmp, &e->list, entry) { > + if (chunk->sync_seq <= last_seq) { > + list_del(&chunk->entry); > + kmem_cache_free(amdgpu_mux_chunk_slab, chunk); > + } else { > + size++; > + } > + } > +} > + > +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) > +{ > + struct amdgpu_mux_entry *e; > + struct amdgpu_mux_chunk *chunk; > + > + e = amdgpu_get_sw_entry(mux, ring); > + if (!e) { > + DRM_ERROR("cannot find entry!\n"); > + return; > + } > + > + chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry); > + if (!chunk) { > + DRM_ERROR("cannot find chunk!\n"); > + return; > + } > + > + chunk->end = ring->wptr; > + chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq); > + > + scan_and_remove_signaled_chunk(mux, ring); > +} > + > +static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux) > +{ > + struct amdgpu_mux_entry *e = NULL; > + struct amdgpu_mux_chunk *chunk; > + uint32_t seq, last_seq; > + int i; > + > + /*find low priority entries:*/ > + spin_lock(&mux->lock); > + > + for (i = 0; i < mux->num_ring_entries; i++) { > + if (mux->ring_entries[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) { > + e = &mux->ring_entries[i]; > + break; > + } > + } > + > + if (!e) { > + DRM_ERROR("%s no low priority ring found\n", __func__); > + return; > + } > + > + last_seq = atomic_read(&e->ring->fence_drv.last_seq); > + seq = READ_ONCE(e->ring->fence_drv.sync_seq); > + if (seq == last_seq) { > + DRM_INFO("skip as fence signaled seq=%x\n", seq); > + return; > + } > + DRM_INFO("begin to copy resubmit chunks\n"); Those two DRM_INFO() should be removed. > + > + /*resubmit all the fences between (last_seq, seq]*/ > + list_for_each_entry(chunk, &e->list, entry) { > + if (chunk->sync_seq > last_seq) { > + copy_pkt_from_sw_ring(mux, e->ring, chunk->start, chunk->end); > + amdgpu_ring_commit(mux->real_ring); > + } > + } > + spin_unlock(&mux->lock); > + > + del_timer(&mux->resubmit_timer); > + mux->s_resubmit = false; > +} > + > +static void amdgpu_mux_resubmit_fallback(struct timer_list *t) > +{ > + struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer); > + > + DRM_INFO("calling %s\n", __func__); > + amdgpu_mux_resubmit_chunks(mux); > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h > index d058c43bb063..1d91c235061a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h > @@ -44,17 +44,27 @@ struct amdgpu_mux_entry { > u64 sw_cptr; > u64 sw_rptr; > u64 sw_wptr; > + > + struct list_head list; > }; > > struct amdgpu_ring_mux { > struct amdgpu_ring *real_ring; > > struct amdgpu_mux_entry ring_entries[AMDGPU_MAX_GFX_RINGS]; > - > unsigned num_ring_entries; > > spinlock_t lock; > > + bool s_resubmit; > + struct timer_list resubmit_timer; > +}; > + > +struct amdgpu_mux_chunk { > + struct list_head entry; > + uint32_t sync_seq; > + u64 start; > + u64 end; > }; I'd generally include a comment on struct members--makes it clear what the members are. > > int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); > @@ -64,4 +74,8 @@ void amdgpu_ring_set_wptr_to_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring > u64 amdgpu_ring_get_wptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); > u64 amdgpu_ring_get_rptr_from_mux(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); > > +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); > +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); > +void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux); > + > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c > index 452d0ff37758..143a84c18534 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sw_ring.c > @@ -26,6 +26,7 @@ > > #include "amdgpu_sw_ring.h" > #include "amdgpu_ring_mux.h" > +#include "amdgpu_mcbp.h" > > #define amdgpu_ring_get_gpu_addr(ring, offset) \ > (ring->is_mes_queue ? \ > @@ -202,3 +203,28 @@ void amdgpu_sw_ring_fini(struct amdgpu_ring *ring) > ring->adev->rings[ring->idx] = NULL; > } > > +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring) > +{ > + struct amdgpu_device *adev = ring->adev; > + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; > + > + BUG_ON(!ring->is_sw_ring); > + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) { > + if (amdgpu_mcbp_scan(mux) > 0) > + amdgpu_mcbp_trigger_preempt(mux); > + return; > + } > + > + amdgpu_ring_mux_start_ib(mux, ring); > +} > + > +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring) > +{ > + struct amdgpu_device *adev = ring->adev; > + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; > + > + BUG_ON(!ring->is_sw_ring); > + if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) > + return; > + amdgpu_ring_mux_end_ib(mux, ring); > +} Regards, -- Luben