[PATCH 1/3] drm/amd/gfx: add instance field to select_se_sh

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Yeah, that should do it.

Alex

From: amd-gfx [mailto:amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx] On Behalf Of StDenis, Tom
Sent: Wednesday, June 29, 2016 5:48 AM
To: Nicolai Hähnle; amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH 1/3] drm/amd/gfx: add instance field to select_se_sh


Ya I see that in


u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);

data = REG_SET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);



So should I do something similar with 0xFFFFFFFF and change all the default "0" throughout the driver to that?  So that becomes

if (instance == 0xFFFFFFFF) {
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
} else {
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
}

That's an easy change.

Tom

________________________________
From: Nicolai Hähnle <nhaehnle@xxxxxxxxx<mailto:nhaehnle at gmail.com>>
Sent: Wednesday, June 29, 2016 03:16
To: Tom St Denis; amd-gfx at lists.freedesktop.org<mailto:amd-gfx at lists.freedesktop.org>
Cc: StDenis, Tom
Subject: Re: [PATCH 1/3] drm/amd/gfx: add instance field to select_se_sh

On 28.06.2016 16:43, Tom St Denis wrote:
> Add ability to specify instance in select_se_sh callback.
> Defaults to 0 all over the driver.
>
> Signed-off-by: Tom St Denis <tom.stdenis at amd.com<mailto:tom.stdenis at amd.com>>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h   |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/cik.c      |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 25 +++++++++++++------------
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 19 ++++++++++---------
>   drivers/gpu/drm/amd/amdgpu/vi.c       |  4 ++--
>   5 files changed, 29 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 780a830b2bbd..d7efbd0ee983 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1158,7 +1158,7 @@ struct amdgpu_cu_info {
>   struct amdgpu_gfx_funcs {
>        /* get the gpu clock counter */
>        uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
> -     void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num);
> +     void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
>   };
>
>   struct amdgpu_gfx {
> @@ -2295,7 +2295,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>   #define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev))
>   #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
>   #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
> -#define amdgpu_gfx_select_se_sh(adev, se, sh) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh))
> +#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
>
>   #define amdgpu_dpm_get_temperature(adev) \
>        ((adev)->pp_enabled ?                                           \
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
> index ebc8c256286b..1799612b8299 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik.c
> @@ -1036,12 +1036,12 @@ static uint32_t cik_read_indexed_register(struct amdgpu_device *adev,
>
>        mutex_lock(&adev->grbm_idx_mutex);
>        if (se_num != 0xffffffff || sh_num != 0xffffffff)
> -             amdgpu_gfx_select_se_sh(adev, se_num, sh_num);
> +             amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0);
>
>        val = RREG32(reg_offset);
>
>        if (se_num != 0xffffffff || sh_num != 0xffffffff)
> -             amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +             amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>        mutex_unlock(&adev->grbm_idx_mutex);
>        return val;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 9edec8a98ee2..44def1665f0c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -1584,10 +1584,11 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
>    * broadcast to all SEs or SHs (CIK).
>    */
>   static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
> -                               u32 se_num, u32 sh_num)
> +                               u32 se_num, u32 sh_num, u32 instance)
>   {
>        u32 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK;
>
> +     data = REG_SET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);

INSTANCE_INDEX and INSTANCE_BROADCAST_WRITES have the same relationship
as SH/SE_INDEX and SH/SE_BROADCAST_WRITES. It's actually kind of odd
that the current code either sets both of SH/SE_BROADCAST_WRITES or none
of them. It should logically be possible to broadcast writes to all SHs
in one SE, for example.

Nicolai

>        if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
>                data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
>                        GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
> @@ -1660,13 +1661,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
>        mutex_lock(&adev->grbm_idx_mutex);
>        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
>                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> -                     gfx_v7_0_select_se_sh(adev, i, j);
> +                     gfx_v7_0_select_se_sh(adev, i, j, 0);
>                        data = gfx_v7_0_get_rb_active_bitmap(adev);
>                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
>                                               rb_bitmap_width_per_sh);
>                }
>        }
> -     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>        mutex_unlock(&adev->grbm_idx_mutex);
>
>        adev->gfx.config.backend_enable_mask = active_rbs;
> @@ -1747,7 +1748,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
>         * making sure that the following register writes will be broadcasted
>         * to all the shaders
>         */
> -     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>
>        /* XXX SH_MEM regs */
>        /* where to put LDS, scratch, GPUVM in FSA64 space */
> @@ -3381,7 +3382,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
>        mutex_lock(&adev->grbm_idx_mutex);
>        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
>                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> -                     gfx_v7_0_select_se_sh(adev, i, j);
> +                     gfx_v7_0_select_se_sh(adev, i, j, 0);
>                        for (k = 0; k < adev->usec_timeout; k++) {
>                                if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
>                                        break;
> @@ -3389,7 +3390,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
>                        }
>                }
>        }
> -     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>        mutex_unlock(&adev->grbm_idx_mutex);
>
>        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
> @@ -3549,7 +3550,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
>        WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
>
>        mutex_lock(&adev->grbm_idx_mutex);
> -     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>        WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
>        WREG32(mmRLC_LB_PARAMS, 0x00600408);
>        WREG32(mmRLC_LB_CNTL, 0x80000004);
> @@ -3589,7 +3590,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
>                tmp = gfx_v7_0_halt_rlc(adev);
>
>                mutex_lock(&adev->grbm_idx_mutex);
> -             gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +             gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>                WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
>                WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
>                tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
> @@ -3640,7 +3641,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
>                tmp = gfx_v7_0_halt_rlc(adev);
>
>                mutex_lock(&adev->grbm_idx_mutex);
> -             gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +             gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>                WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
>                WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
>                data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
> @@ -3691,7 +3692,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
>                tmp = gfx_v7_0_halt_rlc(adev);
>
>                mutex_lock(&adev->grbm_idx_mutex);
> -             gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +             gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>                WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
>                WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
>                data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
> @@ -5055,7 +5056,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
>                        mask = 1;
>                        ao_bitmap = 0;
>                        counter = 0;
> -                     gfx_v7_0_select_se_sh(adev, i, j);
> +                     gfx_v7_0_select_se_sh(adev, i, j, 0);
>                        bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
>                        cu_info->bitmap[i][j] = bitmap;
>
> @@ -5071,7 +5072,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
>                        ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
>                }
>        }
> -     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +     gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>        mutex_unlock(&adev->grbm_idx_mutex);
>
>        cu_info->number = active_cu_number;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 9aa30bc06e4a..45c3ad52d21c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -3447,10 +3447,11 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
>   }
>
>   static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
> -                               u32 se_num, u32 sh_num)
> +                               u32 se_num, u32 sh_num, u32 instance)
>   {
>        u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
>
> +     data = REG_SET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
>        if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
>                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
>                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
> @@ -3499,13 +3500,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
>        mutex_lock(&adev->grbm_idx_mutex);
>        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
>                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> -                     gfx_v8_0_select_se_sh(adev, i, j);
> +                     gfx_v8_0_select_se_sh(adev, i, j, 0);
>                        data = gfx_v8_0_get_rb_active_bitmap(adev);
>                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
>                                               rb_bitmap_width_per_sh);
>                }
>        }
> -     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>        mutex_unlock(&adev->grbm_idx_mutex);
>
>        adev->gfx.config.backend_enable_mask = active_rbs;
> @@ -3609,7 +3610,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
>         * making sure that the following register writes will be broadcasted
>         * to all the shaders
>         */
> -     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>
>        WREG32(mmPA_SC_FIFO_SIZE,
>                   (adev->gfx.config.sc_prim_fifo_size_frontend <<
> @@ -3632,7 +3633,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
>        mutex_lock(&adev->grbm_idx_mutex);
>        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
>                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> -                     gfx_v8_0_select_se_sh(adev, i, j);
> +                     gfx_v8_0_select_se_sh(adev, i, j, 0);
>                        for (k = 0; k < adev->usec_timeout; k++) {
>                                if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
>                                        break;
> @@ -3640,7 +3641,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
>                        }
>                }
>        }
> -     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>        mutex_unlock(&adev->grbm_idx_mutex);
>
>        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
> @@ -5409,7 +5410,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
>   {
>        uint32_t data;
>
> -     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>
>        WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
>        WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
> @@ -6501,7 +6502,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
>                        mask = 1;
>                        ao_bitmap = 0;
>                        counter = 0;
> -                     gfx_v8_0_select_se_sh(adev, i, j);
> +                     gfx_v8_0_select_se_sh(adev, i, j, 0);
>                        bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
>                        cu_info->bitmap[i][j] = bitmap;
>
> @@ -6517,7 +6518,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
>                        ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
>                }
>        }
> -     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +     gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>        mutex_unlock(&adev->grbm_idx_mutex);
>
>        cu_info->number = active_cu_number;
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index c628a09f84d7..c7762a554e6b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -534,12 +534,12 @@ static uint32_t vi_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
>
>        mutex_lock(&adev->grbm_idx_mutex);
>        if (se_num != 0xffffffff || sh_num != 0xffffffff)
> -             amdgpu_gfx_select_se_sh(adev, se_num, sh_num);
> +             amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0);
>
>        val = RREG32(reg_offset);
>
>        if (se_num != 0xffffffff || sh_num != 0xffffffff)
> -             amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff);
> +             amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0);
>        mutex_unlock(&adev->grbm_idx_mutex);
>        return val;
>   }
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20160629/4cbe1214/attachment-0001.html>


[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux