Re: [PATCH v5 2/2] drm/amdgpu: Add address alignment support to DCC buffers

"Paneer Selvam, Arunpravin" <arunpravin.paneerselvam@xxxxxxx> · Wed, 17 Jul 2024 20:08:33 +0530



    Hi Christian,

    
    Can we use the below combination flags to kick in hardware
    workaround while pinning BO's for this specific hw generation.

    
    if (place->flags & TTM_PL_FLAG_CONTIGUOUS) &&

    (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||

    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1))) {

    }

    
    Regards,

    Arun.

    
    On 7/17/2024 2:38 PM, Christian König
      wrote:

    
      Well that approach was discussed before and seemed to be to
      complicated.

      
      But I totally agree that the AMDGPU_GEM_CREATE_GFX12_DCC flag is a
      bad idea. This isn't anything userspace should need to specify in
      the first place.

      
      All we need is a hardware workaround which kicks in all the time
      while pinning BOs for this specific hw generation and texture
      channel configuration.

      
      Please remove the AMDGPU_GEM_CREATE_GFX12_DCC flag again if
      possible or specify why it is actually necessary?

      
      Regards,

      Christian.

      
      Am 17.07.24 um 05:44 schrieb Marek
        Olšák:

      
          AMDGPU_GEM_CREATE_GFX12_DCC is set on 90% of all memory
            allocations, and almost all of them are not displayable.
            Shouldn't we use a different way to indicate that we need a
            non-power-of-two alignment, such as looking at the alignment
            field directly? 
          

          Marek

            
              On Tue, Jul 16, 2024,
                11:45 Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@xxxxxxx>
                wrote:

              
              Add
                address alignment support to the DCC VRAM buffers.

                
                v2:

                  - adjust size based on the max_texture_channel_caches
                values

                    only for GFX12 DCC buffers.

                  - used AMDGPU_GEM_CREATE_GFX12_DCC flag to apply
                change only

                    for DCC buffers.

                  - roundup non power of two DCC buffer adjusted size to
                nearest

                    power of two number as the buddy allocator does not
                support non

                    power of two alignments. This applies only to the
                contiguous

                    DCC buffers.

                
                v3:(Alex)

                  - rewrite the max texture channel caches comparison
                code in an

                    algorithmic way to determine the alignment size.

                
                v4:(Alex)

                  - Move the logic from amdgpu_vram_mgr_dcc_alignment()
                to gmc_v12_0.c

                    and add a new gmc func callback for dcc alignment.
                If the callback

                    is non-NULL, call it to get the alignment,
                otherwise, use the default.

                
                v5:(Alex)

                  - Set the Alignment to a default value if the callback
                doesn't exist.

                  - Add the callback to amdgpu_gmc_funcs.

                
                v6:

                  - Fix checkpatch error reported by Intel CI.

                
                Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@xxxxxxx>

                Acked-by: Alex Deucher <alexander.deucher@xxxxxxx>

                Acked-by: Christian König <christian.koenig@xxxxxxx>

                Reviewed-by: Frank Min <Frank.Min@xxxxxxx>

                ---

                 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h      |  6 ++++

                 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 36
                ++++++++++++++++++--

                 drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c       | 15
                ++++++++

                 3 files changed, 55 insertions(+), 2 deletions(-)

                
                diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
                b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h

                index febca3130497..654d0548a3f8 100644

                --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h

                +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h

                @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs {

                                                      uint64_t addr,
                uint64_t *flags);

                        /* get the amount of memory used by the vbios
                for pre-OS console */

                        unsigned int (*get_vbios_fb_size)(struct
                amdgpu_device *adev);

                +       /* get the DCC buffer alignment */

                +       u64 (*get_dcc_alignment)(struct amdgpu_device
                *adev);

                
                        enum amdgpu_memory_partition
                (*query_mem_partition_mode)(

                                struct amdgpu_device *adev);

                @@ -363,6 +365,10 @@ struct amdgpu_gmc {

                       
                (adev)->gmc.gmc_funcs->override_vm_pte_flags     
                              \

                                ((adev), (vm), (addr), (pte_flags))

                 #define amdgpu_gmc_get_vbios_fb_size(adev)
                (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))

                +#define amdgpu_gmc_get_dcc_alignment(_adev) ({         
                       \

                +       typeof(_adev) (adev) = (_adev);                 
                       \

                +     
                 ((adev)->gmc.gmc_funcs->get_dcc_alignment((adev))); 
                   \

                +})

                
                 /**

                  * amdgpu_gmc_vram_full_visible - Check if full VRAM is
                visible through the BAR

                diff --git
                a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
                b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

                index f91cc149d06c..aa9dca12371c 100644

                --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

                +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

                @@ -512,6 +512,16 @@ static int
                amdgpu_vram_mgr_new(struct ttm_resource_manager *man,

                                vres->flags |=
                DRM_BUDDY_RANGE_ALLOCATION;

                
                        remaining_size = (u64)vres->base.size;

                +       if (bo->flags &
                AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&

                +           bo->flags &
                AMDGPU_GEM_CREATE_GFX12_DCC) {

                +               u64 adjust_size;

                +

                +               if
                (adev->gmc.gmc_funcs->get_dcc_alignment) {

                +                       adjust_size =
                amdgpu_gmc_get_dcc_alignment(adev);

                +                       remaining_size =
                roundup_pow_of_two(remaining_size + adjust_size);

                +                       vres->flags |=
                DRM_BUDDY_TRIM_DISABLE;

                +               }

                +       }

                
                        mutex_lock(&mgr->lock);

                        while (remaining_size) {

                @@ -521,8 +531,12 @@ static int
                amdgpu_vram_mgr_new(struct ttm_resource_manager *man,

                                        min_block_size =
                mgr->default_page_size;

                
                                size = remaining_size;

                -               if ((size >= (u64)pages_per_block
                << PAGE_SHIFT) &&

                -                   !(size & (((u64)pages_per_block
                << PAGE_SHIFT) - 1)))

                +

                +               if (bo->flags &
                AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&

                +                   bo->flags &
                AMDGPU_GEM_CREATE_GFX12_DCC)

                +                       min_block_size = size;

                +               else if ((size >=
                (u64)pages_per_block << PAGE_SHIFT) &&

                +                        !(size &
                (((u64)pages_per_block << PAGE_SHIFT) - 1)))

                                        min_block_size =
                (u64)pages_per_block << PAGE_SHIFT;

                
                                BUG_ON(min_block_size <
                mm->chunk_size);

                @@ -553,6 +567,24 @@ static int
                amdgpu_vram_mgr_new(struct ttm_resource_manager *man,

                        }

                        mutex_unlock(&mgr->lock);

                
                +       if (bo->flags &
                AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&

                +           bo->flags &
                AMDGPU_GEM_CREATE_GFX12_DCC) {

                +               struct drm_buddy_block *dcc_block;

                +               u64 dcc_start, alignment;

                +

                +               dcc_block =
                amdgpu_vram_mgr_first_block(&vres->blocks);

                +               dcc_start =
                amdgpu_vram_mgr_block_start(dcc_block);

                +

                +               if
                (adev->gmc.gmc_funcs->get_dcc_alignment) {

                +                       alignment =
                amdgpu_gmc_get_dcc_alignment(adev);

                +                       /* Adjust the start address for
                DCC buffers only */

                +                       dcc_start = roundup(dcc_start,
                alignment);

                +                       drm_buddy_block_trim(mm,
                &dcc_start,

                +                                           
                (u64)vres->base.size,

                +                                           
                &vres->blocks);

                +               }

                +       }

                +

                        vres->base.start = 0;

                        size = max_t(u64,
                amdgpu_vram_mgr_blocks_size(&vres->blocks),

                                     vres->base.size);

                diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
                b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c

                index fd3ac483760e..4259edcdec8a 100644

                --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c

                +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c

                @@ -542,6 +542,20 @@ static unsigned
                gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)

                        return 0;

                 }

                
                +static u64 gmc_v12_0_get_dcc_alignment(struct
                amdgpu_device *adev)

                +{

                +       u64 max_tex_channel_caches, alignment;

                +

                +       max_tex_channel_caches =
                adev->gfx.config.max_texture_channel_caches;

                +       if (is_power_of_2(max_tex_channel_caches))

                +               alignment = (max_tex_channel_caches /
                SZ_4) * max_tex_channel_caches;

                +       else

                +               alignment =
                roundup_pow_of_two(max_tex_channel_caches) *

                +                               max_tex_channel_caches;

                +

                +       return (u64)alignment * SZ_1K;

                +}

                +

                 static const struct amdgpu_gmc_funcs
                gmc_v12_0_gmc_funcs = {

                        .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb,

                        .flush_gpu_tlb_pasid =
                gmc_v12_0_flush_gpu_tlb_pasid,

                @@ -551,6 +565,7 @@ static const struct amdgpu_gmc_funcs
                gmc_v12_0_gmc_funcs = {

                        .get_vm_pde = gmc_v12_0_get_vm_pde,

                        .get_vm_pte = gmc_v12_0_get_vm_pte,

                        .get_vbios_fb_size =
                gmc_v12_0_get_vbios_fb_size,

                +       .get_dcc_alignment =
                gmc_v12_0_get_dcc_alignment,

                 };

                
                 static void gmc_v12_0_set_gmc_funcs(struct
                amdgpu_device *adev)

                -- 

                2.25.1