Re: [PATCH] drm/ttm: add NUMA node id to the pool

Alex Deucher <alexdeucher@xxxxxxxxx> · Fri, 31 Mar 2023 17:13:42 -0400

On Fri, Mar 31, 2023 at 4:02 PM Felix Kuehling <felix.kuehling@xxxxxxx> wrote:
>
> There is a subsequent patch where amdgpu directly calls ttm_pool_init to
> create pools per NUMA node. That will depend on the updated function
> signature.

Then we probably want to take this through amdgpu then.

Alex

>
> Regards,
>    Felix
>
>
> On 2023-03-31 15:17, Alex Deucher wrote:
> > On Fri, Mar 31, 2023 at 2:54 AM Christian König
> > <ckoenig.leichtzumerken@xxxxxxxxx> wrote:
> >> Should I push this to drm-misc-next or do we take it through
> >> amd-staging-drm-next?
> > I think either way is fine.  We can carry it internally as needed for
> > testing if you want to commit it to drm-misc-next.  I don't think
> > there are any direct code dependencies, but you or Rajneesh can
> > correct me if I'm wrong.
> >
> > Alex
> >
> >> Christian.
> >>
> >> Am 30.03.23 um 21:50 schrieb Alex Deucher:
> >>> From: Rajneesh Bhardwaj <rajneesh.bhardwaj@xxxxxxx>
> >>>
> >>> This allows backing ttm_tt structure with pages from different NUMA
> >>> pools.
> >>>
> >>> Tested-by: Graham Sider <graham.sider@xxxxxxx>
> >>> Reviewed-by: Felix Kuehling <felix.kuehling@xxxxxxx>
> >>> Signed-off-by: Christian König <christian.koenig@xxxxxxx>
> >>> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@xxxxxxx>
> >>> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
> >>> ---
> >>>    drivers/gpu/drm/ttm/ttm_device.c |  2 +-
> >>>    drivers/gpu/drm/ttm/ttm_pool.c   | 13 ++++++++-----
> >>>    include/drm/ttm/ttm_pool.h       |  4 +++-
> >>>    3 files changed, 12 insertions(+), 7 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> >>> index e7147e304637..4a8164a5320f 100644
> >>> --- a/drivers/gpu/drm/ttm/ttm_device.c
> >>> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> >>> @@ -218,7 +218,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
> >>>        bdev->funcs = funcs;
> >>>
> >>>        ttm_sys_man_init(bdev);
> >>> -     ttm_pool_init(&bdev->pool, dev, use_dma_alloc, use_dma32);
> >>> +     ttm_pool_init(&bdev->pool, dev, NUMA_NO_NODE, use_dma_alloc, use_dma32);
> >>>
> >>>        bdev->vma_manager = vma_manager;
> >>>        INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
> >>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> >>> index 9f6764bf3b15..1068a41cded1 100644
> >>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> >>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> >>> @@ -92,7 +92,7 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
> >>>                        __GFP_KSWAPD_RECLAIM;
> >>>
> >>>        if (!pool->use_dma_alloc) {
> >>> -             p = alloc_pages(gfp_flags, order);
> >>> +             p = alloc_pages_node(pool->nid, gfp_flags, order);
> >>>                if (p)
> >>>                        p->private = order;
> >>>                return p;
> >>> @@ -286,7 +286,7 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
> >>>                                                  enum ttm_caching caching,
> >>>                                                  unsigned int order)
> >>>    {
> >>> -     if (pool->use_dma_alloc)
> >>> +     if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
> >>>                return &pool->caching[caching].orders[order];
> >>>
> >>>    #ifdef CONFIG_X86
> >>> @@ -523,29 +523,32 @@ EXPORT_SYMBOL(ttm_pool_free);
> >>>     *
> >>>     * @pool: the pool to initialize
> >>>     * @dev: device for DMA allocations and mappings
> >>> + * @nid: NUMA node to use for allocations
> >>>     * @use_dma_alloc: true if coherent DMA alloc should be used
> >>>     * @use_dma32: true if GFP_DMA32 should be used
> >>>     *
> >>>     * Initialize the pool and its pool types.
> >>>     */
> >>>    void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
> >>> -                bool use_dma_alloc, bool use_dma32)
> >>> +                int nid, bool use_dma_alloc, bool use_dma32)
> >>>    {
> >>>        unsigned int i, j;
> >>>
> >>>        WARN_ON(!dev && use_dma_alloc);
> >>>
> >>>        pool->dev = dev;
> >>> +     pool->nid = nid;
> >>>        pool->use_dma_alloc = use_dma_alloc;
> >>>        pool->use_dma32 = use_dma32;
> >>>
> >>> -     if (use_dma_alloc) {
> >>> +     if (use_dma_alloc || nid != NUMA_NO_NODE) {
> >>>                for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> >>>                        for (j = 0; j < MAX_ORDER; ++j)
> >>>                                ttm_pool_type_init(&pool->caching[i].orders[j],
> >>>                                                   pool, i, j);
> >>>        }
> >>>    }
> >>> +EXPORT_SYMBOL(ttm_pool_init);
> >>>
> >>>    /**
> >>>     * ttm_pool_fini - Cleanup a pool
> >>> @@ -559,7 +562,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
> >>>    {
> >>>        unsigned int i, j;
> >>>
> >>> -     if (pool->use_dma_alloc) {
> >>> +     if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
> >>>                for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> >>>                        for (j = 0; j < MAX_ORDER; ++j)
> >>>                                ttm_pool_type_fini(&pool->caching[i].orders[j]);
> >>> diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h
> >>> index ef09b23d29e3..23bd8be6d4f8 100644
> >>> --- a/include/drm/ttm/ttm_pool.h
> >>> +++ b/include/drm/ttm/ttm_pool.h
> >>> @@ -61,12 +61,14 @@ struct ttm_pool_type {
> >>>     * struct ttm_pool - Pool for all caching and orders
> >>>     *
> >>>     * @dev: the device we allocate pages for
> >>> + * @nid: which numa node to use
> >>>     * @use_dma_alloc: if coherent DMA allocations should be used
> >>>     * @use_dma32: if GFP_DMA32 should be used
> >>>     * @caching: pools for each caching/order
> >>>     */
> >>>    struct ttm_pool {
> >>>        struct device *dev;
> >>> +     int nid;
> >>>
> >>>        bool use_dma_alloc;
> >>>        bool use_dma32;
> >>> @@ -81,7 +83,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
> >>>    void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt);
> >>>
> >>>    void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
> >>> -                bool use_dma_alloc, bool use_dma32);
> >>> +                int nid, bool use_dma_alloc, bool use_dma32);
> >>>    void ttm_pool_fini(struct ttm_pool *pool);
> >>>
> >>>    int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m);