Yes, exactly that's the problem. See when you want to prevent a process B from allocating the memory process A has evicted, you need to prevent all concurrent allocation. And we don't do that because it causes a major performance drop. Regards, Christian. Am 26.01.2018 um 14:21 schrieb Zhou, David(ChunMing): > You patch will prevent concurrent allocation, and will result in > allocation performance drop much. > > å??è?ªå??æ?? Pro > > Christian Ké°?ig <ckoenig.leichtzumerken at gmail.com> äº? 2018å¹´1æ??26æ?¥ > ä¸?å??9:04å??é??ï¼? > > Attached is what you actually want to do cleanly implemented. But as I > said this is a NO-GO. > > Regards, > Christian. > > Am 26.01.2018 um 13:43 schrieb Christian König: >>> After my investigation, this issue should be detect of TTM design >>> self, which breaks scheduling balance. >> Yeah, but again. This is indented design we can't change easily. >> >> Regards, >> Christian. >> >> Am 26.01.2018 um 13:36 schrieb Zhou, David(ChunMing): >>> I am off work, so reply mail by phone, the format could not be text. >>> >>> back to topic itself: >>> the problem indeed happen on amdgpu driver, someone reports me that >>> application runs with two instances, the performance are different. >>> I also reproduced the issue with unit test(bo_eviction_test). They >>> always think our scheduler isn't working as expected. >>> >>> After my investigation, this issue should be detect of TTM design >>> self, which breaks scheduling balance. >>> >>> Further, if we run containers for our gpu, container A could run >>> high score, container B runs low score with same benchmark. >>> >>> So this is bug that we need fix. >>> >>> Regards, >>> David Zhou >>> >>> å??è?ªå??æ?? Pro >>> >>> Christian Ké°?ig <ckoenig.leichtzumerken at gmail.com> äº? 2018å¹´1æ??26æ?¥ >>> ä¸?å??6:31å??é??ï¼? >>> >>> Am 26.01.2018 um 11:22 schrieb Chunming Zhou: >>> > there is a scheduling balance issue about get node like: >>> > a. process A allocates full memory and use it for submission. >>> > b. process B tries to allocates memory, will wait for process A BO >>> idle in eviction. >>> > c. process A completes the job, process B eviction will put >>> process A BO node, >>> > but in the meantime, process C is comming to allocate BO, whill >>> directly get node successfully, and do submission, >>> > process B will again wait for process C BO idle. >>> > d. repeat the above setps, process B could be delayed much more. >>> > >>> > later allocation must not be ahead of front in same place. >>> >>> Again NAK to the whole approach. >>> >>> At least with amdgpu the problem you described above never occurs >>> because evictions are pipelined operations. We could only block for >>> deleted regions to become free. >>> >>> But independent of that incoming memory requests while we make room for >>> eviction are intended to be served first. >>> >>> Changing that is certainly a no-go cause that would favor memory hungry >>> applications over small clients. >>> >>> Regards, >>> Christian. >>> >>> > >>> > Change-Id: I3daa892e50f82226c552cc008a29e55894a98f18 >>> > Signed-off-by: Chunming Zhou <david1.zhou at amd.com> >>> > --- >>> >  drivers/gpu/drm/ttm/ttm_bo.c   | 69 >>> +++++++++++++++++++++++++++++++++++++++-- >>> >  include/drm/ttm/ttm_bo_api.h   | 7 +++++ >>> >  include/drm/ttm/ttm_bo_driver.h | 7 +++++ >>> >  3 files changed, 80 insertions(+), 3 deletions(-) >>> > >>> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c >>> b/drivers/gpu/drm/ttm/ttm_bo.c >>> > index d33a6bb742a1..558ec2cf465d 100644 >>> > --- a/drivers/gpu/drm/ttm/ttm_bo.c >>> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c >>> > @@ -841,6 +841,58 @@ static int ttm_bo_add_move_fence(struct >>> ttm_buffer_object *bo, >>> >       return 0; >>> >  } >>> > >>> > +static void ttm_man_init_waiter(struct ttm_bo_waiter *waiter, >>> > +                            struct ttm_buffer_object *bo, >>> > +                            const struct ttm_place *place) >>> > +{ >>> > +    waiter->tbo = bo; >>> > +    memcpy((void *)&waiter->place, (void *)place, sizeof(*place)); >>> > +    INIT_LIST_HEAD(&waiter->list); >>> > +} >>> > + >>> > +static void ttm_man_add_waiter(struct ttm_mem_type_manager *man, >>> > +                           struct ttm_bo_waiter *waiter) >>> > +{ >>> > +    if (!waiter) >>> > +            return; >>> > +    spin_lock(&man->wait_lock); >>> > +    list_add_tail(&waiter->list, &man->waiter_list); >>> > +    spin_unlock(&man->wait_lock); >>> > +} >>> > + >>> > +static void ttm_man_del_waiter(struct ttm_mem_type_manager *man, >>> > +                           struct ttm_bo_waiter *waiter) >>> > +{ >>> > +    if (!waiter) >>> > +            return; >>> > +    spin_lock(&man->wait_lock); >>> > +    if (!list_empty(&waiter->list)) >>> > +            list_del(&waiter->list); >>> > +    spin_unlock(&man->wait_lock); >>> > +    kfree(waiter); >>> > +} >>> > + >>> > +int ttm_man_check_bo(struct ttm_mem_type_manager *man, >>> > +                 struct ttm_buffer_object *bo, >>> > +                 const struct ttm_place *place) >>> > +{ >>> > +    struct ttm_bo_waiter *waiter, *tmp; >>> > + >>> > +    spin_lock(&man->wait_lock); >>> > +    list_for_each_entry_safe(waiter, tmp, &man->waiter_list, list) { >>> > +            if ((bo != waiter->tbo) && >>> > +                ((place->fpfn >= waiter->place.fpfn && >>> > +                  place->fpfn <= waiter->place.lpfn) || >>> > +                 (place->lpfn <= waiter->place.lpfn && >>> place->lpfn >= >>> > +                  waiter->place.fpfn))) >>> > +                goto later_bo; >>> > +    } >>> > +    spin_unlock(&man->wait_lock); >>> > +    return true; >>> > +later_bo: >>> > +    spin_unlock(&man->wait_lock); >>> > +    return false; >>> > +} >>> >  /** >>> >   * Repeatedly evict memory from the LRU for @mem_type until we >>> create enough >>> >   * space, or we've evicted everything and there isn't enough space. >>> > @@ -853,17 +905,26 @@ static int ttm_bo_mem_force_space(struct >>> ttm_buffer_object *bo, >>> >  { >>> >       struct ttm_bo_device *bdev = bo->bdev; >>> >       struct ttm_mem_type_manager *man = &bdev->man[mem_type]; >>> > +    struct ttm_bo_waiter waiter; >>> >       int ret; >>> > >>> > +    ttm_man_init_waiter(&waiter, bo, place); >>> > +    ttm_man_add_waiter(man, &waiter); >>> >       do { >>> >               ret = (*man->func->get_node)(man, bo, place, mem); >>> > -            if (unlikely(ret != 0)) >>> > +            if (unlikely(ret != 0)) { >>> > +                    ttm_man_del_waiter(man, &waiter); >>> >                       return ret; >>> > -            if (mem->mm_node) >>> > +            } >>> > +            if (mem->mm_node) { >>> > +                    ttm_man_del_waiter(man, &waiter); >>> >                       break; >>> > +            } >>> >               ret = ttm_mem_evict_first(bdev, mem_type, place, ctx); >>> > -            if (unlikely(ret != 0)) >>> > +            if (unlikely(ret != 0)) { >>> > +                    ttm_man_del_waiter(man, &waiter); >>> >                       return ret; >>> > +            } >>> >       } while (1); >>> >       mem->mem_type = mem_type; >>> >       return ttm_bo_add_move_fence(bo, man, mem); >>> > @@ -1450,6 +1511,8 @@ int ttm_bo_init_mm(struct ttm_bo_device >>> *bdev, unsigned type, >>> >       man->use_io_reserve_lru = false; >>> >       mutex_init(&man->io_reserve_mutex); >>> >       spin_lock_init(&man->move_lock); >>> > +    spin_lock_init(&man->wait_lock); >>> > +    INIT_LIST_HEAD(&man->waiter_list); >>> > INIT_LIST_HEAD(&man->io_reserve_lru); >>> > >>> >       ret = bdev->driver->init_mem_type(bdev, type, man); >>> > diff --git a/include/drm/ttm/ttm_bo_api.h >>> b/include/drm/ttm/ttm_bo_api.h >>> > index 2cd025c2abe7..0fce4dbd02e7 100644 >>> > --- a/include/drm/ttm/ttm_bo_api.h >>> > +++ b/include/drm/ttm/ttm_bo_api.h >>> > @@ -40,6 +40,7 @@ >>> >  #include <linux/mm.h> >>> >  #include <linux/bitmap.h> >>> >  #include <linux/reservation.h> >>> > +#include <drm/ttm/ttm_placement.h> >>> > >>> >  struct ttm_bo_device; >>> > >>> > @@ -232,6 +233,12 @@ struct ttm_buffer_object { >>> >       struct mutex wu_mutex; >>> >  }; >>> > >>> > +struct ttm_bo_waiter { >>> > +    struct ttm_buffer_object *tbo; >>> > +    struct ttm_place place; >>> > +    struct list_head list; >>> > +}; >>> > + >>> >  /** >>> >   * struct ttm_bo_kmap_obj >>> >   * >>> > diff --git a/include/drm/ttm/ttm_bo_driver.h >>> b/include/drm/ttm/ttm_bo_driver.h >>> > index 9b417eb2df20..dc6b8b4c9e06 100644 >>> > --- a/include/drm/ttm/ttm_bo_driver.h >>> > +++ b/include/drm/ttm/ttm_bo_driver.h >>> > @@ -293,6 +293,10 @@ struct ttm_mem_type_manager { >>> >       bool io_reserve_fastpath; >>> >       spinlock_t move_lock; >>> > >>> > +    /* waiters in list */ >>> > +    spinlock_t wait_lock; >>> > +    struct list_head waiter_list; >>> > + >>> >       /* >>> >        * Protected by @io_reserve_mutex: >>> >        */ >>> > @@ -748,6 +752,9 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, >>> >                    struct ttm_mem_reg *mem, >>> >                    struct ttm_operation_ctx *ctx); >>> > >>> > +int ttm_man_check_bo(struct ttm_mem_type_manager *man, >>> > +                 struct ttm_buffer_object *bo, >>> > +                 const struct ttm_place *place); >>> >  void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct >>> ttm_mem_reg *mem); >>> >  void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo, >>> >                          struct ttm_mem_reg *mem); >>> >>> >>> >>> _______________________________________________ >>> amd-gfx mailing list >>> amd-gfx at lists.freedesktop.org >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx >> > > > > _______________________________________________ > dri-devel mailing list > dri-devel at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20180126/dbe14939/attachment-0001.html>