Attached is what you actually want to do cleanly implemented. But as I said this is a NO-GO. Regards, Christian. Am 26.01.2018 um 13:43 schrieb Christian König: >> After my investigation, this issue should be detect of TTM design >> self, which breaks scheduling balance. > Yeah, but again. This is indented design we can't change easily. > > Regards, > Christian. > > Am 26.01.2018 um 13:36 schrieb Zhou, David(ChunMing): >> I am off work, so reply mail by phone, the format could not be text. >> >> back to topic itself: >> the problem indeed happen on amdgpu driver, someone reports me that >> application runs with two instances, the performance are different. >> I also reproduced the issue with unit test(bo_eviction_test). They >> always think our scheduler isn't working as expected. >> >> After my investigation, this issue should be detect of TTM design >> self, which breaks scheduling balance. >> >> Further, if we run containers for our gpu, container A could run high >> score, container B runs low score with same benchmark. >> >> So this is bug that we need fix. >> >> Regards, >> David Zhou >> >> å??è?ªå??æ?? Pro >> >> Christian Ké°?ig <ckoenig.leichtzumerken at gmail.com> äº? 2018å¹´1æ??26æ?¥ >> ä¸?å??6:31å??é??ï¼? >> >> Am 26.01.2018 um 11:22 schrieb Chunming Zhou: >> > there is a scheduling balance issue about get node like: >> > a. process A allocates full memory and use it for submission. >> > b. process B tries to allocates memory, will wait for process A BO >> idle in eviction. >> > c. process A completes the job, process B eviction will put process >> A BO node, >> > but in the meantime, process C is comming to allocate BO, whill >> directly get node successfully, and do submission, >> > process B will again wait for process C BO idle. >> > d. repeat the above setps, process B could be delayed much more. >> > >> > later allocation must not be ahead of front in same place. >> >> Again NAK to the whole approach. >> >> At least with amdgpu the problem you described above never occurs >> because evictions are pipelined operations. We could only block for >> deleted regions to become free. >> >> But independent of that incoming memory requests while we make room for >> eviction are intended to be served first. >> >> Changing that is certainly a no-go cause that would favor memory hungry >> applications over small clients. >> >> Regards, >> Christian. >> >> > >> > Change-Id: I3daa892e50f82226c552cc008a29e55894a98f18 >> > Signed-off-by: Chunming Zhou <david1.zhou at amd.com> >> > --- >> >  drivers/gpu/drm/ttm/ttm_bo.c   | 69 >> +++++++++++++++++++++++++++++++++++++++-- >> >  include/drm/ttm/ttm_bo_api.h   | 7 +++++ >> >  include/drm/ttm/ttm_bo_driver.h | 7 +++++ >> >  3 files changed, 80 insertions(+), 3 deletions(-) >> > >> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c >> b/drivers/gpu/drm/ttm/ttm_bo.c >> > index d33a6bb742a1..558ec2cf465d 100644 >> > --- a/drivers/gpu/drm/ttm/ttm_bo.c >> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c >> > @@ -841,6 +841,58 @@ static int ttm_bo_add_move_fence(struct >> ttm_buffer_object *bo, >> >       return 0; >> >  } >> > >> > +static void ttm_man_init_waiter(struct ttm_bo_waiter *waiter, >> > +                            struct ttm_buffer_object *bo, >> > +                            const struct ttm_place *place) >> > +{ >> > +    waiter->tbo = bo; >> > +    memcpy((void *)&waiter->place, (void *)place, sizeof(*place)); >> > +    INIT_LIST_HEAD(&waiter->list); >> > +} >> > + >> > +static void ttm_man_add_waiter(struct ttm_mem_type_manager *man, >> > +                           struct ttm_bo_waiter *waiter) >> > +{ >> > +    if (!waiter) >> > +            return; >> > +    spin_lock(&man->wait_lock); >> > +    list_add_tail(&waiter->list, &man->waiter_list); >> > +    spin_unlock(&man->wait_lock); >> > +} >> > + >> > +static void ttm_man_del_waiter(struct ttm_mem_type_manager *man, >> > +                           struct ttm_bo_waiter *waiter) >> > +{ >> > +    if (!waiter) >> > +            return; >> > +    spin_lock(&man->wait_lock); >> > +    if (!list_empty(&waiter->list)) >> > +            list_del(&waiter->list); >> > +    spin_unlock(&man->wait_lock); >> > +    kfree(waiter); >> > +} >> > + >> > +int ttm_man_check_bo(struct ttm_mem_type_manager *man, >> > +                 struct ttm_buffer_object *bo, >> > +                 const struct ttm_place *place) >> > +{ >> > +    struct ttm_bo_waiter *waiter, *tmp; >> > + >> > +    spin_lock(&man->wait_lock); >> > +    list_for_each_entry_safe(waiter, tmp, &man->waiter_list, list) { >> > +            if ((bo != waiter->tbo) && >> > +                ((place->fpfn >= waiter->place.fpfn && >> > +                  place->fpfn <= waiter->place.lpfn) || >> > +                 (place->lpfn <= waiter->place.lpfn && place->lpfn >= >> > +                  waiter->place.fpfn))) >> > +                goto later_bo; >> > +    } >> > +    spin_unlock(&man->wait_lock); >> > +    return true; >> > +later_bo: >> > +    spin_unlock(&man->wait_lock); >> > +    return false; >> > +} >> >  /** >> >   * Repeatedly evict memory from the LRU for @mem_type until we >> create enough >> >   * space, or we've evicted everything and there isn't enough space. >> > @@ -853,17 +905,26 @@ static int ttm_bo_mem_force_space(struct >> ttm_buffer_object *bo, >> >  { >> >       struct ttm_bo_device *bdev = bo->bdev; >> >       struct ttm_mem_type_manager *man = &bdev->man[mem_type]; >> > +    struct ttm_bo_waiter waiter; >> >       int ret; >> > >> > +    ttm_man_init_waiter(&waiter, bo, place); >> > +    ttm_man_add_waiter(man, &waiter); >> >       do { >> >               ret = (*man->func->get_node)(man, bo, place, mem); >> > -            if (unlikely(ret != 0)) >> > +            if (unlikely(ret != 0)) { >> > +                    ttm_man_del_waiter(man, &waiter); >> >                       return ret; >> > -            if (mem->mm_node) >> > +            } >> > +            if (mem->mm_node) { >> > +                    ttm_man_del_waiter(man, &waiter); >> >                       break; >> > +            } >> >               ret = ttm_mem_evict_first(bdev, mem_type, place, ctx); >> > -            if (unlikely(ret != 0)) >> > +            if (unlikely(ret != 0)) { >> > +                    ttm_man_del_waiter(man, &waiter); >> >                       return ret; >> > +            } >> >       } while (1); >> >       mem->mem_type = mem_type; >> >       return ttm_bo_add_move_fence(bo, man, mem); >> > @@ -1450,6 +1511,8 @@ int ttm_bo_init_mm(struct ttm_bo_device >> *bdev, unsigned type, >> >       man->use_io_reserve_lru = false; >> >       mutex_init(&man->io_reserve_mutex); >> >       spin_lock_init(&man->move_lock); >> > +    spin_lock_init(&man->wait_lock); >> > +    INIT_LIST_HEAD(&man->waiter_list); >> >       INIT_LIST_HEAD(&man->io_reserve_lru); >> > >> >       ret = bdev->driver->init_mem_type(bdev, type, man); >> > diff --git a/include/drm/ttm/ttm_bo_api.h >> b/include/drm/ttm/ttm_bo_api.h >> > index 2cd025c2abe7..0fce4dbd02e7 100644 >> > --- a/include/drm/ttm/ttm_bo_api.h >> > +++ b/include/drm/ttm/ttm_bo_api.h >> > @@ -40,6 +40,7 @@ >> >  #include <linux/mm.h> >> >  #include <linux/bitmap.h> >> >  #include <linux/reservation.h> >> > +#include <drm/ttm/ttm_placement.h> >> > >> >  struct ttm_bo_device; >> > >> > @@ -232,6 +233,12 @@ struct ttm_buffer_object { >> >       struct mutex wu_mutex; >> >  }; >> > >> > +struct ttm_bo_waiter { >> > +    struct ttm_buffer_object *tbo; >> > +    struct ttm_place place; >> > +    struct list_head list; >> > +}; >> > + >> >  /** >> >   * struct ttm_bo_kmap_obj >> >   * >> > diff --git a/include/drm/ttm/ttm_bo_driver.h >> b/include/drm/ttm/ttm_bo_driver.h >> > index 9b417eb2df20..dc6b8b4c9e06 100644 >> > --- a/include/drm/ttm/ttm_bo_driver.h >> > +++ b/include/drm/ttm/ttm_bo_driver.h >> > @@ -293,6 +293,10 @@ struct ttm_mem_type_manager { >> >       bool io_reserve_fastpath; >> >       spinlock_t move_lock; >> > >> > +    /* waiters in list */ >> > +    spinlock_t wait_lock; >> > +    struct list_head waiter_list; >> > + >> >       /* >> >        * Protected by @io_reserve_mutex: >> >        */ >> > @@ -748,6 +752,9 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, >> >                    struct ttm_mem_reg *mem, >> >                    struct ttm_operation_ctx *ctx); >> > >> > +int ttm_man_check_bo(struct ttm_mem_type_manager *man, >> > +                 struct ttm_buffer_object *bo, >> > +                 const struct ttm_place *place); >> >  void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct >> ttm_mem_reg *mem); >> >  void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo, >> >                          struct ttm_mem_reg *mem); >> >> >> >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx at lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx > -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20180126/10a5dc90/attachment-0001.html> -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-drm-ttm-alloc-only-one-memory-allocation-operation-a.patch Type: text/x-patch Size: 1687 bytes Desc: not available URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20180126/10a5dc90/attachment-0001.bin>