From: Aharon Landau <aharonl@xxxxxxxxxx> Currently, the cache structure is a static linear array. Therefore, its size is limited to the number of entries in it and is not expandable on run-time. The entries are dedicated to mkeys of size 2^x and no access_flags. Mkeys with different properties are not cacheable. In this patch, we change the cache structure to an RB-tree. By this, we enable caching all user mkeys that can use umr. Signed-off-by: Aharon Landau <aharonl@xxxxxxxxxx> Reviewed-by: Michael Guralnik <michaelgur@xxxxxxxxxx> --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 17 +- drivers/infiniband/hw/mlx5/mr.c | 293 +++++++++++++++++++-------- drivers/infiniband/hw/mlx5/odp.c | 9 +- 3 files changed, 223 insertions(+), 96 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5b57b2a24b47..7fd3b47190b1 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -741,10 +741,10 @@ struct mlx5_cache_ent { unsigned long stored; unsigned long reserved; + struct rb_node node; struct mlx5r_cache_rb_key rb_key; char name[4]; - u32 order; u8 disabled:1; u8 fill_to_high_water:1; @@ -775,8 +775,9 @@ struct mlx5r_async_create_mkey { struct mlx5_mkey_cache { struct workqueue_struct *wq; - struct mlx5_cache_ent ent[MAX_MKEY_CACHE_ENTRIES]; - struct dentry *root; + struct rb_root rb_root; + struct mutex rb_lock; + struct dentry *fs_root; unsigned long last_add; }; @@ -1321,6 +1322,8 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev); int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev); +struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, + struct mlx5r_cache_rb_key rb_key); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, u8 access_mode, unsigned int access_flags, @@ -1348,7 +1351,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq); void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); -void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent); +struct mlx5_cache_ent *mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev); void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, struct mlx5_ib_mr *mr, int flags); @@ -1367,7 +1370,11 @@ static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} -static inline void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) {} +static inline struct mlx5_cache_ent * +mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev) +{ + return NULL; +} static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, struct mlx5_ib_mr *mr, int flags) {} diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bb3d5a766cb8..6977d0cbbe6f 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -514,18 +514,22 @@ static const struct file_operations limit_fops = { static bool someone_adding(struct mlx5_mkey_cache *cache) { - unsigned int i; - - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { - struct mlx5_cache_ent *ent = &cache->ent[i]; - bool ret; + struct mlx5_cache_ent *ent; + struct rb_node *node; + bool ret; + mutex_lock(&cache->rb_lock); + for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) { + ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); ret = ent->stored < ent->limit; xa_unlock_irq(&ent->mkeys); - if (ret) + if (ret) { + mutex_unlock(&cache->rb_lock); return true; + } } + mutex_unlock(&cache->rb_lock); return false; } @@ -589,8 +593,8 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) if (err != -EAGAIN) { mlx5_ib_warn( dev, - "command failed order %d, err %d\n", - ent->order, err); + "command failed order %s, err %d\n", + ent->name, err); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(1000)); } @@ -636,6 +640,72 @@ static void delayed_cache_work_func(struct work_struct *work) __cache_work_func(ent); } +static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, + struct mlx5_cache_ent *ent) +{ + struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL; + struct mlx5_cache_ent *cur; + int cmp; + + mutex_lock(&cache->rb_lock); + /* Figure out where to put new node */ + while (*new) { + cur = rb_entry(*new, struct mlx5_cache_ent, node); + parent = *new; + cmp = memcmp(&ent->rb_key, &cur->rb_key, + sizeof(struct mlx5r_cache_rb_key)); + if (cmp < 0) + new = &((*new)->rb_left); + if (cmp > 0) + new = &((*new)->rb_right); + if (cmp == 0) { + mutex_unlock(&cache->rb_lock); + return -EEXIST; + } + } + + /* Add new node and rebalance tree. */ + rb_link_node(&ent->node, parent, new); + rb_insert_color(&ent->node, &cache->rb_root); + + mutex_unlock(&cache->rb_lock); + return 0; +} + +static struct rb_node * +mlx5_cache_find_smallest_ent(struct mlx5_mkey_cache *cache, + struct mlx5r_cache_rb_key rb_key) +{ + struct rb_node *node = cache->rb_root.rb_node; + struct mlx5_cache_ent *cur, *smallest = NULL; + int cmp; + + /* + * Find the smallest ent with ent.rb_key >= rb_key. + */ + while (node) { + cur = rb_entry(node, struct mlx5_cache_ent, node); + + cmp = memcmp(&rb_key, &cur->rb_key, + sizeof(struct mlx5r_cache_rb_key)); + if (cmp < 0) { + /* cur.rb_key > rb_key */ + smallest = cur; + node = node->rb_left; + } + if (cmp > 0) + node = node->rb_right; + if (cmp == 0) + return &cur->node; + } + + return (smallest && + smallest->rb_key.access_mode == rb_key.access_mode && + smallest->rb_key.access_flags == rb_key.access_flags) ? + &smallest->node : + NULL; +} + static bool mlx5_ent_get_mkey(struct mlx5_cache_ent *ent, struct mlx5_ib_mr *mr) { xa_lock_irq(&ent->mkeys); @@ -655,36 +725,41 @@ static bool mlx5_ent_get_mkey(struct mlx5_cache_ent *ent, struct mlx5_ib_mr *mr) return true; } -static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev, - unsigned int order) -{ - struct mlx5_mkey_cache *cache = &dev->cache; - - if (order < cache->ent[0].order) - return &cache->ent[0]; - order = order - cache->ent[0].order; - if (order > MKEY_CACHE_LAST_STD_ENTRY) - return NULL; - return &cache->ent[order]; -} - static bool mlx5_cache_get_mkey(struct mlx5_ib_dev *dev, struct mlx5r_cache_rb_key rb_key, struct mlx5_ib_mr *mr) { + struct mlx5_mkey_cache *cache = &dev->cache; + unsigned int order, upper_bound; struct mlx5_cache_ent *ent; + struct rb_node *node; - if (!mlx5r_umr_can_reconfig(dev, 0, rb_key.access_flags)) - return false; + order = order_base_2(rb_key.ndescs) > 2 ? + order_base_2(rb_key.ndescs) : 2; + upper_bound = 1 << order; + + /* + * Find the smallest node within the range with available mkeys. + */ + mutex_lock(&cache->rb_lock); + node = mlx5_cache_find_smallest_ent(cache, rb_key); + while (node) { + ent = rb_entry(node, struct mlx5_cache_ent, node); + if (ent->rb_key.access_mode != rb_key.access_mode || + ent->rb_key.access_flags != rb_key.access_flags || + ent->rb_key.ndescs > upper_bound) + break; - if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) - ent = &dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY]; + if (mlx5_ent_get_mkey(ent, mr)) { + mutex_unlock(&cache->rb_lock); + return true; + } - ent = mkey_cache_ent_from_order(dev, order_base_2(rb_key.ndescs)); - if (!ent) - return false; + node = rb_next(node); + } + mutex_unlock(&cache->rb_lock); - return mlx5_ent_get_mkey(ent, mr); + return false; } static int get_uchangeable_access_flags(struct mlx5_ib_dev *dev, @@ -743,10 +818,8 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, u8 access_mode, return mr; } -static void clean_keys(struct mlx5_ib_dev *dev, int c) +static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) { - struct mlx5_mkey_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent = &cache->ent[c]; u32 mkey; cancel_delayed_work(&ent->dwork); @@ -765,26 +838,19 @@ static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) if (!mlx5_debugfs_root || dev->is_rep) return; - debugfs_remove_recursive(dev->cache.root); - dev->cache.root = NULL; + debugfs_remove_recursive(dev->cache.fs_root); + dev->cache.fs_root = NULL; } -static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev) +static void mlx5_cache_ent_debugfs_init(struct mlx5_ib_dev *dev, + struct mlx5_cache_ent *ent, int order) { struct mlx5_mkey_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; struct dentry *dir; - int i; - if (!mlx5_debugfs_root || dev->is_rep) - return; - - cache->root = debugfs_create_dir("mr_cache", mlx5_debugfs_get_dev_root(dev->mdev)); - - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - sprintf(ent->name, "%d", ent->order); - dir = debugfs_create_dir(ent->name, cache->root); + if (cache->fs_root) { + sprintf(ent->name, "%d", order); + dir = debugfs_create_dir(ent->name, cache->fs_root); debugfs_create_file("size", 0600, dir, ent, &size_fops); debugfs_create_file("limit", 0600, dir, ent, &limit_fops); debugfs_create_ulong("cur", 0400, dir, &ent->stored); @@ -799,68 +865,114 @@ static void delay_time_func(struct timer_list *t) WRITE_ONCE(dev->fill_delay, 0); } -int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) +struct mlx5_cache_ent *mlx5r_cache_create_ent(struct mlx5_ib_dev *dev, + struct mlx5r_cache_rb_key rb_key) +{ + struct mlx5_cache_ent *ent; + int ret; + + ent = kzalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + return ERR_PTR(-ENOMEM); + + xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); + ent->rb_key = rb_key; + ent->dev = dev; + + INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); + + ret = mlx5_cache_ent_insert(&dev->cache, ent); + if (ret) { + kfree(ent); + return ERR_PTR(ret); + } + return ent; +} + +static int mlx5_cache_init_default_entries(struct mlx5_ib_dev *dev) { struct mlx5r_cache_rb_key rb_key = { .access_mode = MLX5_MKC_ACCESS_MODE_MTT }; struct mlx5_mkey_cache *cache = &dev->cache; + bool can_use_cache, need_cache; struct mlx5_cache_ent *ent; - int i; + int order; + + if (mlx5_debugfs_root && !dev->is_rep) + cache->fs_root = debugfs_create_dir( + "mr_cache", mlx5_debugfs_get_dev_root(dev->mdev)); + + can_use_cache = !dev->is_rep && mlx5r_umr_can_load_pas(dev, 0); + need_cache = (dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && + mlx5_core_is_pf(dev->mdev); + + for (order = 2; order <= MKEY_CACHE_LAST_STD_ENTRY + 2; order++) { + rb_key.ndescs = 1 << order; + ent = mlx5r_cache_create_ent(dev, rb_key); + if (IS_ERR(ent)) + return PTR_ERR(ent); + + mlx5_cache_ent_debugfs_init(dev, ent, order); + + if (can_use_cache && need_cache && + order <= mkey_cache_max_order(dev)) { + ent->limit = + dev->mdev->profile.mr_cache[order - 2].limit; + xa_lock_irq(&ent->mkeys); + queue_adjust_cache_locked(ent); + xa_unlock_irq(&ent->mkeys); + } + } + + ent = mlx5_odp_init_mkey_cache_entry(dev); + if (ent) { + if (IS_ERR(ent)) + return PTR_ERR(ent); + + mlx5_cache_ent_debugfs_init(dev, ent, + MLX5_IMR_KSM_CACHE_ENTRY + 2); + } + + return 0; +} + +int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) +{ + int err; mutex_init(&dev->slow_path_mutex); - cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); - if (!cache->wq) { + mutex_init(&dev->cache.rb_lock); + dev->cache.rb_root = RB_ROOT; + dev->cache.wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); + if (!dev->cache.wq) { mlx5_ib_warn(dev, "failed to create work queue\n"); return -ENOMEM; } mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { - ent = &cache->ent[i]; - xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); - ent->order = i + 2; - ent->dev = dev; - ent->limit = 0; - - INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); - - if (i > MKEY_CACHE_LAST_STD_ENTRY) { - mlx5_odp_init_mkey_cache_entry(ent); - continue; - } - - if (ent->order > mkey_cache_max_order(dev)) - continue; - - rb_key.ndescs = 1 << ent->order; - ent->rb_key = rb_key; - if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && - !dev->is_rep && mlx5_core_is_pf(dev->mdev) && - mlx5r_umr_can_load_pas(dev, 0)) - ent->limit = dev->mdev->profile.mr_cache[i].limit; - else - ent->limit = 0; - xa_lock_irq(&ent->mkeys); - queue_adjust_cache_locked(ent); - xa_unlock_irq(&ent->mkeys); - } - - mlx5_mkey_cache_debugfs_init(dev); + err = mlx5_cache_init_default_entries(dev); + if (err) + goto err; return 0; +err: + mlx5_mkey_cache_cleanup(dev); + return err; } int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) { - unsigned int i; + struct rb_root *root = &dev->cache.rb_root; + struct mlx5_cache_ent *ent; + struct rb_node *node; if (!dev->cache.wq) return 0; - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { - struct mlx5_cache_ent *ent = &dev->cache.ent[i]; - + mutex_lock(&dev->cache.rb_lock); + for (node = rb_first(root); node; node = rb_next(node)) { + ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); ent->disabled = true; xa_unlock_irq(&ent->mkeys); @@ -870,8 +982,15 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) mlx5_mkey_cache_debugfs_cleanup(dev); mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); - for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) - clean_keys(dev, i); + node = rb_first(root); + while (node) { + ent = rb_entry(node, struct mlx5_cache_ent, node); + node = rb_next(node); + clean_keys(dev, ent); + rb_erase(&ent->node, root); + kfree(ent); + } + mutex_unlock(&dev->cache.rb_lock); destroy_workqueue(dev->cache.wq); del_timer_sync(&dev->delay_timer); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 90339edddfed..b29ec4e0d8ff 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1587,16 +1587,17 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) return err; } -void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) +struct mlx5_cache_ent *mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev) { struct mlx5r_cache_rb_key rb_key = { .access_mode = MLX5_MKC_ACCESS_MODE_KSM, .ndescs = mlx5_imr_ksm_entries }; - if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) - return; - ent->rb_key = rb_key; + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) + return NULL; + + return mlx5r_cache_create_ent(dev, rb_key); } static const struct ib_device_ops mlx5_ib_dev_odp_ops = { -- 2.17.2