On Mon, Dec 06, 2021 at 11:10:49AM +0200, Leon Romanovsky wrote: > +static struct mlx5_cache_ent *ent_search(struct mlx5_mr_cache *cache, void *mkc) mlx5_cache_ent_find() ? This search isn't really what I would expect, it should stop if it finds a (somewhat) larger and otherwise compatible entry instead of continuing to bin on power of two sizes. > + struct rb_node *node = cache->cache_root.rb_node; > + int size = MLX5_ST_SZ_BYTES(mkc); size_t not int > + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); > + if (dev->cache.maintained_cache && !force) { > + int order; unsigned int > + /* > + * Try to get an mkey from pool. > + */ > + order = order_base_2(ndescs) > 2 ? order_base_2(ndescs) : 2; > + MLX5_SET(mkc, mkc, translations_octword_size, > + get_mkc_octo_size(access_mode, 1 << order)); > + mutex_lock(&dev->cache.cache_lock); > + ent = ent_search(&dev->cache, mkc); > + mutex_unlock(&dev->cache.cache_lock); Why is it OK to drop the lock here? What is protecting the ent pointer against free? > + if (ent && (ent->limit || force)) { What locking protects ent->limit ? > + xa_lock_irq(&ent->mkeys); > + if (!ent->stored) { > + if (ent->limit) { > + queue_adjust_cache_locked(ent); > + ent->miss++; > + } > + xa_unlock_irq(&ent->mkeys); > + > + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); > + if (err) > + goto err; So if there is no entry we create a bigger one rounded up to pow2? > + > + WRITE_ONCE(ent->dev->cache.last_add, jiffies); > + xa_lock_irq(&ent->mkeys); > + ent->total_mrs++; > + xa_unlock_irq(&ent->mkeys); May as well optimize for the normal case, do the total_mrs++ before create_mkey while we still have the lock and undo it if it fails. It is just minor stat reporting right? > + } else { > + xa_ent = __xa_store(&ent->mkeys, --ent->stored, > + NULL, GFP_KERNEL); > + WARN_ON(xa_ent == NULL || xa_is_err(xa_ent)); > + WARN_ON(__xa_erase(&ent->mkeys, --ent->reserved) != > + NULL); This whole bigger block want to be in its own function 'mlx5_ent_get_mkey()' Then you can write it simpler without all the duplicated error handling and deep indenting > queue_adjust_cache_locked(ent); > - ent->miss++; > - } > - xa_unlock_irq(&ent->mkeys); > - err = create_cache_mkey(ent, &mr->mmkey.key); > - if (err) { > - kfree(mr); > - return ERR_PTR(err); > + xa_unlock_irq(&ent->mkeys); > + mr->mmkey.key = (u32)xa_to_value(xa_ent); > } > + mr->cache_ent = ent; > } else { > - mr = __xa_store(&ent->mkeys, --ent->stored, NULL, > - GFP_KERNEL); > - WARN_ON(mr == NULL || xa_is_err(mr)); > - WARN_ON(__xa_erase(&ent->mkeys, --ent->reserved) != NULL); > - queue_adjust_cache_locked(ent); > - xa_unlock_irq(&ent->mkeys); > - > - mr->mmkey.key = (u32)xa_to_value(xa_ent); > + /* > + * Can not use a cache mkey. > + * Create an mkey with the exact needed size. > + */ > + MLX5_SET(mkc, mkc, translations_octword_size, > + get_mkc_octo_size(access_mode, ndescs)); > + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); > + if (err) > + goto err; > } I think this needs to be broken to functions, it is hard to read with all these ifs and inlined locking > > +static int ent_insert(struct mlx5_mr_cache *cache, struct mlx5_cache_ent *ent) > +{ > + struct rb_node **new = &cache->cache_root.rb_node, *parent = NULL; > + int size = MLX5_ST_SZ_BYTES(mkc); > + struct mlx5_cache_ent *cur; > + int cmp; > + > + /* Figure out where to put new node */ > + while (*new) { > + cur = container_of(*new, struct mlx5_cache_ent, node); > + parent = *new; > + cmp = memcmp(ent->mkc, cur->mkc, size); > + if (cmp < 0) > + new = &((*new)->rb_left); > + else if (cmp > 0) > + new = &((*new)->rb_right); > + else > + return -EEXIST; > + } > + > + /* Add new node and rebalance tree. */ > + rb_link_node(&ent->node, parent, new); > + rb_insert_color(&ent->node, &cache->cache_root); > + > + return 0; > +} This should be near the find > + > +static struct mlx5_cache_ent *mlx5_ib_create_cache_ent(struct mlx5_ib_dev *dev, > + int order) > +{ > + struct mlx5_cache_ent *ent; > + int ret; > + > + ent = kzalloc(sizeof(*ent), GFP_KERNEL); > + if (!ent) > + return ERR_PTR(-ENOMEM); > + > + ent->mkc = kzalloc(MLX5_ST_SZ_BYTES(mkc), GFP_KERNEL); > + if (!ent->mkc) { > + kfree(ent); > + return ERR_PTR(-ENOMEM); > + } > + > + ent->ndescs = 1 << order; > + mlx5_set_cache_mkc(dev, ent->mkc, 0, MLX5_MKC_ACCESS_MODE_MTT, > + PAGE_SHIFT); > + MLX5_SET(mkc, ent->mkc, translations_octword_size, > + get_mkc_octo_size(MLX5_MKC_ACCESS_MODE_MTT, ent->ndescs)); > + mutex_lock(&dev->cache.cache_lock); > + ret = ent_insert(&dev->cache, ent); > + mutex_unlock(&dev->cache.cache_lock); > + if (ret) { > + kfree(ent->mkc); > + kfree(ent); > + return ERR_PTR(ret); > + } > + > + xa_init_flags(&ent->mkeys, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); > + ent->dev = dev; > + > + INIT_WORK(&ent->work, cache_work_func); > + INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); And the ent should be fully setup before adding it to the cache Jason