On Thu, Dec 30, 2021 at 01:23:19PM +0200, Leon Romanovsky wrote: > diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c > index 2cba55bb7825..8936b504ff99 100644 > +++ b/drivers/infiniband/hw/mlx5/mr.c > @@ -147,14 +147,17 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) > struct mlx5_cache_ent *ent = mr->cache_ent; > struct mlx5_ib_dev *dev = ent->dev; > unsigned long flags; > + void *old; > > if (status) { > mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); > kfree(mr); > - spin_lock_irqsave(&ent->lock, flags); > - ent->pending--; > + xa_lock_irqsave(&ent->mkeys, flags); > + ent->reserved--; > + old = __xa_erase(&ent->mkeys, ent->reserved); > + WARN_ON(old != NULL); > WRITE_ONCE(dev->fill_delay, 1); > - spin_unlock_irqrestore(&ent->lock, flags); > + xa_unlock_irqrestore(&ent->mkeys, flags); > mod_timer(&dev->delay_timer, jiffies + HZ); > return; > } Since push_reserve_mkey was put in a function these stack manipulation should be too, especially since it is open coded again in the err_undo_reserve label below > @@ -166,14 +169,14 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) > > WRITE_ONCE(dev->cache.last_add, jiffies); > > - spin_lock_irqsave(&ent->lock, flags); > - list_add_tail(&mr->list, &ent->head); > - ent->available_mrs++; > + xa_lock_irqsave(&ent->mkeys, flags); > + old = __xa_store(&ent->mkeys, ent->stored, mr, GFP_ATOMIC); > + WARN_ON(old != NULL); > + ent->stored++; > ent->total_mrs++; > /* If we are doing fill_to_high_water then keep going. */ > queue_adjust_cache_locked(ent); > - ent->pending--; > - spin_unlock_irqrestore(&ent->lock, flags); > + xa_unlock_irqrestore(&ent->mkeys, flags); > } > > static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) > @@ -196,12 +199,48 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) > return mr; > } > > +static int _push_reserve_mkey(struct mlx5_cache_ent *ent) > +{ > + unsigned long to_reserve; > + void *old; > + > + while (true) { > + to_reserve = ent->reserved; > + old = __xa_cmpxchg(&ent->mkeys, to_reserve, NULL, XA_ZERO_ENTRY, > + GFP_KERNEL); > + > + if (xa_is_err(old)) > + return xa_err(old); Comment that this below is needed because xa_cmpxchg could drop the lock and thus ent->reserved can change. > + if (to_reserve != ent->reserved || old != NULL) { WARN_ON(old != NULL) ? This can't happen right? > +static int push_reserve_mkey(struct mlx5_cache_ent *ent) > +{ > + int ret; > + > + xa_lock_irq(&ent->mkeys); > + ret = _push_reserve_mkey(ent); > + xa_unlock_irq(&ent->mkeys); > + > + return ret; > +} Why not just use one function and a simple goto unwind? > + > + xa_lock_irq(&ent->mkeys); > + err = _push_reserve_mkey(ent); > + if (err) > + goto err_unlock; > + if ((ent->reserved - ent->stored) > MAX_PENDING_REG_MR) { > err = -EAGAIN; Maybe this check should be inside push_reserve_mkey before it does any xarray operation? > @@ -286,40 +335,42 @@ static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent) > static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) > { > struct mlx5_ib_mr *mr; > + void *old; > > + if (!ent->stored) > return; > + ent->stored--; > + mr = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY, GFP_KERNEL); > + WARN_ON(mr == NULL || xa_is_err(mr)); > + ent->reserved--; > + old = __xa_erase(&ent->mkeys, ent->reserved); > + WARN_ON(old != NULL); > ent->total_mrs--; > + xa_unlock_irq(&ent->mkeys); This should avoid the double XA operations if reserved == stored. > + mr = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY, > + GFP_KERNEL); > + WARN_ON(mr == NULL || xa_is_err(mr)); > + ent->reserved--; > + old = __xa_erase(&ent->mkeys, ent->reserved); > + WARN_ON(old != NULL); 'pop' should really be its own function too > @@ -601,41 +655,35 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, > static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) > { > struct mlx5_cache_ent *ent = mr->cache_ent; > + void *old; > > - spin_lock_irq(&ent->lock); > - list_add_tail(&mr->list, &ent->head); > - ent->available_mrs++; > + xa_lock_irq(&ent->mkeys); > + old = __xa_store(&ent->mkeys, ent->stored, mr, 0); > + WARN_ON(old != NULL); This allocates memory, errors now have to be handled by falling back to dereg_mr. Shouldn't this adjust reserved too? Jason