Re: [PATCH rdma-next 2/5] RDMA/mlx5: Replace cache list with Xarray

Jason Gunthorpe <jgg@xxxxxxxxxx> · Wed, 8 Jun 2022 08:01:44 -0300

On Tue, Jun 07, 2022 at 02:40:12PM +0300, Leon Romanovsky wrote:
> +static int push_reserve_mkey(struct mlx5_cache_ent *ent, bool limit_pendings)
> +{
> +	unsigned long to_reserve;
> +	void *old;
> +	int err;
> +
> +	xa_lock_irq(&ent->mkeys);
> +	while (true) {
> +		if (limit_pendings &&
> +		    (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) {
> +			err = -EAGAIN;
> +			goto err;
> +		}
> +
> +		to_reserve = ent->reserved;
> +		old = __xa_cmpxchg(&ent->mkeys, to_reserve, NULL, XA_ZERO_ENTRY,
> +				   GFP_KERNEL);
> +
> +		if (xa_is_err(old)) {
> +			err = xa_err(old);
> +			goto err;
> +		}
> +
> +		/*
> +		 * __xa_cmpxchg() might drop the lock, thus ent->reserved can
> +		 * change.
> +		 */
> +		if (to_reserve != ent->reserved) {
> +			if (to_reserve > ent->reserved)
> +				__xa_erase(&ent->mkeys, to_reserve);
> +			continue;
> +		}
> +
> +		/*
> +		 * If old != NULL to_reserve cannot be equal to ent->reserved.
> +		 */
> +		WARN_ON(old);
> +
> +		ent->reserved++;
> +		break;
> +	}
> +	xa_unlock_irq(&ent->mkeys);
> +	return 0;
> +
> +err:
> +	xa_unlock_irq(&ent->mkeys);
> +	return err;
> +}

So, I looked at this for a good long time and I think we can replace
it with this version:

static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
		     void *to_store)
{
	XA_STATE(xas, &ent->mkeys, 0);
	void *curr;

	xa_lock_irq(&ent->mkeys);
	if (limit_pendings &&
	    (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) {
		xa_unlock_irq(&ent->mkeys);
		return -EAGAIN;
	}
	while (1) {
		/*
		 * This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version
		 * doesn't transparently unlock. Instead we set the xas index to
		 * the current value of reserved every iteration.
		 */
		xas_set(&xas, ent->reserved);
		curr = xas_load(&xas);
		if (!curr) {
			if (to_store && ent->stored == ent->reserved)
				xas_store(&xas, to_store);
			else
				xas_store(&xas, XA_ZERO_ENTRY);
			if (xas_valid(&xas)) {
				ent->reserved++;
				if (to_store) {
					if (ent->stored != ent->reserved)
						__xa_store(&ent->mkeys,
							   ent->stored,
							   to_store,
							   GFP_KERNEL);
					ent->stored++;
				}
			}
		}
		xa_unlock_irq(&ent->mkeys);

		/*
		 * Notice xas_nomem() must always be called as it cleans
		 * up any cached allocation.
		 */
		if (!xas_nomem(&xas, GFP_KERNEL))
			break;
		xa_lock_irq(&ent->mkeys);
	}
	if (xas_error(&xas))
		return xas_error(&xas);
	if (WARN_ON(curr))
		return -EINVAL;
	return 0;
}

Which can do either reserve or a store and is a little more direct as
to how it works

Which allows this:

>	if (mr->mmkey.cache_ent) {
>		xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
>		mr->mmkey.cache_ent->in_use--;
>		xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
>
>		if (mlx5r_umr_revoke_mr(mr) ||
>		    push_reserve_mkey(mr->mmkey.cache_ent, false))
>

To just call

    push_mkey((mr->mmkey.cache_ent, false, mr->mmkey.key)

And with some locking shuffling avoid a lot of lock/unlocking/xarray
cycles on the common path of just appending to an xarray with no
reservation.

But I didn't notice anything wrong with this series, it does look good.

Thanks,
Jason