Re: [PATCH for-next 4/9] IB/core: Infrastructure to manage peer core context

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Le mercredi 01 octobre 2014 à 18:18 +0300, Yishai Hadas a écrit :
> Adds an infrastructure to manage core context for a given umem,
> it's needed for the invalidation flow.
> 
> Core context is supplied to peer clients as some opaque data for a given
> memory pages represented by a umem.
> 
> If the peer client needs to invalidate memory it provided through the peer memory callbacks,
> it should call the invalidation callback, supplying the relevant core context.
> IB core will use this context to invalidate the relevant memory.
> 
> To prevent cases when there are inflight invalidation calls in parallel
> to releasing this memory (e.g. by dereg_mr) we must ensure that context
> is valid before accessing it, that's why couldn't use the core context
> pointer directly. For that reason we added a lookup table to map between
> a ticket id to a core context.

You could have use the context pointer as the key, instead of creating
the "ticket" abstraction. The context pointer can be looked up in a data
structure which track the current contexts.

But I'm not sure to understand the purpose of the indirection:
if dereg_mr() can release the context in parallel,
is the context pointer stored in the "ticket" going to point to
something no more valid ?

>  Peer client will get/supply the ticket
> id, core will check whether exists before accessing its corresponding
> context.
> 

Could you explain the expected lifetime of the ticket id and whether it
will be exchanged with "remote" parties (remote node, hardware,
userspace, etc.)

> Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxxxx>
> Signed-off-by: Shachar Raindel <raindel@xxxxxxxxxxxx>
> ---
>  drivers/infiniband/core/peer_mem.c |  132 ++++++++++++++++++++++++++++++++++++
>  include/rdma/ib_peer_mem.h         |   19 +++++
>  include/rdma/ib_umem.h             |    6 ++
>  3 files changed, 157 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/infiniband/core/peer_mem.c b/drivers/infiniband/core/peer_mem.c
> index 3936e13..ad10672 100644
> --- a/drivers/infiniband/core/peer_mem.c
> +++ b/drivers/infiniband/core/peer_mem.c
> @@ -44,6 +44,136 @@ static int ib_invalidate_peer_memory(void *reg_handle, void *core_context)
>  	return -ENOSYS;
>  }
>  
> +static int peer_ticket_exists(struct ib_peer_memory_client *ib_peer_client,
> +			      unsigned long ticket)
> +{
> +	struct core_ticket *core_ticket;
> +
> +	list_for_each_entry(core_ticket, &ib_peer_client->core_ticket_list,
> +			    ticket_list) {
> +		if (core_ticket->key == ticket)
> +			return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int peer_get_free_ticket(struct ib_peer_memory_client *ib_peer_client,
> +				unsigned long *new_ticket)
> +{
> +	unsigned long candidate_ticket = ib_peer_client->last_ticket + 1;

Overflow ?

> +	static int max_retries = 1000;

It's no static, and why 1000 ?

> +	int i;
> +
> +	for (i = 0; i < max_retries; i++) {
> +		if (peer_ticket_exists(ib_peer_client, candidate_ticket)) {
> +			candidate_ticket++;

Overflow ?

> +			continue;
> +		}
> +		*new_ticket = candidate_ticket;
> +		return 0;
> +	}
> +

Counting the number of allocated ticket number could prevent looping in
the case every numbers are used (unlikely).

> +	return -EINVAL;
> +}
> +

> +static int ib_peer_insert_context(struct ib_peer_memory_client *ib_peer_client,
> +				  void *context,
> +				  unsigned long *context_ticket)
> +{
> +	struct core_ticket *core_ticket = kzalloc(sizeof(*core_ticket), GFP_KERNEL);
> +	int ret;
> +
> +	if (!core_ticket)
> +		return -ENOMEM;
> +
> +	mutex_lock(&ib_peer_client->lock);
> +	if (ib_peer_client->last_ticket < ib_peer_client->last_ticket + 1 &&
> +	    !ib_peer_client->ticket_wrapped) {
> +		core_ticket->key = ib_peer_client->last_ticket++;
> +	} else {
> +		/* special/rare case when wrap around occurred, not expected on 64 bit machines */

Some still have 32bits system ...

> +		unsigned long new_ticket;
> +
> +		ib_peer_client->ticket_wrapped = 1;

The whole mechanism to handle wrapping seems fragile, at best.
Wrapping could happen multiple times by the way.

Additionally, it would make more sense to have ticket number handling in
peer_get_free_ticket().

> +		ret = peer_get_free_ticket(ib_peer_client, &new_ticket);
> +		if (ret) {
> +			mutex_unlock(&ib_peer_client->lock);
> +			kfree(core_ticket);
> +			return ret;
> +		}
> +		ib_peer_client->last_ticket = new_ticket;
> +		core_ticket->key = ib_peer_client->last_ticket;
> +	}
> +	core_ticket->context = context;
> +	list_add_tail(&core_ticket->ticket_list,
> +		      &ib_peer_client->core_ticket_list);
> +	*context_ticket = core_ticket->key;
> +	mutex_unlock(&ib_peer_client->lock);
> +	return 0;
> +}
> +

Perhaps idr could be used to track the "ticket" number ?

> +/* Caller should be holding the peer client lock, specifically, the caller should hold ib_peer_client->lock */
> +static int ib_peer_remove_context(struct ib_peer_memory_client *ib_peer_client,
> +				  unsigned long key)
> +{
> +	struct core_ticket *core_ticket;
> +
> +	list_for_each_entry(core_ticket, &ib_peer_client->core_ticket_list,
> +			    ticket_list) {
> +		if (core_ticket->key == key) {
> +			list_del(&core_ticket->ticket_list);
> +			kfree(core_ticket);
> +			return 0;
> +		}
> +	}
> +
> +	return 1;
> +}
> +
> +/**
> +** ib_peer_create_invalidation_ctx - creates invalidation context for a given umem
> +** @ib_peer_mem: peer client to be used
> +** @umem: umem struct belongs to that context
> +** @invalidation_ctx: output context
> +**/
> +int ib_peer_create_invalidation_ctx(struct ib_peer_memory_client *ib_peer_mem, struct ib_umem *umem,
> +				    struct invalidation_ctx **invalidation_ctx)
> +{
> +	int ret;
> +	struct invalidation_ctx *ctx;
> +
> +	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	ret = ib_peer_insert_context(ib_peer_mem, ctx,
> +				     &ctx->context_ticket);
> +	if (ret) {
> +		kfree(ctx);
> +		return ret;
> +	}
> +
> +	ctx->umem = umem;
> +	umem->invalidation_ctx = ctx;
> +	*invalidation_ctx = ctx;
> +	return 0;
> +}
> +
> +/**
> + * ** ib_peer_destroy_invalidation_ctx - destroy a given invalidation context
> + * ** @ib_peer_mem: peer client to be used
> + * ** @invalidation_ctx: context to be invalidated
> + * **/
> +void ib_peer_destroy_invalidation_ctx(struct ib_peer_memory_client *ib_peer_mem,
> +				      struct invalidation_ctx *invalidation_ctx)
> +{
> +	mutex_lock(&ib_peer_mem->lock);
> +	ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket);
> +	mutex_unlock(&ib_peer_mem->lock);
> +
> +	kfree(invalidation_ctx);
> +}
>  static int ib_memory_peer_check_mandatory(const struct peer_memory_client
>  						     *peer_client)
>  {
> @@ -94,6 +224,8 @@ void *ib_register_peer_memory_client(const struct peer_memory_client *peer_clien
>  	if (!ib_peer_client)
>  		return NULL;
>  
> +	INIT_LIST_HEAD(&ib_peer_client->core_ticket_list);
> +	mutex_init(&ib_peer_client->lock);
>  	init_completion(&ib_peer_client->unload_comp);
>  	kref_init(&ib_peer_client->ref);
>  	ib_peer_client->peer_mem = peer_client;
> diff --git a/include/rdma/ib_peer_mem.h b/include/rdma/ib_peer_mem.h
> index 98056c5..d3fbb50 100644
> --- a/include/rdma/ib_peer_mem.h
> +++ b/include/rdma/ib_peer_mem.h
> @@ -4,6 +4,8 @@
>  #include <rdma/peer_mem.h>
>  
>  struct ib_ucontext;
> +struct ib_umem;
> +struct invalidation_ctx;
>  
>  struct ib_peer_memory_client {
>  	const struct peer_memory_client *peer_mem;
> @@ -11,16 +13,33 @@ struct ib_peer_memory_client {
>  	int invalidation_required;
>  	struct kref ref;
>  	struct completion unload_comp;
> +	/* lock is used via the invalidation flow */
> +	struct mutex lock;
> +	struct list_head   core_ticket_list;
> +	unsigned long       last_ticket;
> +	int ticket_wrapped;
>  };
>  
>  enum ib_peer_mem_flags {
>  	IB_PEER_MEM_ALLOW	= 1,
>  };
>  
> +struct core_ticket {
> +	unsigned long key;
> +	void *context;
> +	struct list_head   ticket_list;
> +};
> +
>  struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext *context, unsigned long addr,
>  						 size_t size, void **peer_client_context);
>  
>  void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client,
>  			void *peer_client_context);
>  
> +int ib_peer_create_invalidation_ctx(struct ib_peer_memory_client *ib_peer_mem, struct ib_umem *umem,
> +				    struct invalidation_ctx **invalidation_ctx);
> +
> +void ib_peer_destroy_invalidation_ctx(struct ib_peer_memory_client *ib_peer_mem,
> +				      struct invalidation_ctx *invalidation_ctx);
> +
>  #endif
> diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
> index a22dde0..4b8a042 100644
> --- a/include/rdma/ib_umem.h
> +++ b/include/rdma/ib_umem.h
> @@ -40,6 +40,11 @@
>  
>  struct ib_ucontext;
>  
> +struct invalidation_ctx {
> +	struct ib_umem *umem;
> +	unsigned long context_ticket;
> +};
> +
>  struct ib_umem {
>  	struct ib_ucontext     *context;
>  	size_t			length;
> @@ -56,6 +61,7 @@ struct ib_umem {
>  	int             npages;
>  	/* peer memory that manages this umem */
>  	struct ib_peer_memory_client *ib_peer_mem;
> +	struct invalidation_ctx *invalidation_ctx;
>  	/* peer memory private context */
>  	void *peer_mem_client_context;
>  };



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux