RE: [PATCH for-next 5/9] IB/core: Invalidation support for peer memory

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> -----Original Message-----
> From: Yann Droneaud [mailto:ydroneaud@xxxxxxxxxx]
> Sent: Wednesday, October 01, 2014 7:25 PM
> To: Yishai Hadas
> Cc: roland@xxxxxxxxxx; linux-rdma@xxxxxxxxxxxxxxx; Shachar Raindel
> Subject: Re: [PATCH for-next 5/9] IB/core: Invalidation support for peer
> memory
> 
> Le mercredi 01 octobre 2014 à 18:18 +0300, Yishai Hadas a écrit :
> > Adds the required functionality to invalidate a given peer
> > memory represented by some core context.
> >
> > Each umem that was built over peer memory and supports invalidation
> has
> > some invalidation context assigned to it with the required data to
> > manage, once peer will call the invalidation callback below actions
> are
> > taken:
> >
> > 1) Taking lock on peer client to sync with inflight dereg_mr on that
> > memory.
> > 2) Once lock is taken have a lookup for ticket id to find the matching
> > core context.
> > 3) In case found will call umem invalidation function, otherwise call
> is
> > returned.
> >
> > Some notes:
> > 1) As peer invalidate callback defined to be blocking it must return
> > just after that pages are not going to be accessed any more. For that
> > reason ib_invalidate_peer_memory is waiting for a completion event in
> > case there is other inflight call coming as part of dereg_mr.
> >
> > 2) The peer memory API assumes that a lock might be taken by a peer
> > client to protect its memory operations. Specifically, its invalidate
> > callback might be called under that lock which may lead to an AB/BA
> > dead-lock in case IB core will call get/put pages APIs with the IB
> core peer's lock taken,
> > for that reason as part of  ib_umem_activate_invalidation_notifier
> lock is taken
> > then checking for some inflight invalidation state before activating
> it.
> >
> > 3) Once a peer client admits as part of its registration that it may
> > require invalidation support, it can't be an owner of a memory range
> > which doesn't support it.
> >
> > Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxxxx>
> > Signed-off-by: Shachar Raindel <raindel@xxxxxxxxxxxx>
> > ---
> >  drivers/infiniband/core/peer_mem.c |   86
> +++++++++++++++++++++++++++++++++---
> >  drivers/infiniband/core/umem.c     |   51 ++++++++++++++++++---
> >  include/rdma/ib_peer_mem.h         |    4 +-
> >  include/rdma/ib_umem.h             |   17 +++++++
> >  4 files changed, 143 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/infiniband/core/peer_mem.c
> b/drivers/infiniband/core/peer_mem.c
> > index ad10672..d6bd192 100644
> > --- a/drivers/infiniband/core/peer_mem.c
> > +++ b/drivers/infiniband/core/peer_mem.c
> > @@ -38,10 +38,57 @@ static DEFINE_MUTEX(peer_memory_mutex);
> >  static LIST_HEAD(peer_memory_list);
> >  static int num_registered_peers;
> >
> > -static int ib_invalidate_peer_memory(void *reg_handle, void
> *core_context)
> > +/* Caller should be holding the peer client lock, ib_peer_client-
> >lock */
> > +static struct core_ticket *ib_peer_search_context(struct
> ib_peer_memory_client *ib_peer_client,
> > +						  unsigned long key)
> > +{
> > +	struct core_ticket *core_ticket;
> > +
> > +	list_for_each_entry(core_ticket, &ib_peer_client->core_ticket_list,
> > +			    ticket_list) {
> > +		if (core_ticket->key == key)
> > +			return core_ticket;
> > +	}
> >
> > +	return NULL;
> > +}
> > +
> 
> You have now two functions to lookup key in ticket list:
> see peer_ticket_exists().
> 

As discussed in previous e-mail, peer_ticket_exists will be deleted.

> > +static int ib_invalidate_peer_memory(void *reg_handle, void
> *core_context)
> >  {
> > -	return -ENOSYS;
> > +	struct ib_peer_memory_client *ib_peer_client =
> > +		(struct ib_peer_memory_client *)reg_handle;
> > +	struct invalidation_ctx *invalidation_ctx;
> > +	struct core_ticket *core_ticket;
> > +	int need_unlock = 1;
> > +
> > +	mutex_lock(&ib_peer_client->lock);
> > +	core_ticket = ib_peer_search_context(ib_peer_client,
> > +					     (unsigned long)core_context);
> > +	if (!core_ticket)
> > +		goto out;
> > +
> > +	invalidation_ctx = (struct invalidation_ctx *)core_ticket->context;
> > +	/* If context is not ready yet, mark it to be invalidated */
> > +	if (!invalidation_ctx->func) {
> > +		invalidation_ctx->peer_invalidated = 1;
> > +		goto out;
> > +	}
> > +	invalidation_ctx->func(invalidation_ctx->cookie,
> > +					invalidation_ctx->umem, 0, 0);
> > +	if (invalidation_ctx->inflight_invalidation) {
> > +		/* init the completion to wait on before letting other thread
> to run */
> > +		init_completion(&invalidation_ctx->comp);
> > +		mutex_unlock(&ib_peer_client->lock);
> > +		need_unlock = 0;
> > +		wait_for_completion(&invalidation_ctx->comp);
> > +	}
> > +
> > +	kfree(invalidation_ctx);
> > +out:
> > +	if (need_unlock)
> > +		mutex_unlock(&ib_peer_client->lock);
> > +
> > +	return 0;
> >  }
> >
> >  static int peer_ticket_exists(struct ib_peer_memory_client
> *ib_peer_client,
> > @@ -168,11 +215,30 @@ int ib_peer_create_invalidation_ctx(struct
> ib_peer_memory_client *ib_peer_mem, s
> >  void ib_peer_destroy_invalidation_ctx(struct ib_peer_memory_client
> *ib_peer_mem,
> >  				      struct invalidation_ctx *invalidation_ctx)
> >  {
> > -	mutex_lock(&ib_peer_mem->lock);
> > -	ib_peer_remove_context(ib_peer_mem, invalidation_ctx-
> >context_ticket);
> > -	mutex_unlock(&ib_peer_mem->lock);
> > +	int peer_callback;
> > +	int inflight_invalidation;
> >
> > -	kfree(invalidation_ctx);
> > +	/* If we are under peer callback lock was already taken.*/
> > +	if (!invalidation_ctx->peer_callback)
> > +		mutex_lock(&ib_peer_mem->lock);
> > +	ib_peer_remove_context(ib_peer_mem, invalidation_ctx-
> >context_ticket);
> > +	/* make sure to check inflight flag after took the lock and remove
> from tree.
> > +	 * in addition, from that point using local variables for
> peer_callback and
> > +	 * inflight_invalidation as after the complete invalidation_ctx
> can't be accessed
> > +	 * any more as it may be freed by the callback.
> > +	 */
> > +	peer_callback = invalidation_ctx->peer_callback;
> > +	inflight_invalidation = invalidation_ctx->inflight_invalidation;
> > +	if (inflight_invalidation)
> > +		complete(&invalidation_ctx->comp);
> > +
> > +	/* On peer callback lock is handled externally */
> > +	if (!peer_callback)
> > +		mutex_unlock(&ib_peer_mem->lock);
> > +
> > +	/* in case under callback context or callback is pending let it
> free the invalidation context */
> > +	if (!peer_callback && !inflight_invalidation)
> > +		kfree(invalidation_ctx);
> >  }
> >  static int ib_memory_peer_check_mandatory(const struct
> peer_memory_client
> >  						     *peer_client)
> > @@ -261,13 +327,19 @@ void ib_unregister_peer_memory_client(void
> *reg_handle)
> >  EXPORT_SYMBOL(ib_unregister_peer_memory_client);
> >
> >  struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext
> *context, unsigned long addr,
> > -						 size_t size, void
> **peer_client_context)
> > +						 size_t size, unsigned long
> peer_mem_flags,
> > +						 void **peer_client_context)
> >  {
> >  	struct ib_peer_memory_client *ib_peer_client;
> >  	int ret;
> >
> >  	mutex_lock(&peer_memory_mutex);
> >  	list_for_each_entry(ib_peer_client, &peer_memory_list,
> core_peer_list) {
> > +		/* In case peer requires invalidation it can't own memory
> which doesn't support it */
> > +		if (ib_peer_client->invalidation_required &&
> > +		    (!(peer_mem_flags & IB_PEER_MEM_INVAL_SUPP)))
> > +			continue;
> > +
> >  		ret = ib_peer_client->peer_mem->acquire(addr, size,
> >  						   context->peer_mem_private_data,
> >  						   context->peer_mem_name,
> > diff --git a/drivers/infiniband/core/umem.c
> b/drivers/infiniband/core/umem.c
> > index 0de9916..51f32a1 100644
> > --- a/drivers/infiniband/core/umem.c
> > +++ b/drivers/infiniband/core/umem.c
> > @@ -44,12 +44,19 @@
> >
> >  static struct ib_umem *peer_umem_get(struct ib_peer_memory_client
> *ib_peer_mem,
> >  				     struct ib_umem *umem, unsigned long addr,
> > -				     int dmasync)
> > +				     int dmasync, unsigned long peer_mem_flags)
> >  {
> >  	int ret;
> >  	const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
> > +	struct invalidation_ctx *invalidation_ctx = NULL;
> >
> >  	umem->ib_peer_mem = ib_peer_mem;
> > +	if (peer_mem_flags & IB_PEER_MEM_INVAL_SUPP) {
> > +		ret = ib_peer_create_invalidation_ctx(ib_peer_mem, umem,
> &invalidation_ctx);
> > +		if (ret)
> > +			goto end;
> > +	}
> > +
> >  	/*
> >  	 * We always request write permissions to the pages, to force
> breaking of any CoW
> >  	 * during the registration of the MR. For read-only MRs we use the
> "force" flag to
> > @@ -60,7 +67,9 @@ static struct ib_umem *peer_umem_get(struct
> ib_peer_memory_client *ib_peer_mem,
> >  				  1, !umem->writable,
> >  				  &umem->sg_head,
> >  				  umem->peer_mem_client_context,
> > -				  NULL);
> > +				  invalidation_ctx ?
> > +				  (void *)invalidation_ctx->context_ticket : NULL);
> > +
> 
> NULL may be a valid "ticket" once converted to unsigned long and looked
> up in the ticket list.
> 

We will move to use uint64 value, and make sure the first ticket value
is 1. A ticket value of 0 indicates that there is no support for 
invalidation of this memory.

> >  	if (ret)
> >  		goto out;
> >
> > @@ -84,6 +93,9 @@ put_pages:
> >  	peer_mem->put_pages(umem->peer_mem_client_context,
> >  					&umem->sg_head);
> >  out:
> > +	if (invalidation_ctx)
> > +		ib_peer_destroy_invalidation_ctx(ib_peer_mem,
> invalidation_ctx);
> > +end:
> >  	ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context);
> >  	kfree(umem);
> >  	return ERR_PTR(ret);
> > @@ -91,15 +103,19 @@ out:
> >
> >  static void peer_umem_release(struct ib_umem *umem)
> >  {
> > -	const struct peer_memory_client *peer_mem =
> > -				umem->ib_peer_mem->peer_mem;
> > +	struct ib_peer_memory_client *ib_peer_mem = umem->ib_peer_mem;
> > +	const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
> > +	struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
> > +
> > +	if (invalidation_ctx)
> > +		ib_peer_destroy_invalidation_ctx(ib_peer_mem,
> invalidation_ctx);
> >
> >  	peer_mem->dma_unmap(&umem->sg_head,
> >  			    umem->peer_mem_client_context,
> >  			    umem->context->device->dma_device);
> >  	peer_mem->put_pages(&umem->sg_head,
> >  			    umem->peer_mem_client_context);
> > -	ib_put_peer_client(umem->ib_peer_mem, umem-
> >peer_mem_client_context);
> > +	ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context);
> >  	kfree(umem);
> >  }
> >
> > @@ -127,6 +143,27 @@ static void __ib_umem_release(struct ib_device
> *dev, struct ib_umem *umem, int d
> >
> >  }
> >
> > +int ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
> > +					   umem_invalidate_func_t func,
> > +					   void *cookie)
> > +{
> > +	struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
> > +	int ret = 0;
> > +
> > +	mutex_lock(&umem->ib_peer_mem->lock);
> > +	if (invalidation_ctx->peer_invalidated) {
> > +		pr_err("ib_umem_activate_invalidation_notifier: pages were
> invalidated by peer\n");
> > +		ret = -EINVAL;
> > +		goto end;
> > +	}
> > +	invalidation_ctx->func = func;
> > +	invalidation_ctx->cookie = cookie;
> > +	/* from that point any pending invalidations can be called */
> > +end:
> > +	mutex_unlock(&umem->ib_peer_mem->lock);
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL(ib_umem_activate_invalidation_notifier);
> >  /**
> >   * ib_umem_get - Pin and DMA map userspace memory.
> >   * @context: userspace context to pin memory for
> > @@ -179,11 +216,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext
> *context, unsigned long addr,
> >  	if (peer_mem_flags & IB_PEER_MEM_ALLOW) {
> >  		struct ib_peer_memory_client *peer_mem_client;
> >
> > -		peer_mem_client =  ib_get_peer_client(context, addr, size,
> > +		peer_mem_client =  ib_get_peer_client(context, addr, size,
> peer_mem_flags,
> >  						      &umem->peer_mem_client_context);
> >  		if (peer_mem_client)
> >  			return peer_umem_get(peer_mem_client, umem, addr,
> > -					dmasync);
> > +					dmasync, peer_mem_flags);
> >  	}
> >
> >  	/* We assume the memory is from hugetlb until proved otherwise */
> > diff --git a/include/rdma/ib_peer_mem.h b/include/rdma/ib_peer_mem.h
> > index d3fbb50..8f67aaf 100644
> > --- a/include/rdma/ib_peer_mem.h
> > +++ b/include/rdma/ib_peer_mem.h
> > @@ -22,6 +22,7 @@ struct ib_peer_memory_client {
> >
> >  enum ib_peer_mem_flags {
> >  	IB_PEER_MEM_ALLOW	= 1,
> > +	IB_PEER_MEM_INVAL_SUPP = (1<<1),
> >  };
> >
> >  struct core_ticket {
> > @@ -31,7 +32,8 @@ struct core_ticket {
> >  };
> >
> >  struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext
> *context, unsigned long addr,
> > -						 size_t size, void
> **peer_client_context);
> > +						 size_t size, unsigned long
> peer_mem_flags,
> > +						 void **peer_client_context);
> >
> >  void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client,
> >  			void *peer_client_context);
> > diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
> > index 4b8a042..83d6059 100644
> > --- a/include/rdma/ib_umem.h
> > +++ b/include/rdma/ib_umem.h
> > @@ -39,10 +39,21 @@
> >  #include <rdma/ib_peer_mem.h>
> >
> >  struct ib_ucontext;
> > +struct ib_umem;
> > +
> > +typedef void (*umem_invalidate_func_t)(void *invalidation_cookie,
> > +					    struct ib_umem *umem,
> > +					    unsigned long addr, size_t size);
> >
> >  struct invalidation_ctx {
> >  	struct ib_umem *umem;
> >  	unsigned long context_ticket;
> > +	umem_invalidate_func_t func;
> > +	void *cookie;
> > +	int peer_callback;
> > +	int inflight_invalidation;
> > +	int peer_invalidated;
> > +	struct completion comp;
> >  };
> >
> >  struct ib_umem {
> > @@ -73,6 +84,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext
> *context, unsigned long addr,
> >  			       unsigned long peer_mem_flags);
> >  void ib_umem_release(struct ib_umem *umem);
> >  int ib_umem_page_count(struct ib_umem *umem);
> > +int  ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
> > +					    umem_invalidate_func_t func,
> > +					    void *cookie);
> >
> >  #else /* CONFIG_INFINIBAND_USER_MEM */
> >
> > @@ -87,6 +101,9 @@ static inline struct ib_umem *ib_umem_get(struct
> ib_ucontext *context,
> >  static inline void ib_umem_release(struct ib_umem *umem) { }
> >  static inline int ib_umem_page_count(struct ib_umem *umem) { return
> 0; }
> >
> > +static inline int ib_umem_activate_invalidation_notifier(struct
> ib_umem *umem,
> > +							 umem_invalidate_func_t func,
> > +							 void *cookie) {return 0; }
> >  #endif /* CONFIG_INFINIBAND_USER_MEM */
> >
> >  #endif /* IB_UMEM_H */
> 

��.n��������+%������w��{.n�����{���fk��ܨ}���Ơz�j:+v�����w����ޙ��&�)ߡ�a����z�ޗ���ݢj��w�f





[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux