> -----Original Message----- > From: Yann Droneaud [mailto:ydroneaud@xxxxxxxxxx] > Sent: Wednesday, October 01, 2014 7:25 PM > To: Yishai Hadas > Cc: roland@xxxxxxxxxx; linux-rdma@xxxxxxxxxxxxxxx; Shachar Raindel > Subject: Re: [PATCH for-next 5/9] IB/core: Invalidation support for peer > memory > > Le mercredi 01 octobre 2014 à 18:18 +0300, Yishai Hadas a écrit : > > Adds the required functionality to invalidate a given peer > > memory represented by some core context. > > > > Each umem that was built over peer memory and supports invalidation > has > > some invalidation context assigned to it with the required data to > > manage, once peer will call the invalidation callback below actions > are > > taken: > > > > 1) Taking lock on peer client to sync with inflight dereg_mr on that > > memory. > > 2) Once lock is taken have a lookup for ticket id to find the matching > > core context. > > 3) In case found will call umem invalidation function, otherwise call > is > > returned. > > > > Some notes: > > 1) As peer invalidate callback defined to be blocking it must return > > just after that pages are not going to be accessed any more. For that > > reason ib_invalidate_peer_memory is waiting for a completion event in > > case there is other inflight call coming as part of dereg_mr. > > > > 2) The peer memory API assumes that a lock might be taken by a peer > > client to protect its memory operations. Specifically, its invalidate > > callback might be called under that lock which may lead to an AB/BA > > dead-lock in case IB core will call get/put pages APIs with the IB > core peer's lock taken, > > for that reason as part of ib_umem_activate_invalidation_notifier > lock is taken > > then checking for some inflight invalidation state before activating > it. > > > > 3) Once a peer client admits as part of its registration that it may > > require invalidation support, it can't be an owner of a memory range > > which doesn't support it. > > > > Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxxxx> > > Signed-off-by: Shachar Raindel <raindel@xxxxxxxxxxxx> > > --- > > drivers/infiniband/core/peer_mem.c | 86 > +++++++++++++++++++++++++++++++++--- > > drivers/infiniband/core/umem.c | 51 ++++++++++++++++++--- > > include/rdma/ib_peer_mem.h | 4 +- > > include/rdma/ib_umem.h | 17 +++++++ > > 4 files changed, 143 insertions(+), 15 deletions(-) > > > > diff --git a/drivers/infiniband/core/peer_mem.c > b/drivers/infiniband/core/peer_mem.c > > index ad10672..d6bd192 100644 > > --- a/drivers/infiniband/core/peer_mem.c > > +++ b/drivers/infiniband/core/peer_mem.c > > @@ -38,10 +38,57 @@ static DEFINE_MUTEX(peer_memory_mutex); > > static LIST_HEAD(peer_memory_list); > > static int num_registered_peers; > > > > -static int ib_invalidate_peer_memory(void *reg_handle, void > *core_context) > > +/* Caller should be holding the peer client lock, ib_peer_client- > >lock */ > > +static struct core_ticket *ib_peer_search_context(struct > ib_peer_memory_client *ib_peer_client, > > + unsigned long key) > > +{ > > + struct core_ticket *core_ticket; > > + > > + list_for_each_entry(core_ticket, &ib_peer_client->core_ticket_list, > > + ticket_list) { > > + if (core_ticket->key == key) > > + return core_ticket; > > + } > > > > + return NULL; > > +} > > + > > You have now two functions to lookup key in ticket list: > see peer_ticket_exists(). > As discussed in previous e-mail, peer_ticket_exists will be deleted. > > +static int ib_invalidate_peer_memory(void *reg_handle, void > *core_context) > > { > > - return -ENOSYS; > > + struct ib_peer_memory_client *ib_peer_client = > > + (struct ib_peer_memory_client *)reg_handle; > > + struct invalidation_ctx *invalidation_ctx; > > + struct core_ticket *core_ticket; > > + int need_unlock = 1; > > + > > + mutex_lock(&ib_peer_client->lock); > > + core_ticket = ib_peer_search_context(ib_peer_client, > > + (unsigned long)core_context); > > + if (!core_ticket) > > + goto out; > > + > > + invalidation_ctx = (struct invalidation_ctx *)core_ticket->context; > > + /* If context is not ready yet, mark it to be invalidated */ > > + if (!invalidation_ctx->func) { > > + invalidation_ctx->peer_invalidated = 1; > > + goto out; > > + } > > + invalidation_ctx->func(invalidation_ctx->cookie, > > + invalidation_ctx->umem, 0, 0); > > + if (invalidation_ctx->inflight_invalidation) { > > + /* init the completion to wait on before letting other thread > to run */ > > + init_completion(&invalidation_ctx->comp); > > + mutex_unlock(&ib_peer_client->lock); > > + need_unlock = 0; > > + wait_for_completion(&invalidation_ctx->comp); > > + } > > + > > + kfree(invalidation_ctx); > > +out: > > + if (need_unlock) > > + mutex_unlock(&ib_peer_client->lock); > > + > > + return 0; > > } > > > > static int peer_ticket_exists(struct ib_peer_memory_client > *ib_peer_client, > > @@ -168,11 +215,30 @@ int ib_peer_create_invalidation_ctx(struct > ib_peer_memory_client *ib_peer_mem, s > > void ib_peer_destroy_invalidation_ctx(struct ib_peer_memory_client > *ib_peer_mem, > > struct invalidation_ctx *invalidation_ctx) > > { > > - mutex_lock(&ib_peer_mem->lock); > > - ib_peer_remove_context(ib_peer_mem, invalidation_ctx- > >context_ticket); > > - mutex_unlock(&ib_peer_mem->lock); > > + int peer_callback; > > + int inflight_invalidation; > > > > - kfree(invalidation_ctx); > > + /* If we are under peer callback lock was already taken.*/ > > + if (!invalidation_ctx->peer_callback) > > + mutex_lock(&ib_peer_mem->lock); > > + ib_peer_remove_context(ib_peer_mem, invalidation_ctx- > >context_ticket); > > + /* make sure to check inflight flag after took the lock and remove > from tree. > > + * in addition, from that point using local variables for > peer_callback and > > + * inflight_invalidation as after the complete invalidation_ctx > can't be accessed > > + * any more as it may be freed by the callback. > > + */ > > + peer_callback = invalidation_ctx->peer_callback; > > + inflight_invalidation = invalidation_ctx->inflight_invalidation; > > + if (inflight_invalidation) > > + complete(&invalidation_ctx->comp); > > + > > + /* On peer callback lock is handled externally */ > > + if (!peer_callback) > > + mutex_unlock(&ib_peer_mem->lock); > > + > > + /* in case under callback context or callback is pending let it > free the invalidation context */ > > + if (!peer_callback && !inflight_invalidation) > > + kfree(invalidation_ctx); > > } > > static int ib_memory_peer_check_mandatory(const struct > peer_memory_client > > *peer_client) > > @@ -261,13 +327,19 @@ void ib_unregister_peer_memory_client(void > *reg_handle) > > EXPORT_SYMBOL(ib_unregister_peer_memory_client); > > > > struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext > *context, unsigned long addr, > > - size_t size, void > **peer_client_context) > > + size_t size, unsigned long > peer_mem_flags, > > + void **peer_client_context) > > { > > struct ib_peer_memory_client *ib_peer_client; > > int ret; > > > > mutex_lock(&peer_memory_mutex); > > list_for_each_entry(ib_peer_client, &peer_memory_list, > core_peer_list) { > > + /* In case peer requires invalidation it can't own memory > which doesn't support it */ > > + if (ib_peer_client->invalidation_required && > > + (!(peer_mem_flags & IB_PEER_MEM_INVAL_SUPP))) > > + continue; > > + > > ret = ib_peer_client->peer_mem->acquire(addr, size, > > context->peer_mem_private_data, > > context->peer_mem_name, > > diff --git a/drivers/infiniband/core/umem.c > b/drivers/infiniband/core/umem.c > > index 0de9916..51f32a1 100644 > > --- a/drivers/infiniband/core/umem.c > > +++ b/drivers/infiniband/core/umem.c > > @@ -44,12 +44,19 @@ > > > > static struct ib_umem *peer_umem_get(struct ib_peer_memory_client > *ib_peer_mem, > > struct ib_umem *umem, unsigned long addr, > > - int dmasync) > > + int dmasync, unsigned long peer_mem_flags) > > { > > int ret; > > const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem; > > + struct invalidation_ctx *invalidation_ctx = NULL; > > > > umem->ib_peer_mem = ib_peer_mem; > > + if (peer_mem_flags & IB_PEER_MEM_INVAL_SUPP) { > > + ret = ib_peer_create_invalidation_ctx(ib_peer_mem, umem, > &invalidation_ctx); > > + if (ret) > > + goto end; > > + } > > + > > /* > > * We always request write permissions to the pages, to force > breaking of any CoW > > * during the registration of the MR. For read-only MRs we use the > "force" flag to > > @@ -60,7 +67,9 @@ static struct ib_umem *peer_umem_get(struct > ib_peer_memory_client *ib_peer_mem, > > 1, !umem->writable, > > &umem->sg_head, > > umem->peer_mem_client_context, > > - NULL); > > + invalidation_ctx ? > > + (void *)invalidation_ctx->context_ticket : NULL); > > + > > NULL may be a valid "ticket" once converted to unsigned long and looked > up in the ticket list. > We will move to use uint64 value, and make sure the first ticket value is 1. A ticket value of 0 indicates that there is no support for invalidation of this memory. > > if (ret) > > goto out; > > > > @@ -84,6 +93,9 @@ put_pages: > > peer_mem->put_pages(umem->peer_mem_client_context, > > &umem->sg_head); > > out: > > + if (invalidation_ctx) > > + ib_peer_destroy_invalidation_ctx(ib_peer_mem, > invalidation_ctx); > > +end: > > ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context); > > kfree(umem); > > return ERR_PTR(ret); > > @@ -91,15 +103,19 @@ out: > > > > static void peer_umem_release(struct ib_umem *umem) > > { > > - const struct peer_memory_client *peer_mem = > > - umem->ib_peer_mem->peer_mem; > > + struct ib_peer_memory_client *ib_peer_mem = umem->ib_peer_mem; > > + const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem; > > + struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx; > > + > > + if (invalidation_ctx) > > + ib_peer_destroy_invalidation_ctx(ib_peer_mem, > invalidation_ctx); > > > > peer_mem->dma_unmap(&umem->sg_head, > > umem->peer_mem_client_context, > > umem->context->device->dma_device); > > peer_mem->put_pages(&umem->sg_head, > > umem->peer_mem_client_context); > > - ib_put_peer_client(umem->ib_peer_mem, umem- > >peer_mem_client_context); > > + ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context); > > kfree(umem); > > } > > > > @@ -127,6 +143,27 @@ static void __ib_umem_release(struct ib_device > *dev, struct ib_umem *umem, int d > > > > } > > > > +int ib_umem_activate_invalidation_notifier(struct ib_umem *umem, > > + umem_invalidate_func_t func, > > + void *cookie) > > +{ > > + struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx; > > + int ret = 0; > > + > > + mutex_lock(&umem->ib_peer_mem->lock); > > + if (invalidation_ctx->peer_invalidated) { > > + pr_err("ib_umem_activate_invalidation_notifier: pages were > invalidated by peer\n"); > > + ret = -EINVAL; > > + goto end; > > + } > > + invalidation_ctx->func = func; > > + invalidation_ctx->cookie = cookie; > > + /* from that point any pending invalidations can be called */ > > +end: > > + mutex_unlock(&umem->ib_peer_mem->lock); > > + return ret; > > +} > > +EXPORT_SYMBOL(ib_umem_activate_invalidation_notifier); > > /** > > * ib_umem_get - Pin and DMA map userspace memory. > > * @context: userspace context to pin memory for > > @@ -179,11 +216,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext > *context, unsigned long addr, > > if (peer_mem_flags & IB_PEER_MEM_ALLOW) { > > struct ib_peer_memory_client *peer_mem_client; > > > > - peer_mem_client = ib_get_peer_client(context, addr, size, > > + peer_mem_client = ib_get_peer_client(context, addr, size, > peer_mem_flags, > > &umem->peer_mem_client_context); > > if (peer_mem_client) > > return peer_umem_get(peer_mem_client, umem, addr, > > - dmasync); > > + dmasync, peer_mem_flags); > > } > > > > /* We assume the memory is from hugetlb until proved otherwise */ > > diff --git a/include/rdma/ib_peer_mem.h b/include/rdma/ib_peer_mem.h > > index d3fbb50..8f67aaf 100644 > > --- a/include/rdma/ib_peer_mem.h > > +++ b/include/rdma/ib_peer_mem.h > > @@ -22,6 +22,7 @@ struct ib_peer_memory_client { > > > > enum ib_peer_mem_flags { > > IB_PEER_MEM_ALLOW = 1, > > + IB_PEER_MEM_INVAL_SUPP = (1<<1), > > }; > > > > struct core_ticket { > > @@ -31,7 +32,8 @@ struct core_ticket { > > }; > > > > struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext > *context, unsigned long addr, > > - size_t size, void > **peer_client_context); > > + size_t size, unsigned long > peer_mem_flags, > > + void **peer_client_context); > > > > void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client, > > void *peer_client_context); > > diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h > > index 4b8a042..83d6059 100644 > > --- a/include/rdma/ib_umem.h > > +++ b/include/rdma/ib_umem.h > > @@ -39,10 +39,21 @@ > > #include <rdma/ib_peer_mem.h> > > > > struct ib_ucontext; > > +struct ib_umem; > > + > > +typedef void (*umem_invalidate_func_t)(void *invalidation_cookie, > > + struct ib_umem *umem, > > + unsigned long addr, size_t size); > > > > struct invalidation_ctx { > > struct ib_umem *umem; > > unsigned long context_ticket; > > + umem_invalidate_func_t func; > > + void *cookie; > > + int peer_callback; > > + int inflight_invalidation; > > + int peer_invalidated; > > + struct completion comp; > > }; > > > > struct ib_umem { > > @@ -73,6 +84,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext > *context, unsigned long addr, > > unsigned long peer_mem_flags); > > void ib_umem_release(struct ib_umem *umem); > > int ib_umem_page_count(struct ib_umem *umem); > > +int ib_umem_activate_invalidation_notifier(struct ib_umem *umem, > > + umem_invalidate_func_t func, > > + void *cookie); > > > > #else /* CONFIG_INFINIBAND_USER_MEM */ > > > > @@ -87,6 +101,9 @@ static inline struct ib_umem *ib_umem_get(struct > ib_ucontext *context, > > static inline void ib_umem_release(struct ib_umem *umem) { } > > static inline int ib_umem_page_count(struct ib_umem *umem) { return > 0; } > > > > +static inline int ib_umem_activate_invalidation_notifier(struct > ib_umem *umem, > > + umem_invalidate_func_t func, > > + void *cookie) {return 0; } > > #endif /* CONFIG_INFINIBAND_USER_MEM */ > > > > #endif /* IB_UMEM_H */ > ��.n��������+%������w��{.n�����{���fk��ܨ}���Ơz�j:+v�����w����ޙ��&�)ߡ�a����z�ޗ���ݢj��w�f