Hi Neil, On 04/09/2017 10:22 PM, NeilBrown wrote: > > When passed GFP flags that allow sleeping (such as > GFP_NOIO), mempool_alloc() will never return NULL, it will > wait until memory is available. > > This means that we don't need to handle failure, but that we > do need to ensure one thread doesn't call mempool_alloc() > twice on the one pool without queuing or freeing the first > allocation. If multiple threads did this during times of > high memory pressure, the pool could be exhausted and a > deadlock could result. > > pnfs_generic_alloc_ds_commits() attempts to allocate from > the nfs_commit_mempool while already holding an allocation > from that pool. This is not safe. So change > nfs_commitdata_alloc() to take a flag that indicates whether > failure is acceptable. > > In pnfs_generic_alloc_ds_commits(), accept failure and > handle it as we currently do. Else where, do not accept > failure, and do not handle it. > > Even when failure is acceptable, we want to succeed if > possible. That means both > - using an entry from the pool if there is one > - waiting for direct reclaim is there isn't. > > We call mempool_alloc(GFP_NOWAIT) to achieve the first, then > kmem_cache_alloc(GFP_NOIO|__GFP_NORETRY) to achieve the > second. Each of these can fail, but together they do the > best they can without blocking indefinitely. > > Also, don't test for failure when allocating from > nfs_wdata_mempool. > > Signed-off-by: NeilBrown <neilb@xxxxxxxx> > --- > fs/nfs/pnfs_nfs.c | 16 +++++----------- > fs/nfs/write.c | 35 +++++++++++++++++++++-------------- > include/linux/nfs_fs.h | 2 +- > 3 files changed, 27 insertions(+), 26 deletions(-) > > diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c > index 7250b95549ec..1edf5b84aba5 100644 > --- a/fs/nfs/pnfs_nfs.c > +++ b/fs/nfs/pnfs_nfs.c > @@ -217,7 +217,7 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo, > for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { > if (list_empty(&bucket->committing)) > continue; > - data = nfs_commitdata_alloc(); > + data = nfs_commitdata_alloc(false); > if (!data) > break; > data->ds_commit_index = i; > @@ -283,16 +283,10 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, > unsigned int nreq = 0; > > if (!list_empty(mds_pages)) { > - data = nfs_commitdata_alloc(); > - if (data != NULL) { > - data->ds_commit_index = -1; > - list_add(&data->pages, &list); > - nreq++; > - } else { > - nfs_retry_commit(mds_pages, NULL, cinfo, 0); > - pnfs_generic_retry_commit(cinfo, 0); > - return -ENOMEM; > - } > + data = nfs_commitdata_alloc(true); > + data->ds_commit_index = -1; > + list_add(&data->pages, &list); > + nreq++; > } > > nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); > diff --git a/fs/nfs/write.c b/fs/nfs/write.c > index abb2c8a3be42..bdfe5a7c5874 100644 > --- a/fs/nfs/write.c > +++ b/fs/nfs/write.c > @@ -60,14 +60,28 @@ static mempool_t *nfs_wdata_mempool; > static struct kmem_cache *nfs_cdata_cachep; > static mempool_t *nfs_commit_mempool; > > -struct nfs_commit_data *nfs_commitdata_alloc(void) > +struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail) > { > - struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); > + struct nfs_commit_data *p; > > - if (p) { > - memset(p, 0, sizeof(*p)); > - INIT_LIST_HEAD(&p->pages); > + if (never_fail) > + p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); > + else { > + /* It is OK to do some reclaim, not no safe to wait > + * for anything to be returned to the pool. > + * mempool_alloc() cannot handle that particular combination, > + * so we need two separate attempts. > + */ > + p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT); > + if (!p) > + p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO | > + __GFP_NOWARN | __GFP_NORETRY); Do we need to add something to the nfs_commit_data structure to properly free a kmem_cache_alloc()-ed object? Right now it looks like nfs_commit_free() calls mempool_free() unconditionally. Thanks, Anna > + if (!p) > + return NULL; > } > + > + memset(p, 0, sizeof(*p)); > + INIT_LIST_HEAD(&p->pages); > return p; > } > EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); > @@ -82,8 +96,7 @@ static struct nfs_pgio_header *nfs_writehdr_alloc(void) > { > struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); > > - if (p) > - memset(p, 0, sizeof(*p)); > + memset(p, 0, sizeof(*p)); > return p; > } > > @@ -1705,19 +1718,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, > if (list_empty(head)) > return 0; > > - data = nfs_commitdata_alloc(); > - > - if (!data) > - goto out_bad; > + data = nfs_commitdata_alloc(true); > > /* Set up the argument struct */ > nfs_init_commit(data, head, NULL, cinfo); > atomic_inc(&cinfo->mds->rpcs_out); > return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), > data->mds_ops, how, 0); > - out_bad: > - nfs_retry_commit(head, NULL, cinfo, 0); > - return -ENOMEM; > } > > int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf) > diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h > index 287f34161086..1b29915247b2 100644 > --- a/include/linux/nfs_fs.h > +++ b/include/linux/nfs_fs.h > @@ -502,7 +502,7 @@ extern int nfs_wb_all(struct inode *inode); > extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder); > extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); > extern int nfs_commit_inode(struct inode *, int); > -extern struct nfs_commit_data *nfs_commitdata_alloc(void); > +extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); > extern void nfs_commit_free(struct nfs_commit_data *data); > > static inline int > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html