On Thu, Jul 30, 2020 at 5:48 PM Jeff Layton <jlayton@xxxxxxxxxx> wrote: > > When doing some testing recently, I hit some page allocation failures > on mount, when creating the wb_pagevec_pool for the mount. That > requires 128k (32 contiguous pages), and after thrashing the memory > during an xfstests run, sometimes that would fail. > > 128k for each mount seems like a lot to hold in reserve for a rainy > day, so let's change this to a global mempool that gets allocated > when the module is plugged in. > > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> > --- > fs/ceph/addr.c | 23 +++++++++++------------ > fs/ceph/super.c | 22 ++++++++-------------- > fs/ceph/super.h | 2 -- > include/linux/ceph/libceph.h | 1 + > 4 files changed, 20 insertions(+), 28 deletions(-) > > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c > index 01ad09733ac7..6ea761c84494 100644 > --- a/fs/ceph/addr.c > +++ b/fs/ceph/addr.c > @@ -862,8 +862,7 @@ static void writepages_finish(struct ceph_osd_request *req) > > osd_data = osd_req_op_extent_osd_data(req, 0); > if (osd_data->pages_from_pool) > - mempool_free(osd_data->pages, > - ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool); > + mempool_free(osd_data->pages, ceph_wb_pagevec_pool); > else > kfree(osd_data->pages); > ceph_osdc_put_request(req); > @@ -955,10 +954,10 @@ static int ceph_writepages_start(struct address_space *mapping, > int num_ops = 0, op_idx; > unsigned i, pvec_pages, max_pages, locked_pages = 0; > struct page **pages = NULL, **data_pages; > - mempool_t *pool = NULL; /* Becomes non-null if mempool used */ > struct page *page; > pgoff_t strip_unit_end = 0; > u64 offset = 0, len = 0; > + bool from_pool = false; > > max_pages = wsize >> PAGE_SHIFT; > > @@ -1057,16 +1056,16 @@ static int ceph_writepages_start(struct address_space *mapping, > sizeof(*pages), > GFP_NOFS); > if (!pages) { > - pool = fsc->wb_pagevec_pool; > - pages = mempool_alloc(pool, GFP_NOFS); > + from_pool = true; > + pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS); > BUG_ON(!pages); > } > > len = 0; > } else if (page->index != > (offset + len) >> PAGE_SHIFT) { > - if (num_ops >= (pool ? CEPH_OSD_SLAB_OPS : > - CEPH_OSD_MAX_OPS)) { > + if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS : > + CEPH_OSD_MAX_OPS)) { > redirty_page_for_writepage(wbc, page); > unlock_page(page); > break; > @@ -1161,7 +1160,7 @@ static int ceph_writepages_start(struct address_space *mapping, > offset, len); > osd_req_op_extent_osd_data_pages(req, op_idx, > data_pages, len, 0, > - !!pool, false); > + from_pool, false); > osd_req_op_extent_update(req, op_idx, len); > > len = 0; > @@ -1188,12 +1187,12 @@ static int ceph_writepages_start(struct address_space *mapping, > dout("writepages got pages at %llu~%llu\n", offset, len); > > osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len, > - 0, !!pool, false); > + 0, from_pool, false); > osd_req_op_extent_update(req, op_idx, len); > > BUG_ON(op_idx + 1 != req->r_num_ops); > > - pool = NULL; > + from_pool = false; > if (i < locked_pages) { > BUG_ON(num_ops <= req->r_num_ops); > num_ops -= req->r_num_ops; > @@ -1204,8 +1203,8 @@ static int ceph_writepages_start(struct address_space *mapping, > pages = kmalloc_array(locked_pages, sizeof(*pages), > GFP_NOFS); > if (!pages) { > - pool = fsc->wb_pagevec_pool; > - pages = mempool_alloc(pool, GFP_NOFS); > + from_pool = true; > + pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS); > BUG_ON(!pages); > } > memcpy(pages, data_pages + i, > diff --git a/fs/ceph/super.c b/fs/ceph/super.c > index 585aecea5cad..7ec0e6d03d10 100644 > --- a/fs/ceph/super.c > +++ b/fs/ceph/super.c > @@ -637,8 +637,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, > struct ceph_options *opt) > { > struct ceph_fs_client *fsc; > - int page_count; > - size_t size; > int err; > > fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); > @@ -686,22 +684,12 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, > if (!fsc->cap_wq) > goto fail_inode_wq; > > - /* set up mempools */ > - err = -ENOMEM; > - page_count = fsc->mount_options->wsize >> PAGE_SHIFT; > - size = sizeof (struct page *) * (page_count ? page_count : 1); > - fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); > - if (!fsc->wb_pagevec_pool) > - goto fail_cap_wq; > - > spin_lock(&ceph_fsc_lock); > list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); > spin_unlock(&ceph_fsc_lock); > > return fsc; > > -fail_cap_wq: > - destroy_workqueue(fsc->cap_wq); > fail_inode_wq: > destroy_workqueue(fsc->inode_wq); > fail_client: > @@ -732,8 +720,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) > destroy_workqueue(fsc->inode_wq); > destroy_workqueue(fsc->cap_wq); > > - mempool_destroy(fsc->wb_pagevec_pool); > - > destroy_mount_options(fsc->mount_options); > > ceph_destroy_client(fsc->client); > @@ -752,6 +738,7 @@ struct kmem_cache *ceph_dentry_cachep; > struct kmem_cache *ceph_file_cachep; > struct kmem_cache *ceph_dir_file_cachep; > struct kmem_cache *ceph_mds_request_cachep; > +mempool_t *ceph_wb_pagevec_pool; > > static void ceph_inode_init_once(void *foo) > { > @@ -796,6 +783,10 @@ static int __init init_caches(void) > if (!ceph_mds_request_cachep) > goto bad_mds_req; > > + ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); > + if (!ceph_wb_pagevec_pool) > + goto bad_pagevec_pool; > + > error = ceph_fscache_register(); > if (error) > goto bad_fscache; > @@ -804,6 +795,8 @@ static int __init init_caches(void) > > bad_fscache: > kmem_cache_destroy(ceph_mds_request_cachep); > +bad_pagevec_pool: > + mempool_destroy(ceph_wb_pagevec_pool); > bad_mds_req: > kmem_cache_destroy(ceph_dir_file_cachep); > bad_dir_file: > @@ -834,6 +827,7 @@ static void destroy_caches(void) > kmem_cache_destroy(ceph_file_cachep); > kmem_cache_destroy(ceph_dir_file_cachep); > kmem_cache_destroy(ceph_mds_request_cachep); > + mempool_destroy(ceph_wb_pagevec_pool); > > ceph_fscache_unregister(); > } > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index 9001a896ae8c..4c3c964b1c54 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -118,8 +118,6 @@ struct ceph_fs_client { > > struct ceph_mds_client *mdsc; > > - /* writeback */ > - mempool_t *wb_pagevec_pool; > atomic_long_t writeback_count; > > struct workqueue_struct *inode_wq; > diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h > index e5ed1c541e7f..c8645f0b797d 100644 > --- a/include/linux/ceph/libceph.h > +++ b/include/linux/ceph/libceph.h > @@ -282,6 +282,7 @@ extern struct kmem_cache *ceph_dentry_cachep; > extern struct kmem_cache *ceph_file_cachep; > extern struct kmem_cache *ceph_dir_file_cachep; > extern struct kmem_cache *ceph_mds_request_cachep; > +extern mempool_t *ceph_wb_pagevec_pool; > > /* ceph_common.c */ > extern bool libceph_compatible(void *data); Looks fine to me. I think it used to be just a single page per mount before 95cca2b44e54 ("ceph: limit osd write size") because fsopt->wsize defaulted to 0 which meant "no limit". And CEPH_MSG_MAX_DATA_LEN got increased from 16M to 64M as well... Thanks, Ilya