Re: [PATCH 5/9] cephfs: don't set/clear bdi_congestion

Jeff Layton <jlayton@xxxxxxxxxx> · Thu, 27 Jan 2022 06:12:15 -0500

On Thu, 2022-01-27 at 13:46 +1100, NeilBrown wrote:
> The bdi congestion framework is no-longer used - writeback uses other
> mechanisms to manage throughput.
> 
> So remove calls to set_bdi_congested() and clear_bdi_congested(), and
> remove the writeback_count which is used only to guide the setting and
> clearing.
> 
> The congestion_kb mount option is no longer meaningful, but as it is
> visible to user-space, removing it needs more consideration.
> 
> Signed-off-by: NeilBrown <neilb@xxxxxxx>
> ---
>  fs/ceph/addr.c  |   27 ---------------------------
>  fs/ceph/super.c |    2 --
>  fs/ceph/super.h |    2 --
>  3 files changed, 31 deletions(-)
> 
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index c98e5238a1b6..9147667f8cd5 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -57,11 +57,6 @@
>   * accounting is preserved.
>   */
>  
> -#define CONGESTION_ON_THRESH(congestion_kb) (congestion_kb >> (PAGE_SHIFT-10))
> -#define CONGESTION_OFF_THRESH(congestion_kb)				\
> -	(CONGESTION_ON_THRESH(congestion_kb) -				\
> -	 (CONGESTION_ON_THRESH(congestion_kb) >> 2))
> -
>  static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
>  					struct folio *folio, void **_fsdata);
>  
> @@ -561,10 +556,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
>  	dout("writepage %p page %p index %lu on %llu~%llu snapc %p seq %lld\n",
>  	     inode, page, page->index, page_off, len, snapc, snapc->seq);
>  
> -	if (atomic_long_inc_return(&fsc->writeback_count) >
> -	    CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
> -		set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
> -
>  	req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &len, 0, 1,
>  				    CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc,
>  				    ceph_wbc.truncate_seq, ceph_wbc.truncate_size,
> @@ -621,10 +612,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
>  	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
>  	ceph_put_snap_context(snapc);  /* page's reference */
>  
> -	if (atomic_long_dec_return(&fsc->writeback_count) <
> -	    CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
> -		clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
> -
>  	return err;
>  }
>  
> @@ -704,12 +691,6 @@ static void writepages_finish(struct ceph_osd_request *req)
>  			BUG_ON(!page);
>  			WARN_ON(!PageUptodate(page));
>  
> -			if (atomic_long_dec_return(&fsc->writeback_count) <
> -			     CONGESTION_OFF_THRESH(
> -					fsc->mount_options->congestion_kb))
> -				clear_bdi_congested(inode_to_bdi(inode),
> -						    BLK_RW_ASYNC);
> -
>  			ceph_put_snap_context(detach_page_private(page));
>  			end_page_writeback(page);
>  			dout("unlocking %p\n", page);
> @@ -952,14 +933,6 @@ static int ceph_writepages_start(struct address_space *mapping,
>  			dout("%p will write page %p idx %lu\n",
>  			     inode, page, page->index);
>  
> -			if (atomic_long_inc_return(&fsc->writeback_count) >
> -			    CONGESTION_ON_THRESH(
> -				    fsc->mount_options->congestion_kb)) {
> -				set_bdi_congested(inode_to_bdi(inode),
> -						  BLK_RW_ASYNC);
> -			}
> -
> -
>  			pages[locked_pages++] = page;
>  			pvec.pages[i] = NULL;
>  
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index bf79f369aec6..b2f38af9fca8 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -801,8 +801,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
>  	fsc->filp_gen = 1;
>  	fsc->have_copy_from2 = true;
>  
> -	atomic_long_set(&fsc->writeback_count, 0);
> -
>  	err = -ENOMEM;
>  	/*
>  	 * The number of concurrent works can be high but they don't need
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 67f145e1ae7a..fc58adf1d36a 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -120,8 +120,6 @@ struct ceph_fs_client {
>  
>  	struct ceph_mds_client *mdsc;
>  
> -	atomic_long_t writeback_count;
> -
>  	struct workqueue_struct *inode_wq;
>  	struct workqueue_struct *cap_wq;
>  
> 
> 

Thanks Neil.

I'll plan to pull this into the ceph testing branch and do some testing
with it, but at a quick glance I don't forsee any issues. This should
make v5.18, but we may be able to get it in sooner.
-- 
Jeff Layton <jlayton@xxxxxxxxxx>