Re: [PATCH] ceph: add halt mount option support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, 2020-02-16 at 01:49 -0500, xiubli@xxxxxxxxxx wrote:
> From: Xiubo Li <xiubli@xxxxxxxxxx>
> 
> This will simulate pulling the power cable situation, which will
> do:
> 
> - abort all the inflight osd/mds requests and fail them with -EIO.
> - reject any new coming osd/mds requests with -EIO.
> - close all the mds connections directly without doing any clean up
>   and disable mds sessions recovery routine.
> - close all the osd connections directly without doing any clean up.
> - set the msgr as stopped.
> 
> URL: https://tracker.ceph.com/issues/44044
> Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx>

There is no explanation of how to actually _use_ this feature? I assume
you have to remount the fs with "-o remount,halt" ? Is it possible to
reenable the mount as well?  If not, why keep the mount around? Maybe we
should consider wiring this in to a new umount2() flag instead?

This needs much better documentation.

In the past, I've generally done this using iptables. Granted that that
is difficult with a clustered fs like ceph (given that you potentially
have to set rules for a lot of addresses), but I wonder whether a scheme
like that might be more viable in the long run.

Note too that this may have interesting effects when superblocks end up
being shared between vfsmounts.

> ---
>  fs/ceph/mds_client.c            | 12 ++++++++++--
>  fs/ceph/mds_client.h            |  3 ++-
>  fs/ceph/super.c                 | 33 ++++++++++++++++++++++++++++-----
>  fs/ceph/super.h                 |  1 +
>  include/linux/ceph/libceph.h    |  1 +
>  include/linux/ceph/mon_client.h |  2 ++
>  include/linux/ceph/osd_client.h |  1 +
>  net/ceph/ceph_common.c          | 14 ++++++++++++++
>  net/ceph/mon_client.c           | 16 ++++++++++++++--
>  net/ceph/osd_client.c           | 11 +++++++++++
>  10 files changed, 84 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index b0f34251ad28..b6aa357f7c61 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -4110,6 +4110,9 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
>  {
>  	struct ceph_fs_client *fsc = mdsc->fsc;
>  
> +	if (ceph_test_mount_opt(fsc, HALT))
> +		return;
> +
>  	if (!ceph_test_mount_opt(fsc, CLEANRECOVER))
>  		return;
>  
> @@ -4735,7 +4738,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
>  	dout("stopped\n");
>  }
>  
> -void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
> +void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc, bool halt)
>  {
>  	struct ceph_mds_session *session;
>  	int mds;
> @@ -4748,7 +4751,12 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
>  		if (!session)
>  			continue;
>  
> -		if (session->s_state == CEPH_MDS_SESSION_REJECTED)
> +		/*
> +		 * when halting the superblock, it will simulate pulling
> +		 * the power cable, so here close the connection before
> +		 * doing any cleanup.
> +		 */
> +		if (halt || (session->s_state == CEPH_MDS_SESSION_REJECTED))
>  			__unregister_session(mdsc, session);

Note that this is not exactly like pulling the power cable. The
connection will be closed, which will send a FIN to the peer.

>  		__wake_requests(mdsc, &session->s_waiting);
>  		mutex_unlock(&mdsc->mutex);
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index c13910da07c4..b66eea830ae1 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -478,7 +478,8 @@ extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc,
>  
>  extern int ceph_mdsc_init(struct ceph_fs_client *fsc);
>  extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc);
> -extern void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc);
> +extern void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc,
> +				   bool halt);
>  extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc);
>  
>  extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 8b52bea13273..2a6fd5d2fffa 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -155,6 +155,7 @@ enum {
>  	Opt_acl,
>  	Opt_quotadf,
>  	Opt_copyfrom,
> +	Opt_halt,
>  };
>  
>  enum ceph_recover_session_mode {
> @@ -194,6 +195,7 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = {
>  	fsparam_string	("snapdirname",			Opt_snapdirname),
>  	fsparam_string	("source",			Opt_source),
>  	fsparam_u32	("wsize",			Opt_wsize),
> +	fsparam_flag	("halt",			Opt_halt),
>  	{}
>  };
>  
> @@ -435,6 +437,9 @@ static int ceph_parse_mount_param(struct fs_context *fc,
>  			fc->sb_flags &= ~SB_POSIXACL;
>  		}
>  		break;
> +	case Opt_halt:
> +		fsopt->flags |= CEPH_MOUNT_OPT_HALT;
> +		break;
>  	default:
>  		BUG();
>  	}
> @@ -601,6 +606,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
>  	if (m->count == pos)
>  		m->count--;
>  
> +	if (fsopt->flags & CEPH_MOUNT_OPT_HALT)
> +		seq_puts(m, ",halt");
>  	if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
>  		seq_puts(m, ",dirstat");
>  	if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES))
> @@ -877,22 +884,28 @@ static void destroy_caches(void)
>  }
>  
>  /*
> - * ceph_umount_begin - initiate forced umount.  Tear down down the
> - * mount, skipping steps that may hang while waiting for server(s).
> + * ceph_umount_begin - initiate forced umount.  Tear down the mount,
> + * skipping steps that may hang while waiting for server(s).
>   */
> -static void ceph_umount_begin(struct super_block *sb)
> +static void __ceph_umount_begin(struct super_block *sb, bool halt)
>  {
>  	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
>  
> -	dout("ceph_umount_begin - starting forced umount\n");
>  	if (!fsc)
>  		return;
>  	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
>  	ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
> -	ceph_mdsc_force_umount(fsc->mdsc);
> +	ceph_mdsc_force_umount(fsc->mdsc, halt);
>  	fsc->filp_gen++; // invalidate open files
>  }
>  
> +static void ceph_umount_begin(struct super_block *sb)
> +{
> +	dout("%s - starting forced umount\n", __func__);
> +
> +	__ceph_umount_begin(sb, false);
> +}
> +
>  static const struct super_operations ceph_super_ops = {
>  	.alloc_inode	= ceph_alloc_inode,
>  	.free_inode	= ceph_free_inode,
> @@ -1193,6 +1206,16 @@ static int ceph_reconfigure_fc(struct fs_context *fc)
>  	struct ceph_parse_opts_ctx *pctx = fc->fs_private;
>  	struct ceph_mount_options *new_fsopt = pctx->opts;
>  
> +	/* halt the mount point, will ignore other options */
> +	if (new_fsopt->flags & CEPH_MOUNT_OPT_HALT) {
> +		dout("halt the mount point\n");
> +		fsopt->flags |= CEPH_MOUNT_OPT_HALT;
> +		__ceph_umount_begin(sb, true);
> +		ceph_halt_client(fsc->client);
> +
> +		return 0;
> +	}
> +
>  	sync_filesystem(sb);
>  
>  	if (strcmp_null(new_fsopt->snapdir_name, fsopt->snapdir_name))
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 4c40e86ad016..64f16083b216 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -43,6 +43,7 @@
>  #define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<12) /* mount waits if no mds is up */
>  #define CEPH_MOUNT_OPT_NOQUOTADF       (1<<13) /* no root dir quota in statfs */
>  #define CEPH_MOUNT_OPT_NOCOPYFROM      (1<<14) /* don't use RADOS 'copy-from' op */
> +#define CEPH_MOUNT_OPT_HALT            (1<<15) /* halt the mount point */
>  
>  #define CEPH_MOUNT_OPT_DEFAULT			\
>  	(CEPH_MOUNT_OPT_DCACHE |		\
> diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
> index 8fe9b80e80a5..12e9f0cc8501 100644
> --- a/include/linux/ceph/libceph.h
> +++ b/include/linux/ceph/libceph.h
> @@ -295,6 +295,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private);
>  struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client);
>  u64 ceph_client_gid(struct ceph_client *client);
>  extern void ceph_destroy_client(struct ceph_client *client);
> +void ceph_halt_client(struct ceph_client *client);
>  extern void ceph_reset_client_addr(struct ceph_client *client);
>  extern int __ceph_open_session(struct ceph_client *client,
>  			       unsigned long started);
> diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
> index dbb8a6959a73..7718a2e65d07 100644
> --- a/include/linux/ceph/mon_client.h
> +++ b/include/linux/ceph/mon_client.h
> @@ -78,6 +78,7 @@ struct ceph_mon_client {
>  	struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack;
>  	int pending_auth;
>  
> +	bool halt;
>  	bool hunting;
>  	int cur_mon;                       /* last monitor i contacted */
>  	unsigned long sub_renew_after;
> @@ -109,6 +110,7 @@ extern int ceph_monmap_contains(struct ceph_monmap *m,
>  
>  extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
>  extern void ceph_monc_stop(struct ceph_mon_client *monc);
> +void ceph_monc_halt(struct ceph_mon_client *monc);
>  extern void ceph_monc_reopen_session(struct ceph_mon_client *monc);
>  
>  enum {
> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> index 02ff3a302d26..4b9143f7d989 100644
> --- a/include/linux/ceph/osd_client.h
> +++ b/include/linux/ceph/osd_client.h
> @@ -382,6 +382,7 @@ extern void ceph_osdc_cleanup(void);
>  extern int ceph_osdc_init(struct ceph_osd_client *osdc,
>  			  struct ceph_client *client);
>  extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
> +extern void ceph_osdc_halt(struct ceph_osd_client *osdc);
>  extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
>  
>  extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
> index a9d6c97b5b0d..c47578ed0546 100644
> --- a/net/ceph/ceph_common.c
> +++ b/net/ceph/ceph_common.c
> @@ -652,6 +652,20 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
>  }
>  EXPORT_SYMBOL(ceph_create_client);
>  
> +void ceph_halt_client(struct ceph_client *client)
> +{
> +	dout("halt_client %p\n", client);
> +
> +	atomic_set(&client->msgr.stopping, 1);
> +
> +	/* unmount */
> +	ceph_osdc_halt(&client->osdc);
> +	ceph_monc_halt(&client->monc);
> +
> +	dout("halt_client %p done\n", client);
> +}
> +EXPORT_SYMBOL(ceph_halt_client);
> +
>  void ceph_destroy_client(struct ceph_client *client)
>  {
>  	dout("destroy_client %p\n", client);
> diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
> index 9d9e4e4ea600..5819a02af7fe 100644
> --- a/net/ceph/mon_client.c
> +++ b/net/ceph/mon_client.c
> @@ -979,14 +979,16 @@ static void delayed_work(struct work_struct *work)
>  	mutex_lock(&monc->mutex);
>  	if (monc->hunting) {
>  		dout("%s continuing hunt\n", __func__);
> -		reopen_session(monc);
> +		if (!monc->halt)
> +			reopen_session(monc);
>  	} else {
>  		int is_auth = ceph_auth_is_authenticated(monc->auth);
>  		if (ceph_con_keepalive_expired(&monc->con,
>  					       CEPH_MONC_PING_TIMEOUT)) {
>  			dout("monc keepalive timeout\n");
>  			is_auth = 0;
> -			reopen_session(monc);
> +			if (!monc->halt)
> +				reopen_session(monc);
>  		}
>  
>  		if (!monc->hunting) {
> @@ -1115,6 +1117,16 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
>  }
>  EXPORT_SYMBOL(ceph_monc_init);
>  
> +void ceph_monc_halt(struct ceph_mon_client *monc)
> +{
> +	dout("monc halt\n");
> +
> +	mutex_lock(&monc->mutex);
> +	monc->halt = true;
> +	ceph_con_close(&monc->con);
> +	mutex_unlock(&monc->mutex);
> +}
> +

The changelog doesn't mention shutting down connections to the mons.

>  void ceph_monc_stop(struct ceph_mon_client *monc)
>  {
>  	dout("stop\n");
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index 108c9457d629..161daf35d7f1 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -5202,6 +5202,17 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
>  	return err;
>  }
>  
> +void ceph_osdc_halt(struct ceph_osd_client *osdc)
> +{
> +	down_write(&osdc->lock);
> +	while (!RB_EMPTY_ROOT(&osdc->osds)) {
> +		struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
> +						struct ceph_osd, o_node);
> +		close_osd(osd);
> +	}
> +	up_write(&osdc->lock);
> +}
> +
>  void ceph_osdc_stop(struct ceph_osd_client *osdc)
>  {
>  	destroy_workqueue(osdc->completion_wq);

-- 
Jeff Layton <jlayton@xxxxxxxxxx>




[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux