Re: [PATCH] ceph: new mount option that control fscache data are indexed

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 2017-06-27 at 12:23 +0800, Yan, Zheng wrote:
> Current ceph uses FSID as primary index key of fscache data. This
> allows ceph to retain cached data across remount. But this causes
> problem (kernel opps, fscache does not support sharing data) when
> a filesystem get mounted (with fscache enabled) several times.
> 
> The fix is adding a new mount option, which makes ceph use client
> ID as primary index key. Client ID is unique for each mount. For
> the old fscache mount option, only allow one fscache instance for
> each filesystem.
> 
> Signed-off-by: "Yan, Zheng" <zyan@xxxxxxxxxx>

Yuck. I hope this will be well documented. An admin will have no idea
what this does otherwise.

FWIW, the kernel nfs client solves this by unifying the pagecache
between mounts. You have to explicitly disable cache sharing if you want
different cache objects ("nosharecache").

That could be done with ceph too, but it would take some restructuring.

> ---
>  fs/ceph/cache.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  fs/ceph/super.c |  32 ++++++++++------
>  fs/ceph/super.h |   5 ++-
>  3 files changed, 131 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
> index 4e7421c..b4956b4 100644
> --- a/fs/ceph/cache.c
> +++ b/fs/ceph/cache.c
> @@ -35,8 +35,17 @@ struct fscache_netfs ceph_cache_netfs = {
>  	.version	= 0,
>  };
>  
> -static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
> -					     void *buffer, uint16_t maxbuf)
> +static DEFINE_MUTEX(ceph_fscache_fsid_lock);
> +static LIST_HEAD(ceph_fscache_fsid_list);
> +
> +struct ceph_fscache_fsid {
> +	struct list_head list;
> +	struct fscache_cookie *fscache;
> +	struct ceph_fsid fsid;
> +};
> +
> +static uint16_t ceph_fscache_fsid_get_key(const void *cookie_netfs_data,
> +					  void *buffer, uint16_t maxbuf)
>  {
>  	const struct ceph_fs_client* fsc = cookie_netfs_data;
>  	uint16_t klen;
> @@ -52,7 +61,32 @@ static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
>  static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
>  	.name		= "CEPH.fsid",
>  	.type		= FSCACHE_COOKIE_TYPE_INDEX,
> -	.get_key	= ceph_fscache_session_get_key,
> +	.get_key	= ceph_fscache_fsid_get_key,
> +};
> +
> +static uint16_t ceph_fscache_client_get_key(const void *cookie_netfs_data,
> +					    void *buffer, uint16_t maxbuf)
> +{
> +	const struct ceph_fs_client* fsc = cookie_netfs_data;
> +	const struct ceph_fsid *fsid = &fsc->client->fsid;
> +	u64 client_id = fsc->client->monc.auth->global_id;
> +	uint16_t fsid_len, key_len;
> +
> +	fsid_len = sizeof(*fsid);
> +	key_len = fsid_len + sizeof(client_id);
> +	if (key_len > maxbuf)
> +		return 0;
> +
> +	memcpy(buffer, fsid, fsid_len);
> +	memcpy(buffer + fsid_len, &client_id, sizeof(client_id));
> +
> +	return key_len;
> +}
> +
> +static const struct fscache_cookie_def ceph_fscache_client_object_def = {
> +	.name		= "CEPH.client",
> +	.type		= FSCACHE_COOKIE_TYPE_INDEX,
> +	.get_key	= ceph_fscache_client_get_key,
>  };
>  
>  int ceph_fscache_register(void)
> @@ -67,13 +101,54 @@ void ceph_fscache_unregister(void)
>  
>  int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
>  {
> -	fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
> -					      &ceph_fscache_fsid_object_def,
> -					      fsc, true);
> -	if (!fsc->fscache)
> -		pr_err("Unable to register fsid: %p fscache cookie\n", fsc);
> +	const struct ceph_fsid *fsid = &fsc->client->fsid;
> +	struct ceph_fscache_fsid *ent;
> +	int err = 0;
> +
> +	if (fsc->mount_options->flags & CEPH_MOUNT_OPT_TMPFSCACHE) {
> +		fsc->fscache = fscache_acquire_cookie(
> +						ceph_cache_netfs.primary_index,
> +						&ceph_fscache_client_object_def,
> +						fsc, true);
> +		if (!fsc->fscache)
> +			pr_err("Unable to register fsid: %p "
> +			       "fscache cookie\n", fsc);
> +	} else {
> +		mutex_lock(&ceph_fscache_fsid_lock);
> +		list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
> +			if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
> +				pr_err("fscache cookie already registered "
> +				       "for fsid %pU\n", fsid);
> +				pr_err("  use tmpfsc mount option instead\n");
> +				err = -EBUSY;
> +				goto out_unlock;
> +			}
> +		}
>  
> -	return 0;
> +		ent = kzalloc(sizeof(*ent), GFP_KERNEL);
> +		if (!ent) {
> +			err = -ENOMEM;
> +			goto out_unlock;
> +		}
> +
> +		fsc->fscache = fscache_acquire_cookie(
> +						ceph_cache_netfs.primary_index,
> +						&ceph_fscache_fsid_object_def,
> +						fsc, true);
> +
> +		if (fsc->fscache) {
> +			memcpy(&ent->fsid, fsid, sizeof(*fsid));
> +			ent->fscache = fsc->fscache;
> +			list_add_tail(&ent->list, &ceph_fscache_fsid_list);
> +		} else {
> +			kfree(ent);
> +			pr_err("Unable to register fsid: %p "
> +			       "fscache cookie\n", fsc);
> +		}
> +out_unlock:
> +		mutex_unlock(&ceph_fscache_fsid_lock);
> +	}
> +	return err;
>  }
>  
>  static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
> @@ -349,7 +424,29 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
>  
>  void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
>  {
> -	fscache_relinquish_cookie(fsc->fscache, 0);
> +	if (fscache_cookie_valid(fsc->fscache)) {
> +		if (fsc->fscache->def == &ceph_fscache_fsid_object_def) {
> +			const struct ceph_fsid *fsid = &fsc->client->fsid;
> +			struct ceph_fscache_fsid *ent, *found = NULL;
> +
> +			mutex_lock(&ceph_fscache_fsid_lock);
> +			list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
> +				if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
> +					found = ent;
> +					break;
> +				}
> +			}
> +			if (found) {
> +				WARN_ON_ONCE(found->fscache != fsc->fscache);
> +				list_del(&found->list);
> +				kfree(found);
> +			} else {
> +				WARN_ON_ONCE(true);
> +			}
> +			mutex_unlock(&ceph_fscache_fsid_lock);
> +		}
> +		__fscache_relinquish_cookie(fsc->fscache, 0);
> +	}
>  	fsc->fscache = NULL;
>  }
>  
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 14e78dd..bb6dd7f 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -134,6 +134,7 @@ enum {
>  	Opt_ino32,
>  	Opt_noino32,
>  	Opt_fscache,
> +	Opt_tmpfscache,
>  	Opt_nofscache,
>  	Opt_poolperm,
>  	Opt_nopoolperm,
> @@ -170,6 +171,7 @@ static match_table_t fsopt_tokens = {
>  	{Opt_ino32, "ino32"},
>  	{Opt_noino32, "noino32"},
>  	{Opt_fscache, "fsc"},
> +	{Opt_tmpfscache, "tmpfsc"},

Maybe allowing the fsc option to take an optional argument would be
cleaner?

    fsc=tmp

That would also leave open the option to allow other flavors in the
future.


>  	{Opt_nofscache, "nofsc"},
>  	{Opt_poolperm, "poolperm"},
>  	{Opt_nopoolperm, "nopoolperm"},
> @@ -281,6 +283,10 @@ static int parse_fsopt_token(char *c, void *private)
>  	case Opt_fscache:
>  		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
>  		break;
> +	case Opt_tmpfscache:
> +		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE |
> +				CEPH_MOUNT_OPT_TMPFSCACHE;
> +		break;
>  	case Opt_nofscache:
>  		fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
>  		break;
> @@ -475,8 +481,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
>  		seq_puts(m, ",noasyncreaddir");
>  	if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
>  		seq_puts(m, ",nodcache");
> -	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
> -		seq_puts(m, ",fsc");
> +	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
> +		if (fsopt->flags & CEPH_MOUNT_OPT_TMPFSCACHE)
> +			seq_puts(m, ",tmpfsc");
> +		else
> +			seq_puts(m, ",fsc");
> +	}
>  	if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
>  		seq_puts(m, ",nopoolperm");
>  
> @@ -597,18 +607,11 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
>  	if (!fsc->wb_pagevec_pool)
>  		goto fail_trunc_wq;
>  
> -	/* setup fscache */
> -	if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
> -	    (ceph_fscache_register_fs(fsc) != 0))
> -		goto fail_fscache;
> -
>  	/* caps */
>  	fsc->min_caps = fsopt->max_readdir;
>  
>  	return fsc;
>  
> -fail_fscache:
> -	ceph_fscache_unregister_fs(fsc);
>  fail_trunc_wq:
>  	destroy_workqueue(fsc->trunc_wq);
>  fail_pg_inv_wq:
> @@ -626,8 +629,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
>  {
>  	dout("destroy_fs_client %p\n", fsc);
>  
> -	ceph_fscache_unregister_fs(fsc);
> -
>  	destroy_workqueue(fsc->wb_wq);
>  	destroy_workqueue(fsc->pg_inv_wq);
>  	destroy_workqueue(fsc->trunc_wq);
> @@ -820,6 +821,13 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
>  		if (err < 0)
>  			goto out;
>  
> +		/* setup fscache */
> +		if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
> +			err = ceph_fscache_register_fs(fsc);
> +			if (err < 0)
> +				goto out;
> +		}
> +
>  		if (!fsc->mount_options->server_path) {
>  			path = "";
>  			dout("mount opening path \\t\n");
> @@ -1042,6 +1050,8 @@ static void ceph_kill_sb(struct super_block *s)
>  	fsc->client->extra_mon_dispatch = NULL;
>  	ceph_fs_debugfs_cleanup(fsc);
>  
> +	ceph_fscache_unregister_fs(fsc);
> +
>  	ceph_mdsc_destroy(fsc);
>  
>  	destroy_fs_client(fsc);
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index f8a0aba..21e5562 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -36,8 +36,9 @@
>  #define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
>  #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
>  #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
> -#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
> -#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<12) /* mount waits if no mds is up */
> +#define CEPH_MOUNT_OPT_TMPFSCACHE      (1<<11) /* use temp fscache */
> +#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<12) /* no pool permission check */
> +#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<13) /* mount waits if no mds is up */
>  
>  #define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE
>  

-- 
Jeff Layton <jlayton@xxxxxxxxxx>
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux