Re: [PATCH] ceph: new mount option that control fscache data are indexed

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> On 27 Jun 2017, at 23:08, Jeff Layton <jlayton@xxxxxxxxxx> wrote:
> 
> On Tue, 2017-06-27 at 12:23 +0800, Yan, Zheng wrote:
>> Current ceph uses FSID as primary index key of fscache data. This
>> allows ceph to retain cached data across remount. But this causes
>> problem (kernel opps, fscache does not support sharing data) when
>> a filesystem get mounted (with fscache enabled) several times.
>> 
>> The fix is adding a new mount option, which makes ceph use client
>> ID as primary index key. Client ID is unique for each mount. For
>> the old fscache mount option, only allow one fscache instance for
>> each filesystem.
>> 
>> Signed-off-by: "Yan, Zheng" <zyan@xxxxxxxxxx>
> 
> Yuck. I hope this will be well documented. An admin will have no idea
> what this does otherwise.
> 
> FWIW, the kernel nfs client solves this by unifying the pagecache
> between mounts. You have to explicitly disable cache sharing if you want
> different cache objects ("nosharecache”).

ceph tries unifying the pagecache between mounts too. It creates separate mds_client
when mount option does not match the exiting one or “onshare” mount option is provided.
I checked nfs code, It seems nfs does similar things.

Regards
Yan, Zheng

> 
> That could be done with ceph too, but it would take some restructuring.
> 
>> ---
>> fs/ceph/cache.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
>> fs/ceph/super.c |  32 ++++++++++------
>> fs/ceph/super.h |   5 ++-
>> 3 files changed, 131 insertions(+), 23 deletions(-)
>> 
>> diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
>> index 4e7421c..b4956b4 100644
>> --- a/fs/ceph/cache.c
>> +++ b/fs/ceph/cache.c
>> @@ -35,8 +35,17 @@ struct fscache_netfs ceph_cache_netfs = {
>> 	.version	= 0,
>> };
>> 
>> -static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
>> -					     void *buffer, uint16_t maxbuf)
>> +static DEFINE_MUTEX(ceph_fscache_fsid_lock);
>> +static LIST_HEAD(ceph_fscache_fsid_list);
>> +
>> +struct ceph_fscache_fsid {
>> +	struct list_head list;
>> +	struct fscache_cookie *fscache;
>> +	struct ceph_fsid fsid;
>> +};
>> +
>> +static uint16_t ceph_fscache_fsid_get_key(const void *cookie_netfs_data,
>> +					  void *buffer, uint16_t maxbuf)
>> {
>> 	const struct ceph_fs_client* fsc = cookie_netfs_data;
>> 	uint16_t klen;
>> @@ -52,7 +61,32 @@ static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
>> static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
>> 	.name		= "CEPH.fsid",
>> 	.type		= FSCACHE_COOKIE_TYPE_INDEX,
>> -	.get_key	= ceph_fscache_session_get_key,
>> +	.get_key	= ceph_fscache_fsid_get_key,
>> +};
>> +
>> +static uint16_t ceph_fscache_client_get_key(const void *cookie_netfs_data,
>> +					    void *buffer, uint16_t maxbuf)
>> +{
>> +	const struct ceph_fs_client* fsc = cookie_netfs_data;
>> +	const struct ceph_fsid *fsid = &fsc->client->fsid;
>> +	u64 client_id = fsc->client->monc.auth->global_id;
>> +	uint16_t fsid_len, key_len;
>> +
>> +	fsid_len = sizeof(*fsid);
>> +	key_len = fsid_len + sizeof(client_id);
>> +	if (key_len > maxbuf)
>> +		return 0;
>> +
>> +	memcpy(buffer, fsid, fsid_len);
>> +	memcpy(buffer + fsid_len, &client_id, sizeof(client_id));
>> +
>> +	return key_len;
>> +}
>> +
>> +static const struct fscache_cookie_def ceph_fscache_client_object_def = {
>> +	.name		= "CEPH.client",
>> +	.type		= FSCACHE_COOKIE_TYPE_INDEX,
>> +	.get_key	= ceph_fscache_client_get_key,
>> };
>> 
>> int ceph_fscache_register(void)
>> @@ -67,13 +101,54 @@ void ceph_fscache_unregister(void)
>> 
>> int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
>> {
>> -	fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
>> -					      &ceph_fscache_fsid_object_def,
>> -					      fsc, true);
>> -	if (!fsc->fscache)
>> -		pr_err("Unable to register fsid: %p fscache cookie\n", fsc);
>> +	const struct ceph_fsid *fsid = &fsc->client->fsid;
>> +	struct ceph_fscache_fsid *ent;
>> +	int err = 0;
>> +
>> +	if (fsc->mount_options->flags & CEPH_MOUNT_OPT_TMPFSCACHE) {
>> +		fsc->fscache = fscache_acquire_cookie(
>> +						ceph_cache_netfs.primary_index,
>> +						&ceph_fscache_client_object_def,
>> +						fsc, true);
>> +		if (!fsc->fscache)
>> +			pr_err("Unable to register fsid: %p "
>> +			       "fscache cookie\n", fsc);
>> +	} else {
>> +		mutex_lock(&ceph_fscache_fsid_lock);
>> +		list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
>> +			if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
>> +				pr_err("fscache cookie already registered "
>> +				       "for fsid %pU\n", fsid);
>> +				pr_err("  use tmpfsc mount option instead\n");
>> +				err = -EBUSY;
>> +				goto out_unlock;
>> +			}
>> +		}
>> 
>> -	return 0;
>> +		ent = kzalloc(sizeof(*ent), GFP_KERNEL);
>> +		if (!ent) {
>> +			err = -ENOMEM;
>> +			goto out_unlock;
>> +		}
>> +
>> +		fsc->fscache = fscache_acquire_cookie(
>> +						ceph_cache_netfs.primary_index,
>> +						&ceph_fscache_fsid_object_def,
>> +						fsc, true);
>> +
>> +		if (fsc->fscache) {
>> +			memcpy(&ent->fsid, fsid, sizeof(*fsid));
>> +			ent->fscache = fsc->fscache;
>> +			list_add_tail(&ent->list, &ceph_fscache_fsid_list);
>> +		} else {
>> +			kfree(ent);
>> +			pr_err("Unable to register fsid: %p "
>> +			       "fscache cookie\n", fsc);
>> +		}
>> +out_unlock:
>> +		mutex_unlock(&ceph_fscache_fsid_lock);
>> +	}
>> +	return err;
>> }
>> 
>> static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
>> @@ -349,7 +424,29 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
>> 
>> void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
>> {
>> -	fscache_relinquish_cookie(fsc->fscache, 0);
>> +	if (fscache_cookie_valid(fsc->fscache)) {
>> +		if (fsc->fscache->def == &ceph_fscache_fsid_object_def) {
>> +			const struct ceph_fsid *fsid = &fsc->client->fsid;
>> +			struct ceph_fscache_fsid *ent, *found = NULL;
>> +
>> +			mutex_lock(&ceph_fscache_fsid_lock);
>> +			list_for_each_entry(ent, &ceph_fscache_fsid_list, list) {
>> +				if (!memcmp(&ent->fsid, fsid, sizeof(*fsid))) {
>> +					found = ent;
>> +					break;
>> +				}
>> +			}
>> +			if (found) {
>> +				WARN_ON_ONCE(found->fscache != fsc->fscache);
>> +				list_del(&found->list);
>> +				kfree(found);
>> +			} else {
>> +				WARN_ON_ONCE(true);
>> +			}
>> +			mutex_unlock(&ceph_fscache_fsid_lock);
>> +		}
>> +		__fscache_relinquish_cookie(fsc->fscache, 0);
>> +	}
>> 	fsc->fscache = NULL;
>> }
>> 
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index 14e78dd..bb6dd7f 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -134,6 +134,7 @@ enum {
>> 	Opt_ino32,
>> 	Opt_noino32,
>> 	Opt_fscache,
>> +	Opt_tmpfscache,
>> 	Opt_nofscache,
>> 	Opt_poolperm,
>> 	Opt_nopoolperm,
>> @@ -170,6 +171,7 @@ static match_table_t fsopt_tokens = {
>> 	{Opt_ino32, "ino32"},
>> 	{Opt_noino32, "noino32"},
>> 	{Opt_fscache, "fsc"},
>> +	{Opt_tmpfscache, "tmpfsc"},
> 
> Maybe allowing the fsc option to take an optional argument would be
> cleaner?
> 
>    fsc=tmp
> 
> That would also leave open the option to allow other flavors in the
> future.
> 
> 
>> 	{Opt_nofscache, "nofsc"},
>> 	{Opt_poolperm, "poolperm"},
>> 	{Opt_nopoolperm, "nopoolperm"},
>> @@ -281,6 +283,10 @@ static int parse_fsopt_token(char *c, void *private)
>> 	case Opt_fscache:
>> 		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
>> 		break;
>> +	case Opt_tmpfscache:
>> +		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE |
>> +				CEPH_MOUNT_OPT_TMPFSCACHE;
>> +		break;
>> 	case Opt_nofscache:
>> 		fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
>> 		break;
>> @@ -475,8 +481,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
>> 		seq_puts(m, ",noasyncreaddir");
>> 	if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
>> 		seq_puts(m, ",nodcache");
>> -	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
>> -		seq_puts(m, ",fsc");
>> +	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
>> +		if (fsopt->flags & CEPH_MOUNT_OPT_TMPFSCACHE)
>> +			seq_puts(m, ",tmpfsc");
>> +		else
>> +			seq_puts(m, ",fsc");
>> +	}
>> 	if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
>> 		seq_puts(m, ",nopoolperm");
>> 
>> @@ -597,18 +607,11 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
>> 	if (!fsc->wb_pagevec_pool)
>> 		goto fail_trunc_wq;
>> 
>> -	/* setup fscache */
>> -	if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
>> -	    (ceph_fscache_register_fs(fsc) != 0))
>> -		goto fail_fscache;
>> -
>> 	/* caps */
>> 	fsc->min_caps = fsopt->max_readdir;
>> 
>> 	return fsc;
>> 
>> -fail_fscache:
>> -	ceph_fscache_unregister_fs(fsc);
>> fail_trunc_wq:
>> 	destroy_workqueue(fsc->trunc_wq);
>> fail_pg_inv_wq:
>> @@ -626,8 +629,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
>> {
>> 	dout("destroy_fs_client %p\n", fsc);
>> 
>> -	ceph_fscache_unregister_fs(fsc);
>> -
>> 	destroy_workqueue(fsc->wb_wq);
>> 	destroy_workqueue(fsc->pg_inv_wq);
>> 	destroy_workqueue(fsc->trunc_wq);
>> @@ -820,6 +821,13 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
>> 		if (err < 0)
>> 			goto out;
>> 
>> +		/* setup fscache */
>> +		if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
>> +			err = ceph_fscache_register_fs(fsc);
>> +			if (err < 0)
>> +				goto out;
>> +		}
>> +
>> 		if (!fsc->mount_options->server_path) {
>> 			path = "";
>> 			dout("mount opening path \\t\n");
>> @@ -1042,6 +1050,8 @@ static void ceph_kill_sb(struct super_block *s)
>> 	fsc->client->extra_mon_dispatch = NULL;
>> 	ceph_fs_debugfs_cleanup(fsc);
>> 
>> +	ceph_fscache_unregister_fs(fsc);
>> +
>> 	ceph_mdsc_destroy(fsc);
>> 
>> 	destroy_fs_client(fsc);
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index f8a0aba..21e5562 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -36,8 +36,9 @@
>> #define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
>> #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
>> #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
>> -#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
>> -#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<12) /* mount waits if no mds is up */
>> +#define CEPH_MOUNT_OPT_TMPFSCACHE      (1<<11) /* use temp fscache */
>> +#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<12) /* no pool permission check */
>> +#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<13) /* mount waits if no mds is up */
>> 
>> #define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE
>> 
> 
> -- 
> Jeff Layton <jlayton@xxxxxxxxxx>

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux