Re: [RFC PATCH v2 05/10] ceph: decode interval_sets for delegated inos

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2020-01-16 at 22:32 +0800, Yan, Zheng wrote:
> On 1/16/20 4:59 AM, Jeff Layton wrote:
> > Starting in Octopus, the MDS will hand out caps that allow the client
> > to do asynchronous file creates under certain conditions. As part of
> > that, the MDS will delegate ranges of inode numbers to the client.
> > 
> > Add the infrastructure to decode these ranges, and stuff them into an
> > xarray for later consumption by the async creation code.
> > 
> > Because the xarray code currently only handles unsigned long indexes,
> > and those are 32-bits on 32-bit arches, we only enable the decoding when
> > running on a 64-bit arch.
> > 
> > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
> > ---
> >   fs/ceph/mds_client.c | 109 +++++++++++++++++++++++++++++++++++++++----
> >   fs/ceph/mds_client.h |   7 ++-
> >   2 files changed, 106 insertions(+), 10 deletions(-)
> > 
> > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > index 8263f75badfc..19bd71eb5733 100644
> > --- a/fs/ceph/mds_client.c
> > +++ b/fs/ceph/mds_client.c
> > @@ -415,21 +415,110 @@ static int parse_reply_info_filelock(void **p, void *end,
> >   	return -EIO;
> >   }
> >   
> > +
> > +#if BITS_PER_LONG == 64
> > +
> > +#define DELEGATED_INO_AVAILABLE		xa_mk_value(1)
> > +
> > +static int ceph_parse_deleg_inos(void **p, void *end,
> > +				 struct ceph_mds_session *s)
> > +{
> > +	u32 sets;
> > +
> > +	ceph_decode_32_safe(p, end, sets, bad);
> > +	dout("got %u sets of delegated inodes\n", sets);
> > +	while (sets--) {
> > +		u64 start, len, ino;
> > +
> > +		ceph_decode_64_safe(p, end, start, bad);
> > +		ceph_decode_64_safe(p, end, len, bad);
> > +		while (len--) {
> > +			int err = xa_insert(&s->s_delegated_inos, ino = start++,
> > +					    DELEGATED_INO_AVAILABLE,
> > +					    GFP_KERNEL);
> > +			if (!err) {
> > +				dout("added delegated inode 0x%llx\n",
> > +				     start - 1);
> > +			} else if (err == -EBUSY) {
> > +				pr_warn("ceph: MDS delegated inode 0x%llx more than once.\n",
> > +					start - 1);
> > +			} else {
> > +				return err;
> > +			}
> > +		}
> > +	}
> > +	return 0;
> > +bad:
> > +	return -EIO;
> > +}
> > +
> > +unsigned long ceph_get_deleg_ino(struct ceph_mds_session *s)
> > +{
> > +	unsigned long ino;
> > +	void *val;
> > +
> > +	xa_for_each(&s->s_delegated_inos, ino, val) {
> > +		val = xa_erase(&s->s_delegated_inos, ino);
> > +		if (val == DELEGATED_INO_AVAILABLE)
> > +			return ino;
> > +	}
> > +	return 0;
> 
> do we need to protect s_delegated_inos? ceph_get_deleg_ino() and 
> ceph_parse_deleg_inos() can be executed at the same time. multiple 
> thread may call ceph_parse_deleg_inos() at the same time.
> 

No. Xarrays have their own locking, and we're using the "simple" API
here (which does it implicitly).

> > +}
> > +#else /* BITS_PER_LONG == 64 */
> > +/*
> > + * FIXME: xarrays can't handle 64-bit indexes on a 32-bit arch. For now, just
> > + * ignore delegated_inos on 32 bit arch. Maybe eventually add xarrays for top
> > + * and bottom words?
> > + */
> > +static int ceph_parse_deleg_inos(void **p, void *end,
> > +				 struct ceph_mds_session *s)
> > +{
> > +	u32 sets;
> > +
> > +	ceph_decode_32_safe(p, end, sets, bad);
> > +	if (sets)
> > +		ceph_decode_skip_n(p, end, sets * 2 * sizeof(__le64), bad);
> > +	return 0;
> > +bad:
> > +	return -EIO;
> > +}
> > +
> > +unsigned long ceph_get_deleg_ino(struct ceph_mds_session *s)
> > +{
> > +	return 0;
> > +}
> > +#endif /* BITS_PER_LONG == 64 */
> > +
> >   /*
> >    * parse create results
> >    */
> >   static int parse_reply_info_create(void **p, void *end,
> >   				  struct ceph_mds_reply_info_parsed *info,
> > -				  u64 features)
> > +				  u64 features, struct ceph_mds_session *s)
> >   {
> > +	int ret;
> > +
> >   	if (features == (u64)-1 ||
> >   	    (features & CEPH_FEATURE_REPLY_CREATE_INODE)) {
> > -		/* Malformed reply? */
> >   		if (*p == end) {
> > +			/* Malformed reply? */
> >   			info->has_create_ino = false;
> > -		} else {
> > +		} else if (test_bit(CEPHFS_FEATURE_DELEG_INO, &s->s_features)) {
> > +			u8 struct_v, struct_compat;
> > +			u32 len;
> > +
> >   			info->has_create_ino = true;
> > +			ceph_decode_8_safe(p, end, struct_v, bad);
> > +			ceph_decode_8_safe(p, end, struct_compat, bad);
> > +			ceph_decode_32_safe(p, end, len, bad);
> > +			ceph_decode_64_safe(p, end, info->ino, bad);
> > +			ret = ceph_parse_deleg_inos(p, end, s);
> > +			if (ret)
> > +				return ret;
> > +		} else {
> > +			/* legacy */
> >   			ceph_decode_64_safe(p, end, info->ino, bad);
> > +			info->has_create_ino = true;
> >   		}
> >   	} else {
> >   		if (*p != end)
> > @@ -448,7 +537,7 @@ static int parse_reply_info_create(void **p, void *end,
> >    */
> >   static int parse_reply_info_extra(void **p, void *end,
> >   				  struct ceph_mds_reply_info_parsed *info,
> > -				  u64 features)
> > +				  u64 features, struct ceph_mds_session *s)
> >   {
> >   	u32 op = le32_to_cpu(info->head->op);
> >   
> > @@ -457,7 +546,7 @@ static int parse_reply_info_extra(void **p, void *end,
> >   	else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP)
> >   		return parse_reply_info_readdir(p, end, info, features);
> >   	else if (op == CEPH_MDS_OP_CREATE)
> > -		return parse_reply_info_create(p, end, info, features);
> > +		return parse_reply_info_create(p, end, info, features, s);
> >   	else
> >   		return -EIO;
> >   }
> > @@ -465,7 +554,7 @@ static int parse_reply_info_extra(void **p, void *end,
> >   /*
> >    * parse entire mds reply
> >    */
> > -static int parse_reply_info(struct ceph_msg *msg,
> > +static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
> >   			    struct ceph_mds_reply_info_parsed *info,
> >   			    u64 features)
> >   {
> > @@ -490,7 +579,7 @@ static int parse_reply_info(struct ceph_msg *msg,
> >   	ceph_decode_32_safe(&p, end, len, bad);
> >   	if (len > 0) {
> >   		ceph_decode_need(&p, end, len, bad);
> > -		err = parse_reply_info_extra(&p, p+len, info, features);
> > +		err = parse_reply_info_extra(&p, p+len, info, features, s);
> >   		if (err < 0)
> >   			goto out_bad;
> >   	}
> > @@ -558,6 +647,7 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
> >   	if (refcount_dec_and_test(&s->s_ref)) {
> >   		if (s->s_auth.authorizer)
> >   			ceph_auth_destroy_authorizer(s->s_auth.authorizer);
> > +		xa_destroy(&s->s_delegated_inos);
> >   		kfree(s);
> >   	}
> >   }
> > @@ -645,6 +735,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
> >   	refcount_set(&s->s_ref, 1);
> >   	INIT_LIST_HEAD(&s->s_waiting);
> >   	INIT_LIST_HEAD(&s->s_unsafe);
> > +	xa_init(&s->s_delegated_inos);
> >   	s->s_num_cap_releases = 0;
> >   	s->s_cap_reconnect = 0;
> >   	s->s_cap_iterator = NULL;
> > @@ -2947,9 +3038,9 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
> >   	dout("handle_reply tid %lld result %d\n", tid, result);
> >   	rinfo = &req->r_reply_info;
> >   	if (test_bit(CEPHFS_FEATURE_REPLY_ENCODING, &session->s_features))
> > -		err = parse_reply_info(msg, rinfo, (u64)-1);
> > +		err = parse_reply_info(session, msg, rinfo, (u64)-1);
> >   	else
> > -		err = parse_reply_info(msg, rinfo, session->s_con.peer_features);
> > +		err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features);
> >   	mutex_unlock(&mdsc->mutex);
> >   
> >   	mutex_lock(&session->s_mutex);
> > diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> > index 27a7446e10d3..30fb60ba2580 100644
> > --- a/fs/ceph/mds_client.h
> > +++ b/fs/ceph/mds_client.h
> > @@ -23,8 +23,9 @@ enum ceph_feature_type {
> >   	CEPHFS_FEATURE_RECLAIM_CLIENT,
> >   	CEPHFS_FEATURE_LAZY_CAP_WANTED,
> >   	CEPHFS_FEATURE_MULTI_RECONNECT,
> > +	CEPHFS_FEATURE_DELEG_INO,
> >   
> > -	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MULTI_RECONNECT,
> > +	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_DELEG_INO,
> >   };
> >   
> >   /*
> > @@ -37,6 +38,7 @@ enum ceph_feature_type {
> >   	CEPHFS_FEATURE_REPLY_ENCODING,		\
> >   	CEPHFS_FEATURE_LAZY_CAP_WANTED,		\
> >   	CEPHFS_FEATURE_MULTI_RECONNECT,		\
> > +	CEPHFS_FEATURE_DELEG_INO,		\
> >   						\
> >   	CEPHFS_FEATURE_MAX,			\
> >   }
> > @@ -201,6 +203,7 @@ struct ceph_mds_session {
> >   
> >   	struct list_head  s_waiting;  /* waiting requests */
> >   	struct list_head  s_unsafe;   /* unsafe requests */
> > +	struct xarray	  s_delegated_inos;
> >   };
> >   
> >   /*
> > @@ -537,4 +540,6 @@ extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
> >   extern int ceph_trim_caps(struct ceph_mds_client *mdsc,
> >   			  struct ceph_mds_session *session,
> >   			  int max_caps);
> > +
> > +extern unsigned long ceph_get_deleg_ino(struct ceph_mds_session *session);
> >   #endif
> > 

-- 
Jeff Layton <jlayton@xxxxxxxxxx>




[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux