+ /* check if the auth mds supports the getvxattr feature */
+ if (session &&
+ test_bit(CEPHFS_FEATURE_GETVXATTR, &session->s_features)) {
+ return session->s_mds;
+ } else {
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ if (mdsc->sessions[i] &&
+ test_bit(CEPHFS_FEATURE_GETVXATTR,
+ &mdsc->sessions[i]->s_features))
+ mds_with_getvxattr_support++;
+ }
+ if (!mds_with_getvxattr_support)
+ goto out;
+
+ /* choose a random mds with getvxattr support */
+ rnd = prandom_u32() % mds_with_getvxattr_support;
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ if (mdsc->sessions[i] &&
+ test_bit(CEPHFS_FEATURE_GETVXATTR,
+ &mdsc->sessions[i]->s_features)) {
+ if (rnd)
+ rnd--;
+ if (!rnd) {
+ idx = mdsc->sessions[i]->s_mds;
+ break;
+ }
+ }
+ }
+ if (idx != MDS_RANK_UNAVAILABLE && random)
+ *random = true;
+ }
+ }
+out:
+ return idx;
+}
+
+int ceph_do_getvxattr(struct inode *inode, const char *name, void *value,
+ size_t size)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct ceph_mds_request *req;
+ int mode = USE_VETTED_MDS;
+ int err;
+ char *xattr_value;
+ size_t xattr_value_len;
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETVXATTR, mode);
+ if (IS_ERR(req)) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ req->r_path2 = kstrdup(name, GFP_NOFS);
+ if (!req->r_path2) {
+ err = -ENOMEM;
+ goto put;
+ }
+
+ ihold(inode);
+ req->r_inode = inode;
+ req->r_vet_session = ceph_vet_session;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ if (err < 0)
+ goto put;
+
+ xattr_value = req->r_reply_info.xattr_info.xattr_value;
+ xattr_value_len = req->r_reply_info.xattr_info.xattr_value_len;
+
+ dout("do_getvxattr xattr_value_len:%zu, size:%zu\n", xattr_value_len, size);
+
+ err = (int)xattr_value_len;
+ if (size == 0)
+ goto put;
+
+ if (xattr_value_len > size) {
+ err = -ERANGE;
+ goto put;
+ }
+
+ memcpy(value, xattr_value, xattr_value_len);
+put:
+ ceph_mdsc_put_request(req);
+out:
+ dout("do_getvxattr result=%d\n", err);
+ return err;
+}
+
/*
* Check inode permissions. We verify we have a valid value for
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c30eefc0ac19..87df4ef6880b 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -555,6 +555,29 @@ static int parse_reply_info_create(void **p, void *end,
return -EIO;
}
+static int parse_reply_info_getvxattr(void **p, void *end,
+ struct ceph_mds_reply_info_parsed *info,
+ u64 features)
+{
+ u8 struct_v, struct_compat;
+ u32 struct_len;
+ u32 value_len;
+
+ ceph_decode_8_safe(p, end, struct_v, bad);
+ ceph_decode_8_safe(p, end, struct_compat, bad);
+ ceph_decode_32_safe(p, end, struct_len, bad);
+ ceph_decode_32_safe(p, end, value_len, bad);
+
+ if (value_len == end - *p) {
+ info->xattr_info.xattr_value = *p;
+ info->xattr_info.xattr_value_len = value_len;
+ *p = end;
+ return value_len;
+ }
+bad:
+ return -EIO;
+}
+
/*
* parse extra results
*/
@@ -570,6 +593,8 @@ static int parse_reply_info_extra(void **p, void *end,
return parse_reply_info_readdir(p, end, info, features);
else if (op == CEPH_MDS_OP_CREATE)
return parse_reply_info_create(p, end, info, features, s);
+ else if (op == CEPH_MDS_OP_GETVXATTR)
+ return parse_reply_info_getvxattr(p, end, info, features);
else
return -EIO;
}
@@ -1041,6 +1066,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
if (mode == USE_RANDOM_MDS)
goto random;
+ if (mode == USE_VETTED_MDS)
+ return req->r_vet_session(mdsc, req, random);
+
inode = NULL;
if (req->r_inode) {
if (ceph_snap(req->r_inode) != CEPH_SNAPDIR) {
@@ -2770,6 +2798,10 @@ static void __do_request(struct ceph_mds_client *mdsc,
put_request_session(req);
mds = __choose_mds(mdsc, req, &random);
+ if (mds == MDS_RANK_UNAVAILABLE) {
+ err = -EOPNOTSUPP;
+ goto finish;
+ }
if (mds < 0 ||
ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags)) {
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 97c7f7bfa55f..94230fae9e71 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -29,8 +29,10 @@ enum ceph_feature_type {
CEPHFS_FEATURE_MULTI_RECONNECT,
CEPHFS_FEATURE_DELEG_INO,
CEPHFS_FEATURE_METRIC_COLLECT,
+ CEPHFS_FEATURE_ALTERNATE_NAME,
+ CEPHFS_FEATURE_GETVXATTR,
- CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
+ CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_GETVXATTR,
};
/*
@@ -45,6 +47,8 @@ enum ceph_feature_type {
CEPHFS_FEATURE_MULTI_RECONNECT, \
CEPHFS_FEATURE_DELEG_INO, \
CEPHFS_FEATURE_METRIC_COLLECT, \
+ CEPHFS_FEATURE_ALTERNATE_NAME, \
+ CEPHFS_FEATURE_GETVXATTR, \
\
CEPHFS_FEATURE_MAX, \
}
@@ -100,6 +104,11 @@ struct ceph_mds_reply_dir_entry {
loff_t offset;
};
+struct ceph_mds_reply_xattr {
+ char *xattr_value;
+ size_t xattr_value_len;
+};
+
/*
* parsed info about an mds reply, including information about
* either: 1) the target inode and/or its parent directory and dentry,
@@ -115,6 +124,7 @@ struct ceph_mds_reply_info_parsed {
char *dname;
u32 dname_len;
struct ceph_mds_reply_lease *dlease;
+ struct ceph_mds_reply_xattr xattr_info;
/* extra */
union {
@@ -222,6 +232,14 @@ enum {
USE_ANY_MDS,
USE_RANDOM_MDS,
USE_AUTH_MDS, /* prefer authoritative mds for this metadata item */
+ USE_VETTED_MDS /* eg. use an mds supporting particular feature */
+};
+
+/*
+ * special mds rank
+ */
+enum {
+ MDS_RANK_UNAVAILABLE = (int)0x80000000 /* INT_MIN */
};
struct ceph_mds_request;
@@ -337,6 +355,10 @@ struct ceph_mds_request {
int r_readdir_cache_idx;
struct ceph_cap_reservation r_caps_reservation;
+ /* return mds rank to send request to */
+ int (*r_vet_session)(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req,
+ bool *random);
};
struct ceph_pool_perm {
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index 573bb9556fb5..e36e8948e728 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -60,6 +60,7 @@ const char *ceph_mds_op_name(int op)
case CEPH_MDS_OP_LOOKUPINO: return "lookupino";
case CEPH_MDS_OP_LOOKUPNAME: return "lookupname";
case CEPH_MDS_OP_GETATTR: return "getattr";
+ case CEPH_MDS_OP_GETVXATTR: return "getvxattr";
case CEPH_MDS_OP_SETXATTR: return "setxattr";
case CEPH_MDS_OP_SETATTR: return "setattr";
case CEPH_MDS_OP_RMXATTR: return "rmxattr";
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index ac331aa07cfa..a627fa69668e 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1043,6 +1043,7 @@ static inline bool ceph_inode_is_shutdown(struct inode *inode)
/* xattr.c */
int __ceph_setxattr(struct inode *, const char *, const void *, size_t, int);
+int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, size_t size);
ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t);
extern ssize_t ceph_listxattr(struct dentry *, char *, size_t);
extern struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index fcf7dfdecf96..557749882aa2 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -923,9 +923,12 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_inode_xattr *xattr;
- struct ceph_vxattr *vxattr = NULL;
+ struct ceph_vxattr *vxattr;
int req_mask;
- ssize_t err;
+ ssize_t err = -ENODATA;
+
+ if (strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
+ goto out_nounlock;
/* let's see if a virtual xattr was requested */
vxattr = ceph_match_vxattr(inode, name);
@@ -945,6 +948,13 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
err = -ERANGE;
}
return err;
+ } else {
+ err = -ENODATA;
+ spin_unlock(&ci->i_ceph_lock);
+ err = ceph_do_getvxattr(inode, name, value, size);
+ spin_lock(&ci->i_ceph_lock);
+
+ goto out;
}
req_mask = __get_request_mask(inode);
@@ -997,6 +1007,7 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
ci->i_ceph_flags |= CEPH_I_SEC_INITED;
out:
spin_unlock(&ci->i_ceph_lock);
+out_nounlock:
return err;
}
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 7ad6c3d0db7d..66db21ac5f0c 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -328,6 +328,7 @@ enum {
CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
CEPH_MDS_OP_LOOKUPINO = 0x00104,
CEPH_MDS_OP_LOOKUPNAME = 0x00105,
+ CEPH_MDS_OP_GETVXATTR = 0x00106,
CEPH_MDS_OP_SETXATTR = 0x01105,
CEPH_MDS_OP_RMXATTR = 0x01106,