From: Christian Brauner <brauner@xxxxxxxxxx>
Inode operations that create a new filesystem object such as ->mknod,
->create, ->mkdir() and others don't take a {g,u}id argument explicitly.
Instead the caller's fs{g,u}id is used for the {g,u}id of the new
filesystem object.
In order to ensure that the correct {g,u}id is used map the caller's
fs{g,u}id for creation requests. This doesn't require complex changes.
It suffices to pass in the relevant idmapping recorded in the request
message. If this request message was triggered from an inode operation
that creates filesystem objects it will have passed down the relevant
idmaping. If this is a request message that was triggered from an inode
operation that doens't need to take idmappings into account the initial
idmapping is passed down which is an identity mapping.
This change uses a new cephfs protocol extension
CEPHFS_FEATURE_HAS_OWNER_UIDGID
which adds two new fields (owner_{u,g}id) to the request head structure.
So, we need to ensure that MDS supports it otherwise we need to fail
any IO that comes through an idmapped mount because we can't process it
in a proper way. MDS server without such an extension will use
caller_{u,g}id
fields to set a new inode owner UID/GID which is incorrect because
caller_{u,g}id
values are unmapped. At the same time we can't map these fields with an
idmapping as it can break UID/GID-based permission checks logic on the
MDS side. This problem was described with a lot of details at [1], [2].
[1]
https://lore.kernel.org/lkml/CAEivzxfw1fHO2TFA4dx3u23ZKK6Q+EThfzuibrhA3RKM=ZOYLg@xxxxxxxxxxxxxx/
[2]
https://lore.kernel.org/all/20220104140414.155198-3-brauner@xxxxxxxxxx/
https://github.com/ceph/ceph/pull/52575
https://tracker.ceph.com/issues/62217
Cc: Xiubo Li <xiubli@xxxxxxxxxx>
Cc: Jeff Layton <jlayton@xxxxxxxxxx>
Cc: Ilya Dryomov <idryomov@xxxxxxxxx>
Cc: ceph-devel@xxxxxxxxxxxxxxx
Co-Developed-by: Alexander Mikhalitsyn
<aleksandr.mikhalitsyn@xxxxxxxxxxxxx>
Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>
Signed-off-by: Alexander Mikhalitsyn
<aleksandr.mikhalitsyn@xxxxxxxxxxxxx>
---
v7:
- reworked to use two new fields for owner UID/GID
(https://github.com/ceph/ceph/pull/52575)
v8:
- properly handled case when old MDS used with new kernel client
---
fs/ceph/mds_client.c | 46 +++++++++++++++++++++++++++++++++---
fs/ceph/mds_client.h | 5 +++-
include/linux/ceph/ceph_fs.h | 4 +++-
3 files changed, 50 insertions(+), 5 deletions(-)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 8829f55103da..7d3106d3b726 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2902,6 +2902,17 @@ static void encode_mclientrequest_tail(void
**p, const struct ceph_mds_request *
}
}
+static inline u16 mds_supported_head_version(struct
ceph_mds_session *session)
+{
+ if (!test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD,
&session->s_features))
+ return 1;
+
+ if (!test_bit(CEPHFS_FEATURE_HAS_OWNER_UIDGID,
&session->s_features))
+ return 2;
+
+ return CEPH_MDS_REQUEST_HEAD_VERSION;
+}
+
static struct ceph_mds_request_head_legacy *
find_legacy_request_head(void *p, u64 features)
{
@@ -2923,6 +2934,7 @@ static struct ceph_msg
*create_request_message(struct ceph_mds_session *session,
{
int mds = session->s_mds;
struct ceph_mds_client *mdsc = session->s_mdsc;
+ struct ceph_client *cl = mdsc->fsc->client;
struct ceph_msg *msg;
struct ceph_mds_request_head_legacy *lhead;
const char *path1 = NULL;
@@ -2936,7 +2948,7 @@ static struct ceph_msg
*create_request_message(struct ceph_mds_session *session,
void *p, *end;
int ret;
bool legacy = !(session->s_con.peer_features &
CEPH_FEATURE_FS_BTIME);
- bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD,
&session->s_features);
+ u16 request_head_version = mds_supported_head_version(session);
ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino,
@@ -2977,8 +2989,10 @@ static struct ceph_msg
*create_request_message(struct ceph_mds_session *session,
*/
if (legacy)
len = sizeof(struct ceph_mds_request_head_legacy);
- else if (old_version)
+ else if (request_head_version == 1)
len = sizeof(struct ceph_mds_request_head_old);
+ else if (request_head_version == 2)
+ len = offsetofend(struct ceph_mds_request_head, ext_num_fwd);
else
len = sizeof(struct ceph_mds_request_head);