On 5/26/22 1:24 AM, Luís Henriques wrote:
The MDS tries to enforce a limit on the total key/values in extended
attributes. However, this limit is enforced only if doing a synchronous
operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
doesn't have a chance to enforce these limits.
This patch adds support for an extra feature bit that will allow the
client to get the MDS max_xattr_pairs_size setting in the session message.
Then, when setting an xattr, the kernel will revert to do a synchronous
operation if that maximum size is exceeded.
While there, fix a dout() that would trigger a printk warning:
[ 98.718078] ------------[ cut here ]------------
[ 98.719012] precision 65536 too large
[ 98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
...
URL: https://tracker.ceph.com/issues/55725
Signed-off-by: Luís Henriques <lhenriques@xxxxxxx>
---
fs/ceph/mds_client.c | 12 ++++++++++++
fs/ceph/mds_client.h | 15 ++++++++++++++-
fs/ceph/xattr.c | 12 ++++++++----
3 files changed, 34 insertions(+), 5 deletions(-)
* Changes since v1
Added support for new feature bit to get the MDS max_xattr_pairs_size
setting.
Also note that this patch relies on a patch that hasn't been merged yet
("ceph: use correct index when encoding client supported features"),
otherwise the new feature bit won't be correctly encoded.
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 35597fafb48c..87a25b7cf496 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3500,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
struct ceph_mds_session_head *h;
u32 op;
u64 seq, features = 0;
+ u64 max_xattr_pairs_size = 0;
int wake = 0;
bool blocklisted = false;
@@ -3545,6 +3546,9 @@ static void handle_session(struct ceph_mds_session *session,
}
}
+ if (msg_version >= 6)
+ ceph_decode_64_safe(&p, end, max_xattr_pairs_size, bad);
+
mutex_lock(&mdsc->mutex);
if (op == CEPH_SESSION_CLOSE) {
ceph_get_mds_session(session);
@@ -3552,6 +3556,12 @@ static void handle_session(struct ceph_mds_session *session,
}
/* FIXME: this ttl calculation is generous */
session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
+
+ if (max_xattr_pairs_size && (op == CEPH_SESSION_OPEN)) {
+ dout("Changing MDS max xattrs pairs size: %llu => %llu\n",
+ mdsc->max_xattr_pairs_size, max_xattr_pairs_size);
+ mdsc->max_xattr_pairs_size = max_xattr_pairs_size;
+ }
Is there any case that in the ceph cluster some MDSes are still using
the default size, while some have changed the size ?
In that case IMO we should make sure the mdsc->max_xattr_pairs_size is
always the smallest size.
mutex_unlock(&mdsc->mutex);
mutex_lock(&session->s_mutex);
@@ -4761,6 +4771,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
strscpy(mdsc->nodename, utsname()->nodename,
sizeof(mdsc->nodename));
+ mdsc->max_xattr_pairs_size = MDS_MAX_XATTR_PAIRS_SIZE;
+
fsc->mdsc = mdsc;
return 0;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ca32f26f5eed..3db777df6d88 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -29,8 +29,11 @@ enum ceph_feature_type {
CEPHFS_FEATURE_MULTI_RECONNECT,
CEPHFS_FEATURE_DELEG_INO,
CEPHFS_FEATURE_METRIC_COLLECT,
+ CEPHFS_FEATURE_ALTERNATE_NAME,
+ CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
+ CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
- CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
+ CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
};
/*
@@ -45,9 +48,16 @@ enum ceph_feature_type {
CEPHFS_FEATURE_MULTI_RECONNECT, \
CEPHFS_FEATURE_DELEG_INO, \
CEPHFS_FEATURE_METRIC_COLLECT, \
+ CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE, \
}
#define CEPHFS_FEATURES_CLIENT_REQUIRED {}
+/*
+ * Maximum size of xattrs the MDS can handle per inode by default. This
+ * includes the attribute name and 4+4 bytes for the key/value sizes.
+ */
+#define MDS_MAX_XATTR_PAIRS_SIZE (1<<16) /* 64K */
+
/*
* Some lock dependencies:
*
@@ -404,6 +414,9 @@ struct ceph_mds_client {
struct rb_root quotarealms_inodes;
struct mutex quotarealms_inodes_mutex;
+ /* maximum aggregate size of extended attributes on a file */
+ u64 max_xattr_pairs_size;
+
/*
* snap_rwsem will cover cap linkage into snaprealms, and
* realm snap contexts. (later, we can do per-realm snap
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 8c2dc2c762a4..175a8c1449aa 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
flags |= CEPH_XATTR_REMOVE;
}
- dout("setxattr value=%.*s\n", (int)size, value);
+ dout("setxattr value size: %ld\n", size);
/* do request */
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
@@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
spin_lock(&ci->i_ceph_lock);
retry:
issued = __ceph_caps_issued(ci, NULL);
- if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
+ required_blob_size = __get_required_blob_size(ci, name_len, val_len);
+ if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
+ (required_blob_size >= mdsc->max_xattr_pairs_size)) {
required_blob_size > mdsc->max_xattr_pairs_size ?
Thanks,
-- Xiubo
+ dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
+ __func__, ci->i_xattrs.version, required_blob_size,
+ mdsc->max_xattr_pairs_size);
goto do_sync;
+ }
if (!lock_snap_rwsem && !ci->i_head_snapc) {
lock_snap_rwsem = true;
@@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
ceph_cap_string(issued));
__build_xattrs(inode);
- required_blob_size = __get_required_blob_size(ci, name_len, val_len);
-
if (!ci->i_xattrs.prealloc_blob ||
required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
struct ceph_buffer *blob;