On 5/27/22 2:39 AM, Jeff Layton wrote:
On Wed, 2022-05-25 at 18:24 +0100, Luís Henriques wrote:The MDS tries to enforce a limit on the total key/values in extended attributes. However, this limit is enforced only if doing a synchronous operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS doesn't have a chance to enforce these limits. This patch adds support for an extra feature bit that will allow the client to get the MDS max_xattr_pairs_size setting in the session message. Then, when setting an xattr, the kernel will revert to do a synchronous operation if that maximum size is exceeded. While there, fix a dout() that would trigger a printk warning: [ 98.718078] ------------[ cut here ]------------ [ 98.719012] precision 65536 too large [ 98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600 ... URL: https://tracker.ceph.com/issues/55725 Signed-off-by: Luís Henriques <lhenriques@xxxxxxx> --- fs/ceph/mds_client.c | 12 ++++++++++++ fs/ceph/mds_client.h | 15 ++++++++++++++- fs/ceph/xattr.c | 12 ++++++++---- 3 files changed, 34 insertions(+), 5 deletions(-) * Changes since v1 Added support for new feature bit to get the MDS max_xattr_pairs_size setting. Also note that this patch relies on a patch that hasn't been merged yet ("ceph: use correct index when encoding client supported features"), otherwise the new feature bit won't be correctly encoded. diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 35597fafb48c..87a25b7cf496 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3500,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session, struct ceph_mds_session_head *h; u32 op; u64 seq, features = 0; + u64 max_xattr_pairs_size = 0; int wake = 0; bool blocklisted = false;@@ -3545,6 +3546,9 @@ static void handle_session(struct ceph_mds_session *session,} }+ if (msg_version >= 6)+ ceph_decode_64_safe(&p, end, max_xattr_pairs_size, bad); + mutex_lock(&mdsc->mutex); if (op == CEPH_SESSION_CLOSE) { ceph_get_mds_session(session); @@ -3552,6 +3556,12 @@ static void handle_session(struct ceph_mds_session *session, } /* FIXME: this ttl calculation is generous */ session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose; + + if (max_xattr_pairs_size && (op == CEPH_SESSION_OPEN)) { + dout("Changing MDS max xattrs pairs size: %llu => %llu\n", + mdsc->max_xattr_pairs_size, max_xattr_pairs_size); + mdsc->max_xattr_pairs_size = max_xattr_pairs_size; + } mutex_unlock(&mdsc->mutex);mutex_lock(&session->s_mutex);@@ -4761,6 +4771,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) strscpy(mdsc->nodename, utsname()->nodename, sizeof(mdsc->nodename));+ mdsc->max_xattr_pairs_size = MDS_MAX_XATTR_PAIRS_SIZE;+ fsc->mdsc = mdsc; return 0;diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.hindex ca32f26f5eed..3db777df6d88 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -29,8 +29,11 @@ enum ceph_feature_type { CEPHFS_FEATURE_MULTI_RECONNECT, CEPHFS_FEATURE_DELEG_INO, CEPHFS_FEATURE_METRIC_COLLECT, + CEPHFS_FEATURE_ALTERNATE_NAME, + CEPHFS_FEATURE_NOTIFY_SESSION_STATE, + CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,Having to make this feature-bit-dependent kind of sucks. I wonder if it could be avoided... A question: How do the MDS's discover this setting? Do they get it from the mons? If so, I wonder if there is a way for the clients to query the mon for this instead of having to extend the MDS protocol?
It sounds like what the "max_file_size" does, which will be recorded in the 'mdsmap'.
While currently the "max_xattr_pairs_size" is one MDS's option for each daemon and could set different values for each MDS.
-- Xiubo
- CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,+ CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE, };/*@@ -45,9 +48,16 @@ enum ceph_feature_type { CEPHFS_FEATURE_MULTI_RECONNECT, \ CEPHFS_FEATURE_DELEG_INO, \ CEPHFS_FEATURE_METRIC_COLLECT, \ + CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE, \ } #define CEPHFS_FEATURES_CLIENT_REQUIRED {}+/*+ * Maximum size of xattrs the MDS can handle per inode by default. This + * includes the attribute name and 4+4 bytes for the key/value sizes. + */ +#define MDS_MAX_XATTR_PAIRS_SIZE (1<<16) /* 64K */ + /* * Some lock dependencies: * @@ -404,6 +414,9 @@ struct ceph_mds_client { struct rb_root quotarealms_inodes; struct mutex quotarealms_inodes_mutex;+ /* maximum aggregate size of extended attributes on a file */+ u64 max_xattr_pairs_size; + /* * snap_rwsem will cover cap linkage into snaprealms, and * realm snap contexts. (later, we can do per-realm snap diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 8c2dc2c762a4..175a8c1449aa 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, flags |= CEPH_XATTR_REMOVE; }- dout("setxattr value=%.*s\n", (int)size, value);+ dout("setxattr value size: %ld\n", size);/* do request */req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); @@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name, spin_lock(&ci->i_ceph_lock); retry: issued = __ceph_caps_issued(ci, NULL); - if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) + required_blob_size = __get_required_blob_size(ci, name_len, val_len); + if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) || + (required_blob_size >= mdsc->max_xattr_pairs_size)) { + dout("%s do sync setxattr: version: %llu size: %d max: %llu\n", + __func__, ci->i_xattrs.version, required_blob_size, + mdsc->max_xattr_pairs_size); goto do_sync; + }if (!lock_snap_rwsem && !ci->i_head_snapc) {lock_snap_rwsem = true; @@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name, ceph_cap_string(issued)); __build_xattrs(inode);- required_blob_size = __get_required_blob_size(ci, name_len, val_len);- if (!ci->i_xattrs.prealloc_blob || required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { struct ceph_buffer *blob;