On Thu, 2022-06-02 at 15:36 +0100, Luís Henriques wrote: > The MDS tries to enforce a limit on the total key/values in extended > attributes. However, this limit is enforced only if doing a synchronous > operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS > doesn't have a chance to enforce these limits. > > This patch adds support for decoding the xattrs maximum size setting that is > distributed in the mdsmap. Then, when setting an xattr, the kernel client > will revert to do a synchronous operation if that maximum size is exceeded. > > While there, fix a dout() that would trigger a printk warning: > > [ 98.718078] ------------[ cut here ]------------ > [ 98.719012] precision 65536 too large > [ 98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600 > ... > > URL: https://tracker.ceph.com/issues/55725 > Signed-off-by: Luís Henriques <lhenriques@xxxxxxx> > --- > fs/ceph/mdsmap.c | 28 ++++++++++++++++++++++++---- > fs/ceph/xattr.c | 12 ++++++++---- > include/linux/ceph/mdsmap.h | 1 + > 3 files changed, 33 insertions(+), 8 deletions(-) > > * Changes since v3 > > As per Xiubo review: > - Always force a (sync) SETXATTR Op when connecting to an old cluster > - use '>' instead of '>=' > Also fixed the warning detected by 0day. > > * Changes since v2 > > Well, a lot has changed since v2! Now the xattr max value setting is > obtained through the mdsmap, which needs to be decoded, and the feature > that was used in the previous revision was dropped. The drawback is that > the MDS isn't unable to know in advance if a client is aware of this xattr > max value. > > * Changes since v1 > > Added support for new feature bit to get the MDS max_xattr_pairs_size > setting. > > Also note that this patch relies on a patch that hasn't been merged yet > ("ceph: use correct index when encoding client supported features"), > otherwise the new feature bit won't be correctly encoded. > > diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c > index 30387733765d..c6ce83a48175 100644 > --- a/fs/ceph/mdsmap.c > +++ b/fs/ceph/mdsmap.c > @@ -13,6 +13,12 @@ > > #include "super.h" > > +/* > + * Maximum size of xattrs the MDS can handle per inode by default. This > + * includes the attribute name and 4+4 bytes for the key/value sizes. > + */ > +#define MDS_MAX_XATTR_SIZE (1<<16) /* 64K */ > + > #define CEPH_MDS_IS_READY(i, ignore_laggy) \ > (m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy) > > @@ -352,12 +358,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) > __decode_and_drop_type(p, end, u8, bad_ext); > } > if (mdsmap_ev >= 8) { > - u32 name_len; > /* enabled */ > ceph_decode_8_safe(p, end, m->m_enabled, bad_ext); > - ceph_decode_32_safe(p, end, name_len, bad_ext); > - ceph_decode_need(p, end, name_len, bad_ext); > - *p += name_len; > + /* fs_name */ > + ceph_decode_skip_string(p, end, bad_ext); > } > /* damaged */ > if (mdsmap_ev >= 9) { > @@ -370,6 +374,22 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) > } else { > m->m_damaged = false; > } > + if (mdsmap_ev >= 17) { > + /* balancer */ > + ceph_decode_skip_string(p, end, bad_ext); > + /* standby_count_wanted */ > + ceph_decode_skip_32(p, end, bad_ext); > + /* old_max_mds */ > + ceph_decode_skip_32(p, end, bad_ext); > + /* min_compat_client */ > + ceph_decode_skip_8(p, end, bad_ext); > + /* required_client_features */ > + ceph_decode_skip_set(p, end, 64, bad_ext); > + ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext); > + } else { > + /* This forces the usage of the (sync) SETXATTR Op */ > + m->m_max_xattr_size = 0; > + } > bad_ext: > dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", > !!m->m_enabled, !!m->m_damaged, m->m_num_laggy); > diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c > index 8c2dc2c762a4..1be415e9220b 100644 > --- a/fs/ceph/xattr.c > +++ b/fs/ceph/xattr.c > @@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name, > flags |= CEPH_XATTR_REMOVE; > } > > - dout("setxattr value=%.*s\n", (int)size, value); > + dout("setxattr value size: %lu\n", size); > > /* do request */ > req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); > @@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name, > spin_lock(&ci->i_ceph_lock); > retry: > issued = __ceph_caps_issued(ci, NULL); > - if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) > + required_blob_size = __get_required_blob_size(ci, name_len, val_len); > + if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) || > + (required_blob_size > mdsc->mdsmap->m_max_xattr_size)) { > + dout("%s do sync setxattr: version: %llu size: %d max: %llu\n", > + __func__, ci->i_xattrs.version, required_blob_size, > + mdsc->mdsmap->m_max_xattr_size); > goto do_sync; > + } > > if (!lock_snap_rwsem && !ci->i_head_snapc) { > lock_snap_rwsem = true; > @@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name, > ceph_cap_string(issued)); > __build_xattrs(inode); > > - required_blob_size = __get_required_blob_size(ci, name_len, val_len); > - > if (!ci->i_xattrs.prealloc_blob || > required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { > struct ceph_buffer *blob; > diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h > index 523fd0452856..4c3e0648dc27 100644 > --- a/include/linux/ceph/mdsmap.h > +++ b/include/linux/ceph/mdsmap.h > @@ -25,6 +25,7 @@ struct ceph_mdsmap { > u32 m_session_timeout; /* seconds */ > u32 m_session_autoclose; /* seconds */ > u64 m_max_file_size; > + u64 m_max_xattr_size; /* maximum size for xattrs blob */ > u32 m_max_mds; /* expected up:active mds number */ > u32 m_num_active_mds; /* actual up:active mds number */ > u32 possible_max_rank; /* possible max rank index */ Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx>