Re: [RFC PATCH v2] ceph: prevent a client from exceeding the MDS maximum xattr size

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 5/26/22 1:24 AM, Luís Henriques wrote:
The MDS tries to enforce a limit on the total key/values in extended
attributes.  However, this limit is enforced only if doing a synchronous
operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
doesn't have a chance to enforce these limits.

This patch adds support for an extra feature bit that will allow the
client to get the MDS max_xattr_pairs_size setting in the session message.
Then, when setting an xattr, the kernel will revert to do a synchronous
operation if that maximum size is exceeded.

While there, fix a dout() that would trigger a printk warning:

[   98.718078] ------------[ cut here ]------------
[   98.719012] precision 65536 too large
[   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
...

URL: https://tracker.ceph.com/issues/55725
Signed-off-by: Luís Henriques <lhenriques@xxxxxxx>
---
  fs/ceph/mds_client.c | 12 ++++++++++++
  fs/ceph/mds_client.h | 15 ++++++++++++++-
  fs/ceph/xattr.c      | 12 ++++++++----
  3 files changed, 34 insertions(+), 5 deletions(-)

* Changes since v1

Added support for new feature bit to get the MDS max_xattr_pairs_size
setting.

Also note that this patch relies on a patch that hasn't been merged yet
("ceph: use correct index when encoding client supported features"),
otherwise the new feature bit won't be correctly encoded.

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 35597fafb48c..87a25b7cf496 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3500,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
  	struct ceph_mds_session_head *h;
  	u32 op;
  	u64 seq, features = 0;
+	u64 max_xattr_pairs_size = 0;
  	int wake = 0;
  	bool blocklisted = false;
@@ -3545,6 +3546,9 @@ static void handle_session(struct ceph_mds_session *session,
  		}
  	}
+ if (msg_version >= 6)
+		ceph_decode_64_safe(&p, end, max_xattr_pairs_size, bad);
+
  	mutex_lock(&mdsc->mutex);
  	if (op == CEPH_SESSION_CLOSE) {
  		ceph_get_mds_session(session);
@@ -3552,6 +3556,12 @@ static void handle_session(struct ceph_mds_session *session,
  	}
  	/* FIXME: this ttl calculation is generous */
  	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
+
+	if (max_xattr_pairs_size && (op == CEPH_SESSION_OPEN)) {
+		dout("Changing MDS max xattrs pairs size: %llu => %llu\n",
+		     mdsc->max_xattr_pairs_size, max_xattr_pairs_size);
+		mdsc->max_xattr_pairs_size = max_xattr_pairs_size;
+	}

Is there any case that in the ceph cluster some MDSes are still using the default size, while some have changed the size ?

In that case IMO we should make sure the mdsc->max_xattr_pairs_size is always the smallest size.


  	mutex_unlock(&mdsc->mutex);
mutex_lock(&session->s_mutex);
@@ -4761,6 +4771,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
  	strscpy(mdsc->nodename, utsname()->nodename,
  		sizeof(mdsc->nodename));
+ mdsc->max_xattr_pairs_size = MDS_MAX_XATTR_PAIRS_SIZE;
+
  	fsc->mdsc = mdsc;
  	return 0;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ca32f26f5eed..3db777df6d88 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -29,8 +29,11 @@ enum ceph_feature_type {
  	CEPHFS_FEATURE_MULTI_RECONNECT,
  	CEPHFS_FEATURE_DELEG_INO,
  	CEPHFS_FEATURE_METRIC_COLLECT,
+	CEPHFS_FEATURE_ALTERNATE_NAME,
+	CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
+	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
- CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
+	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
  };
/*
@@ -45,9 +48,16 @@ enum ceph_feature_type {
  	CEPHFS_FEATURE_MULTI_RECONNECT,		\
  	CEPHFS_FEATURE_DELEG_INO,		\
  	CEPHFS_FEATURE_METRIC_COLLECT,		\
+	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,	\
  }
  #define CEPHFS_FEATURES_CLIENT_REQUIRED {}
+/*
+ * Maximum size of xattrs the MDS can handle per inode by default.  This
+ * includes the attribute name and 4+4 bytes for the key/value sizes.
+ */
+#define MDS_MAX_XATTR_PAIRS_SIZE (1<<16) /* 64K */
+
  /*
   * Some lock dependencies:
   *
@@ -404,6 +414,9 @@ struct ceph_mds_client {
  	struct rb_root		quotarealms_inodes;
  	struct mutex		quotarealms_inodes_mutex;
+ /* maximum aggregate size of extended attributes on a file */
+	u64			max_xattr_pairs_size;
+
  	/*
  	 * snap_rwsem will cover cap linkage into snaprealms, and
  	 * realm snap contexts.  (later, we can do per-realm snap
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 8c2dc2c762a4..175a8c1449aa 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
  			flags |= CEPH_XATTR_REMOVE;
  	}
- dout("setxattr value=%.*s\n", (int)size, value);
+	dout("setxattr value size: %ld\n", size);
/* do request */
  	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
@@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
  	spin_lock(&ci->i_ceph_lock);
  retry:
  	issued = __ceph_caps_issued(ci, NULL);
-	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
+	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
+	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
+	    (required_blob_size >= mdsc->max_xattr_pairs_size)) {

required_blob_size > mdsc->max_xattr_pairs_size ?

Thanks,

-- Xiubo
+		dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
+		     __func__, ci->i_xattrs.version, required_blob_size,
+		     mdsc->max_xattr_pairs_size);
  		goto do_sync;
+	}
if (!lock_snap_rwsem && !ci->i_head_snapc) {
  		lock_snap_rwsem = true;
@@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
  	     ceph_cap_string(issued));
  	__build_xattrs(inode);
- required_blob_size = __get_required_blob_size(ci, name_len, val_len);
-
  	if (!ci->i_xattrs.prealloc_blob ||
  	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
  		struct ceph_buffer *blob;





[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux