[PATCH 01/17] libxfs: unmap xmbuf pages to avoid disaster

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Darrick J. Wong <djwong@xxxxxxxxxx>

It turns out that there's a maximum mappings count, so we need to be
smartish about not overflowing that with too many xmbuf buffers.  This
needs to be a global value because high-agcount filesystems will create
a large number of xmbuf caches but this is a process-global limit.

Cc: <linux-xfs@xxxxxxxxxxxxxxx> # v6.9.0
Fixes: 124b388dac17f5 ("libxfs: support in-memory buffer cache targets")
Signed-off-by: "Darrick J. Wong" <djwong@xxxxxxxxxx>
---
 include/cache.h  |    6 +++
 libxfs/buf_mem.c |  102 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 libxfs/cache.c   |   11 ++++++
 3 files changed, 115 insertions(+), 4 deletions(-)


diff --git a/include/cache.h b/include/cache.h
index 334ad26309e26d..279bf717ba335f 100644
--- a/include/cache.h
+++ b/include/cache.h
@@ -64,6 +64,8 @@ typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int,
 					  unsigned int);
 typedef int (*cache_node_compare_t)(struct cache_node *, cache_key_t);
 typedef unsigned int (*cache_bulk_relse_t)(struct cache *, struct list_head *);
+typedef int (*cache_node_get_t)(struct cache_node *);
+typedef void (*cache_node_put_t)(struct cache_node *);
 
 struct cache_operations {
 	cache_node_hash_t	hash;
@@ -72,6 +74,8 @@ struct cache_operations {
 	cache_node_relse_t	relse;
 	cache_node_compare_t	compare;
 	cache_bulk_relse_t	bulkrelse;	/* optional */
+	cache_node_get_t	get;		/* optional */
+	cache_node_put_t	put;		/* optional */
 };
 
 struct cache_hash {
@@ -107,6 +111,8 @@ struct cache {
 	cache_node_relse_t	relse;		/* memory free function */
 	cache_node_compare_t	compare;	/* comparison routine */
 	cache_bulk_relse_t	bulkrelse;	/* bulk release routine */
+	cache_node_get_t	get;		/* prepare cache node after get */
+	cache_node_put_t	put;		/* prepare to put cache node */
 	unsigned int		c_hashsize;	/* hash bucket count */
 	unsigned int		c_hashshift;	/* hash key shift */
 	struct cache_hash	*c_hash;	/* hash table buckets */
diff --git a/libxfs/buf_mem.c b/libxfs/buf_mem.c
index e5b91d3cfe0486..16cb038ba10e2a 100644
--- a/libxfs/buf_mem.c
+++ b/libxfs/buf_mem.c
@@ -34,6 +34,36 @@
 unsigned int	XMBUF_BLOCKSIZE;
 unsigned int	XMBUF_BLOCKSHIFT;
 
+long		xmbuf_max_mappings;
+static atomic_t	xmbuf_mappings;
+bool		xmbuf_unmap_early = false;
+
+static long
+get_max_mmap_count(void)
+{
+	char	buffer[64];
+	char	*p = NULL;
+	long	ret = -1;
+	FILE	*file;
+
+	file = fopen("/proc/sys/vm/max_map_count", "r");
+	if (!file)
+		return -1;
+
+	while (fgets(buffer, sizeof(buffer), file)) {
+		errno = 0;
+		ret = strtol(buffer, &p, 0);
+		if (errno || p == buffer)
+			continue;
+
+		/* only take half the maximum mmap count so others can use it */
+		ret /= 2;
+		break;
+	}
+	fclose(file);
+	return ret;
+}
+
 void
 xmbuf_libinit(void)
 {
@@ -45,6 +75,14 @@ xmbuf_libinit(void)
 
 	XMBUF_BLOCKSIZE = ret;
 	XMBUF_BLOCKSHIFT = libxfs_highbit32(XMBUF_BLOCKSIZE);
+
+	/*
+	 * Figure out how many mmaps we will use simultaneously.  Pick a low
+	 * default if we can't query procfs.
+	 */
+	xmbuf_max_mappings = get_max_mmap_count();
+	if (xmbuf_max_mappings < 0)
+		xmbuf_max_mappings = 1024;
 }
 
 /* Allocate a new cache node (aka a xfs_buf) */
@@ -105,7 +143,8 @@ xmbuf_cache_relse(
 	struct xfs_buf		*bp;
 
 	bp = container_of(node, struct xfs_buf, b_node);
-	xmbuf_unmap_page(bp);
+	if (bp->b_addr)
+		xmbuf_unmap_page(bp);
 	kmem_cache_free(xfs_buf_cache, bp);
 }
 
@@ -129,13 +168,50 @@ xmbuf_cache_bulkrelse(
 	return count;
 }
 
+static int
+xmbuf_cache_node_get(
+	struct cache_node	*node)
+{
+	struct xfs_buf		*bp =
+		container_of(node, struct xfs_buf, b_node);
+	int			error;
+
+	if (bp->b_addr != NULL)
+		return 0;
+
+	error = xmbuf_map_page(bp);
+	if (error) {
+		fprintf(stderr,
+ _("%s: %s can't mmap %u bytes at xfile offset %llu: %s\n"),
+				progname, __FUNCTION__, BBTOB(bp->b_length),
+				(unsigned long long)xfs_buf_daddr(bp),
+				strerror(error));
+		return error;
+	}
+
+	return 0;
+}
+
+static void
+xmbuf_cache_node_put(
+	struct cache_node	*node)
+{
+	struct xfs_buf		*bp =
+		container_of(node, struct xfs_buf, b_node);
+
+	if (xmbuf_unmap_early)
+		xmbuf_unmap_page(bp);
+}
+
 static struct cache_operations xmbuf_bcache_operations = {
 	.hash		= libxfs_bhash,
 	.alloc		= xmbuf_cache_alloc,
 	.flush		= xmbuf_cache_flush,
 	.relse		= xmbuf_cache_relse,
 	.compare	= libxfs_bcompare,
-	.bulkrelse	= xmbuf_cache_bulkrelse
+	.bulkrelse	= xmbuf_cache_bulkrelse,
+	.get		= xmbuf_cache_node_get,
+	.put		= xmbuf_cache_node_put,
 };
 
 /*
@@ -216,8 +292,24 @@ xmbuf_map_page(
 	pos = xfile->partition_pos + BBTOB(xfs_buf_daddr(bp));
 	p = mmap(NULL, BBTOB(bp->b_length), PROT_READ | PROT_WRITE, MAP_SHARED,
 			xfile->fcb->fd, pos);
-	if (p == MAP_FAILED)
-		return -errno;
+	if (p == MAP_FAILED) {
+		if (errno == ENOMEM && !xmbuf_unmap_early) {
+#ifdef DEBUG
+			fprintf(stderr, "xmbuf could not make mappings!\n");
+#endif
+			xmbuf_unmap_early = true;
+		}
+		return errno;
+	}
+
+	if (!xmbuf_unmap_early &&
+	    atomic_inc_return(&xmbuf_mappings) > xmbuf_max_mappings) {
+#ifdef DEBUG
+		fprintf(stderr, _("xmbuf hit too many mappings (%ld)!\n",
+					xmbuf_max_mappings);
+#endif
+		xmbuf_unmap_early = true;
+	}
 
 	bp->b_addr = p;
 	bp->b_flags |= LIBXFS_B_UPTODATE | LIBXFS_B_UNCHECKED;
@@ -230,6 +322,8 @@ void
 xmbuf_unmap_page(
 	struct xfs_buf		*bp)
 {
+	if (!xmbuf_unmap_early)
+		atomic_dec(&xmbuf_mappings);
 	munmap(bp->b_addr, BBTOB(bp->b_length));
 	bp->b_addr = NULL;
 }
diff --git a/libxfs/cache.c b/libxfs/cache.c
index 139c7c1b9e715e..af20f3854df93e 100644
--- a/libxfs/cache.c
+++ b/libxfs/cache.c
@@ -61,6 +61,8 @@ cache_init(
 	cache->compare = cache_operations->compare;
 	cache->bulkrelse = cache_operations->bulkrelse ?
 		cache_operations->bulkrelse : cache_generic_bulkrelse;
+	cache->get = cache_operations->get;
+	cache->put = cache_operations->put;
 	pthread_mutex_init(&cache->c_mutex, NULL);
 
 	for (i = 0; i < hashsize; i++) {
@@ -415,6 +417,13 @@ cache_node_get(
 			 */
 			pthread_mutex_lock(&node->cn_mutex);
 
+			if (node->cn_count == 0 && cache->get) {
+				int err = cache->get(node);
+				if (err) {
+					pthread_mutex_unlock(&node->cn_mutex);
+					goto next_object;
+				}
+			}
 			if (node->cn_count == 0) {
 				ASSERT(node->cn_priority >= 0);
 				ASSERT(!list_empty(&node->cn_mru));
@@ -503,6 +512,8 @@ cache_node_put(
 #endif
 	node->cn_count--;
 
+	if (node->cn_count == 0 && cache->put)
+		cache->put(node);
 	if (node->cn_count == 0) {
 		/* add unreferenced node to appropriate MRU for shaker */
 		mru = &cache->c_mrus[node->cn_priority];





[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux