From: Darrick J. Wong <djwong@xxxxxxxxxx> It turns out that there's a maximum mappings count, so we need to be smartish about not overflowing that with too many xmbuf buffers. This needs to be a global value because high-agcount filesystems will create a large number of xmbuf caches but this is a process-global limit. Cc: <linux-xfs@xxxxxxxxxxxxxxx> # v6.9.0 Fixes: 124b388dac17f5 ("libxfs: support in-memory buffer cache targets") Signed-off-by: "Darrick J. Wong" <djwong@xxxxxxxxxx> --- include/cache.h | 6 +++ libxfs/buf_mem.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- libxfs/cache.c | 11 ++++++ 3 files changed, 115 insertions(+), 4 deletions(-) diff --git a/include/cache.h b/include/cache.h index 334ad26309e26d..279bf717ba335f 100644 --- a/include/cache.h +++ b/include/cache.h @@ -64,6 +64,8 @@ typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int, unsigned int); typedef int (*cache_node_compare_t)(struct cache_node *, cache_key_t); typedef unsigned int (*cache_bulk_relse_t)(struct cache *, struct list_head *); +typedef int (*cache_node_get_t)(struct cache_node *); +typedef void (*cache_node_put_t)(struct cache_node *); struct cache_operations { cache_node_hash_t hash; @@ -72,6 +74,8 @@ struct cache_operations { cache_node_relse_t relse; cache_node_compare_t compare; cache_bulk_relse_t bulkrelse; /* optional */ + cache_node_get_t get; /* optional */ + cache_node_put_t put; /* optional */ }; struct cache_hash { @@ -107,6 +111,8 @@ struct cache { cache_node_relse_t relse; /* memory free function */ cache_node_compare_t compare; /* comparison routine */ cache_bulk_relse_t bulkrelse; /* bulk release routine */ + cache_node_get_t get; /* prepare cache node after get */ + cache_node_put_t put; /* prepare to put cache node */ unsigned int c_hashsize; /* hash bucket count */ unsigned int c_hashshift; /* hash key shift */ struct cache_hash *c_hash; /* hash table buckets */ diff --git a/libxfs/buf_mem.c b/libxfs/buf_mem.c index e5b91d3cfe0486..16cb038ba10e2a 100644 --- a/libxfs/buf_mem.c +++ b/libxfs/buf_mem.c @@ -34,6 +34,36 @@ unsigned int XMBUF_BLOCKSIZE; unsigned int XMBUF_BLOCKSHIFT; +long xmbuf_max_mappings; +static atomic_t xmbuf_mappings; +bool xmbuf_unmap_early = false; + +static long +get_max_mmap_count(void) +{ + char buffer[64]; + char *p = NULL; + long ret = -1; + FILE *file; + + file = fopen("/proc/sys/vm/max_map_count", "r"); + if (!file) + return -1; + + while (fgets(buffer, sizeof(buffer), file)) { + errno = 0; + ret = strtol(buffer, &p, 0); + if (errno || p == buffer) + continue; + + /* only take half the maximum mmap count so others can use it */ + ret /= 2; + break; + } + fclose(file); + return ret; +} + void xmbuf_libinit(void) { @@ -45,6 +75,14 @@ xmbuf_libinit(void) XMBUF_BLOCKSIZE = ret; XMBUF_BLOCKSHIFT = libxfs_highbit32(XMBUF_BLOCKSIZE); + + /* + * Figure out how many mmaps we will use simultaneously. Pick a low + * default if we can't query procfs. + */ + xmbuf_max_mappings = get_max_mmap_count(); + if (xmbuf_max_mappings < 0) + xmbuf_max_mappings = 1024; } /* Allocate a new cache node (aka a xfs_buf) */ @@ -105,7 +143,8 @@ xmbuf_cache_relse( struct xfs_buf *bp; bp = container_of(node, struct xfs_buf, b_node); - xmbuf_unmap_page(bp); + if (bp->b_addr) + xmbuf_unmap_page(bp); kmem_cache_free(xfs_buf_cache, bp); } @@ -129,13 +168,50 @@ xmbuf_cache_bulkrelse( return count; } +static int +xmbuf_cache_node_get( + struct cache_node *node) +{ + struct xfs_buf *bp = + container_of(node, struct xfs_buf, b_node); + int error; + + if (bp->b_addr != NULL) + return 0; + + error = xmbuf_map_page(bp); + if (error) { + fprintf(stderr, + _("%s: %s can't mmap %u bytes at xfile offset %llu: %s\n"), + progname, __FUNCTION__, BBTOB(bp->b_length), + (unsigned long long)xfs_buf_daddr(bp), + strerror(error)); + return error; + } + + return 0; +} + +static void +xmbuf_cache_node_put( + struct cache_node *node) +{ + struct xfs_buf *bp = + container_of(node, struct xfs_buf, b_node); + + if (xmbuf_unmap_early) + xmbuf_unmap_page(bp); +} + static struct cache_operations xmbuf_bcache_operations = { .hash = libxfs_bhash, .alloc = xmbuf_cache_alloc, .flush = xmbuf_cache_flush, .relse = xmbuf_cache_relse, .compare = libxfs_bcompare, - .bulkrelse = xmbuf_cache_bulkrelse + .bulkrelse = xmbuf_cache_bulkrelse, + .get = xmbuf_cache_node_get, + .put = xmbuf_cache_node_put, }; /* @@ -216,8 +292,24 @@ xmbuf_map_page( pos = xfile->partition_pos + BBTOB(xfs_buf_daddr(bp)); p = mmap(NULL, BBTOB(bp->b_length), PROT_READ | PROT_WRITE, MAP_SHARED, xfile->fcb->fd, pos); - if (p == MAP_FAILED) - return -errno; + if (p == MAP_FAILED) { + if (errno == ENOMEM && !xmbuf_unmap_early) { +#ifdef DEBUG + fprintf(stderr, "xmbuf could not make mappings!\n"); +#endif + xmbuf_unmap_early = true; + } + return errno; + } + + if (!xmbuf_unmap_early && + atomic_inc_return(&xmbuf_mappings) > xmbuf_max_mappings) { +#ifdef DEBUG + fprintf(stderr, _("xmbuf hit too many mappings (%ld)!\n", + xmbuf_max_mappings); +#endif + xmbuf_unmap_early = true; + } bp->b_addr = p; bp->b_flags |= LIBXFS_B_UPTODATE | LIBXFS_B_UNCHECKED; @@ -230,6 +322,8 @@ void xmbuf_unmap_page( struct xfs_buf *bp) { + if (!xmbuf_unmap_early) + atomic_dec(&xmbuf_mappings); munmap(bp->b_addr, BBTOB(bp->b_length)); bp->b_addr = NULL; } diff --git a/libxfs/cache.c b/libxfs/cache.c index 139c7c1b9e715e..af20f3854df93e 100644 --- a/libxfs/cache.c +++ b/libxfs/cache.c @@ -61,6 +61,8 @@ cache_init( cache->compare = cache_operations->compare; cache->bulkrelse = cache_operations->bulkrelse ? cache_operations->bulkrelse : cache_generic_bulkrelse; + cache->get = cache_operations->get; + cache->put = cache_operations->put; pthread_mutex_init(&cache->c_mutex, NULL); for (i = 0; i < hashsize; i++) { @@ -415,6 +417,13 @@ cache_node_get( */ pthread_mutex_lock(&node->cn_mutex); + if (node->cn_count == 0 && cache->get) { + int err = cache->get(node); + if (err) { + pthread_mutex_unlock(&node->cn_mutex); + goto next_object; + } + } if (node->cn_count == 0) { ASSERT(node->cn_priority >= 0); ASSERT(!list_empty(&node->cn_mru)); @@ -503,6 +512,8 @@ cache_node_put( #endif node->cn_count--; + if (node->cn_count == 0 && cache->put) + cache->put(node); if (node->cn_count == 0) { /* add unreferenced node to appropriate MRU for shaker */ mru = &cache->c_mrus[node->cn_priority];