ping? On Wed, Oct 09, 2013 at 06:02:41AM -0700, Christoph Hellwig wrote: > Currently libxfs has a cache for xfs_inode structures. Unlike in kernelspace > where the inode cache, and the associated page cache for file data is used > for all filesystem operations the libxfs inode cache is only used in few > places: > > - the libxfs init code reads the root and realtime inodes when called from > xfs_db using a special flag, but these inode structure are never referenced > again > - mkfs uses namespace and bmap routines that take the xfs_inode structure > to create the root and realtime inodes, as well as any additional files > specified in the proto file > - the xfs_db attr code uses xfs_inode-based attr routines in the attrset > and attrget commands > - phase6 of xfs_repair uses xfs_inode-based routines for rebuilding > directories and moving files to the lost+found directory. > - phase7 of xfs_repair uses struct xfs_inode to modify the nlink count > of inodes. > > So except in repair we never ever reuse a cached inode, and even in repair > the logical inode caching doesn't help: > > - in phase 6a we iterate over each inode in the incore inode tree, > and if it's a directory check/rebuild it > - phase6b then updates the "." and ".." entries for directories > that need, which means we require the backing buffers. > - phase6c moves disconnected inodes to lost_found, which again needs > the backing buffer to actually do anything. > - phase7 then only touches inodes for which we need to reset i_nlink, > which always involves reading, modifying and writing the physical > inode. > which always involves modifying the . and .. entries. > > Given these facts stop caching the inodes to reduce memory usage > especially in xfs_repair, where this makes a different for large inode > count inodes. On the upper end this allows repair to complete for > filesystem / amount of memory combinations that previously wouldn't. > > With this we probably could increase the memory available to the buffer > cache in xfs_repair, but trying to do so I got a bit lost - the current > formula seems to magic to me to make any sense, and simply doubling the > buffer cache size causes us to run out of memory given that the data cached > in the buffer cache (typically lots of 8k inode buffers and few 4k other > metadata buffers) are much bigger than the inodes cached in the inode > cache. We probably need a sizing scheme that takes the actual amount > of memory allocated to the buffer cache into account to solve this better. > > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > > --- > include/libxfs.h | 5 -- > libxfs/init.c | 9 ----- > libxfs/rdwr.c | 87 +++++++++++--------------------------------------- > man/man8/xfs_repair.8 | 6 --- > mkfs/xfs_mkfs.c | 1 > repair/xfs_repair.c | 14 +------- > 6 files changed, 23 insertions(+), 99 deletions(-) > > Index: xfsprogs/include/libxfs.h > =================================================================== > --- xfsprogs.orig/include/libxfs.h 2013-10-09 12:36:31.000000000 +0000 > +++ xfsprogs/include/libxfs.h 2013-10-09 12:40:20.000000000 +0000 > @@ -257,7 +257,6 @@ > #define LIBXFS_MOUNT_COMPAT_ATTR 0x0010 > #define LIBXFS_MOUNT_ATTR2 0x0020 > > -#define LIBXFS_IHASHSIZE(sbp) (1<<10) > #define LIBXFS_BHASHSIZE(sbp) (1<<10) > > extern xfs_mount_t *libxfs_mount (xfs_mount_t *, xfs_sb_t *, > @@ -440,7 +439,6 @@ > extern int libxfs_readbufr(struct xfs_buftarg *, xfs_daddr_t, xfs_buf_t *, int, int); > > extern int libxfs_bhash_size; > -extern int libxfs_ihash_size; > > #define LIBXFS_BREAD 0x1 > #define LIBXFS_BWRITE 0x2 > @@ -640,9 +638,6 @@ > extern int libxfs_iflush_int (xfs_inode_t *, xfs_buf_t *); > > /* Inode Cache Interfaces */ > -extern struct cache *libxfs_icache; > -extern struct cache_operations libxfs_icache_operations; > -extern void libxfs_icache_purge (void); > extern int libxfs_iget (xfs_mount_t *, xfs_trans_t *, xfs_ino_t, > uint, xfs_inode_t **, xfs_daddr_t); > extern void libxfs_iput (xfs_inode_t *, uint); > Index: xfsprogs/libxfs/init.c > =================================================================== > --- xfsprogs.orig/libxfs/init.c 2013-10-09 12:36:31.000000000 +0000 > +++ xfsprogs/libxfs/init.c 2013-10-09 12:40:20.000000000 +0000 > @@ -22,9 +22,6 @@ > > char *progname = "libxfs"; /* default, changed by each tool */ > > -struct cache *libxfs_icache; /* global inode cache */ > -int libxfs_ihash_size; /* #buckets in icache */ > - > struct cache *libxfs_bcache; /* global buffer cache */ > int libxfs_bhash_size; /* #buckets in bcache */ > > @@ -335,9 +332,6 @@ > } > if (needcd) > chdir(curdir); > - if (!libxfs_ihash_size) > - libxfs_ihash_size = LIBXFS_IHASHSIZE(sbp); > - libxfs_icache = cache_init(libxfs_ihash_size, &libxfs_icache_operations); > if (!libxfs_bhash_size) > libxfs_bhash_size = LIBXFS_BHASHSIZE(sbp); > libxfs_bcache = cache_init(libxfs_bhash_size, &libxfs_bcache_operations); > @@ -866,7 +860,6 @@ > int agno; > > libxfs_rtmount_destroy(mp); > - libxfs_icache_purge(); > libxfs_bcache_purge(); > > for (agno = 0; agno < mp->m_maxagi; agno++) { > @@ -882,7 +875,6 @@ > libxfs_destroy(void) > { > manage_zones(1); > - cache_destroy(libxfs_icache); > cache_destroy(libxfs_bcache); > } > > @@ -898,7 +890,6 @@ > time_t t; > char *c; > > - cache_report(fp, "libxfs_icache", libxfs_icache); > cache_report(fp, "libxfs_bcache", libxfs_bcache); > > t = time(NULL); > Index: xfsprogs/libxfs/rdwr.c > =================================================================== > --- xfsprogs.orig/libxfs/rdwr.c 2013-10-09 12:36:31.000000000 +0000 > +++ xfsprogs/libxfs/rdwr.c 2013-10-09 12:46:09.000000000 +0000 > @@ -993,26 +993,12 @@ > > > /* > - * Inode cache interfaces > + * Inode cache stubs. > */ > > extern kmem_zone_t *xfs_ili_zone; > extern kmem_zone_t *xfs_inode_zone; > > -static unsigned int > -libxfs_ihash(cache_key_t key, unsigned int hashsize) > -{ > - return ((unsigned int)*(xfs_ino_t *)key) % hashsize; > -} > - > -static int > -libxfs_icompare(struct cache_node *node, cache_key_t key) > -{ > - xfs_inode_t *ip = (xfs_inode_t *)node; > - > - return (ip->i_ino == *(xfs_ino_t *)key); > -} > - > int > libxfs_iget(xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint lock_flags, > xfs_inode_t **ipp, xfs_daddr_t bno) > @@ -1020,34 +1006,21 @@ > xfs_inode_t *ip; > int error = 0; > > - if (cache_node_get(libxfs_icache, &ino, (struct cache_node **)&ip)) { > -#ifdef INO_DEBUG > - fprintf(stderr, "%s: allocated inode, ino=%llu(%llu), %p\n", > - __FUNCTION__, (unsigned long long)ino, bno, ip); > -#endif > - ip->i_ino = ino; > - ip->i_mount = mp; > - error = xfs_iread(mp, tp, ip, bno); > - if (error) { > - cache_node_purge(libxfs_icache, &ino, > - (struct cache_node *)ip); > - ip = NULL; > - } > + ip = kmem_zone_zalloc(xfs_inode_zone, 0); > + if (!ip) > + return ENOMEM; > + > + ip->i_ino = ino; > + ip->i_mount = mp; > + error = xfs_iread(mp, tp, ip, bno); > + if (error) { > + kmem_zone_free(xfs_inode_zone, ip); > + *ipp = NULL; > + return error; > } > - *ipp = ip; > - return error; > -} > - > -void > -libxfs_iput(xfs_inode_t *ip, uint lock_flags) > -{ > - cache_node_put(libxfs_icache, (struct cache_node *)ip); > -} > > -static struct cache_node * > -libxfs_ialloc(cache_key_t key) > -{ > - return kmem_zone_zalloc(xfs_inode_zone, 0); > + *ipp = ip; > + return 0; > } > > static void > @@ -1064,32 +1037,12 @@ > libxfs_idestroy_fork(ip, XFS_ATTR_FORK); > } > > -static void > -libxfs_irelse(struct cache_node *node) > -{ > - xfs_inode_t *ip = (xfs_inode_t *)node; > - > - if (ip != NULL) { > - if (ip->i_itemp) > - kmem_zone_free(xfs_ili_zone, ip->i_itemp); > - ip->i_itemp = NULL; > - libxfs_idestroy(ip); > - kmem_zone_free(xfs_inode_zone, ip); > - ip = NULL; > - } > -} > - > void > -libxfs_icache_purge(void) > +libxfs_iput(xfs_inode_t *ip, uint lock_flags) > { > - cache_purge(libxfs_icache); > + if (ip->i_itemp) > + kmem_zone_free(xfs_ili_zone, ip->i_itemp); > + ip->i_itemp = NULL; > + libxfs_idestroy(ip); > + kmem_zone_free(xfs_inode_zone, ip); > } > - > -struct cache_operations libxfs_icache_operations = { > - /* .hash */ libxfs_ihash, > - /* .alloc */ libxfs_ialloc, > - /* .flush */ NULL, > - /* .relse */ libxfs_irelse, > - /* .compare */ libxfs_icompare, > - /* .bulkrelse */ NULL > -}; > Index: xfsprogs/mkfs/xfs_mkfs.c > =================================================================== > --- xfsprogs.orig/mkfs/xfs_mkfs.c 2013-10-09 12:36:31.000000000 +0000 > +++ xfsprogs/mkfs/xfs_mkfs.c 2013-10-09 12:40:20.000000000 +0000 > @@ -2909,7 +2909,6 @@ > * Need to drop references to inodes we still hold, first. > */ > libxfs_rtmount_destroy(mp); > - libxfs_icache_purge(); > libxfs_bcache_purge(); > > /* > Index: xfsprogs/repair/xfs_repair.c > =================================================================== > --- xfsprogs.orig/repair/xfs_repair.c 2013-09-09 14:34:49.000000000 +0000 > +++ xfsprogs/repair/xfs_repair.c 2013-10-09 12:40:20.000000000 +0000 > @@ -69,7 +69,6 @@ > }; > > > -static int ihash_option_used; > static int bhash_option_used; > static long max_mem_specified; /* in megabytes */ > static int phase2_threads = 32; > @@ -239,13 +238,13 @@ > pre_65_beta = 1; > break; > case IHASH_SIZE: > - libxfs_ihash_size = (int)strtol(val, NULL, 0); > - ihash_option_used = 1; > + do_warn( > + _("-o ihash option has been removed and will be ignored\n")); > break; > case BHASH_SIZE: > if (max_mem_specified) > do_abort( > - _("-o bhash option cannot be used with -m option\n")); > + _("-o bhash option cannot be used with -m option\n")); > libxfs_bhash_size = (int)strtol(val, NULL, 0); > bhash_option_used = 1; > break; > @@ -648,9 +647,7 @@ > unsigned long max_mem; > struct rlimit rlim; > > - libxfs_icache_purge(); > libxfs_bcache_purge(); > - cache_destroy(libxfs_icache); > cache_destroy(libxfs_bcache); > > mem_used = (mp->m_sb.sb_icount >> (10 - 2)) + > @@ -709,11 +706,6 @@ > do_log(_(" - block cache size set to %d entries\n"), > libxfs_bhash_size * HASH_CACHE_RATIO); > > - if (!ihash_option_used) > - libxfs_ihash_size = libxfs_bhash_size; > - > - libxfs_icache = cache_init(libxfs_ihash_size, > - &libxfs_icache_operations); > libxfs_bcache = cache_init(libxfs_bhash_size, > &libxfs_bcache_operations); > } > Index: xfsprogs/man/man8/xfs_repair.8 > =================================================================== > --- xfsprogs.orig/man/man8/xfs_repair.8 2013-09-09 14:34:49.000000000 +0000 > +++ xfsprogs/man/man8/xfs_repair.8 2013-10-09 12:40:20.000000000 +0000 > @@ -130,12 +130,6 @@ > supported are: > .RS 1.0i > .TP > -.BI ihash= ihashsize > -overrides the default inode cache hash size. The total number of > -inode cache entries are limited to 8 times this amount. The default > -.I ihashsize > -is 1024 (for a total of 8192 entries). > -.TP > .BI bhash= bhashsize > overrides the default buffer cache hash size. The total number of > buffer cache entries are limited to 8 times this amount. The default > > _______________________________________________ > xfs mailing list > xfs@xxxxxxxxxxx > http://oss.sgi.com/mailman/listinfo/xfs ---end quoted text--- _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs