From: Darrick J. Wong <djwong@xxxxxxxxxx> Reduce the memory requirements of the string blob structure by deduplicating the strings stored within. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- repair/pptr.c | 5 ++ repair/strblobs.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++- repair/strblobs.h | 4 +- 3 files changed, 145 insertions(+), 5 deletions(-) diff --git a/repair/pptr.c b/repair/pptr.c index 20d66884..c1cd9060 100644 --- a/repair/pptr.c +++ b/repair/pptr.c @@ -269,13 +269,16 @@ void parent_ptr_init( struct xfs_mount *mp) { + uint64_t iused; xfs_agnumber_t agno; int error; if (!xfs_has_parent(mp)) return; - error = strblobs_init(mp, "parent pointer names", &nameblobs); + /* One hash bucket per inode, up to about 8M of memory on 64-bit. */ + iused = min(mp->m_sb.sb_icount - mp->m_sb.sb_ifree, 1048573); + error = strblobs_init(mp, "parent pointer names", iused, &nameblobs); if (error) do_error(_("init parent pointer names failed: %s\n"), strerror(error)); diff --git a/repair/strblobs.c b/repair/strblobs.c index 2b7a7a5e..fb5929e9 100644 --- a/repair/strblobs.c +++ b/repair/strblobs.c @@ -13,23 +13,43 @@ * ===================== * * This data structure wraps the storage of strings with explicit length in an - * xfblob structure. + * xfblob structure. It stores a hashtable of string checksums to provide + * fast(ish) lookups of existing strings to enable deduplication of the strings + * contained within. */ +struct strblob_hashent { + struct strblob_hashent *next; + + xfblob_cookie str_cookie; + unsigned int str_len; + xfs_dahash_t str_hash; +}; + struct strblobs { struct xfblob *strings; + unsigned int nr_buckets; + + struct strblob_hashent *buckets[]; }; +static inline size_t strblobs_sizeof(unsigned int nr_buckets) +{ + return sizeof(struct strblobs) + + (nr_buckets * sizeof(struct strblobs_hashent *)); +} + /* Initialize a string blob structure. */ int strblobs_init( struct xfs_mount *mp, const char *descr, + unsigned int hash_buckets, struct strblobs **sblobs) { struct strblobs *sb; int error; - sb = malloc(sizeof(struct strblobs)); + sb = calloc(strblobs_sizeof(hash_buckets), 1); if (!sb) return ENOMEM; @@ -37,6 +57,7 @@ strblobs_init( if (error) goto out_free; + sb->nr_buckets = hash_buckets; *sblobs = sb; return 0; @@ -51,12 +72,114 @@ strblobs_destroy( struct strblobs **sblobs) { struct strblobs *sb = *sblobs; + struct strblob_hashent *ent, *ent_next; + unsigned int bucket; + + for (bucket = 0; bucket < sb->nr_buckets; bucket++) { + ent = sb->buckets[bucket]; + while (ent != NULL) { + ent_next = ent->next; + free(ent); + ent = ent_next; + } + } xfblob_destroy(sb->strings); free(sb); *sblobs = NULL; } +/* + * Search the string hashtable for a matching entry. Sets sets the cookie and + * returns 0 if one is found; ENOENT if there is no match; or a positive errno. + */ +static int +__strblobs_lookup( + struct strblobs *sblobs, + xfblob_cookie *str_cookie, + const unsigned char *str, + unsigned int str_len, + xfs_dahash_t str_hash) +{ + struct strblob_hashent *ent; + char *buf = NULL; + unsigned int bucket; + int error; + + bucket = str_hash % sblobs->nr_buckets; + ent = sblobs->buckets[bucket]; + + for (ent = sblobs->buckets[bucket]; ent != NULL; ent = ent->next) { + if (ent->str_len != str_len || ent->str_hash != str_hash) + continue; + + if (!buf) { + buf = malloc(str_len); + if (!buf) + return ENOMEM; + } + + error = strblobs_load(sblobs, ent->str_cookie, buf, str_len); + if (error) + goto out; + + if (memcmp(str, buf, str_len)) + continue; + + *str_cookie = ent->str_cookie; + goto out; + } + error = ENOENT; + +out: + free(buf); + return error; +} + +/* + * Search the string hashtable for a matching entry. Sets sets the cookie and + * returns 0 if one is found; ENOENT if there is no match; or a positive errno. + */ +int +strblobs_lookup( + struct strblobs *sblobs, + xfblob_cookie *str_cookie, + const unsigned char *str, + unsigned int str_len) +{ + xfs_dahash_t str_hash; + + str_hash = libxfs_da_hashname(str, str_len); + return __strblobs_lookup(sblobs, str_cookie, str, str_len, str_hash); +} + +/* Remember a string in the hashtable. */ +static int +strblobs_hash( + struct strblobs *sblobs, + xfblob_cookie str_cookie, + const unsigned char *str, + unsigned int str_len, + xfs_dahash_t str_hash) +{ + struct strblob_hashent *ent; + unsigned int bucket; + + bucket = str_hash % sblobs->nr_buckets; + + ent = malloc(sizeof(struct strblob_hashent)); + if (!ent) + return ENOMEM; + + ent->str_cookie = str_cookie; + ent->str_len = str_len; + ent->str_hash = str_hash; + ent->next = sblobs->buckets[bucket]; + + sblobs->buckets[bucket] = ent; + return 0; +} + /* Store a string and return a cookie for its retrieval. */ int strblobs_store( @@ -65,7 +188,19 @@ strblobs_store( const unsigned char *str, unsigned int str_len) { - return -xfblob_store(sblobs->strings, str_cookie, str, str_len); + int error; + xfs_dahash_t str_hash; + + str_hash = libxfs_da_hashname(str, str_len); + error = __strblobs_lookup(sblobs, str_cookie, str, str_len, str_hash); + if (error != ENOENT) + return error; + + error = -xfblob_store(sblobs->strings, str_cookie, str, str_len); + if (error) + return error; + + return strblobs_hash(sblobs, *str_cookie, str, str_len, str_hash); } /* Retrieve a previously stored string. */ diff --git a/repair/strblobs.h b/repair/strblobs.h index f5680175..b8b059e2 100644 --- a/repair/strblobs.h +++ b/repair/strblobs.h @@ -9,12 +9,14 @@ struct strblobs; int strblobs_init(struct xfs_mount *mp, const char *descr, - struct strblobs **sblobs); + unsigned int hash_buckets, struct strblobs **sblobs); void strblobs_destroy(struct strblobs **sblobs); int strblobs_store(struct strblobs *sblobs, xfblob_cookie *str_cookie, const unsigned char *str, unsigned int str_len); int strblobs_load(struct strblobs *sblobs, xfblob_cookie str_cookie, unsigned char *str, unsigned int str_len); +int strblobs_lookup(struct strblobs *sblobs, xfblob_cookie *str_cookie, + const unsigned char *str, unsigned int str_len); #endif /* __REPAIR_STRBLOBS_H__ */