The patch titled Manage jbd allocations from its own slabs has been added to the -mm tree. Its filename is manage-jbd-allocations-from-its-own-slabs.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: Manage jbd allocations from its own slabs From: Badari Pulavarty <pbadari@xxxxxxxxxx> JBD currently allocates commit and frozen buffers from slabs. With CONFIG_SLAB_DEBUG, its possible for an allocation to cross the page boundary causing IO problems. https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=200127 So, instead of allocating these from regular slabs - manage allocation from its own slabs and disable slab debug for these slabs. Signed-off-by: Badari Pulavarty <pbadari@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- fs/jbd/commit.c | 6 +- fs/jbd/journal.c | 87 +++++++++++++++++++++++++++++++++++++++-- fs/jbd/transaction.c | 9 ++-- include/linux/jbd.h | 3 + 4 files changed, 94 insertions(+), 11 deletions(-) diff -puN fs/jbd/commit.c~manage-jbd-allocations-from-its-own-slabs fs/jbd/commit.c --- a/fs/jbd/commit.c~manage-jbd-allocations-from-its-own-slabs +++ a/fs/jbd/commit.c @@ -261,7 +261,7 @@ void journal_commit_transaction(journal_ struct buffer_head *bh = jh2bh(jh); jbd_lock_bh_state(bh); - kfree(jh->b_committed_data); + jbd_slab_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; jbd_unlock_bh_state(bh); } @@ -745,14 +745,14 @@ restart_loop: * Otherwise, we can just throw away the frozen data now. */ if (jh->b_committed_data) { - kfree(jh->b_committed_data); + jbd_slab_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; if (jh->b_frozen_data) { jh->b_committed_data = jh->b_frozen_data; jh->b_frozen_data = NULL; } } else if (jh->b_frozen_data) { - kfree(jh->b_frozen_data); + jbd_slab_free(jh->b_frozen_data, bh->b_size); jh->b_frozen_data = NULL; } diff -puN fs/jbd/journal.c~manage-jbd-allocations-from-its-own-slabs fs/jbd/journal.c --- a/fs/jbd/journal.c~manage-jbd-allocations-from-its-own-slabs +++ a/fs/jbd/journal.c @@ -84,6 +84,7 @@ EXPORT_SYMBOL(journal_force_commit); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); static void __journal_abort_soft (journal_t *journal, int errno); +static int journal_create_jbd_slab(size_t slab_size); /* * Helper function used to manage commit timeouts @@ -328,10 +329,10 @@ repeat: char *tmp; jbd_unlock_bh_state(bh_in); - tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS); + tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS); jbd_lock_bh_state(bh_in); if (jh_in->b_frozen_data) { - kfree(tmp); + jbd_slab_free(tmp, bh_in->b_size); goto repeat; } @@ -1090,6 +1091,13 @@ int journal_load(journal_t *journal) } } + /* + * Make sure to create a slab for this blocksize + */ + err = journal_create_jbd_slab(cpu_to_be32(journal->j_superblock->s_blocksize)); + if (err) + return err; + /* Let the recovery code check whether it needs to recover any * data from the journal. */ if (journal_recover(journal)) @@ -1612,6 +1620,76 @@ void * __jbd_kmalloc (const char *where, } /* + * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed + * and allocate frozen and commit buffers from these slabs. + * + * Reason for doing this is to avoid, SLAB_DEBUG - since it could + * cause bh to cross page boundary. + */ + +#define JBD_MAX_SLABS 5 +#define JBD_SLAB_INDEX(size) (size >> 11) + +static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; +static const char *jbd_slab_names[JBD_MAX_SLABS] = { + "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" +}; + +static void journal_destroy_jbd_slabs(void) +{ + int i; + + for (i=0; i<JBD_MAX_SLABS; i++) { + if (jbd_slab[i]) + kmem_cache_destroy(jbd_slab[i]); + jbd_slab[i] = NULL; + } +} + +static int journal_create_jbd_slab(size_t slab_size) +{ + int i = JBD_SLAB_INDEX(slab_size); + + BUG_ON(i >= JBD_MAX_SLABS); + /* + * Check if we already have a slab created for this size + */ + if (jbd_slab[i]) + return 0; + + /* + * Create a slab and force alignment to be same as slabsize - + * this will make sure that allocations won't cross the page + * boundary. + */ + jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], + slab_size, slab_size, 0, NULL, NULL); + if (!jbd_slab[i]) { + printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); + return -ENOMEM; + } + return 0; +} + +void * jbd_slab_alloc(size_t size, gfp_t flags) +{ + int idx; + + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); +} + +void jbd_slab_free(void *ptr, size_t size) +{ + int idx; + + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + kmem_cache_free(jbd_slab[idx], ptr); +} + +/* * Journal_head storage management */ static kmem_cache_t *journal_head_cache; @@ -1799,13 +1877,13 @@ static void __journal_remove_journal_hea printk(KERN_WARNING "%s: freeing " "b_frozen_data\n", __FUNCTION__); - kfree(jh->b_frozen_data); + jbd_slab_free(jh->b_frozen_data, bh->b_size); } if (jh->b_committed_data) { printk(KERN_WARNING "%s: freeing " "b_committed_data\n", __FUNCTION__); - kfree(jh->b_committed_data); + jbd_slab_free(jh->b_committed_data, bh->b_size); } bh->b_private = NULL; jh->b_bh = NULL; /* debug, really */ @@ -1961,6 +2039,7 @@ static void journal_destroy_caches(void) journal_destroy_revoke_caches(); journal_destroy_journal_head_cache(); journal_destroy_handle_cache(); + journal_destroy_jbd_slabs(); } static int __init journal_init(void) diff -puN fs/jbd/transaction.c~manage-jbd-allocations-from-its-own-slabs fs/jbd/transaction.c --- a/fs/jbd/transaction.c~manage-jbd-allocations-from-its-own-slabs +++ a/fs/jbd/transaction.c @@ -666,8 +666,9 @@ repeat: if (!frozen_buffer) { JBUFFER_TRACE(jh, "allocate memory for buffer"); jbd_unlock_bh_state(bh); - frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size, - GFP_NOFS); + frozen_buffer = + jbd_slab_alloc(jh2bh(jh)->b_size, + GFP_NOFS); if (!frozen_buffer) { printk(KERN_EMERG "%s: OOM for frozen_buffer\n", @@ -879,7 +880,7 @@ int journal_get_undo_access(handle_t *ha repeat: if (!jh->b_committed_data) { - committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS); + committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { printk(KERN_EMERG "%s: No memory for committed data\n", __FUNCTION__); @@ -906,7 +907,7 @@ repeat: out: journal_put_journal_head(jh); if (unlikely(committed_data)) - kfree(committed_data); + jbd_slab_free(committed_data, bh->b_size); return err; } diff -puN include/linux/jbd.h~manage-jbd-allocations-from-its-own-slabs include/linux/jbd.h --- a/include/linux/jbd.h~manage-jbd-allocations-from-its-own-slabs +++ a/include/linux/jbd.h @@ -72,6 +72,9 @@ extern int journal_enable_debug; #endif extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry); +extern void * jbd_slab_alloc(size_t size, gfp_t flags); +extern void jbd_slab_free(void *ptr, size_t size); + #define jbd_kmalloc(size, flags) \ __jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry) #define jbd_rep_kmalloc(size, flags) \ _ Patches currently in -mm which might be from pbadari@xxxxxxxxxx are manage-jbd-allocations-from-its-own-slabs.patch pass-io-size-to-batch_write-address-space-operation.patch vectorize-aio_read-aio_write-fileop-methods.patch vectorize-aio_read-aio_write-fileop-methods-xfs-fix.patch vectorize-aio_read-aio_write-fileop-methods-hypfs-fix.patch remove-readv-writev-methods-and-use-aio_read-aio_write.patch streamline-generic_file_-interfaces-and-filemap.patch add-vector-aio-support.patch add-vector-aio-support-fix.patch streamline-generic_file_-interfaces-and-filemap-ecryptfs.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html