Admin could reset zram during I/O operation going on so we have used zram->init_lock as read-side lock in I/O path to prevent sudden zram meta freeing. However, the init_lock is really troublesome. We can't do call zram_meta_alloc under init_lock due to lockdep splat because zram_rw_page is one of the function under reclaim path and hold it as read_lock while other places in process context hold it as write_lock. So, we have used allocation out of the lock to avoid lockdep warn but it's not good for readability and fainally, I met another lockdep splat between init_lock and cpu_hotpulug from kmem_cache_destroy during wokring zsmalloc compaction. :( Yes, the ideal is to remove horrible init_lock of zram in rw path. This patch removes it in rw path and instead, put init_done bool variable to check initialization done with smp_[wmb|rmb] and srcu_[un]read_lock to prevent sudden zram meta freeing during I/O operation. Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx> --- drivers/block/zram/zram_drv.c | 85 +++++++++++++++++++++++++++++++------------ drivers/block/zram/zram_drv.h | 5 +++ 2 files changed, 66 insertions(+), 24 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a598ada817f0..b33add453027 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -32,6 +32,7 @@ #include <linux/string.h> #include <linux/vmalloc.h> #include <linux/err.h> +#include <linux/srcu.h> #include "zram_drv.h" @@ -53,9 +54,31 @@ static ssize_t name##_show(struct device *d, \ } \ static DEVICE_ATTR_RO(name); -static inline int init_done(struct zram *zram) +static inline bool init_done(struct zram *zram) { - return zram->meta != NULL; + /* + * init_done can be used without holding zram->init_lock in + * read/write handler(ie, zram_make_request) but we should make sure + * that zram->init_done should set up after meta initialization is + * done. Look at setup_init_done. + */ + bool ret = zram->init_done; + + if (ret) + smp_rmb(); /* pair with setup_init_done */ + + return ret; +} + +static inline void setup_init_done(struct zram *zram, bool flag) +{ + /* + * Store operation of struct zram fields should complete + * before zram->init_done set up because zram_bvec_rw + * doesn't hold an zram->init_lock. + */ + smp_wmb(); + zram->init_done = flag; } static inline struct zram *dev_to_zram(struct device *dev) @@ -326,6 +349,10 @@ static void zram_meta_free(struct zram_meta *meta) kfree(meta); } +static void rcu_zram_do_nothing(struct rcu_head *unused) +{ +} + static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize) { char pool_name[8]; @@ -726,11 +753,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) return; } - zcomp_destroy(zram->comp); zram->max_comp_streams = 1; - zram_meta_free(zram->meta); - zram->meta = NULL; /* Reset stats */ memset(&zram->stats, 0, sizeof(zram->stats)); @@ -738,8 +762,12 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) if (reset_capacity) set_capacity(zram->disk, 0); + setup_init_done(zram, false); + call_srcu(&zram->srcu, &zram->rcu, rcu_zram_do_nothing); + synchronize_srcu(&zram->srcu); + zram_meta_free(zram->meta); + zcomp_destroy(zram->comp); up_write(&zram->init_lock); - /* * Revalidate disk out of the init_lock to avoid lockdep splat. * It's okay because disk's capacity is protected by init_lock @@ -762,10 +790,19 @@ static ssize_t disksize_store(struct device *dev, if (!disksize) return -EINVAL; + down_write(&zram->init_lock); + if (init_done(zram)) { + pr_info("Cannot change disksize for initialized device\n"); + up_write(&zram->init_lock); + return -EBUSY; + } + disksize = PAGE_ALIGN(disksize); meta = zram_meta_alloc(zram->disk->first_minor, disksize); - if (!meta) + if (!meta) { + up_write(&zram->init_lock); return -ENOMEM; + } comp = zcomp_create(zram->compressor, zram->max_comp_streams); if (IS_ERR(comp)) { @@ -775,17 +812,11 @@ static ssize_t disksize_store(struct device *dev, goto out_free_meta; } - down_write(&zram->init_lock); - if (init_done(zram)) { - pr_info("Cannot change disksize for initialized device\n"); - err = -EBUSY; - goto out_destroy_comp; - } - zram->meta = meta; zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); + setup_init_done(zram, true); up_write(&zram->init_lock); /* @@ -797,10 +828,8 @@ static ssize_t disksize_store(struct device *dev, return len; -out_destroy_comp: - up_write(&zram->init_lock); - zcomp_destroy(comp); out_free_meta: + up_write(&zram->init_lock); zram_meta_free(meta); return err; } @@ -905,9 +934,10 @@ out: */ static void zram_make_request(struct request_queue *queue, struct bio *bio) { + int idx; struct zram *zram = queue->queuedata; - down_read(&zram->init_lock); + idx = srcu_read_lock(&zram->srcu); if (unlikely(!init_done(zram))) goto error; @@ -918,12 +948,12 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) } __zram_make_request(zram, bio); - up_read(&zram->init_lock); + srcu_read_unlock(&zram->srcu, idx); return; error: - up_read(&zram->init_lock); + srcu_read_unlock(&zram->srcu, idx); bio_io_error(bio); } @@ -945,18 +975,20 @@ static void zram_slot_free_notify(struct block_device *bdev, static int zram_rw_page(struct block_device *bdev, sector_t sector, struct page *page, int rw) { - int offset, err; + int offset, err, idx; u32 index; struct zram *zram; struct bio_vec bv; zram = bdev->bd_disk->private_data; + idx = srcu_read_lock(&zram->srcu); + if (!valid_io_request(zram, sector, PAGE_SIZE)) { atomic64_inc(&zram->stats.invalid_io); + srcu_read_unlock(&zram->srcu, idx); return -EINVAL; } - down_read(&zram->init_lock); if (unlikely(!init_done(zram))) { err = -EIO; goto out_unlock; @@ -971,7 +1003,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, err = zram_bvec_rw(zram, &bv, index, offset, rw); out_unlock: - up_read(&zram->init_lock); + srcu_read_unlock(&zram->srcu, idx); /* * If I/O fails, just return error(ie, non-zero) without * calling page_endio. @@ -1041,6 +1073,11 @@ static int create_device(struct zram *zram, int device_id) init_rwsem(&zram->init_lock); + if (init_srcu_struct(&zram->srcu)) { + pr_err("Error initialize srcu for device %d\n", device_id); + goto out; + } + zram->queue = blk_alloc_queue(GFP_KERNEL); if (!zram->queue) { pr_err("Error allocating disk queue for device %d\n", @@ -1125,8 +1162,8 @@ static void destroy_device(struct zram *zram) del_gendisk(zram->disk); put_disk(zram->disk); - blk_cleanup_queue(zram->queue); + cleanup_srcu_struct(&zram->srcu); } static int __init zram_init(void) diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index e492f6bf11f1..2042c310aea8 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -105,8 +105,13 @@ struct zram { struct gendisk *disk; struct zcomp *comp; + struct srcu_struct srcu; + struct rcu_head rcu; + /* Prevent concurrent execution of device init, reset and R/W request */ struct rw_semaphore init_lock; + bool init_done; + /* * This is the limit on amount of *uncompressed* worth of data * we can store in a disk. -- 1.9.1 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>