On Feb 15, 2007 20:44 -0800, Andrew Morton wrote: > I have a report from a google person who just did some basic > power-it-off-during-a-write testing on 2.6.20's ext3. ordered-data is OK, > but data=journal came back with crap in the file data. Ouch. > I suspect we should resurrect and formalise my old > make-the-disk-stop-accepting-writes-when-a-timer-goes-off thing. It was > very useful for stress-testing recovery. We have a patch that we use for Lustre testing which allows you to set a block device readonly (silently discarding all writes), without the filesystem immediately keeling over dead like set_disk_ro. The readonly state persists until the the last reference on the block device is dropped, so there are no races w.r.t. VFS cleanup of inodes and flushing buffers after the filesystem is unmounted. We call this conditionally inside Lustre to simulate a crash of the node at critical points without actually having to do lengthy reboots or have power control. ================== dev_set_rdonly-2.6.18-vanilla.patch ================== Index: linux-2.6/fs/block_dev.c =================================================================== --- linux-2.6.orig/fs/block_dev.c 2006-07-06 23:41:48.000000000 +0800 +++ linux-2.6/fs/block_dev.c 2006-07-15 16:20:25.000000000 +0800 @@ -1118,6 +1118,7 @@ static int __blkdev_put(struct block_dev } unlock_kernel(); mutex_unlock(&bdev->bd_mutex); + dev_clear_rdonly(bdev); bdput(bdev); return ret; } Index: linux-2.6/block/ll_rw_blk.c =================================================================== --- linux-2.6.orig/block/ll_rw_blk.c 2006-07-10 22:30:08.000000000 +0800 +++ linux-2.6/block/ll_rw_blk.c 2006-07-15 16:15:14.000000000 +0800 @@ -2993,6 +2993,8 @@ static void handle_bad_sector(struct bio set_bit(BIO_EOF, &bio->bi_flags); } +int dev_check_rdonly(struct block_device *bdev); + /** * generic_make_request: hand a buffer to its device driver for I/O * @bio: The bio describing the location in memory and on the device. @@ -3076,6 +3078,11 @@ end_io: if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) goto end_io; + /* this is cfs's dev_rdonly check */ + if (bio->bi_rw == WRITE && dev_check_rdonly(bio->bi_bdev)) { + bio_endio(bio, bio->bi_size, 0); + break; + } /* * If this device has partitions, remap block n @@ -3673,6 +3681,98 @@ void swap_io_context(struct io_context * *ioc2 = temp; } EXPORT_SYMBOL(swap_io_context); + + /* + * Debug code for turning block devices "read-only" (will discard writes + * silently). This is for filesystem crash/recovery testing. + */ +struct deventry { + dev_t dev; + struct deventry *next; +}; + +static struct deventry *devlist = NULL; +static spinlock_t devlock = SPIN_LOCK_UNLOCKED; + +int dev_check_rdonly(struct block_device *bdev) +{ + struct deventry *cur; + + if (!bdev) + return 0; + + spin_lock(&devlock); + cur = devlist; + while(cur) { + if (bdev->bd_dev == cur->dev) { + spin_unlock(&devlock); + return 1; + } + cur = cur->next; + } + spin_unlock(&devlock); + return 0; +} + +void dev_set_rdonly(struct block_device *bdev) +{ + struct deventry *newdev, *cur; + + if (!bdev) + return; + + newdev = kmalloc(sizeof(struct deventry), GFP_KERNEL); + if (!newdev) + return; + + spin_lock(&devlock); + cur = devlist; + while(cur) { + if (bdev->bd_dev == cur->dev) { + spin_unlock(&devlock); + kfree(newdev); + return; + } + cur = cur->next; + } + newdev->dev = bdev->bd_dev; + newdev->next = devlist; + devlist = newdev; + spin_unlock(&devlock); + printk(KERN_WARNING "Turning device %s (%#x) read-only\n", + bdev->bd_disk ? bdev->bd_disk->disk_name : "", bdev->bd_dev); +} + +void dev_clear_rdonly(struct block_device *bdev) +{ + struct deventry *cur, *last = NULL; + + if (!bdev) + return; + + spin_lock(&devlock); + cur = devlist; + while (cur) { + if (bdev->bd_dev == cur->dev) { + if (last) + last->next = cur->next; + else + devlist = cur->next; + spin_unlock(&devlock); + kfree(cur); + printk(KERN_WARNING "Removing read-only on %s (%#x)\n", + bdev->bd_disk ? bdev->bd_disk->disk_name : + "unknown block", bdev->bd_dev); + return; + } + last = cur; + cur = cur->next; + } + spin_unlock(&devlock); +} + +EXPORT_SYMBOL(dev_set_rdonly); +EXPORT_SYMBOL(dev_check_rdonly); /* * sysfs parts below Index: linux-2.6/include/linux/fs.h =================================================================== --- linux-2.6.orig/include/linux/fs.h 2006-07-15 16:14:58.000000000 +0800 +++ linux-2.6/include/linux/fs.h 2006-07-15 16:15:14.000000000 +0800 @@ -1648,6 +1648,9 @@ extern void file_kill(struct file *f); struct bio; extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); +#define HAVE_CLEAR_RDONLY_ON_PUT +void dev_set_rdonly(struct block_device *bdev); +int dev_check_rdonly(struct block_device *bdev); +void dev_clear_rdonly(struct block_device *bdev); extern int set_blocksize(struct block_device *, int); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); Cheers, Andreas -- Andreas Dilger Principal Software Engineer Cluster File Systems, Inc. - To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html