Hi, We (Intel software raid devs) are seeing a problem with the detection of partitions on md devices. The trace below shows that the block device inode is dropped in-between when the array comes up and when it is first opened (timestamp 655.498875). When this happens bdev->bd_invalidated is cleared by bdget() and we never detect partitions. (Seen on a 2.6.32 based kernel, but I presume it is still present) # tracer: nop # # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | <...>-1114 [000] 655.488730: check_disk_size_change: ffff8800374d3780: (md1) check_disk_size_change:1103 open: 1 invalidated: 0 <...>-1114 [000] 655.497906: check_disk_size_change: ffff8800374d3780: (md1) check_disk_size_change:1112 open: 1 invalidated: 0 <...>-1114 [000] 655.497908: flush_disk: ffff8800374d3780: (md1) flush_disk:1084 open: 1 invalidated: 1 <...>-1114 [000] 655.497909: flush_disk: ffff8800374d3780: (md1) flush_disk:1086 open: 1 invalidated: 1 <...>-1117 [003] 655.498875: bdget: ffff8800375aec40: () bdget:595 open: 0 invalidated: 0 <...>-1117 [003] 655.498878: __blkdev_get: ffff8800375aec40: (md1) __blkdev_get:1229 open: 0 invalidated: 0 <...>-1117 [003] 655.498879: __blkdev_get: ffff8800375aec40: (md1) __blkdev_get:1233 open: 0 invalidated: 0 <...>-1117 [003] 655.498880: __blkdev_get: ffff8800375aec40: (md1) __blkdev_get:1239 open: 0 invalidated: 0 <...>-1117 [003] 655.498882: __blkdev_get: ffff8800375aec40: (md1) __blkdev_get:1307 open: 1 invalidated: 0 These traces generated by: #define dbg(bdev) ({ if (debug_partitions) {\ char name[BDEVNAME_SIZE] = "";\ if (bdev->bd_disk)\ disk_name(bdev->bd_disk, 0, name);\ trace_printk("%p: (%s) %s:%d open: %d invalidated: %d\n", bdev, name, __func__, __LINE__, bdev->bd_openers, bdev->bd_invalidated); \ } 0; }) The patch below (2.6.32 based) moves the block_device bd_invalidated field to a gendisk flag, as it seems this info wants a longer lifetime. Thoughts on this fix? Maybe it wants to be a standalone integer flag so we don't need to add locking to nbd. Thanks, Dan diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index cc923a5..de8a4a4 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -607,8 +607,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, if (S_ISSOCK(inode->i_mode)) { lo->file = file; lo->sock = SOCKET_I(inode); + mutex_lock(&bdev->bd_mutex); if (max_part > 0) - bdev->bd_invalidated = 1; + bdev->bd_disk->flags |= GENHD_FL_INVALIDATED; + mutex_unlock(&bdev->bd_mutex); return 0; } else { fput(file); diff --git a/fs/block_dev.c b/fs/block_dev.c index 6dcee88..147f449 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -571,7 +571,6 @@ struct block_device *bdget(dev_t dev) bdev->bd_inode = inode; bdev->bd_block_size = (1 << inode->i_blkbits); bdev->bd_part_count = 0; - bdev->bd_invalidated = 0; inode->i_mode = S_IFBLK; inode->i_rdev = dev; inode->i_bdev = bdev; @@ -1069,7 +1068,7 @@ static void flush_disk(struct block_device *bdev) if (!bdev->bd_disk) return; if (disk_partitionable(bdev->bd_disk)) - bdev->bd_invalidated = 1; + bdev->bd_disk->flags |= GENHD_FL_INVALIDATED; } /** @@ -1243,7 +1242,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdi = &default_backing_dev_info; bdev->bd_inode->i_data.backing_dev_info = bdi; } - if (bdev->bd_invalidated) + if (bdev->bd_disk->flags & GENHD_FL_INVALIDATED) rescan_partitions(disk, bdev); } else { struct block_device *whole; @@ -1276,7 +1275,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (ret) goto out_unlock_bdev; } - if (bdev->bd_invalidated) + if (bdev->bd_disk->flags & GENHD_FL_INVALIDATED) rescan_partitions(bdev->bd_disk, bdev); } } diff --git a/fs/partitions/check.c b/fs/partitions/check.c index e8865c1..7872269 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -519,7 +519,7 @@ void register_disk(struct gendisk *disk) if (!bdev) goto exit; - bdev->bd_invalidated = 1; + disk->flags |= GENHD_FL_INVALIDATED; err = blkdev_get(bdev, FMODE_READ); if (err < 0) goto exit; @@ -558,7 +558,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (disk->fops->revalidate_disk) disk->fops->revalidate_disk(disk); check_disk_size_change(disk, bdev); - bdev->bd_invalidated = 0; + bdev->bd_disk->flags &= ~GENHD_FL_INVALIDATED; if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) return 0; if (IS_ERR(state)) /* I/O error reading the partition table */ @@ -609,7 +609,7 @@ try_scan: if (capacity > get_capacity(disk)) { set_capacity(disk, capacity); check_disk_size_change(disk, bdev); - bdev->bd_invalidated = 0; + bdev->bd_disk->flags &= ~GENHD_FL_INVALIDATED; } goto try_scan; } else { diff --git a/include/linux/fs.h b/include/linux/fs.h index 1e58fc8..367875a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -661,7 +661,6 @@ struct block_device { struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; - int bd_invalidated; struct gendisk * bd_disk; struct list_head bd_list; /* diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c6c0c41..d97cdec 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -116,6 +116,7 @@ struct hd_struct { #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 #define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ #define GENHD_FL_NATIVE_CAPACITY 128 +#define GENHD_FL_INVALIDATED 256 #define BLK_SCSI_MAX_CMDS (256) #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) -- To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html