md versus partition scanning (bd_invalidated)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

We (Intel software raid devs) are seeing a problem with the detection of
partitions on md devices.  The trace below shows that the block device
inode is dropped in-between when the array comes up and when it is first
opened (timestamp 655.498875).  When this happens bdev->bd_invalidated
is cleared by bdget() and we never detect partitions.  (Seen on a 2.6.32
based kernel, but I presume it is still present)

# tracer: nop
#
#           TASK-PID    CPU#    TIMESTAMP  FUNCTION
#              | |       |          |         |
           <...>-1114  [000]   655.488730: check_disk_size_change: ffff8800374d3780: (md1) check_disk_size_change:1103 open: 1 invalidated: 0
           <...>-1114  [000]   655.497906: check_disk_size_change: ffff8800374d3780: (md1) check_disk_size_change:1112 open: 1 invalidated: 0
           <...>-1114  [000]   655.497908: flush_disk: ffff8800374d3780: (md1) flush_disk:1084 open: 1 invalidated: 1
           <...>-1114  [000]   655.497909: flush_disk: ffff8800374d3780: (md1) flush_disk:1086 open: 1 invalidated: 1
           <...>-1117  [003]   655.498875: bdget: ffff8800375aec40: () bdget:595 open: 0 invalidated: 0
           <...>-1117  [003]   655.498878: __blkdev_get: ffff8800375aec40: (md1) __blkdev_get:1229 open: 0 invalidated: 0
           <...>-1117  [003]   655.498879: __blkdev_get: ffff8800375aec40: (md1) __blkdev_get:1233 open: 0 invalidated: 0
           <...>-1117  [003]   655.498880: __blkdev_get: ffff8800375aec40: (md1) __blkdev_get:1239 open: 0 invalidated: 0
           <...>-1117  [003]   655.498882: __blkdev_get: ffff8800375aec40: (md1) __blkdev_get:1307 open: 1 invalidated: 0

These traces generated by:

#define dbg(bdev) ({ if (debug_partitions) {\
        char name[BDEVNAME_SIZE] = "";\
        if (bdev->bd_disk)\
                disk_name(bdev->bd_disk, 0, name);\
        trace_printk("%p: (%s) %s:%d open: %d invalidated: %d\n", bdev, name, __func__, __LINE__, bdev->bd_openers, bdev->bd_invalidated); \
        } 0; })

The patch below (2.6.32 based) moves the block_device bd_invalidated
field to a gendisk flag, as it seems this info wants a longer lifetime.

Thoughts on this fix?  Maybe it wants to be a standalone integer flag so
we don't need to add locking to nbd.

Thanks,
Dan

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index cc923a5..de8a4a4 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -607,8 +607,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
 			if (S_ISSOCK(inode->i_mode)) {
 				lo->file = file;
 				lo->sock = SOCKET_I(inode);
+				mutex_lock(&bdev->bd_mutex);
 				if (max_part > 0)
-					bdev->bd_invalidated = 1;
+					bdev->bd_disk->flags |= GENHD_FL_INVALIDATED;
+				mutex_unlock(&bdev->bd_mutex);
 				return 0;
 			} else {
 				fput(file);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6dcee88..147f449 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -571,7 +571,6 @@ struct block_device *bdget(dev_t dev)
 		bdev->bd_inode = inode;
 		bdev->bd_block_size = (1 << inode->i_blkbits);
 		bdev->bd_part_count = 0;
-		bdev->bd_invalidated = 0;
 		inode->i_mode = S_IFBLK;
 		inode->i_rdev = dev;
 		inode->i_bdev = bdev;
@@ -1069,7 +1068,7 @@ static void flush_disk(struct block_device *bdev)
 	if (!bdev->bd_disk)
 		return;
 	if (disk_partitionable(bdev->bd_disk))
-		bdev->bd_invalidated = 1;
+		bdev->bd_disk->flags |= GENHD_FL_INVALIDATED;
 }
 
 /**
@@ -1243,7 +1242,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 					bdi = &default_backing_dev_info;
 				bdev->bd_inode->i_data.backing_dev_info = bdi;
 			}
-			if (bdev->bd_invalidated)
+			if (bdev->bd_disk->flags & GENHD_FL_INVALIDATED)
 				rescan_partitions(disk, bdev);
 		} else {
 			struct block_device *whole;
@@ -1276,7 +1275,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 				if (ret)
 					goto out_unlock_bdev;
 			}
-			if (bdev->bd_invalidated)
+			if (bdev->bd_disk->flags & GENHD_FL_INVALIDATED)
 				rescan_partitions(bdev->bd_disk, bdev);
 		}
 	}
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index e8865c1..7872269 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -519,7 +519,7 @@ void register_disk(struct gendisk *disk)
 	if (!bdev)
 		goto exit;
 
-	bdev->bd_invalidated = 1;
+	disk->flags |= GENHD_FL_INVALIDATED;
 	err = blkdev_get(bdev, FMODE_READ);
 	if (err < 0)
 		goto exit;
@@ -558,7 +558,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 	if (disk->fops->revalidate_disk)
 		disk->fops->revalidate_disk(disk);
 	check_disk_size_change(disk, bdev);
-	bdev->bd_invalidated = 0;
+	bdev->bd_disk->flags &= ~GENHD_FL_INVALIDATED;
 	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
 		return 0;
 	if (IS_ERR(state))	/* I/O error reading the partition table */
@@ -609,7 +609,7 @@ try_scan:
 				if (capacity > get_capacity(disk)) {
 					set_capacity(disk, capacity);
 					check_disk_size_change(disk, bdev);
-					bdev->bd_invalidated = 0;
+					bdev->bd_disk->flags &= ~GENHD_FL_INVALIDATED;
 				}
 				goto try_scan;
 			} else {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1e58fc8..367875a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -661,7 +661,6 @@ struct block_device {
 	struct hd_struct *	bd_part;
 	/* number of times partitions within this device have been opened. */
 	unsigned		bd_part_count;
-	int			bd_invalidated;
 	struct gendisk *	bd_disk;
 	struct list_head	bd_list;
 	/*
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c6c0c41..d97cdec 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -116,6 +116,7 @@ struct hd_struct {
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 #define GENHD_FL_EXT_DEVT			64 /* allow extended devt */
 #define GENHD_FL_NATIVE_CAPACITY		128
+#define GENHD_FL_INVALIDATED			256
 
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))


--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux