The original rationale for exporting the topology information via sysfs was that we intended to support multiple heterogeneous regions within a block device. And that fit poorly with an ioctl approach. However, with a single region per device it is trivial to provide the topology. And while mkfs.* will continue to use the libblkid interface, there are users that would like to get access to this information without having to traverse sysfs and stitch things together manually. Example: #include <stdio.h> #include <stdlib.h> #include <stdarg.h> #include <errno.h> #include <fcntl.h> #include <sys/ioctl.h> #define BLKSSZGET _IO(0x12,104) #define BLKIOMIN _IO(0x12,120) #define BLKIOOPT _IO(0x12,121) #define BLKALIGNOFF _IO(0x12,122) #define BLKPBSZGET _IO(0x12,123) static int die(const char *fmt, ...) { int err = errno; va_list val; va_start(val, fmt); vfprintf(stderr, fmt, val); if (err != 0) fprintf(stderr, ": %s", strerror(err)); fprintf(stderr, "\n"); va_end(val); exit(EXIT_FAILURE); } int main(int argc, char *argv[]) { int fd, lbs, pbs, min, opt, align; if (argc != 2) die("Usage: %s <dev>", argv[0]); fd = open(argv[1], O_RDONLY); if (fd < 0) die("Can't open %s", argv[1]); if (ioctl(fd, BLKSSZGET, &lbs) < 0) die("Can't get logical block size"); if (ioctl(fd, BLKPBSZGET, &pbs) < 0) die("Can't get physical block size"); if (ioctl(fd, BLKIOMIN, &min) < 0) die("Can't get preferred random I/O size"); if (ioctl(fd, BLKIOOPT, &opt) < 0) die("Can't get preferred sustained I/O size"); if (ioctl(fd, BLKALIGNOFF, &align) < 0) die("Can't get alignment offset"); printf("%s:\n", argv[1]); printf("\tlogical block size: %u\n", lbs); printf("\tphysical block size: %u\n", pbs); printf("\trandom I/O size: %u\n", min); printf("\tsustained I/O size: %u\n", opt); if (align == -1) printf("\talignment offset: inconsistent\n"); else printf("\talignment offset: %u\n", align); exit(EXIT_SUCCESS); } Patch: block: Topology ioctls Not all users of the topology information want to use libblkid. Provide the topology information through bdev ioctls. Also clarify sector size comments for existing BLK ioctls. Signed-off-by: Martin K. Petersen <martin.petersen@xxxxxxxxxx> --- diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 7865a34..bcc8bec 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -734,6 +734,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) switch (cmd) { case HDIO_GETGEO: return compat_hdio_getgeo(disk, bdev, compat_ptr(arg)); + case BLKPBSZGET: + return compat_put_int(arg, bdev_physical_block_size(bdev)); + case BLKIOMIN: + return compat_put_int(arg, bdev_io_min(bdev)); + case BLKIOOPT: + return compat_put_int(arg, bdev_io_opt(bdev)); + case BLKALIGNOFF: + return compat_put_int(arg, bdev_alignment_offset(bdev)); case BLKFLSBUF: case BLKROSET: case BLKDISCARD: diff --git a/block/ioctl.c b/block/ioctl.c index d3e6b58..fea6f2c 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -263,10 +263,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); case BLKROGET: return put_int(arg, bdev_read_only(bdev) != 0); - case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */ + case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ return put_int(arg, block_size(bdev)); - case BLKSSZGET: /* get block device hardware sector size */ + case BLKSSZGET: /* get block device logical block size */ return put_int(arg, bdev_logical_block_size(bdev)); + case BLKPBSZGET: /* get block device physical block size */ + return put_int(arg, bdev_physical_block_size(bdev)); case BLKSECTGET: return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); case BLKRASET: @@ -309,6 +311,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, return put_ulong(arg, size >> 9); case BLKGETSIZE64: return put_u64(arg, bdev->bd_inode->i_size); + case BLKIOMIN: + return put_int(arg, bdev_io_min(bdev)); + case BLKIOOPT: + return put_int(arg, bdev_io_opt(bdev)); + case BLKALIGNOFF: + return put_int(arg, bdev_alignment_offset(bdev)); case BLKTRACESTART: case BLKTRACESTOP: case BLKTRACESETUP: diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e23a86c..935bcb0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1080,16 +1080,31 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q) return q->limits.physical_block_size; } +static inline unsigned int bdev_physical_block_size(struct block_device *bdev) +{ + return queue_physical_block_size(bdev_get_queue(bdev)); +} + static inline unsigned int queue_io_min(struct request_queue *q) { return q->limits.io_min; } +static inline unsigned int bdev_io_min(struct block_device *bdev) +{ + return queue_io_min(bdev_get_queue(bdev)); +} + static inline unsigned int queue_io_opt(struct request_queue *q) { return q->limits.io_opt; } +static inline unsigned int bdev_io_opt(struct block_device *bdev) +{ + return queue_io_opt(bdev_get_queue(bdev)); +} + static inline int queue_alignment_offset(struct request_queue *q) { if (q && q->limits.misaligned) @@ -1108,6 +1123,19 @@ static inline int queue_sector_alignment_offset(struct request_queue *q, & (q->limits.io_min - 1); } +static inline int bdev_alignment_offset(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (!q || q->limits.misaligned) + return -1; + + if (bdev != bdev->bd_contains) + return bdev->bd_part->alignment_offset; + + return q->limits.alignment_offset; +} + static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; diff --git a/include/linux/fs.h b/include/linux/fs.h index 90162fb..3f401fc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -300,6 +300,10 @@ struct inodes_stat_t { #define BLKTRACESTOP _IO(0x12,117) #define BLKTRACETEARDOWN _IO(0x12,118) #define BLKDISCARD _IO(0x12,119) +#define BLKIOMIN _IO(0x12,120) +#define BLKIOOPT _IO(0x12,121) +#define BLKALIGNOFF _IO(0x12,122) +#define BLKPBSZGET _IO(0x12,123) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html