If a zoned block device is found, get its zone information (number of zones
and zone size) using the new helper function btrfs_get_dev_zone_info(). To
avoid costly run-time zone report commands to test the device zones type
during block allocation, attach the seq_zones bitmap to the device
structure to indicate if a zone is sequential or accept random writes. Also
it attaches the empty_zones bitmap to indicate if a zone is empty or not.
This patch also introduces the helper function btrfs_dev_is_sequential() to
test if the zone storing a block is a sequential write required zone and
btrfs_dev_is_empty_zone() to test if the zone is a empty zone.
Signed-off-by: Damien Le Moal <damien.lemoal@xxxxxxx>
Signed-off-by: Naohiro Aota <naohiro.aota@xxxxxxx>
---
fs/btrfs/Makefile | 1 +
fs/btrfs/hmzoned.c | 168 +++++++++++++++++++++++++++++++++++++++++++++
fs/btrfs/hmzoned.h | 92 +++++++++++++++++++++++++
fs/btrfs/volumes.c | 18 ++++-
fs/btrfs/volumes.h | 4 ++
5 files changed, 281 insertions(+), 2 deletions(-)
create mode 100644 fs/btrfs/hmzoned.c
create mode 100644 fs/btrfs/hmzoned.h
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 82200dbca5ac..64aaeed397a4 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -16,6 +16,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
+btrfs-$(CONFIG_BLK_DEV_ZONED) += hmzoned.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
diff --git a/fs/btrfs/hmzoned.c b/fs/btrfs/hmzoned.c
new file mode 100644
index 000000000000..6a13763d2916
--- /dev/null
+++ b/fs/btrfs/hmzoned.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ * Authors:
+ * Naohiro Aota <naohiro.aota@xxxxxxx>
+ * Damien Le Moal <damien.lemoal@xxxxxxx>
+ */
+
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include "ctree.h"
+#include "volumes.h"
+#include "hmzoned.h"
+#include "rcu-string.h"
+
+/* Maximum number of zones to report per blkdev_report_zones() call */
+#define BTRFS_REPORT_NR_ZONES 4096
+
+static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
+ struct blk_zone *zones, unsigned int *nr_zones)
+{
+ int ret;
+
+ ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, zones,
+ nr_zones);
+ if (ret != 0) {
+ btrfs_err_in_rcu(device->fs_info,
+ "get zone at %llu on %s failed %d", pos,
+ rcu_str_deref(device->name), ret);
+ return ret;
+ }
+ if (!*nr_zones)
+ return -EIO;
+
+ return 0;
+}
+
+int btrfs_get_dev_zone_info(struct btrfs_device *device)
+{
+ struct btrfs_zoned_device_info *zone_info = NULL;
+ struct block_device *bdev = device->bdev;
+ sector_t nr_sectors = bdev->bd_part->nr_sects;
+ sector_t sector = 0;
+ struct blk_zone *zones = NULL;
+ unsigned int i, nreported = 0, nr_zones;
+ unsigned int zone_sectors;
+ int ret;
+ char devstr[sizeof(device->fs_info->sb->s_id) +
+ sizeof(" (device )") - 1];
+
+ if (!bdev_is_zoned(bdev))
+ return 0;
+
+ zone_info = kzalloc(sizeof(*zone_info), GFP_KERNEL);
+ if (!zone_info)
+ return -ENOMEM;
+
+ zone_sectors = bdev_zone_sectors(bdev);
+ ASSERT(is_power_of_2(zone_sectors));
+ zone_info->zone_size = (u64)zone_sectors << SECTOR_SHIFT;
+ zone_info->zone_size_shift = ilog2(zone_info->zone_size);
+ zone_info->nr_zones = nr_sectors >> ilog2(bdev_zone_sectors(bdev));
+ if (!IS_ALIGNED(nr_sectors, zone_sectors))
+ zone_info->nr_zones++;
+
+ zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
+ if (!zone_info->seq_zones) {
+ ret = -ENOMEM;
+ goto free_zone_info;
+ }
+
+ zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
+ if (!zone_info->empty_zones) {
+ ret = -ENOMEM;
+ goto free_seq_zones;
+ }
+
+ zones = kcalloc(BTRFS_REPORT_NR_ZONES,
+ sizeof(struct blk_zone), GFP_KERNEL);
+ if (!zones) {
+ ret = -ENOMEM;
+ goto free_empty_zones;
+ }
+
+ /* Get zones type */
+ while (sector < nr_sectors) {
+ nr_zones = BTRFS_REPORT_NR_ZONES;
+ ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones,
+ &nr_zones);
+ if (ret)
+ goto free_zones;
+
+ for (i = 0; i < nr_zones; i++) {
+ if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ)
+ set_bit(nreported, zone_info->seq_zones);
+ if (zones[i].cond == BLK_ZONE_COND_EMPTY)
+ set_bit(nreported, zone_info->empty_zones);
+ nreported++;
+ }
+ sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len;
+ }
+
+ if (nreported != zone_info->nr_zones) {
+ btrfs_err_in_rcu(device->fs_info,
+ "inconsistent number of zones on %s (%u / %u)",
+ rcu_str_deref(device->name), nreported,
+ zone_info->nr_zones);
+ ret = -EIO;
+ goto free_zones;
+ }
+
+ kfree(zones);
+
+ device->zone_info = zone_info;
+
+ devstr[0] = 0;
+ if (device->fs_info)
+ snprintf(devstr, sizeof(devstr), " (device %s)",
+ device->fs_info->sb->s_id);
+
+ rcu_read_lock();
+ pr_info(
+"BTRFS info%s: host-%s zoned block device %s, %u zones of %llu sectors",
+ devstr,
+ bdev_zoned_model(bdev) == BLK_ZONED_HM ? "managed" : "aware",
+ rcu_str_deref(device->name), zone_info->nr_zones,
+ zone_info->zone_size >> SECTOR_SHIFT);
+ rcu_read_unlock();
+
+ return 0;
+
+free_zones:
+ kfree(zones);
+free_empty_zones:
+ bitmap_free(zone_info->empty_zones);
+free_seq_zones:
+ bitmap_free(zone_info->seq_zones);
+free_zone_info: