Re: [PATCH] Add blkzonecmd and blkreport ZAC/ZBC drives

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Shaun,

On 11/22/16 06:51, Shaun Tancheff wrote:
> This patch adds:
>  - blkreset to issue Reset (Write Pointer) zone commands
>  - blkreport to retrieve drive zone information
> 
> Signed-off-by: Shaun Tancheff <shaun.tancheff@xxxxxxxxxxx>
> Signed-off-by: Shaun Tancheff <shaun@xxxxxxxxxxxx>
> ---
>  .gitignore              |   2 +
>  configure.ac            |  10 ++
>  include/strutils.h      |   1 +
>  lib/strutils.c          |   8 +-
>  sys-utils/Makemodule.am |  17 ++
>  sys-utils/blkreport.8   |  69 +++++++
>  sys-utils/blkreport.c   | 470 ++++++++++++++++++++++++++++++++++++++++++++++++
>  sys-utils/blkreset.8    |  63 +++++++
>  sys-utils/blkreset.c    | 295 ++++++++++++++++++++++++++++++
>  9 files changed, 933 insertions(+), 2 deletions(-)
>  create mode 100644 sys-utils/blkreport.8
>  create mode 100644 sys-utils/blkreport.c
>  create mode 100644 sys-utils/blkreset.8
>  create mode 100644 sys-utils/blkreset.c
> 
> diff --git a/.gitignore b/.gitignore
> index 064010b..4c87bac 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -62,6 +62,8 @@ update.log
>  /addpart
>  /agetty
>  /blkdiscard
> +/blkreport
> +/blkreset
>  /blkid
>  /blockdev
>  /cal
> diff --git a/configure.ac b/configure.ac
> index 1bd7d2e..aac597f 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -275,6 +275,8 @@ AC_CHECK_HEADERS([security/pam_misc.h],
>  #endif
>  ])
>  
> +AC_CHECK_HEADERS([linux/blkzoned.h])
> +
>  AC_CHECK_HEADERS([security/openpam.h], [], [], [
>  #ifdef HAVE_SECURITY_PAM_APPL_H
>  #include <security/pam_appl.h>
> @@ -1533,6 +1535,14 @@ UL_BUILD_INIT([blkdiscard], [check])
>  UL_REQUIRES_LINUX([blkdiscard])
>  AM_CONDITIONAL([BUILD_BLKDISCARD], [test "x$build_blkdiscard" = xyes])
>  
> +UL_BUILD_INIT([blkreport], [check])
> +UL_REQUIRES_LINUX([blkreport])
> +AM_CONDITIONAL([BUILD_BLKREPORT], [test "x$build_blkreport" = xyes])
> +
> +UL_BUILD_INIT([blkreset], [check])
> +UL_REQUIRES_LINUX([blkreset])
> +AM_CONDITIONAL([BUILD_BLKZONECMD], [test "x$build_blkreset" = xyes])
> +
>  UL_BUILD_INIT([ldattach], [check])
>  UL_REQUIRES_LINUX([ldattach])
>  AM_CONDITIONAL([BUILD_LDATTACH], [test "x$build_ldattach" = xyes])
> diff --git a/include/strutils.h b/include/strutils.h
> index 51d9c9f..7da0cd4 100644
> --- a/include/strutils.h
> +++ b/include/strutils.h
> @@ -27,6 +27,7 @@ extern uint32_t strtou32_or_err(const char *str, const char *errmesg);
>  
>  extern int64_t strtos64_or_err(const char *str, const char *errmesg);
>  extern uint64_t strtou64_or_err(const char *str, const char *errmesg);
> +extern uint64_t strtou64_base_or_err(int base, const char *str, const char *errmesg);
>  
>  extern double strtod_or_err(const char *str, const char *errmesg);
>  
> diff --git a/lib/strutils.c b/lib/strutils.c
> index d3b998f..861df77 100644
> --- a/lib/strutils.c
> +++ b/lib/strutils.c
> @@ -329,7 +329,7 @@ err:
>  	errx(STRTOXX_EXIT_CODE, "%s: '%s'", errmesg, str);
>  }
>  
> -uint64_t strtou64_or_err(const char *str, const char *errmesg)
> +uint64_t strtou64_base_or_err(int base, const char *str, const char *errmesg)
>  {
>  	uintmax_t num;
>  	char *end = NULL;
> @@ -337,7 +337,7 @@ uint64_t strtou64_or_err(const char *str, const char *errmesg)
>  	errno = 0;
>  	if (str == NULL || *str == '\0')
>  		goto err;
> -	num = strtoumax(str, &end, 10);
> +	num = strtoumax(str, &end, base);
>  
>  	if (errno || str == end || (end && *end))
>  		goto err;
> @@ -350,6 +350,10 @@ err:
>  	errx(STRTOXX_EXIT_CODE, "%s: '%s'", errmesg, str);
>  }
>  
> +uint64_t strtou64_or_err(const char *str, const char *errmesg)
> +{
> +	return strtou64_base_or_err(10, str, errmesg);
> +}
>  
>  double strtod_or_err(const char *str, const char *errmesg)
>  {
> diff --git a/sys-utils/Makemodule.am b/sys-utils/Makemodule.am
> index 9400303..4fa7243 100644
> --- a/sys-utils/Makemodule.am
> +++ b/sys-utils/Makemodule.am
> @@ -111,6 +111,23 @@ blkdiscard_SOURCES = sys-utils/blkdiscard.c lib/monotonic.c
>  blkdiscard_LDADD = $(LDADD) libcommon.la $(REALTIME_LIBS)
>  endif
>  
> +if BUILD_BLKREPORT
> +sbin_PROGRAMS += blkreport
> +dist_man_MANS += sys-utils/blkreport.8
> +blkreport_SOURCES = sys-utils/blkreport.c
> +blkreport_LDADD = $(LDADD) libcommon.la $(REALTIME_LIBS)
> +endif
> +
> +if BUILD_BLKZONECMD
> +sbin_PROGRAMS += blkreset
> +dist_man_MANS += sys-utils/blkreset.8
> +blkreset_SOURCES = sys-utils/blkreset.c
> +blkreset_LDADD = $(LDADD) libcommon.la $(REALTIME_LIBS)
> +if HAVE_UDEV
> +blkreset_LDADD += -ludev
> +endif
> +endif
> +
>  if BUILD_LDATTACH
>  usrsbin_exec_PROGRAMS += ldattach
>  dist_man_MANS += sys-utils/ldattach.8
> diff --git a/sys-utils/blkreport.8 b/sys-utils/blkreport.8
> new file mode 100644
> index 0000000..9225958
> --- /dev/null
> +++ b/sys-utils/blkreport.8
> @@ -0,0 +1,69 @@
> +.TH BLKREPORT 5 "March 2016" "util-linux" "System Administration"
> +.SH NAME
> +blkreport \- report zones on a device
> +.SH SYNOPSIS
> +.B blkreport
> +[options]
> +.RB [ \-o
> +.IR offset ]
> +.RB [ \-l
> +.IR report length ]
> +.I device
> +.SH DESCRIPTION
> +.B blkreport
> +is used to report device zone information.  This is useful for
> +zoned devices that support the ZAC or ZBC command set.
> +.PP
> +By default,
> +.B blkreport
> +will report from the start of the block device upto 512k bytes of the

"upto" -> "up to" (space missing).

> +zone report (~8160 zones].  Options may be used to modify
> +this behavior based on the starting zone or size of the report,
> +as explained below.
> +.PP
> +The
> +.I device
> +argument is the pathname of the block device.
> +.PP
> +.SH OPTIONS
> +The
> +.I zone
> +and
> +.I length
> +arguments may be followed by the multiplicative suffixes KiB (=1024),
> +MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is
> +optional, e.g., "K" has the same meaning as "KiB") or the suffixes
> +KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB.
> +.TP
> +.BR \-z , " \-\-zone \fIoffset"
> +The starting zone to be reported on on specified as a sector offset.

One "on" too much here, I think.

> +The provided offset in sector units should match the start of a zone.
> +The default value is zero.
> +.TP
> +.BR \-l , " \-\-length \fIlength"
> +The number of bytes to allocate for the report from the block device.
> +The provided value will be aligned to the device sector size.
> +If the specified value will be limited to the range of 512 bytes to 512 k-bytes.

I think it may be better to have length specify a number of sectors that
the report should cover, rather than the single report zone command
buffer size. This would make the command easier to use for getting all
zones information as the user may not know without calculation the
buffer size needed to do so.

This also would be consistent with the offset argument which specifies
the start sector for the report, and with other commands such as
blkdiscard which can specify a range of LBAs on which to operate with
offset+length.

This would change the default behavior of blkreport to reporting all
zones of the device (offset = 0 and length = capacity).

> +.TP
> +.BR \-v , " \-\-verbose"
> +Display the aligned values of
> +.I offset
> +and
> +.IR length .
> +.TP
> +.BR \-V , " \-\-version"
> +Display version information and exit.
> +.TP
> +.BR \-h , " \-\-help"
> +Display help text and exit.
> +.SH AUTHOR
> +.MT shaun@xxxxxxxxxxxx
> +Shaun Tancheff
> +.ME
> +.SH SEE ALSO
> +.BR sg_rep_zones (8)
> +.SH AVAILABILITY
> +The blkreport command is part of the util-linux package and is available
> +.UR ftp://\:ftp.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
> +Linux Kernel Archive
> +.UE .
> diff --git a/sys-utils/blkreport.c b/sys-utils/blkreport.c
> new file mode 100644
> index 0000000..a386bb3
> --- /dev/null
> +++ b/sys-utils/blkreport.c
> @@ -0,0 +1,470 @@
> +/*
> + * blkreport.c -- request a zone report on part (or all) of the block device.
> + *
> + * Copyright (C) 2015,2016 Seagate Technology PLC
> + * Written by Shaun Tancheff <shaun.tancheff@xxxxxxxxxxx>
> + *
> + * This program is free software: you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + *
> + * This program uses BLKREPORT ioctl to query zone information about part of
> + * or a whole block device, if the device supports it.
> + * You can specify range (start and length) to be queried.
> + */
> +
> +#include <string.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <fcntl.h>
> +#include <limits.h>
> +#include <getopt.h>
> +#include <time.h>
> +
> +#include <sys/ioctl.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +#include <linux/fs.h>
> +
> +#ifdef HAVE_LINUX_BLKZONED_H
> +#include <linux/blkzoned.h>
> +#endif
> +
> +#define ENABLE_REPORTING_OPTION 0 /* future */
> +
> +#include "nls.h"
> +#include "strutils.h"
> +#include "c.h"
> +#include "closestream.h"
> +#include "monotonic.h"
> +
> +#ifndef HAVE_LINUX_BLKZONED_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +/**
> + * enum blk_zone_type - Types of zones allowed in a zoned device.
> + *
> + * @BLK_ZONE_TYPE_CONVENTIONAL: The zone has no write pointer and can be writen
> + *                              randomly. Zone reset has no effect on the zone.
> + * @BLK_ZONE_TYPE_SEQWRITE_REQ: The zone must be written sequentially
> + * @BLK_ZONE_TYPE_SEQWRITE_PREF: The zone can be written non-sequentially
> + *
> + * Any other value not defined is reserved and must be considered as invalid.
> + */
> +enum blk_zone_type {
> +	BLK_ZONE_TYPE_CONVENTIONAL	= 0x1,
> +	BLK_ZONE_TYPE_SEQWRITE_REQ	= 0x2,
> +	BLK_ZONE_TYPE_SEQWRITE_PREF	= 0x3,
> +};
> +
> +/**
> + * enum blk_zone_cond - Condition [state] of a zone in a zoned device.
> + *
> + * @BLK_ZONE_COND_NOT_WP: The zone has no write pointer, it is conventional.
> + * @BLK_ZONE_COND_EMPTY: The zone is empty.
> + * @BLK_ZONE_COND_IMP_OPEN: The zone is open, but not explicitly opened.
> + * @BLK_ZONE_COND_EXP_OPEN: The zones was explicitly opened by an
> + *                          OPEN ZONE command.
> + * @BLK_ZONE_COND_CLOSED: The zone was [explicitly] closed after writing.
> + * @BLK_ZONE_COND_FULL: The zone is marked as full, possibly by a zone
> + *                      FINISH ZONE command.
> + * @BLK_ZONE_COND_READONLY: The zone is read-only.
> + * @BLK_ZONE_COND_OFFLINE: The zone is offline (sectors cannot be read/written).
> + *
> + * The Zone Condition state machine in the ZBC/ZAC standards maps the above
> + * deinitions as:
> + *   - ZC1: Empty         | BLK_ZONE_EMPTY
> + *   - ZC2: Implicit Open | BLK_ZONE_COND_IMP_OPEN
> + *   - ZC3: Explicit Open | BLK_ZONE_COND_EXP_OPEN
> + *   - ZC4: Closed        | BLK_ZONE_CLOSED
> + *   - ZC5: Full          | BLK_ZONE_FULL
> + *   - ZC6: Read Only     | BLK_ZONE_READONLY
> + *   - ZC7: Offline       | BLK_ZONE_OFFLINE
> + *
> + * Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should
> + * be considered invalid.
> + */
> +enum blk_zone_cond {
> +	BLK_ZONE_COND_NOT_WP	= 0x0,
> +	BLK_ZONE_COND_EMPTY	= 0x1,
> +	BLK_ZONE_COND_IMP_OPEN	= 0x2,
> +	BLK_ZONE_COND_EXP_OPEN	= 0x3,
> +	BLK_ZONE_COND_CLOSED	= 0x4,
> +	BLK_ZONE_COND_READONLY	= 0xD,
> +	BLK_ZONE_COND_FULL	= 0xE,
> +	BLK_ZONE_COND_OFFLINE	= 0xF,
> +};
> +
> +/**
> + * struct blk_zone - Zone descriptor for BLKREPORTZONE ioctl.
> + *
> + * @start: Zone start in 512 B sector units
> + * @len: Zone length in 512 B sector units
> + * @wp: Zone write pointer location in 512 B sector units
> + * @type: see enum blk_zone_type for possible values
> + * @cond: see enum blk_zone_cond for possible values
> + * @non_seq: Flag indicating that the zone is using non-sequential resources
> + *           (for host-aware zoned block devices only).
> + * @reset: Flag indicating that a zone reset is recommended.
> + * @reserved: Padding to 64 B to match the ZBC/ZAC defined zone descriptor size.
> + *
> + * start, len and wp use the regular 512 B sector unit, regardless of the
> + * device logical block size. The overall structure size is 64 B to match the
> + * ZBC/ZAC defined zone descriptor and allow support for future additional
> + * zone information.
> + */
> +struct blk_zone {
> +	__u64	start;		/* Zone start sector */
> +	__u64	len;		/* Zone length in number of sectors */
> +	__u64	wp;		/* Zone write pointer position */
> +	__u8	type;		/* Zone type */
> +	__u8	cond;		/* Zone condition */
> +	__u8	non_seq;	/* Non-sequential write resources active */
> +	__u8	reset;		/* Reset write pointer recommended */
> +	__u8	reserved[36];
> +};
> +
> +/**
> + * struct blk_zone_report - BLKREPORTZONE ioctl request/reply
> + *
> + * @sector: starting sector of report
> + * @nr_zones: IN maximum / OUT actual
> + * @reserved: padding to 16 byte alignment
> + * @zones: Space to hold @nr_zones @zones entries on reply.
> + *
> + * The array of at most @nr_zones must follow this structure in memory.
> + */
> +struct blk_zone_report {
> +	__u64		sector;
> +	__u32		nr_zones;
> +	__u8		reserved[4];
> +	struct blk_zone zones[0];
> +} __attribute__((packed));
> +
> +/**
> + * Zoned block device ioctl's:
> + *
> + * @BLKREPORTZONE: Get zone information. Takes a zone report as argument.
> + *                 The zone report will start from the zone containing the
> + *                 sector specified in the report request structure.
> + */
> +#define BLKREPORTZONE	_IOWR(0x12, 130, struct blk_zone_report)
> +
> +#endif /* ! HAVE_BLKZONED_H */
> +
> +#if ENABLE_REPORTING_OPTION
> +
> +#define ZBC_REPORT_OPTION_MASK  0x3f
> +#define ZBC_REPORT_ZONE_PARTIAL 0x80
> +
> +/**
> + * enum zone_report_option - Report Zones types to be included.
> + *
> + * @ZOPT_NON_SEQ_AND_RESET: Default (all zones).
> + * @ZOPT_ZC1_EMPTY: Zones which are empty.
> + * @ZOPT_ZC2_OPEN_IMPLICIT: Zones open but not explicitly opened
> + * @ZOPT_ZC3_OPEN_EXPLICIT: Zones opened explicitly
> + * @ZOPT_ZC4_CLOSED: Zones closed for writing.
> + * @ZOPT_ZC5_FULL: Zones that are full.
> + * @ZOPT_ZC6_READ_ONLY: Zones that are read-only
> + * @ZOPT_ZC7_OFFLINE: Zones that are offline
> + * @ZOPT_RESET: Zones with Reset WP Recommended
> + * @ZOPT_NON_SEQ: Zones that with Non-Sequential Write Resources Active
> + * @ZOPT_NON_WP_ZONES: Zones that do not have Write Pointers (conventional)
> + * @ZOPT_PARTIAL_FLAG: Modifies the definition of the Zone List Length field.
> + *
> + * Used by Report Zones in bdev_zone_get_report: report_option
> + */
> +enum zbc_zone_reporting_options {
> +	ZBC_ZONE_REPORTING_OPTION_ALL = 0,
> +	ZBC_ZONE_REPORTING_OPTION_EMPTY,
> +	ZBC_ZONE_REPORTING_OPTION_IMPLICIT_OPEN,
> +	ZBC_ZONE_REPORTING_OPTION_EXPLICIT_OPEN,
> +	ZBC_ZONE_REPORTING_OPTION_CLOSED,
> +	ZBC_ZONE_REPORTING_OPTION_FULL,
> +	ZBC_ZONE_REPORTING_OPTION_READONLY,
> +	ZBC_ZONE_REPORTING_OPTION_OFFLINE,
> +	ZBC_ZONE_REPORTING_OPTION_NEED_RESET_WP = 0x10,
> +	ZBC_ZONE_REPORTING_OPTION_NON_SEQWRITE,
> +	ZBC_ZONE_REPORTING_OPTION_NON_WP = 0x3f,
> +	ZBC_ZONE_REPORTING_OPTION_RESERVED = 0x40,
> +	ZBC_ZONE_REPORTING_OPTION_PARTIAL = ZBC_REPORT_ZONE_PARTIAL
> +};
> +
> +static inline int is_report_option_valid(uint64_t ropt)
> +{
> +	uint8_t _opt = ropt & ZBC_REPORT_OPTION_MASK;
> +
> +	if (ropt & ZBC_ZONE_REPORTING_OPTION_RESERVED) {
> +		fprintf(stderr, "Illegal report option %x is reserved.\n",
> +			ZBC_ZONE_REPORTING_OPTION_RESERVED);
> +		return 0;
> +	}
> +
> +	if (_opt <= ZBC_ZONE_REPORTING_OPTION_OFFLINE)
> +		return 1;
> +	
> +	switch (_opt) {
> +	case ZBC_ZONE_REPORTING_OPTION_NEED_RESET_WP:
> +	case ZBC_ZONE_REPORTING_OPTION_NON_SEQWRITE:
> +	case ZBC_ZONE_REPORTING_OPTION_NON_WP:
> +		return 1;
> +	default:
> +		fprintf(stderr, "Illegal report option %x is unknown.\n",
> +			ZBC_ZONE_REPORTING_OPTION_RESERVED);
> +		return 0;
> +	}
> +}
> +
> +# define ZBC_REPORT_OPT "r:"
> +
> +#else
> +
> +# define ZBC_ZONE_REPORTING_OPTION_ALL		0
> +# define ZBC_REPORT_OPT				""
> +static inline int is_report_option_valid(uint64_t ropt)
> +{
> +	return (ropt == ZBC_ZONE_REPORTING_OPTION_ALL);
> +}
> +
> +#endif /* ENABLE_REPORTING_OPTION */
> +
> +static const char * type_text[] = {
> +	"RESERVED",
> +	"CONVENTIONAL",
> +	"SEQ_WRITE_REQUIRED",
> +	"SEQ_WRITE_PREFERRED",
> +};
> +
> +#define ARRAY_COUNT(x) (sizeof((x))/sizeof((*x)))
> +
> +const char * condition_str[] = {
> +	"cv", /* conventional zone */
> +	"e0", /* empty */
> +	"Oi", /* open implicit */
> +	"Oe", /* open explicit */
> +	"Cl", /* closed */
> +	"x5", "x6", "x7", "x8", "x9", "xA", "xB", /* xN: reserved */
> +	"ro", /* read only */
> +	"fu", /* full */
> +	"OL"  /* offline */
> +	};
> +
> +static const char * zone_condition_str(uint8_t cond)
> +{
> +	return condition_str[cond & 0x0f];
> +}
> +
> +static void print_zones(struct blk_zone *info, uint32_t count)
> +{
> +	uint32_t iter;
> +	const char *fmtx = "  start: %9lx, len %6lx, wptr %6lx"
> +		           " reset:%u non-seq:%u, zcond:%2u(%s) [type: %u(%s)]\n";
> +
> +	fprintf(stdout, "Zones returned: %u\n", count);
> +
> +	for (iter = 0; iter < count; iter++ ) {
> +		struct blk_zone * entry = &info[iter];
> +		unsigned int type  = entry->type;
> +		uint64_t start = entry->start;
> +		uint64_t wp = entry->wp;
> +		uint8_t cond = entry->cond;
> +		uint64_t len = entry->len;
> +		const char *fmt = fmtx;
> +
> +		if (!len) {
> +			break;
> +		}
> +
> +		fprintf(stdout, fmt, start, len, wp - start,
> +			entry->reset, entry->non_seq,
> +			cond, zone_condition_str(cond),
> +			type, type_text[type]);
> +	}
> +}
> +
> +static int do_report(int fd, uint64_t lba, uint64_t len, uint8_t ropt, int verbose)
> +{
> +	int rc = -4;
> +	struct blk_zone_report *zi;
> +
> +	zi = malloc(len + sizeof(struct blk_zone_report));
> +	if (zi) {
> +		zi->nr_zones = len / sizeof(struct blk_zone);
> +		zi->sector = lba; /* maybe shift 4Kn -> 512e */
> +		zi->reserved[0] = ropt;
> +		rc = ioctl(fd, BLKREPORTZONE, zi);
> +		if (rc != -1) {
> +			if (verbose)
> +				printf("Found %d zones\n", zi->nr_zones);
> +			print_zones(zi->zones, zi->nr_zones);
> +		} else {
> +			fprintf(stderr, "ERR: %d -> %s\n\n", errno, strerror(errno));
> +		}
> +		free(zi);
> +	}
> +	return rc;
> +}
> +
> +static void __attribute__((__noreturn__)) usage(FILE *out)
> +{
> +	fputs(USAGE_HEADER, out);
> +	fprintf(out,
> +	      _(" %s [options] <device>\n"), program_invocation_short_name);
> +
> +	fputs(USAGE_SEPARATOR, out);
> +	fputs(_("Discard the content of sectors on a device.\n"), out);
> +
> +	fputs(USAGE_OPTIONS, out);
> +	fputs(_(" -z, --zone <num>  zone lba in 512 byte sectors\n"
> +		" -l, --length <num>  length of report (512 bytes to 512k bytes)\n"
> +#if ENABLE_REPORTING_OPTION
> +		" -r, --option <report> report option\n"
> +		"    report is the numeric value from \"enum zone_report_option\".\n"
> +		"             0 - non seq. and reset (default)\n"
> +		"             1 - empty\n"
> +		"             2 - open implicit\n"
> +		"             3 - open explicit\n"
> +		"             4 - closed\n"
> +		"             5 - full\n"
> +		"             6 - read only\n"
> +		"             7 - offline\n"
> +		"          0x10 - reset\n"
> +		"          0x11 - non sequential\n"
> +		"          0x3f - non write pointer zones\n"
> +#endif
> +		" -v, --verbose       print aligned length and offset"),
> +		out);
> +	fputs(USAGE_SEPARATOR, out);
> +	fputs(USAGE_HELP, out);
> +	fputs(USAGE_VERSION, out);
> +	fprintf(out, USAGE_MAN_TAIL("blkreport(8)"));
> +	exit(out == stderr ? EXIT_FAILURE : EXIT_SUCCESS);
> +}
> +
> +
> +#define MAX_REPORT_LEN		(1 << 19) /* 512k */
> +#define MAX_REPORT_LEN_SAT	(1 << 18) /* 512k */
> +
> +int main(int argc, char **argv)
> +{
> +	char *path;
> +	int c;
> +	int fd;
> +	int secsize;
> +	uint64_t blksize;
> +	struct stat sb;
> +	int verbose = 0;
> +	uint64_t ropt = ZBC_ZONE_REPORTING_OPTION_ALL;
> +	uint64_t offset = 0ul;
> +	uint32_t length = MAX_REPORT_LEN;
> +	static const struct option longopts[] = {
> +	    { "help",      0, 0, 'h' },
> +	    { "version",   0, 0, 'V' },
> +	    { "zone",      1, 0, 'z' }, /* starting LBA */
> +	    { "length",    1, 0, 'l' }, /* max #of bytes for result */
> +#if ENABLE_REPORTING_OPTION
> +	    { "option",    1, 0, 'r' }, /* report option */
> +#endif
> +	    { "verbose",   0, 0, 'v' },
> +	    { NULL,        0, 0, 0 }
> +	};
> +	const char *options = "hVl:z:v" ZBC_REPORT_OPT;
> +
> +	setlocale(LC_ALL, "");
> +	bindtextdomain(PACKAGE, LOCALEDIR);
> +	textdomain(PACKAGE);
> +	atexit(close_stdout);
> +
> +	while ((c = getopt_long(argc, argv, options, longopts, NULL)) != -1) {
> +		switch(c) {
> +		case 'h':
> +			usage(stdout);
> +			break;
> +		case 'V':
> +			printf(UTIL_LINUX_VERSION);
> +			return EXIT_SUCCESS;
> +		case 'l':
> +			length = strtou64_base_or_err(0, optarg,
> +					_("failed to parse length"));
> +			break;
> +		case 'z':
> +			offset = strtou64_base_or_err(0, optarg,
> +					_("failed to parse offset"));
> +			break;
> +#if ENABLE_REPORTING_OPTION
> +		case 'r':
> +			ropt = strtou64_base_or_err(0, optarg,
> +					_("failed to parse report option"));
> +			break;
> +#endif
> +		case 'v':
> +			verbose = 1;
> +			break;
> +		default:
> +			usage(stderr);
> +			break;
> +		}
> +	}
> +
> +	if (optind == argc)
> +		errx(EXIT_FAILURE, _("no device specified"));
> +
> +	path = argv[optind++];
> +
> +	if (optind != argc) {
> +		warnx(_("unexpected number of arguments"));
> +		usage(stderr);
> +	}
> +
> +	fd = open(path, O_RDWR);
> +	if (fd < 0)
> +		err(EXIT_FAILURE, _("cannot open %s"), path);

Wouldn't O_READONLY be enough here ?

> +
> +	if (fstat(fd, &sb) == -1)
> +		err(EXIT_FAILURE, _("stat of %s failed"), path);
> +	if (!S_ISBLK(sb.st_mode))
> +		errx(EXIT_FAILURE, _("%s: not a block device"), path);
> +
> +	if (ioctl(fd, BLKGETSIZE64, &blksize))
> +		err(EXIT_FAILURE, _("%s: BLKGETSIZE64 ioctl failed"), path);
> +	if (ioctl(fd, BLKSSZGET, &secsize))
> +		err(EXIT_FAILURE, _("%s: BLKSSZGET ioctl failed"), path);
> +
> +	/* check offset alignment to the sector size */
> +	if (offset % secsize)
> +		errx(EXIT_FAILURE, _("%s: offset %" PRIu64 " is not aligned "
> +			 "to sector size %i"), path, offset, secsize);
> +
> +	/* is the range end behind the end of the device ?*/
> +	if (offset > blksize)
> +		errx(EXIT_FAILURE, _("%s: offset is greater than device size"), path);
> +
> +	length = (length / 512) * 512;
> +	if (length < 512)
> +		length = 512;
> +	if (length > MAX_REPORT_LEN)
> +		length = MAX_REPORT_LEN;
> +
> +	if (!is_report_option_valid(ropt))
> +		errx(EXIT_FAILURE, _("%s: invalid report option for device"), path);
> +
> +	if (do_report(fd, offset, length, ropt & 0xFF, verbose))
> +		 err(EXIT_FAILURE, _("%s: BLKREPORTZONE ioctl failed"), path);

ropt & 0xBF I think is the correct mask if the partial bit is included.
But since it does not look like it is, ropt & 0x3F should be used.

> +
> +	close(fd);
> +	return EXIT_SUCCESS;
> +}
> diff --git a/sys-utils/blkreset.8 b/sys-utils/blkreset.8
> new file mode 100644
> index 0000000..7658d58
> --- /dev/null
> +++ b/sys-utils/blkreset.8
> @@ -0,0 +1,63 @@
> +.TH BLKRESET 5 "October 2016" "util-linux" "System Administration"
> +.SH NAME
> +blkreset \- Reset a range of zones
> +.SH SYNOPSIS
> +.B blkreset
> +[options]
> +.RB [ \-z
> +.IR zone ]
> +.RB [ \-c
> +.IR number of zones ]
> +.SH DESCRIPTION
> +.B blkreset
> +is used to reset a zone.  This is useful for
> +zoned devices that support the ZAC or ZBC command set.

Since operation is potentially on a range of zones, I think thta it may
be better to rephrase "used to reset a zone" as "used to reset one or
more zones of a zoned block device", no ?

> +Unlike
> +.BR sg_reset_wp (8) ,
> +this command operates from the block layer and can reset a range of zones.
> +.PP
> +By default,
> +.B blkreset
> +will operate on the zone at device logical sector 0. Options may be used to
> +modify this behavior as well as specify the operation to be performed on
> +the zone, as explained below.
> +.PP
> +The
> +.I device
> +argument is the pathname of the block device.
> +.PP
> +.SH OPTIONS
> +The
> +.I zone
> +argument may be followed by the multiplicative suffixes KiB (=1024),
> +MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is
> +optional, e.g., "K" has the same meaning as "KiB") or the suffixes
> +KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB.
> +As zones were originally required to be sized as a power of 2 logical
> +sectors this still is the most common layout. The 0x prefix can also be
> +used to specify the zone to be operated on.
> +.TP
> +.BR \-z , " \-\-zone \fIoffset"
> +The zone to be operated on specified as a sector offset.
> +The provided offset in sector units should match the start of a zone.

"in 512 bytes sector units", to be clear ? (4K drives)

> +The default value is zero.
> +.TP
> +.BR \-c , " \-\-count \fIzones"
> +The number of zones to be reset starting from offset.

Again for consistency with existing tools (blkdiscard), it may be better
to have a "-length" option here, rather than a count... Again, same as
for report, the range of operation is defined as offset+length, all in
512B sector units.

> +.TP
> +.BR \-V , " \-\-version"
> +Display version information and exit.
> +.TP
> +.BR \-h , " \-\-help"
> +Display help text and exit.
> +.SH AUTHOR
> +.MT shaun@xxxxxxxxxxxx
> +Shaun Tancheff
> +.ME
> +.SH SEE ALSO
> +.BR sg_reset_wp (8)
> +.SH AVAILABILITY
> +The blkreset command is part of the util-linux package and is available
> +.UR ftp://\:ftp.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
> +Linux Kernel Archive
> +.UE .
> diff --git a/sys-utils/blkreset.c b/sys-utils/blkreset.c
> new file mode 100644
> index 0000000..1a6ed78
> --- /dev/null
> +++ b/sys-utils/blkreset.c
> @@ -0,0 +1,295 @@
> +/*
> + * blkreset.c -- Reset the WP on a range of zones.
> + *
> + * Copyright (C) 2015,2016 Seagate Technology PLC
> + * Written by Shaun Tancheff <shaun.tancheff@xxxxxxxxxxx>
> + *
> + * This program is free software: you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + *
> + * This program uses BLKREPORT ioctl to query zone information about part of
> + * or a whole block device, if the device supports it.
> + * You can specify range (start and length) to be queried.
> + */
> +
> +#include <string.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <fcntl.h>
> +#include <limits.h>
> +#include <getopt.h>
> +#include <time.h>
> +#include <ctype.h>
> +
> +#include <sys/ioctl.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +#include <linux/fs.h>
> +#include <linux/major.h>
> +
> +#ifdef HAVE_LINUX_BLKZONED_H
> +#include <linux/blkzoned.h>
> +#endif
> +
> +#ifdef HAVE_LIBUDEV
> +#include <libudev.h> // udev to find sysfs entries
> +#endif
> +
> +#include "nls.h"
> +#include "strutils.h"
> +#include "c.h"
> +#include "closestream.h"
> +#include "monotonic.h"
> +
> +#ifndef HAVE_LINUX_BLKZONED_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +/**
> + * struct blk_zone_range - BLKRESETZONE ioctl request
> + * @sector: starting sector of the first zone to issue reset write pointer
> + * @nr_sectors: Total number of sectors of 1 or more zones to reset
> + */
> +struct blk_zone_range {
> +	__u64		sector;
> +	__u64		nr_sectors;
> +};
> +
> +#define BLKRESETZONE	_IOW(0x12, 131, struct blk_zone_range)
> +#endif /* HAVE_BLKZONED_H */
> +
> +static unsigned long read_chunk_size(const char * syspath)
> +{
> +	FILE *fp;
> +	char pathbuf[1024];
> +	char fbuf[1024];
> +	unsigned long zoned = 0;
> +
> +	snprintf(pathbuf, sizeof(pathbuf), "%s/queue/chunk_sectors", syspath);
> +	fp = fopen(pathbuf, "r");
> +	if (fp) {
> +		if (fread(fbuf, 1, sizeof(fbuf), fp) > 0) {
> +			zoned = strtoul(fbuf, NULL, 10);
> +		} else {
> +			perror("read failure.");
> +		}

Brakets not needed here ?
Also, even though it would likely never happen, checking for ERANGE
error of strtoul may be nice, no ?

> +		fclose(fp);
> +	} else {
> +		perror(pathbuf);
> +	}
> +	return zoned;
> +}
> +
> +#ifdef HAVE_LIBUDEV
> +
> +#define DT_BLOCK 0x62 /* pfm? */
> +
> +/*
> + * Mapping /dev/sdXn -> /sys/block/sdX to read the
> + *    zoned, and chunk_size files
> + *
> + *  fstat() -> S_ISBLK()
> + *    -> st_dev -> 12 bits major, 20 bits minor
> + *
> + *  int major_no = major(stat.st_dev);
> + *  int minor_no = minor(stat.st_dev);
> + *  int block_no = minor_no & ~0x0f
> + *
> + *  dev_t dev_no makedev(major_no, block_no);
> + *
> + *  udev_device_new_from_devnum(udev,
> + *
> + */
> +static unsigned long get_zone_size(const char *dname)
> +{
> +	unsigned long chunk_size = 0;
> +	struct stat st_buf;
> +
> +	if (stat(dname, &st_buf) == 0) {
> +		if (S_ISBLK(st_buf.st_mode)) {
> +			int major_no = major(st_buf.st_rdev);
> +			int minor_no = minor(st_buf.st_rdev);
> +			int block_no = minor_no & ~0x0f;
> +			dev_t dev_no = makedev(major_no, block_no);
> +			struct udev *udev;
> +			struct udev_device *dev;
> +			const char *syspath;
> +
> +			/* Create the udev object */
> +			udev = udev_new();
> +			if (!udev) {
> +				printf("Can't create udev\n");
> +				return 0;
> +			}
> +
> +			dev = udev_device_new_from_devnum(udev, DT_BLOCK, dev_no);
> +			if (dev) {
> +				syspath = udev_device_get_syspath(dev);
> +				chunk_size = read_chunk_size(syspath);
> +
> +				udev_device_unref(dev);
> +			}
> +			udev_unref(udev);
> +		}
> +	}
> +	return chunk_size;
> +}
> +
> +#else
> +#warning "No libudev. Guessing sysfs mounted at /sys"
> +
> +static unsigned long get_zone_size(const char *dname)
> +{
> +	unsigned long zsize = 0;
> +	char *zname;
> +	char *part_no;
> +	char sysfs[1024];
> +	
> +	zname = strrchr(dname, '/');
> +	if (zname) {
> +		if (*zname == '/')
> +			zname++;
> +		part_no = zname;
> +		while (*part_no && !isdigit(*part_no))
> +			part_no++;
> +
> +		snprintf(sysfs, sizeof(sysfs), "/sys/block/%*.*s",
> +			part_no - zname, part_no - zname, zname);
> +		zsize = read_chunk_size(sysfs);
> +	}
> +	return zsize;
> +}
> +#endif
> +
> +static void __attribute__((__noreturn__)) usage(FILE *out)
> +{
> +	fputs(USAGE_HEADER, out);
> +	fprintf(out,
> +	      _(" %s [options] <device>\n"), program_invocation_short_name);
> +
> +	fputs(USAGE_SEPARATOR, out);
> +	fputs(_("Discard the content of sectors on a device.\n"), out);
> +
> +	fputs(USAGE_OPTIONS, out);
> +	fputs(_(" -z, --zone <num>  lba of start of zone to act upon\n"
> +		" -c, --count       number of zones to reset (default = 1)"),
> +		out);
> +	fputs(USAGE_SEPARATOR, out);
> +	fputs(USAGE_HELP, out);
> +	fputs(USAGE_VERSION, out);
> +	fprintf(out, USAGE_MAN_TAIL("blkreset(8)"));
> +	exit(out == stderr ? EXIT_FAILURE : EXIT_SUCCESS);
> +}
> +
> +
> +int main(int argc, char **argv)
> +{
> +	char *path;
> +	int c, fd;
> +	uint64_t blksize;
> +	uint64_t blksectors;
> +	struct stat sb;
> +	struct blk_zone_range za;
> +	uint64_t zsector = 0ul;
> +	uint64_t zlen = 0;
> +	uint64_t zcount = 1;
> +	unsigned long zsize;
> +	int rc = 0;
> +
> +	static const struct option longopts[] = {
> +	    { "help",      0, 0, 'h' },
> +	    { "version",   0, 0, 'V' },
> +	    { "zone",      1, 0, 'z' },
> +	    { "count",     1, 0, 'c' },
> +	    { NULL,        0, 0, 0 }
> +	};
> +
> +	setlocale(LC_ALL, "");
> +	bindtextdomain(PACKAGE, LOCALEDIR);
> +	textdomain(PACKAGE);
> +	atexit(close_stdout);
> +
> +	while ((c = getopt_long(argc, argv, "hVz:c:v", longopts, NULL)) != -1) {
> +		switch(c) {
> +		case 'h':
> +			usage(stdout);
> +			break;
> +		case 'V':
> +			printf(UTIL_LINUX_VERSION);
> +			return EXIT_SUCCESS;
> +		case 'z':
> +			zsector = strtou64_base_or_err(0, optarg,
> +					_("failed to parse zone"));
> +			break;
> +		case 'c':
> +			zcount = strtou64_base_or_err(0, optarg,
> +					_("failed to parse number of zones"));
> +			break;
> +		default:
> +			usage(stderr);
> +			break;
> +		}
> +	}
> +
> +	if (optind == argc)
> +		errx(EXIT_FAILURE, _("no device specified"));
> +
> +	path = argv[optind++];
> +
> +	if (optind != argc) {
> +		warnx(_("unexpected number of arguments"));
> +		usage(stderr);
> +	}
> +
> +	zsize = get_zone_size(path);
> +	if (zsize == 0)
> +		err(EXIT_FAILURE, _("%s: Unable to determine zone size"), path);
> +
> +	fd = open(path, O_WRONLY);
> +	if (fd < 0)
> +		err(EXIT_FAILURE, _("cannot open %s"), path);
> +
> +	if (fstat(fd, &sb) == -1)
> +		err(EXIT_FAILURE, _("stat of %s failed"), path);
> +	if (!S_ISBLK(sb.st_mode))
> +		errx(EXIT_FAILURE, _("%s: not a block device"), path);
> +
> +	if (ioctl(fd, BLKGETSIZE64, &blksize))
> +		err(EXIT_FAILURE, _("%s: BLKGETSIZE64 ioctl failed"), path);
> +
> +	blksectors = blksize << 9;
> +
> +	/* check offset alignment to the chunk size */
> +	if (zsector & (zsize - 1))
> +		errx(EXIT_FAILURE, _("%s: zone %" PRIu64 " is not aligned "
> +			 "to zone size %" PRIu64), path, zsector, zsize);
> +	if (zsector > blksectors)
> +		errx(EXIT_FAILURE, _("%s: zone %" PRIu64 " is too large "
> +			 "for device %" PRIu64), path, zsector, blksectors);
> +
> +	zlen = zcount * zsize;
> +	if (zsector + zlen > blksectors)
> +		zlen = blksectors - zsector;
> +
> +	za.sector = zsector;
> +	za.nr_sectors = zlen;
> +	rc = ioctl(fd, BLKRESETZONE, &za);
> +	if (rc == -1)
> +		err(EXIT_FAILURE, _("%s: BLKRESETZONE ioctl failed"), path);
> +
> +	close(fd);
> +	return EXIT_SUCCESS;
> +}
> 

-- 
Damien Le Moal, Ph.D.
Sr. Manager, System Software Research Group,
Western Digital Corporation
Damien.LeMoal@xxxxxxx
(+81) 0466-98-3593 (ext. 513593)
1 kirihara-cho, Fujisawa,
Kanagawa, 252-0888 Japan
www.wdc.com, www.hgst.com
--
To unsubscribe from this list: send the line "unsubscribe util-linux" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux