I had a request from someone who cared about mkfs speed(!) over a slower network block device to look into using faster zeroing methods, particularly for the log, during mkfs.xfs. e2fsprogs already does this, thanks to some guy named Darrick: /* * If we know about ZERO_RANGE, try that before we try PUNCH_HOLE because * ZERO_RANGE doesn't unmap preallocated blocks. We prefer fallocate because * it always invalidates page cache, and libext2fs requires that reads after * ZERO_RANGE return zeroes. */ static int __unix_zeroout(int fd, off_t offset, off_t len) { int ret = -1; #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_ZERO_RANGE) ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, offset, len); if (ret == 0) return 0; #endif #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, len); if (ret == 0) return 0; #endif errno = EOPNOTSUPP; return ret; } and nobody has exploded so far, AFAIK. :) So, floating this idea for xfsprogs. I'm a little scared of the second #ifdef block above, but if that's really ok/consistent/safe we could add it too. The patch moves some defines around too, I could split that up and resend if this isn't laughed out of the room. Thanks, -Eric ===== libxfs: use FALLOC_FL_ZERO_RANGE in libxfs_device_zero I had a request from someone who cared about mkfs speed(!) over a slower network block device to look into using faster zeroing methods, particularly for the log, during mkfs. Using FALLOC_FL_ZERO_RANGE is faster in this case than writing a bunch of zeros across a wire. Signed-off-by: Eric Sandeen <sandeen@xxxxxxxxxx> --- diff --git a/include/linux.h b/include/linux.h index 8f3c32b0..425badb5 100644 --- a/include/linux.h +++ b/include/linux.h @@ -113,6 +113,26 @@ static __inline__ void platform_uuid_copy(uuid_t *dst, uuid_t *src) uuid_copy(*dst, *src); } +#ifndef FALLOC_FL_PUNCH_HOLE +#define FALLOC_FL_PUNCH_HOLE 0x02 +#endif + +#ifndef FALLOC_FL_COLLAPSE_RANGE +#define FALLOC_FL_COLLAPSE_RANGE 0x08 +#endif + +#ifndef FALLOC_FL_ZERO_RANGE +#define FALLOC_FL_ZERO_RANGE 0x10 +#endif + +#ifndef FALLOC_FL_INSERT_RANGE +#define FALLOC_FL_INSERT_RANGE 0x20 +#endif + +#ifndef FALLOC_FL_UNSHARE_RANGE +#define FALLOC_FL_UNSHARE_RANGE 0x40 +#endif + #ifndef BLKDISCARD #define BLKDISCARD _IO(0x12,119) #endif diff --git a/io/prealloc.c b/io/prealloc.c index 6d452354..0b4efc45 100644 --- a/io/prealloc.c +++ b/io/prealloc.c @@ -12,26 +12,6 @@ #include "init.h" #include "io.h" -#ifndef FALLOC_FL_PUNCH_HOLE -#define FALLOC_FL_PUNCH_HOLE 0x02 -#endif - -#ifndef FALLOC_FL_COLLAPSE_RANGE -#define FALLOC_FL_COLLAPSE_RANGE 0x08 -#endif - -#ifndef FALLOC_FL_ZERO_RANGE -#define FALLOC_FL_ZERO_RANGE 0x10 -#endif - -#ifndef FALLOC_FL_INSERT_RANGE -#define FALLOC_FL_INSERT_RANGE 0x20 -#endif - -#ifndef FALLOC_FL_UNSHARE_RANGE -#define FALLOC_FL_UNSHARE_RANGE 0x40 -#endif - static cmdinfo_t allocsp_cmd; static cmdinfo_t freesp_cmd; static cmdinfo_t resvsp_cmd; diff --git a/libxfs/Makefile b/libxfs/Makefile index fbcc963a..b4e8864b 100644 --- a/libxfs/Makefile +++ b/libxfs/Makefile @@ -105,6 +105,10 @@ CFILES = cache.c \ # #LCFLAGS += +ifeq ($(HAVE_FALLOCATE),yes) +LCFLAGS += -DHAVE_FALLOCATE +endif + FCFLAGS = -I. LTLIBS = $(LIBPTHREAD) $(LIBRT) diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 0d9d7202..94f63bbf 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -4,6 +4,9 @@ * All Rights Reserved. */ +#if defined(HAVE_FALLOCATE) +#include <linux/falloc.h> +#endif #include "libxfs_priv.h" #include "init.h" @@ -60,9 +63,21 @@ int libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len) { xfs_off_t start_offset, end_offset, offset; - ssize_t zsize, bytes; + ssize_t zsize, bytes, len_bytes; char *z; - int fd; + int ret, fd; + + fd = libxfs_device_to_fd(btp->dev); + start_offset = LIBXFS_BBTOOFF64(start); + end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset; + +#if defined(HAVE_FALLOCATE) + /* try to use special zeroing methods, fall back to writes if needed */ + len_bytes = LIBXFS_BBTOOFF64(len); + ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, start_offset, len_bytes); + if (ret == 0) + return 0; +#endif zsize = min(BDSTRAT_SIZE, BBTOB(len)); if ((z = memalign(libxfs_device_alignment(), zsize)) == NULL) { @@ -73,9 +88,6 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len) } memset(z, 0, zsize); - fd = libxfs_device_to_fd(btp->dev); - start_offset = LIBXFS_BBTOOFF64(start); - if ((lseek(fd, start_offset, SEEK_SET)) < 0) { fprintf(stderr, _("%s: %s seek to offset %llu failed: %s\n"), progname, __FUNCTION__, @@ -83,7 +95,6 @@ libxfs_device_zero(struct xfs_buftarg *btp, xfs_daddr_t start, uint len) exit(1); } - end_offset = LIBXFS_BBTOOFF64(start + len) - start_offset; for (offset = 0; offset < end_offset; ) { bytes = min((ssize_t)(end_offset - offset), zsize); if ((bytes = write(fd, z, bytes)) < 0) {