[PATCH v2] Add ability to invoke fallocate() FALLOC_FL_KEEP_SIZE.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Linux offers fallocate() and the FALLOC_FL_KEEP_SIZE option as
an alternative to posix_fallocate(). When FALLOC_FL_KEEP_SIZE is
specified for an falloc request going beyond the end of the file,
the requested blocks get preallocated without changing the apparent
size of the file. This is is a commonly recommended use of fallocate()
for workloads performing append writes.

This patch modifies the fallocate option from a boolean option
to a string option accepting none/posix/keep/0/1. 'keep' is only
made available on systems where FALLOC_FL_KEEP_SIZE is available
(i.e., Linux at this time). If specified, fallocate() is used
with FALLOC_FL_KEEP_SIZE set. 'none' disables pre-allocation while
'posix' uses posix_fallocate(). The default behavior remains unchaned,
i.e., invoking posix_fallocate. The settings '0'/'1' are there to
provide backward compatibility for users who had explicitly set the
boolean option.
---
Hi Jens, all,

This is version 2 of the patch submitted earlier this week. Compared
to the initial version I removed the "fallocate_keep_size" option and,
as suggested, changed "fallocate" to be a string option. I elected
to provide both descriptive and numeric compatibility values for the
old boolean values. Let me know if you disagree with that choice.

I removed the weak version of fallocate(). It is not required at this
point and not having it removed the problem of figuring what semantics
it should have if invoked.

As always, feedback is appreciated.

 Regards - Eric
---
 HOWTO         |   18 +++++++++++++-----
 file.h        |   10 ++++++++++
 filesetup.c   |   44 ++++++++++++++++++++++++++++++++++++--------
 fio.1         |   30 ++++++++++++++++++++++++++----
 fio.h         |    2 +-
 options.c     |   35 ++++++++++++++++++++++++++++++-----
 os/os-linux.h |    1 +
 7 files changed, 117 insertions(+), 23 deletions(-)

diff --git a/HOWTO b/HOWTO
index 69b8cc6..ee899b8 100644
--- a/HOWTO
+++ b/HOWTO
@@ -354,11 +354,19 @@ use_os_rand=bool Fio can either use the random generator supplied by the OS
 		internal generator, which is often of better quality and
 		faster.
 
-fallocate=bool	By default, fio will use fallocate() to advise the system
-		of the size of the file we are going to write. This can be
-		turned off with fallocate=0. May not be available on all
-		supported platforms.  If using ZFS on Solaris this must be
-		set to 0 because ZFS doesn't support it.
+fallocate=str	Whether pre-allocation is performed when laying down files.
+		Accepted values are:
+
+			none		Do not pre-allocate space
+			posix		Pre-allocate via posix_fallocate()
+			keep		Pre-allocate via fallocate() with
+					FALLOC_FL_KEEP_SIZE set
+			0		Backward-compatible alias for 'none'
+			1		Backward-compatible alias for 'posix'
+
+		May not be available on all supported platforms. 'keep' is only
+		available on Linux.If using ZFS on Solaris this must be set to
+		'none' because ZFS doesn't support it. Default: 'posix'.
 
 fadvise_hint=bool By default, fio will use fadvise() to advise the kernel
 		on what IO patterns it is likely to issue. Sometimes you
diff --git a/file.h b/file.h
index 04c0d45..b3ff051 100644
--- a/file.h
+++ b/file.h
@@ -43,6 +43,16 @@ enum {
 };
 
 /*
+ * No pre-allocation when laying down files, or call posix_fallocate(), or
+ * call fallocate() with FALLOC_FL_KEEP_SIZE set.
+ */
+enum fio_fallocate_mode {
+	FIO_FALLOCATE_NONE	= 1,
+	FIO_FALLOCATE_POSIX	= 2,
+	FIO_FALLOCATE_KEEP_SIZE	= 3,
+};
+
+/*
  * Each thread_data structure has a number of files associated with it,
  * this structure holds state information for a single file.
  */
diff --git a/filesetup.c b/filesetup.c
index 799202f..6d8aa7a 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -13,6 +13,10 @@
 #include "filehash.h"
 #include "os/os.h"
 
+#ifdef FIO_HAVE_LINUX_FALLOCATE
+#include <linux/falloc.h>
+#endif
+
 static int root_warn;
 
 static inline void clear_error(struct thread_data *td)
@@ -67,17 +71,41 @@ static int extend_file(struct thread_data *td, struct fio_file *f)
 	}
 
 #ifdef FIO_HAVE_FALLOCATE
-	if (td->o.fallocate && !td->o.fill_device) {
-		dprint(FD_FILE, "fallocate file %s size %llu\n", f->file_name,
-							f->real_file_size);
-
-		r = posix_fallocate(f->fd, 0, f->real_file_size);
-		if (r > 0) {
-			log_err("fio: posix_fallocate fails: %s\n",
-					strerror(r));
+	if (!td->o.fill_device) {
+		switch (td->o.fallocate_mode) {
+		case FIO_FALLOCATE_NONE:
+			break;
+		case FIO_FALLOCATE_POSIX:
+			dprint(FD_FILE, "posix_fallocate file %s size %llu\n",
+				 f->file_name, f->real_file_size);
+
+			r = posix_fallocate(f->fd, 0, f->real_file_size);
+			if (r > 0) {
+				log_err("fio: posix_fallocate fails: %s\n",
+						strerror(r));
+			}
+			break;
+#ifdef FIO_HAVE_LINUX_FALLOCATE
+		case FIO_FALLOCATE_KEEP_SIZE:
+			dprint(FD_FILE,
+				"fallocate(FALLOC_FL_KEEP_SIZE) "
+				"file %s size %llu\n",
+				f->file_name, f->real_file_size);
+
+			r = fallocate(f->fd, FALLOC_FL_KEEP_SIZE, 0,
+					f->real_file_size);
+			if (r != 0) {
+				td_verror(td, errno, "fallocate");
+			}
+			break;
+#endif /* FIO_HAVE_LINUX_FALLOCATE */
+		default:
+			log_err("fio: unknown fallocate mode: %d\n",
+				td->o.fallocate_mode);
+			assert(0);
 		}
 	}
-#endif
+#endif /* FIO_HAVE_FALLOCATE */
 
 	if (!new_layout)
 		goto done;
diff --git a/fio.1 b/fio.1
index 0ced604..ad5040b 100644
--- a/fio.1
+++ b/fio.1
@@ -220,10 +220,32 @@ offsets, or it can use it's own internal generator (based on Tausworthe).
 Default is to use the internal generator, which is often of better quality and
 faster. Default: false.
 .TP
-.BI fallocate \fR=\fPbool
-By default, fio will use fallocate() to advise the system of the size of the
-file we are going to write. This can be turned off with fallocate=0. May not
-be available on all supported platforms.
+.BI fallocate \fR=\fPstr
+Whether pre-allocation is performed when laying down files. Accepted values
+are:
+.RS
+.RS
+.TP
+.B none
+Do not pre-allocate space.
+.TP
+.B posix
+Pre-allocate via posix_fallocate().
+.TP
+.B keep
+Pre-allocate via fallocate() with FALLOC_FL_KEEP_SIZE set.
+.TP
+.B 0
+Backward-compatible alias for 'none'.
+.TP
+.B 1
+Backward-compatible alias for 'posix'.
+.RE
+.P
+May not be available on all supported platforms. 'keep' is only
+available on Linux. If using ZFS on Solaris this must be set to 'none'
+because ZFS doesn't support it. Default: 'posix'.
+.RE
 .TP
 .BI fadvise_hint \fR=\fPbool
 Disable use of \fIposix_fadvise\fR\|(2) to advise the kernel what I/O patterns
diff --git a/fio.h b/fio.h
index 6ad186f..16866dd 100644
--- a/fio.h
+++ b/fio.h
@@ -248,7 +248,7 @@ struct thread_options {
 	unsigned int file_service_type;
 	unsigned int group_reporting;
 	unsigned int fadvise_hint;
-	unsigned int fallocate;
+	enum fio_fallocate_mode fallocate_mode;
 	unsigned int zero_buffers;
 	unsigned int refill_buffers;
 	unsigned int time_based;
diff --git a/options.c b/options.c
index a9b0534..bd7dc99 100644
--- a/options.c
+++ b/options.c
@@ -1178,12 +1178,37 @@ static struct fio_option options[FIO_MAX_OPTS] = {
 #ifdef FIO_HAVE_FALLOCATE
 	{
 		.name	= "fallocate",
-		.type	= FIO_OPT_BOOL,
-		.off1	= td_var_offset(fallocate),
-		.help	= "Use fallocate() when laying out files",
-		.def	= "1",
-	},
+		.type	= FIO_OPT_STR,
+		.off1	= td_var_offset(fallocate_mode),
+		.help	= "Whether pre-allocation is performed when laying out files",
+		.def	= "posix",
+		.posval	= {
+			  { .ival = "none",
+			    .oval = FIO_FALLOCATE_NONE,
+			    .help = "Do not pre-allocate space",
+			  },
+			  { .ival = "posix",
+			    .oval = FIO_FALLOCATE_POSIX,
+			    .help = "Use posix_fallocate()",
+			  },
+#ifdef FIO_HAVE_LINUX_FALLOCATE
+			  { .ival = "keep",
+			    .oval = FIO_FALLOCATE_KEEP_SIZE,
+			    .help = "Use fallocate(..., FALLOC_FL_KEEP_SIZE, ...)",
+			  },
 #endif
+			  /* Compatibility with former boolean values */
+			  { .ival = "0",
+			    .oval = FIO_FALLOCATE_NONE,
+			    .help = "Alias for 'none'",
+			  },
+			  { .ival = "1",
+			    .oval = FIO_FALLOCATE_POSIX,
+			    .help = "Alias for 'posix'",
+			  },
+		},
+	},
+#endif	/* FIO_HAVE_FALLOCATE */
 	{
 		.name	= "fadvise_hint",
 		.type	= FIO_OPT_BOOL,
diff --git a/os/os-linux.h b/os/os-linux.h
index 70c993b..024ef89 100644
--- a/os/os-linux.h
+++ b/os/os-linux.h
@@ -32,6 +32,7 @@
 #define FIO_HAVE_BLKTRACE
 #define FIO_HAVE_STRSEP
 #define FIO_HAVE_FALLOCATE
+#define FIO_HAVE_LINUX_FALLOCATE
 #define FIO_HAVE_POSIXAIO_FSYNC
 #define FIO_HAVE_PSHARED_MUTEX
 #define FIO_HAVE_CL_SIZE
-- 
1.7.3.1

--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux