Linux kernel defined the default read ahead to 128KiB, and this is
making read perform poorly. To mitigate it, add readahead as a mount
option that is handled by userspace, with some refactoring included.
Signed-off-by: Thiago Rafael Becker <trbecker@xxxxxxxxx>
---
utils/mount/mount.c | 114 ++++++++++++++++++++++++++++++++++++++++--
--
utils/mount/nfs.man | 3 ++
2 files changed, 108 insertions(+), 9 deletions(-)
diff --git a/utils/mount/mount.c b/utils/mount/mount.c
index b98f9e00..15076ca8 100644
--- a/utils/mount/mount.c
+++ b/utils/mount/mount.c
@@ -56,9 +56,11 @@ int nomtab;
int verbose;
int sloppy;
int string;
+static int readahead_kb = 0;
#define FOREGROUND (0)
#define BACKGROUND (1)
+#define READAHEAD_VALUE_LEN 24
static struct option longopts[] = {
{ "fake", 0, 0, 'f' },
@@ -292,6 +294,16 @@ static int add_mtab(char *spec, char
*mount_point, char *fstype,
return result;
}
+static void append_extra_opt(const char *opt, char *extra_opts,
size_t len) {
+ len -= strlen(extra_opts);
+
+ if (*extra_opts && --len > 0)
+ strcat(extra_opts, ",");
+
+ if ((len -= strlen(opt)) > 0)
+ strcat(extra_opts, opt);
+}
+
static void parse_opt(const char *opt, int *mask, char *extra_opts,
size_t len)
{
const struct opt_map *om;
@@ -306,13 +318,37 @@ static void parse_opt(const char *opt, int
*mask, char *extra_opts, size_t len)
}
}
- len -= strlen(extra_opts);
+ append_extra_opt(opt, extra_opts, len);
+}
- if (*extra_opts && --len > 0)
- strcat(extra_opts, ",");
+static void parse_opt_val(const char *opt, const char *val, char
*extra_opts, size_t len)
+{
+ size_t ov_len;
+ char *opt_val;
- if ((len -= strlen(opt)) > 0)
- strcat(extra_opts, opt);
+ /* readahead is a special value that is handled by userspace
*/
+ if (!strcmp(opt, "readahead")) {
+ char *endptr = NULL;
+ const char *expected_endptr = val + strlen(val);
+
+ readahead_kb = strtol(val, &endptr, 10);
+
+ if (endptr != expected_endptr) {
+ nfs_error(_("%s: invalid readahead value
%s"),
+ progname, val);
+ readahead_kb = 0;
+ }
+ return;
+ }
+
+ /* Send the option to the kernel. */
+ ov_len = strlen(opt) + strlen(val) + 3;
+ opt_val = malloc(sizeof(char) * ov_len);
+ snprintf(opt_val, ov_len, ",%s=%s", opt, val);
+
+ append_extra_opt(opt_val, extra_opts, len);
+
+ free(opt_val);
}
/*
@@ -325,7 +361,7 @@ static void parse_opts(const char *options, int
*flags, char **extra_opts)
{
if (options != NULL) {
char *opts = xstrdup(options);
- char *opt, *p;
+ char *opt, *p, *val = NULL;
size_t len = strlen(opts) + 1; /* include room for a
null */
int open_quote = 0;
@@ -341,17 +377,75 @@ static void parse_opts(const char *options, int
*flags, char **extra_opts)
continue; /* still in a quoted
block */
if (*p == ',')
*p = '\0'; /* terminate the
option item */
+ if (*p == '=') { /* key=val option */
+ if (!val) {
+ *p = '\0'; /* terminate
key */
+ val = ++p; /* start the
value */
+ }
+ }
/* end of option item or last item */
if (*p == '\0' || *(p + 1) == '\0') {
- parse_opt(opt, flags, *extra_opts,
len);
- opt = NULL;
+ if (val) {
+ parse_opt_val(opt, val,
*extra_opts, len);
+ } else
+ parse_opt(opt, flags,
*extra_opts, len);
+ opt = val = NULL;
}
}
free(opts);
}
}
+/*
+ * Set the read ahead value for the mount point. On failure, uses
the default kernel
+ * read ahead value (for new mounts) or the current value (for
remounts).
+ */
+static void set_readahead(const char *mount_point) {
+ int error;
+ struct statx mp_stat;
+ char *mount_point_readahead_file, value[READAHEAD_VALUE_LEN];
+ size_t len;
+ int fp;
+
+ /* If readahead_kb is unset, or set to 0, do not change the
value */
+ if (!readahead_kb)
+ return;
+
+ if ((error = statx(0, mount_point, 0, 0, &mp_stat)) != 0) {
+ goto out_error;
+ }
+
+ if (!(mount_point_readahead_file = malloc(PATH_MAX))) {
+ error = -ENOMEM;
+ goto out_error;
+ }
+
+ snprintf(mount_point_readahead_file, PATH_MAX,
"/sys/class/bdi/%d:%d/read_ahead_kb",
+ mp_stat.stx_dev_major,
mp_stat.stx_dev_minor);
+
+ len = snprintf(value, READAHEAD_VALUE_LEN, "%d",
readahead_kb);
+
+ if ((fp = open(mount_point_readahead_file, O_WRONLY)) < 0) {
+ error = errno;
+ goto out_free;
+ }
+
+ if ((error = write(fp, value, len)) < 0)
+ goto out_close;
+
+ close(fp);
+ return;
+
+out_close:
+ close(fp);
+out_free:
+ free(mount_point_readahead_file);
+out_error:
+ nfs_error(_("%s: unable to set readahead value, using default
kernel value (error = %d)\n"),
+ progname, error);
+}
+
static int try_mount(char *spec, char *mount_point, int flags,
char *fs_type, char **extra_opts, char
*mount_opts,
int fake, int bg)
@@ -373,8 +467,10 @@ static int try_mount(char *spec, char
*mount_point, int flags,
if (ret)
return ret;
- if (!fake)
+ if (!fake) {
+ set_readahead(mount_point);
print_one(spec, mount_point, fs_type, mount_opts);
+ }
return add_mtab(spec, mount_point, fs_type, flags,
*extra_opts);
}
diff --git a/utils/mount/nfs.man b/utils/mount/nfs.man
index f1b76936..9832a377 100644
--- a/utils/mount/nfs.man
+++ b/utils/mount/nfs.man
@@ -561,6 +561,9 @@ The
.B sloppy
option is an alternative to specifying
.BR mount.nfs " -s " option.
+.TP 1.5i
+.B readahead=n
+Set the read ahead of the mount to n KiB. This is handled entirely
in userspace and will not appear on mountinfo. If unset or set to 0,
it will not set the a value, using the current value (for a remount)
or the kernel default for a new mount.
.SS "Options for NFS versions 2 and 3 only"
Use these options, along with the options in the above subsection,