[PATCH] Let user choose larger buffers for IO reading

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Eduard Bloch <blade@xxxxxxxxxx>

Simultaneous reading of multiple files through a small one-page buffer
is slow with classic HDDs. Let the user improve it by chosing buffers of
several mebibytes if needed.

Signed-off-by: Eduard Bloch <blade@xxxxxxxxxx>
---
 misc-utils/hardlink.1.adoc |  6 +++++
 misc-utils/hardlink.c      | 50 ++++++++++++++++++++++++++++++++------
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/misc-utils/hardlink.1.adoc b/misc-utils/hardlink.1.adoc
index 1fab77d71..107cd6997 100644
--- a/misc-utils/hardlink.1.adoc
+++ b/misc-utils/hardlink.1.adoc
@@ -71,6 +71,12 @@ A regular expression to include files. If the option *--exclude* has been given,
 *-s*, *--minimum-size* _size_::
 The minimum size to consider. By default this is 1, so empty files will not be linked. The _size_ argument may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB").
 
+*-S*, *--buffer-size* _size_::
+The size of read buffer used when comparing file contents (default: 8KiB). This
+costs some additional memory but potentially reduces the amount of seek
+operations and therefore improve performance, especially with mechanic disk
+drives. Optional factor suffixes are supported, like with the *-s* option. This is mostly efficient with other filters (i.e. with *-f* or *-X*) and can be less efficient with *-top* options.
+
 == ARGUMENTS
 
 *hardlink* takes one or more directories which will be searched for files to be linked.
diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c
index 29c8509e3..aeb486ce5 100644
--- a/misc-utils/hardlink.c
+++ b/misc-utils/hardlink.c
@@ -91,6 +91,10 @@ enum log_level {
 	JLOG_VERBOSE2
 };
 
+#ifndef DEF_SCAN_BUF_SIZE
+#define DEF_SCAN_BUF_SIZE 8192
+#endif
+
 /**
  * struct statistic - Statistics about the file
  * @started: Whether we are post command-line processing
@@ -144,6 +148,7 @@ static struct options {
 	unsigned int keep_oldest:1;
 	unsigned int dry_run:1;
 	uintmax_t min_size;
+	size_t buf_size;
 } opts = {
 	/* default setting */
 	.respect_mode = TRUE,
@@ -151,7 +156,8 @@ static struct options {
 	.respect_time = TRUE,
 	.respect_xattrs = FALSE,
 	.keep_oldest = FALSE,
-	.min_size = 1
+	.min_size = 1,
+  .buf_size = DEF_SCAN_BUF_SIZE
 };
 
 /*
@@ -163,6 +169,12 @@ static struct options {
 static void *files;
 static void *files_by_ino;
 
+/*
+ * Temporary buffers for reading file contents
+ */
+char *buf_a = NULL;
+char *buf_b = NULL;
+
 /*
  * last_signal
  *
@@ -523,8 +535,6 @@ static int file_contents_equal(const struct file *a, const struct file *b)
 {
 	FILE *fa = NULL;
 	FILE *fb = NULL;
-	char buf_a[8192];
-	char buf_b[8192];
 	int cmp = 0;		/* zero => equal */
 	off_t off = 0;		/* current offset */
 
@@ -550,11 +560,11 @@ static int file_contents_equal(const struct file *a, const struct file *b)
 		size_t ca;
 		size_t cb;
 
-		ca = fread(buf_a, 1, sizeof(buf_a), fa);
+		ca = fread(buf_a, 1, opts.buf_size, fa);
 		if (ca < sizeof(buf_a) && ferror(fa))
 			goto err;
 
-		cb = fread(buf_b, 1, sizeof(buf_b), fb);
+		cb = fread(buf_b, 1, opts.buf_size, fb);
 		if (cb < sizeof(buf_b) && ferror(fb))
 			goto err;
 
@@ -889,7 +899,8 @@ static void __attribute__((__noreturn__)) usage(void)
 		"                              (lower precedence than minimize/maximize)\n"), out);
 	fputs(_(" -x, --exclude <regex>      regular expression to exclude files\n"), out);
 	fputs(_(" -i, --include <regex>      regular expression to include files/dirs\n"), out);
-	fputs(_(" -s, --minimum-size <size>  minimum size for files.\n"), out);
+	fputs(_(" -s, --minimum-size=<size>  minimum size for files.\n"), out);
+        fputs(_(" -S, --buffer-size=<size>   Buffer size for file reading (speedup, using more RAM)\n"), out);
 	fputs(_(" -c, --content              compare only file contents, same as -pot\n"), out);
 
 	fputs(USAGE_SEPARATOR, out);
@@ -923,6 +934,20 @@ static void register_regex(struct regex_link **pregs, const char *regex)
 	link->next = *pregs; *pregs = link;
 }
 
+static int init_buffers(size_t bufsize)
+{
+	buf_a = malloc(bufsize);
+	buf_b = malloc(bufsize);
+	// report non-zero if any failed
+	return (NULL == buf_a || NULL == buf_b);
+}
+
+static void deinit_buffers(void)
+{
+	free(buf_a);
+	free(buf_b);
+}
+
 /**
  * parse_options - Parse the command line options
  * @argc: Number of options
@@ -930,7 +955,7 @@ static void register_regex(struct regex_link **pregs, const char *regex)
  */
 static int parse_options(int argc, char *argv[])
 {
-	static const char optstr[] = "VhvnfpotXcmMOx:i:s:q";
+	static const char optstr[] = "VhvnfpotXcmMOx:i:s:S:q";
 	static const struct option long_options[] = {
 		{"version", no_argument, NULL, 'V'},
 		{"help", no_argument, NULL, 'h'},
@@ -947,6 +972,7 @@ static int parse_options(int argc, char *argv[])
 		{"exclude", required_argument, NULL, 'x'},
 		{"include", required_argument, NULL, 'i'},
 		{"minimum-size", required_argument, NULL, 's'},
+		{"buffer-size", required_argument, NULL, 'S'},
 		{"content", no_argument, NULL, 'c'},
 		{"quiet", no_argument, NULL, 'q'},
 		{NULL, 0, NULL, 0}
@@ -1012,6 +1038,9 @@ static int parse_options(int argc, char *argv[])
 		case 's':
 			opts.min_size = strtosize_or_err(optarg, _("failed to parse size"));
 			break;
+		case 'S':
+			opts.buf_size = strtosize_or_err(optarg, _("failed to parse size"));
+			break;
 		case 'h':
 			usage();
 		case 'V':
@@ -1068,6 +1097,10 @@ int main(int argc, char *argv[])
 		errx(EXIT_FAILURE, _("no directory or file specified"));
 
 	gettime_monotonic(&stats.start_time);
+
+	if (init_buffers(opts.buf_size) != 0)
+		err(EXIT_FAILURE, _("failed to allocate buffer memory"));
+
 	stats.started = TRUE;
 
 	for (; optind < argc; optind++) {
@@ -1076,5 +1109,8 @@ int main(int argc, char *argv[])
 	}
 
 	twalk(files, visitor);
+
+	deinit_buffers();
+
 	return 0;
 }
-- 
2.33.0




[Index of Archives]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux