[PATCH 2/4] hardlink: add --list-duplicates and --zero

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



--list-duplicates codifies what everyone keeps re-implementing with
find -exec b2sum or src:perforate's finddup or whatever.

hardlink already knows this, so make the data available thusly,
in a format well-suited for pipeline processing
(fixed-width key for uniq/cut/&c.,
 tab delimiter for cut &a.,
 -z for correct filename handling).
---
 misc-utils/hardlink.1.adoc |  6 ++++++
 misc-utils/hardlink.c      | 21 ++++++++++++++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/misc-utils/hardlink.1.adoc b/misc-utils/hardlink.1.adoc
index b6f07ba70..b85ba09d4 100644
--- a/misc-utils/hardlink.1.adoc
+++ b/misc-utils/hardlink.1.adoc
@@ -76,6 +76,12 @@
 *-n*, *--dry-run*::
 Do not act, just print what would happen.
 
+*-l*, *--list-duplicates*::
+Don't link anything, but list the absolute path of every duplicate file, one per line, preceded by a unique 16-byte discriminator and a tab.
+
+*-z*, *--zero*::
+Separate lines with a NUL instead of a newline in *-l* mode.
+
 *-o*, *--ignore-owner*::
 Link and compare files even if their owner information (user and group) differs. Results may be unpredictable.
 
diff --git a/misc-utils/hardlink.c b/misc-utils/hardlink.c
index e4b44c6c6..643df7cf2 100644
--- a/misc-utils/hardlink.c
+++ b/misc-utils/hardlink.c
@@ -189,6 +189,8 @@ static struct options {
 	unsigned int keep_oldest:1;
 	unsigned int prio_trees:1;
 	unsigned int dry_run:1;
+	unsigned int list_duplicates:1;
+	char line_delim;
 	uintmax_t min_size;
 	uintmax_t max_size;
 	size_t io_size;
@@ -206,6 +208,7 @@ static struct options {
 	.respect_xattrs = FALSE,
 	.keep_oldest = FALSE,
 	.prio_trees = FALSE,
+	.line_delim = '\n',
 	.min_size = 1,
 	.cache_size = 10*1024*1024
 };
@@ -1152,6 +1155,10 @@ static void visitor(const void *nodep, const VISIT which, const int depth)
 
 	/* final cleanup */
 	for (other = begin; other != NULL; other = other->next) {
+		if (opts.list_duplicates && other->st.st_nlink > 1)
+			for (struct link *l = other->links; l; l = l->next)
+				printf("%016zu\t%s%c", (size_t)other, l->path, opts.line_delim);
+
 		if (ul_fileeq_data_associated(&other->data))
 			ul_fileeq_data_deinit(&other->data);
 	}
@@ -1182,6 +1189,8 @@ static void __attribute__((__noreturn__)) usage(void)
 	        "                              lowest hardlink count\n"), out);
 	fputs(_(" -M, --minimize             reverse the meaning of -m\n"), out);
 	fputs(_(" -n, --dry-run              don't actually link anything\n"), out);
+	fputs(_(" -l, --list-duplicates      print every group of duplicate files\n"), out);
+	fputs(_(" -z, --zero                 delimit output with NULs instead of newlines\n"), out);
 	fputs(_(" -o, --ignore-owner         ignore owner changes\n"), out);
 	fputs(_(" -F, --prioritize-trees     files found in the earliest specified top-level\n"
                 "                              directory have higher priority (lower precedence\n"
@@ -1223,7 +1232,7 @@ static int parse_options(int argc, char *argv[])
 		OPT_REFLINK = CHAR_MAX + 1,
 		OPT_SKIP_RELINKS
 	};
-	static const char optstr[] = "VhvndfpotXcmMFOx:y:i:r:S:s:b:q";
+	static const char optstr[] = "VhvndfpotXcmMFOlzx:y:i:r:S:s:b:q";
 	static const struct option long_options[] = {
 		{"version", no_argument, NULL, 'V'},
 		{"help", no_argument, NULL, 'h'},
@@ -1252,6 +1261,8 @@ static int parse_options(int argc, char *argv[])
 		{"content", no_argument, NULL, 'c'},
 		{"quiet", no_argument, NULL, 'q'},
 		{"cache-size", required_argument, NULL, 'r'},
+		{"list-duplicates", no_argument, NULL, 'l'},
+		{"zero", no_argument, NULL, 'z'},
 		{NULL, 0, NULL, 0}
 	};
 	static const ul_excl_t excl[] = {
@@ -1329,6 +1340,14 @@ static int parse_options(int argc, char *argv[])
 		case 'b':
 			opts.io_size = strtosize_or_err(optarg, _("failed to parse I/O size"));
 			break;
+		case 'l':
+			opts.list_duplicates = TRUE;
+			opts.dry_run = TRUE;
+			quiet = TRUE;
+			break;
+		case 'z':
+			opts.line_delim = '\0';
+			break;
 #ifdef USE_REFLINK
 		case OPT_REFLINK:
 			reflink_mode = REFLINK_AUTO;
-- 
2.39.2

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux