Hi Arvind, This looks great, but I have some suggestion to improve. One thing is that should be a patch series with a cover letter and not a single patch, because this way it is much harder to review and bisect problems later on. On Thu, Apr 30, 2020 at 8:34 PM Arvind Raghavan <raghavan.arvind@xxxxxxxxx> wrote: > > Requires patch: Fix duplicate CLI arguments in fssum This lines is irrelevant in git history so should be in notes after --- line > > Adds flag '-R' for non-recursive mode. Useful for checking a directory > after an fsync, as POSIX standards only guarantee that the immediate > files in the directory are synced. > Full stop. that should be a patch on its own. > Added support for files as input, instead of just directories. fssum > computes the checksum of the single file. > That one as well. > Added a flag for including file size in metadata. Previously it was > always included. It is useful to ignore file sizes when testing an > fsync of a directory, as the data of its children is not guaranteed to > be persisted. And that one. > > Signed-off-by: Arvind Raghavan <raghavan.arvind@xxxxxxxxx> > Signed-off-by: Jayashree Mohan <jaya@xxxxxxxxxxxxx> > Signed-off-by: Vijay Chidambaram <vijay@xxxxxxxxxxxxx> > --- > src/fssum.c | 162 +++++++++++++++++++++++++++++++++++++++------------- > 1 file changed, 122 insertions(+), 40 deletions(-) > > diff --git a/src/fssum.c b/src/fssum.c > index 3d97a70b..9b32e8cb 100644 > --- a/src/fssum.c > +++ b/src/fssum.c > @@ -29,6 +29,7 @@ > #include <inttypes.h> > #include <assert.h> > #include <endian.h> > +#include <libgen.h> > > #define CS_SIZE 16 > #define CHUNKS 128 > @@ -56,6 +57,7 @@ typedef int (*sum_file_data_t)(int fd, sum_t *dst); > > int gen_manifest = 0; > int in_manifest = 0; > +int recursive = 1; > char *checksum = NULL; > struct excludes *excludes; > int n_excludes = 0; > @@ -74,13 +76,14 @@ enum _flags { > FLAG_XATTRS, > FLAG_OPEN_ERROR, > FLAG_STRUCTURE, > + FLAG_SIZE, > NUM_FLAGS > }; > > -const char flchar[] = "ugoamcdtes"; > +const char flchar[] = "ugoamcdtesz"; > char line[65536]; > > -int flags[NUM_FLAGS] = {1, 1, 1, 1, 1, 0, 1, 1, 0, 0}; > +int flags[NUM_FLAGS] = {1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1}; > > char * > getln(char *buf, int size, FILE *fp) > @@ -146,12 +149,14 @@ usage(void) > fprintf(stderr, " t : include xattrs\n"); > fprintf(stderr, " e : include open errors (aborts otherwise)\n"); > fprintf(stderr, " s : include block structure (holes)\n"); > + fprintf(stderr, " z : include file size\n"); > fprintf(stderr, " -[UGOAMCDTES]: exclude respective field from calculation\n"); missing Z > fprintf(stderr, " -n : reset all flags\n"); > fprintf(stderr, " -N : set all flags\n"); > fprintf(stderr, " -x path : exclude path when building checksum (multiple ok)\n"); > + fprintf(stderr, " -R : traverse dirs non-recursively (recursive is default)\n"); > fprintf(stderr, " -h : this help\n\n"); > - fprintf(stderr, "The default field mask is ugoamCdtES. If the checksum/manifest is read from a\n"); > + fprintf(stderr, "The default field mask is ugoamCdtESz. If the checksum/manifest is read from a\n"); > fprintf(stderr, "file, the mask is taken from there and the values given on the command line\n"); > fprintf(stderr, "are ignored.\n"); > exit(-1); > @@ -502,6 +507,92 @@ malformed: > excess_file(fn); > } > > +void > +sum_meta(sum_t *meta, struct stat64 *st) { > + if (!S_ISDIR(st->st_mode)) > + sum_add_u64(meta, st->st_nlink); > + if (flags[FLAG_UID]) > + sum_add_u64(meta, st->st_uid); > + if (flags[FLAG_GID]) > + sum_add_u64(meta, st->st_gid); > + if (flags[FLAG_MODE]) > + sum_add_u64(meta, st->st_mode); > + if (flags[FLAG_ATIME]) > + sum_add_time(meta, st->st_atime); > + if (flags[FLAG_MTIME]) > + sum_add_time(meta, st->st_mtime); > + if (flags[FLAG_CTIME]) > + sum_add_time(meta, st->st_ctime); > +} > +void > +print_manifest(char* path, sum_t *meta, sum_t *cs, struct stat64 *st) { > + if (gen_manifest || in_manifest) { > + char *fn; > + char *m; > + char *c; > + > + if (S_ISDIR(st->st_mode)) > + strcat(path, "/"); > + fn = escape(path); > + m = sum_to_string(meta); > + c = sum_to_string(cs); > + > + if (gen_manifest) > + fprintf(out_fp, "%s %s %s\n", fn, m, c); > + if (in_manifest) > + check_manifest(fn, m, c, 0); > + free(c); > + free(m); > + free(fn); > + } > +} > + Those helpers are great, but re-factoring should be a separate prep patch. It is easy to review non-logic-changing-re-factoring and it is hard to review logic changing and re-factoring together - to human ;-). > +void > +sum_file(int fd, sum_t *finalcs, struct stat64 *st, char *path) { > + int ret; > + sum_file_data_t sum_file_data = flags[FLAG_STRUCTURE] ? > + sum_file_data_strict : sum_file_data_permissive; > + > + sum_t cs; > + sum_t meta; > + > + sum_init(&cs); > + sum_init(&meta); > + > + char* name = basename(path); > + sum_add(&meta, name, strlen(name)); > + sum_meta(&meta, st); > + > + if (flags[FLAG_XATTRS]) { > + ret = sum_xattrs(fd, &meta); > + if (ret < 0) { > + fprintf(stderr, "failed to read xattrs from " > + "%s: %s\n", path, strerror(-ret)); > + exit(-1); > + } > + } > + > + if (flags[FLAG_SIZE]) > + sum_add_u64(&meta, st->st_size); > + > + if (flags[FLAG_DATA]) { > + ret = sum_file_data(fd, &cs); > + if (ret < 0) { > + fprintf(stderr, "read failed for %s: %s\n", path, > + strerror(errno)); > + exit(-1); > + } > + } > + > + sum_fini(&cs); > + sum_fini(&meta); > + > + print_manifest(path, &meta, &cs, st); > + > + sum_add_sum(finalcs, &cs); > + sum_add_sum(finalcs, &meta); > +} > + > void > sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in) > { > @@ -582,20 +673,7 @@ sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in) > > sum_add_u64(&meta, level); > sum_add(&meta, namelist[i], strlen(namelist[i])); > - if (!S_ISDIR(st.st_mode)) > - sum_add_u64(&meta, st.st_nlink); > - if (flags[FLAG_UID]) > - sum_add_u64(&meta, st.st_uid); > - if (flags[FLAG_GID]) > - sum_add_u64(&meta, st.st_gid); > - if (flags[FLAG_MODE]) > - sum_add_u64(&meta, st.st_mode); > - if (flags[FLAG_ATIME]) > - sum_add_time(&meta, st.st_atime); > - if (flags[FLAG_MTIME]) > - sum_add_time(&meta, st.st_mtime); > - if (flags[FLAG_CTIME]) > - sum_add_time(&meta, st.st_ctime); > + sum_meta(&meta, &st); > if (flags[FLAG_XATTRS] && > (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))) { > fd = openat(dirfd, namelist[i], 0); > @@ -618,7 +696,7 @@ sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in) > } > } > } > - if (S_ISDIR(st.st_mode)) { > + if (S_ISDIR(st.st_mode) && recursive) { > fd = openat(dirfd, namelist[i], 0); > if (fd == -1 && flags[FLAG_OPEN_ERROR]) { > sum_add_u64(&meta, errno); > @@ -631,7 +709,8 @@ sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in) > close(fd); > } > } else if (S_ISREG(st.st_mode)) { > - sum_add_u64(&meta, st.st_size); > + if (flags[FLAG_SIZE]) > + sum_add_u64(&meta, st.st_size); > if (flags[FLAG_DATA]) { > if (verbose) > fprintf(stderr, "file %s\n", > @@ -672,25 +751,9 @@ sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in) > } > sum_fini(&cs); > sum_fini(&meta); > - if (gen_manifest || in_manifest) { > - char *fn; > - char *m; > - char *c; > - > - if (S_ISDIR(st.st_mode)) > - strcat(path, "/"); > - fn = escape(path); > - m = sum_to_string(&meta); > - c = sum_to_string(&cs); > - > - if (gen_manifest) > - fprintf(out_fp, "%s %s %s\n", fn, m, c); > - if (in_manifest) > - check_manifest(fn, m, c, 0); > - free(c); > - free(m); > - free(fn); > - } > + > + print_manifest(path, &meta, &cs, &st); > + > sum_add_sum(dircs, &cs); > sum_add_sum(dircs, &meta); > next: > @@ -712,7 +775,7 @@ main(int argc, char *argv[]) > int plen; > int elen; > int n_flags = 0; > - const char *allopts = "heEfuUgGoOaAmMcCdDtTsSnNw:r:vx:"; > + const char *allopts = "heEfuUgGoOaAmMcCdDtTsSzZnNRw:r:vx:"; > > out_fp = stdout; > while ((c = getopt(argc, argv, allopts)) != EOF) { > @@ -720,6 +783,9 @@ main(int argc, char *argv[]) > case 'f': > gen_manifest = 1; > break; > + case 'R': > + recursive = 0; > + break; > case 'u': > case 'U': > case 'g': > @@ -740,6 +806,8 @@ main(int argc, char *argv[]) > case 'E': > case 's': > case 'S': > + case 'z': > + case 'Z': > ++n_flags; > parse_flag(c); > break; > @@ -855,8 +923,22 @@ main(int argc, char *argv[]) > if (gen_manifest) > fprintf(out_fp, "Flags: %s\n", flagstring); > > + struct stat64 path_st; > + if (fstat64(fd, &path_st)) { > + perror("fstat"); > + exit(-1); > + } > + > sum_init(&cs); > - sum(fd, 1, &cs, path, ""); > + if (S_ISDIR(path_st.st_mode)) { > + sum(fd, 1, &cs, path, ""); > + } else if (S_ISREG(path_st.st_mode)) { > + sum_file(fd, &cs, &path_st, path); > + } else { > + fprintf(stderr, "path must be file or dir: %s", path); > + exit(-1); > + } > + I would try something different - only after you try to write it we will know if i was a good idea or not... factor out sum_one() from the dir entries iteration in the re-factoring patch then call sum_one() from main() for non-dir in the patch to support single file sum. And there is not really a need to limit single file sum to regular file. It is tempting to just call sum_one() to let it handle file or dir for either root or node dir, but that change may affect all existing test and will be harder to verify. So I would rather that sum_one() will be a stupified factor out of some code chunk to a helper. Hope this is clear enough - if not, let me know. Thanks, Amir.