shejialuo <shejialuo@xxxxxxxxx> writes: > Enhance the git-fsck(1) command by adding a check for reference content > in the files backend. The new functionality ensures that symrefs, real > symbolic link and regular refs are validated correctly. > > In order to check the trailing content of the regular refs, add a new > parameter `trailing` to `parse_loose_ref_contents`. > > For symrefs, `parse_loose_ref_contents` will set the "referent". > However, symbolic link could be either absolute or relative. Use > "strbuf_add_real_path" to read the symbolic link and convert the > relative path to absolute path. Then use "skip_prefix" to make it align > with symref "referent". > > Thus, the symrefs and symbolic links could share the same interface. Add > a new function "files_fsck_symref_target" which aims at checking the > following things: > > 1. whether the pointee is under the `refs/` directory. > 2. whether the pointee name is correct. > 3. whether the pointee path is a wrong type in filesystem. > > Last, add the following FSCK MESSAGEs: > > 1. "badRefContent(ERROR)": A ref has a bad content > 2. "badSymrefPointee(ERROR)": The pointee of a symref is bad. > 3. "trailingRefContent(WARN)": A ref content has trailing contents. > > Mentored-by: Patrick Steinhardt <ps@xxxxxx> > Mentored-by: Karthik Nayak <karthik.188@xxxxxxxxx> > Signed-off-by: shejialuo <shejialuo@xxxxxxxxx> > --- > Documentation/fsck-msgids.txt | 9 +++ > fsck.h | 3 + > refs.c | 2 +- > refs/files-backend.c | 145 +++++++++++++++++++++++++++++++++- > refs/refs-internal.h | 5 +- > t/t0602-reffiles-fsck.sh | 110 ++++++++++++++++++++++++++ > 6 files changed, 269 insertions(+), 5 deletions(-) > > diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt > index dab4012246..b1630a478b 100644 > --- a/Documentation/fsck-msgids.txt > +++ b/Documentation/fsck-msgids.txt > @@ -19,9 +19,15 @@ > `badParentSha1`:: > (ERROR) A commit object has a bad parent sha1. > > +`badRefContent`:: > + (ERROR) A ref has a bad content. > + > `badRefName`:: > (ERROR) A ref has a bad name. > > +`badSymrefPointee`:: > + (ERROR) The pointee of a symref is bad. > + > `badTagName`:: > (INFO) A tag has an invalid format. > > @@ -167,6 +173,9 @@ > `nullSha1`:: > (WARN) Tree contains entries pointing to a null sha1. > > +`trailingRefContent`:: > + (WARN) A ref content has trailing contents. > + > `treeNotSorted`:: > (ERROR) A tree is not properly sorted. > > diff --git a/fsck.h b/fsck.h > index 2a2441e147..e92a5844ae 100644 > --- a/fsck.h > +++ b/fsck.h > @@ -32,6 +32,8 @@ enum fsck_msg_type { > FUNC(BAD_OBJECT_SHA1, ERROR) \ > FUNC(BAD_PARENT_SHA1, ERROR) \ > FUNC(BAD_REF_NAME, ERROR) \ > + FUNC(BAD_REF_CONTENT, ERROR) \ > + FUNC(BAD_SYMREF_POINTEE, ERROR) \ > FUNC(BAD_TIMEZONE, ERROR) \ > FUNC(BAD_TREE, ERROR) \ > FUNC(BAD_TREE_SHA1, ERROR) \ > @@ -72,6 +74,7 @@ enum fsck_msg_type { > FUNC(HAS_DOTDOT, WARN) \ > FUNC(HAS_DOTGIT, WARN) \ > FUNC(NULL_SHA1, WARN) \ > + FUNC(TRAILING_REF_CONTENT, WARN) \ > FUNC(ZERO_PADDED_FILEMODE, WARN) \ > FUNC(NUL_IN_COMMIT, WARN) \ > FUNC(LARGE_PATHNAME, WARN) \ > diff --git a/refs.c b/refs.c > index 410919246b..eb82fb7d4e 100644 > --- a/refs.c > +++ b/refs.c > @@ -1760,7 +1760,7 @@ static int refs_read_special_head(struct ref_store *ref_store, > } > > result = parse_loose_ref_contents(content.buf, oid, referent, type, > - failure_errno); > + failure_errno, NULL); > > done: > strbuf_release(&full_path); > diff --git a/refs/files-backend.c b/refs/files-backend.c > index d20e149214..42d2f676b9 100644 > --- a/refs/files-backend.c > +++ b/refs/files-backend.c > @@ -1,6 +1,7 @@ > #define USE_THE_REPOSITORY_VARIABLE > > #include "../git-compat-util.h" > +#include "../abspath.h" > #include "../copy.h" > #include "../environment.h" > #include "../gettext.h" > @@ -553,7 +554,7 @@ static int read_ref_internal(struct ref_store *ref_store, const char *refname, > strbuf_rtrim(&sb_contents); > buf = sb_contents.buf; > > - ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr); > + ret = parse_loose_ref_contents(buf, oid, referent, type, &myerr, NULL); > > out: > if (ret && !myerr) > @@ -589,7 +590,7 @@ static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refn > > int parse_loose_ref_contents(const char *buf, struct object_id *oid, > struct strbuf *referent, unsigned int *type, > - int *failure_errno) > + int *failure_errno, const char **trailing) > { > const char *p; > if (skip_prefix(buf, "ref:", &buf)) { > @@ -611,6 +612,10 @@ int parse_loose_ref_contents(const char *buf, struct object_id *oid, > *failure_errno = EINVAL; > return -1; > } > + > + if (trailing) > + *trailing = p; > + > return 0; > } > > @@ -3438,6 +3443,141 @@ static int files_fsck_refs_name(struct fsck_options *o, > return ret; > } > > +/* > + * Check the symref "pointee_name" and "pointee_path". The caller should > + * make sure that "pointee_path" is absolute. For symbolic ref, "pointee_name" > + * would be the content after "refs:". For symblic link, "pointee_name" would > + * be the relative path agaignst "gitdir". > + */ > +static int files_fsck_symref_target(struct fsck_options *o, > + const char *refname, > + const char *pointee_name, > + const char *pointee_path) > +{ > + const char *p = NULL; > + struct stat st; > + int ret = 0; > + > + if (!skip_prefix(pointee_name, "refs/", &p)) { > + > + ret = fsck_refs_report(o, NULL, refname, NULL, > + FSCK_MSG_BAD_SYMREF_POINTEE, > + "point to target out of refs hierarchy"); > + goto out; > + } > + > + if (check_refname_format(pointee_name, 0)) { > + ret = fsck_refs_report(o, NULL, refname, NULL, > + FSCK_MSG_BAD_SYMREF_POINTEE, > + "point to invalid refname"); > + } > + > + if (lstat(pointee_path, &st) < 0) > + goto out; > + > + if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) { > + ret = fsck_refs_report(o, NULL, refname, NULL, > + FSCK_MSG_BAD_SYMREF_POINTEE, > + "point to invalid target"); > + goto out; > + } > +out: > + return ret; > +} > + > +static int files_fsck_refs_content(struct fsck_options *o, > + const char *gitdir, > + const char *refs_check_dir, > + struct dir_iterator *iter) > +{ > + struct strbuf pointee_path = STRBUF_INIT, > + ref_content = STRBUF_INIT, > + abs_gitdir = STRBUF_INIT, > + referent = STRBUF_INIT, > + refname = STRBUF_INIT; > + const char *trailing = NULL; > + int failure_errno = 0; > + unsigned int type = 0; > + struct object_id oid; > + int ret = 0; > + > + strbuf_addf(&refname, "%s/%s", refs_check_dir, iter->relative_path); > + > + /* > + * If the file is a symlink, we need to only check the connectivity > + * of the destination object. > + */ > + if (S_ISLNK(iter->st.st_mode)) { > + const char *pointee_name = NULL; > + > + strbuf_add_real_path(&pointee_path, iter->path.buf); > + > + strbuf_add_absolute_path(&abs_gitdir, gitdir); > + strbuf_normalize_path(&abs_gitdir); > + if (!is_dir_sep(abs_gitdir.buf[abs_gitdir.len - 1])) > + strbuf_addch(&abs_gitdir, '/'); > + > + if (!skip_prefix(pointee_path.buf, > + abs_gitdir.buf, &pointee_name)) { > + ret = fsck_refs_report(o, NULL, refname.buf, NULL, > + FSCK_MSG_BAD_SYMREF_POINTEE, > + "point to target outside gitdir"); > + goto clean; > + } > + > + ret = files_fsck_symref_target(o, refname.buf, pointee_name, > + pointee_path.buf); > + goto clean; > + } > + > + if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) { > + ret = error_errno(_("%s/%s: unable to read the ref"), > + refs_check_dir, iter->relative_path); > + goto clean; > + } > + > + if (parse_loose_ref_contents(ref_content.buf, &oid, > + &referent, &type, > + &failure_errno, &trailing)) { > + ret = fsck_refs_report(o, NULL, refname.buf, NULL, > + FSCK_MSG_BAD_REF_CONTENT, > + "invalid ref content"); > + goto clean; > + } > + > + /* > + * If the ref is a symref, we need to check the destination name and > + * connectivity. > + */ > + if (referent.len && (type & REF_ISSYMREF)) { > + strbuf_addf(&pointee_path, "%s/%s", gitdir, referent.buf); > + strbuf_rtrim(&referent); > + > + ret = files_fsck_symref_target(o, refname.buf, referent.buf, > + pointee_path.buf); > + goto clean; > + } else { > + /* > + * Only regular refs could have a trailing garbage. Should > + * be reported as a warning. > + */ What happens if a symbolic reference has trailing garbage ? > + if (trailing && (*trailing != '\0' && *trailing != '\n')) { > + ret = fsck_refs_report(o, NULL, refname.buf, NULL, > + FSCK_MSG_TRAILING_REF_CONTENT, > + "trailing garbage in ref"); > + goto clean; > + } > + } > + > +clean: > + strbuf_release(&abs_gitdir); > + strbuf_release(&pointee_path); > + strbuf_release(&refname); > + strbuf_release(&ref_content); > + strbuf_release(&referent); > + return ret; > +} > + > static int files_fsck_refs_dir(struct ref_store *ref_store, > struct fsck_options *o, > const char *refs_check_dir, > @@ -3490,6 +3630,7 @@ static int files_fsck_refs(struct ref_store *ref_store, > int ret; > files_fsck_refs_fn fsck_refs_fns[]= { > files_fsck_refs_name, > + files_fsck_refs_content, > NULL > }; > > diff --git a/refs/refs-internal.h b/refs/refs-internal.h > index a905e187cd..2fabf41d14 100644 > --- a/refs/refs-internal.h > +++ b/refs/refs-internal.h > @@ -709,11 +709,12 @@ struct ref_store { > > /* > * Parse contents of a loose ref file. *failure_errno maybe be set to EINVAL for > - * invalid contents. > + * invalid contents. Also *trailing is set to the first character after the > + * refname or NULL if the referent is not empty. > */ > int parse_loose_ref_contents(const char *buf, struct object_id *oid, > struct strbuf *referent, unsigned int *type, > - int *failure_errno); > + int *failure_errno, const char **trailing); > > /* > * Fill in the generic part of refs and add it to our collection of > diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh > index b2db58d2c6..35bf40ee64 100755 > --- a/t/t0602-reffiles-fsck.sh > +++ b/t/t0602-reffiles-fsck.sh > @@ -98,4 +98,114 @@ test_expect_success 'ref name check should be adapted into fsck messages' ' > ) > ' > > +test_expect_success 'regular ref content should be checked' ' > + test_when_finished "rm -rf repo" && > + git init repo && > + branch_dir_prefix=.git/refs/heads && > + tag_dir_prefix=.git/refs/tags && > + ( > + cd repo && > + git commit --allow-empty -m initial && > + git checkout -b branch-1 && > + git tag tag-1 && > + git commit --allow-empty -m second && > + git checkout -b branch-2 && > + git tag tag-2 && > + git checkout -b a/b/tag-2 > + ) && > + ( > + cd repo && > + printf "%s garbage" "$(git rev-parse branch-1)" > $branch_dir_prefix/branch-1-garbage && > + git fsck 2>err && > + cat >expect <<-EOF && > + warning: refs/heads/branch-1-garbage: trailingRefContent: trailing garbage in ref > + EOF > + rm $branch_dir_prefix/branch-1-garbage && > + test_cmp expect err > + ) && > + ( > + cd repo && > + printf "%s garbage" "$(git rev-parse tag-1)" > $tag_dir_prefix/tag-1-garbage && > + test_must_fail git -c fsck.trailingRefContent=error fsck 2>err && > + cat >expect <<-EOF && > + error: refs/tags/tag-1-garbage: trailingRefContent: trailing garbage in ref > + EOF > + rm $tag_dir_prefix/tag-1-garbage && > + test_cmp expect err > + ) && > + ( > + cd repo && > + printf "%s " "$(git rev-parse tag-2)" > $tag_dir_prefix/tag-2-garbage && > + git fsck 2>err && > + cat >expect <<-EOF && > + warning: refs/tags/tag-2-garbage: trailingRefContent: trailing garbage in ref > + EOF > + rm $tag_dir_prefix/tag-2-garbage && > + test_cmp expect err > + ) && > + ( > + cd repo && > + printf "xfsazqfxcadas" > $tag_dir_prefix/tag-2-bad && > + test_must_fail git refs verify 2>err && > + cat >expect <<-EOF && > + error: refs/tags/tag-2-bad: badRefContent: invalid ref content > + EOF > + rm $tag_dir_prefix/tag-2-bad && > + test_cmp expect err > + ) && > + ( > + cd repo && > + printf "xfsazqfxcadas" > $branch_dir_prefix/a/b/branch-2-bad && > + test_must_fail git refs verify 2>err && > + cat >expect <<-EOF && > + error: refs/heads/a/b/branch-2-bad: badRefContent: invalid ref content > + EOF > + rm $branch_dir_prefix/a/b/branch-2-bad && > + test_cmp expect err > + ) > +' > + > +test_expect_success 'symbolic ref content should be checked' ' > + test_when_finished "rm -rf repo" && > + git init repo && > + branch_dir_prefix=.git/refs/heads && > + tag_dir_prefix=.git/refs/tags && > + ( > + cd repo && > + git commit --allow-empty -m initial && > + git checkout -b branch-1 && > + git tag tag-1 > + ) && > + ( > + cd repo && > + printf "ref: refs/heads/.branch" > $branch_dir_prefix/branch-2-bad && > + test_must_fail git refs verify 2>err && > + cat >expect <<-EOF && > + error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid refname > + EOF > + rm $branch_dir_prefix/branch-2-bad && > + test_cmp expect err > + ) && > + ( > + cd repo && > + printf "ref: refs/heads" > $branch_dir_prefix/branch-2-bad && > + test_must_fail git refs verify 2>err && > + cat >expect <<-EOF && > + error: refs/heads/branch-2-bad: badSymrefPointee: point to invalid target > + EOF > + rm $branch_dir_prefix/branch-2-bad && > + test_cmp expect err > + ) && > + ( > + cd repo && > + printf "ref: logs/maint-v2.45" > $branch_dir_prefix/branch-2-bad && > + test_must_fail git refs verify 2>err && > + cat >expect <<-EOF && > + error: refs/heads/branch-2-bad: badSymrefPointee: point to target out of refs hierarchy > + EOF > + rm $branch_dir_prefix/branch-2-bad && > + test_cmp expect err > + ) > +' > + > test_done > -- > 2.45.2
Attachment:
signature.asc
Description: PGP signature