[GSoC][PATCH 2/2] refs: add name and content check for file backend

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The existing git-fsck does not fully check bad format ref name such as
standalone '@' or name ending with ".lock". And also `git-fsck` does not
fully check ref content, the following contents will not be reported by
the original git-fsck command.

1. "c71dba5654404b9b0d378a6cbff1b743b9d31a34 garbage": with trailing
   characters.
2. "c71dba5654404b9b0d378a6cbff1b743b9d31a34    ": with trailing empty
   characters.
3. "C71DBA5654404b9b0d378a6cbff1b743b9d31A34": with uppercase hex.
4. "z71dba5654404b9b0d378a6cbff1b743b9d31a34": with not hex character.

Provide functionality to support such consistency checks for branch and
tag refs and add a new unit test to verify this functionality.

Mentored-by: Patrick Steinhardt <ps@xxxxxx>
Mentored-by: Karthik Nayak <karthik.188@xxxxxxxxx>
Signed-off-by: shejialuo <shejialuo@xxxxxxxxx>
---
 refs/files-backend.c     | 112 ++++++++++++++++++++++++++++++++-
 t/t0602-reffiles-fsck.sh | 132 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 243 insertions(+), 1 deletion(-)
 create mode 100755 t/t0602-reffiles-fsck.sh

diff --git a/refs/files-backend.c b/refs/files-backend.c
index b6147c588b..3c811f5c03 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3299,11 +3299,121 @@ static int files_init_db(struct ref_store *ref_store,
 	return 0;
 }
 
-static int files_fsck(struct ref_store *ref_store)
+typedef int (*files_fsck_refs_fn)(const char *refs_check_dir,
+				struct dir_iterator *iter);
+
+static int files_fsck_refs_name(const char *refs_check_dir,
+				struct dir_iterator *iter)
 {
+	if (check_refname_format(iter->basename, REFNAME_ALLOW_ONELEVEL)) {
+		error(_("%s/%s: invalid refname format"), refs_check_dir, iter->basename);
+		return -1;
+	}
+
 	return 0;
 }
 
+static int files_fsck_refs_content(const char *refs_check_dir,
+				struct dir_iterator *iter)
+{
+	struct strbuf ref_content = STRBUF_INIT;
+
+	if (strbuf_read_file(&ref_content, iter->path.buf, 0) < 0) {
+		error(_("%s/%s: unable to read the ref"), refs_check_dir, iter->basename);
+		goto clean;
+	}
+
+	/*
+	 * Case 1: check if the ref content length is valid and the last
+	 * character is a newline.
+	 */
+	if (ref_content.len != the_hash_algo->hexsz + 1 ||
+			ref_content.buf[ref_content.len - 1] != '\n') {
+		goto failure;
+	}
+
+	/*
+	 * Case 2: the content should be range of [0-9a-f].
+	 */
+	for (size_t i = 0; i < the_hash_algo->hexsz; i++) {
+		if (!isdigit(ref_content.buf[i]) &&
+				(ref_content.buf[i] < 'a' || ref_content.buf[i] > 'f')) {
+			goto failure;
+		}
+	}
+
+	strbuf_release(&ref_content);
+	return 0;
+
+failure:
+	error(_("%s/%s: invalid ref content"), refs_check_dir, iter->basename);
+
+clean:
+	strbuf_release(&ref_content);
+	return -1;
+}
+
+static int files_fsck_refs(struct ref_store *ref_store,
+				const char* refs_check_dir,
+				files_fsck_refs_fn *fsck_refs_fns)
+{
+	struct dir_iterator *iter;
+	struct strbuf sb = STRBUF_INIT;
+	int ret = 0;
+	int iter_status;
+
+	strbuf_addf(&sb, "%s/%s", ref_store->gitdir, refs_check_dir);
+
+	iter = dir_iterator_begin(sb.buf, 0);
+
+	/*
+	 * The current implementation does not care about the worktree, the worktree
+	 * may have no refs/heads or refs/tags directory. Simply return 0 now.
+	*/
+	if (!iter) {
+		return 0;
+	}
+
+	while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
+		if (S_ISDIR(iter->st.st_mode)) {
+			continue;
+		} else if (S_ISREG(iter->st.st_mode)) {
+			for (files_fsck_refs_fn *fsck_refs_fn = fsck_refs_fns;
+					*fsck_refs_fn; fsck_refs_fn++) {
+				ret |= (*fsck_refs_fn)(refs_check_dir, iter);
+			}
+		} else {
+			error(_("unexpected file type for '%s'"), iter->basename);
+			ret = -1;
+		}
+	}
+
+	if (iter_status != ITER_DONE) {
+		ret = -1;
+		error(_("failed to iterate over '%s'"), sb.buf);
+	}
+
+	strbuf_release(&sb);
+
+	return ret;
+}
+
+static int files_fsck(struct ref_store *ref_store)
+{
+	int ret = 0;
+
+	files_fsck_refs_fn fsck_refs_fns[] = {
+		files_fsck_refs_name,
+		files_fsck_refs_content,
+		NULL
+	};
+
+	ret = files_fsck_refs(ref_store, "refs/heads",fsck_refs_fns)
+	    | files_fsck_refs(ref_store, "refs/tags", fsck_refs_fns);
+
+	return ret;
+}
+
 struct ref_storage_be refs_be_files = {
 	.name = "files",
 	.init = files_ref_store_create,
diff --git a/t/t0602-reffiles-fsck.sh b/t/t0602-reffiles-fsck.sh
new file mode 100755
index 0000000000..1c6c3804ff
--- /dev/null
+++ b/t/t0602-reffiles-fsck.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+test_description='Test reffiles backend consistency check'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+GIT_TEST_DEFAULT_REF_FORMAT=files
+export GIT_TEST_DEFAULT_REF_FORMAT
+
+. ./test-lib.sh
+
+test_expect_success 'ref name should be checked' '
+	test_when_finished "rm -rf repo" &&
+	git init repo &&
+	branch_dir_prefix=.git/refs/heads &&
+	tag_dir_prefix=.git/refs/tags &&
+	(
+		cd repo &&
+		git commit --allow-empty -m initial &&
+		git checkout -b branch-1 &&
+		git tag tag-1 &&
+		git commit --allow-empty -m second &&
+		git checkout -b branch-2 &&
+		git tag tag-2
+	) &&
+	(
+		cd repo &&
+		cp $branch_dir_prefix/branch-1 $branch_dir_prefix/.branch-1 &&
+		test_must_fail git fsck 2>err &&
+		cat >expect <<-EOF &&
+		error: refs/heads/.branch-1: invalid refname format
+		error: ref database is corrupt
+		EOF
+		rm $branch_dir_prefix/.branch-1 &&
+		test_cmp expect err
+	) &&
+	(
+		cd repo &&
+		cp $tag_dir_prefix/tag-1 $tag_dir_prefix/tag-1.lock &&
+		test_must_fail git fsck 2>err &&
+		cat >expect <<-EOF &&
+		error: refs/tags/tag-1.lock: invalid refname format
+		error: ref database is corrupt
+		EOF
+		rm $tag_dir_prefix/tag-1.lock &&
+		test_cmp expect err
+	) &&
+	(
+		cd repo &&
+		cp $branch_dir_prefix/branch-1 $branch_dir_prefix/@ &&
+		test_must_fail git fsck 2>err &&
+		cat >expect <<-EOF &&
+		error: refs/heads/@: invalid refname format
+		error: ref database is corrupt
+		EOF
+		rm $branch_dir_prefix/@ &&
+		test_cmp expect err
+	)
+'
+
+test_expect_success 'ref content should be checked' '
+	test_when_finished "rm -rf repo" &&
+	git init repo &&
+	branch_dir_prefix=.git/refs/heads &&
+	tag_dir_prefix=.git/refs/tags &&
+	(
+		cd repo &&
+		git commit --allow-empty -m initial &&
+		git checkout -b branch-1 &&
+		git tag tag-1 &&
+		git commit --allow-empty -m second &&
+		git checkout -b branch-2 &&
+		git tag tag-2
+	) &&
+	(
+		cd repo &&
+		printf "%s garbage" "$(git rev-parse branch-1)" > $branch_dir_prefix/branch-1-garbage &&
+		test_must_fail git fsck 2>err &&
+		cat >expect <<-EOF &&
+		error: refs/heads/branch-1-garbage: invalid ref content
+		error: ref database is corrupt
+		EOF
+		rm $branch_dir_prefix/branch-1-garbage &&
+		test_cmp expect err
+	) &&
+	(
+		cd repo &&
+		printf "%s garbage" "$(git rev-parse tag-1)" > $tag_dir_prefix/tag-1-garbage &&
+		test_must_fail git fsck 2>err &&
+		cat >expect <<-EOF &&
+		error: refs/tags/tag-1-garbage: invalid ref content
+		error: ref database is corrupt
+		EOF
+		rm $tag_dir_prefix/tag-1-garbage &&
+		test_cmp expect err
+	) &&
+	(
+		cd repo &&
+		printf "%s    " "$(git rev-parse tag-2)" > $tag_dir_prefix/tag-2-garbage &&
+		test_must_fail git fsck 2>err &&
+		cat >expect <<-EOF &&
+		error: refs/tags/tag-2-garbage: invalid ref content
+		error: ref database is corrupt
+		EOF
+		rm $tag_dir_prefix/tag-2-garbage &&
+		test_cmp expect err
+	) &&
+	(
+		cd repo &&
+		tr -d "\n" < $branch_dir_prefix/branch-1 > $branch_dir_prefix/branch-1-without-newline &&
+		test_must_fail git fsck 2>err &&
+		cat >expect <<-EOF &&
+		error: refs/heads/branch-1-without-newline: invalid ref content
+		error: ref database is corrupt
+		EOF
+		rm $branch_dir_prefix/branch-1-without-newline &&
+		test_cmp expect err
+	) &&
+	(
+		cd repo &&
+		tr "[:lower:]" "[:upper:]" < $branch_dir_prefix/branch-2 > $branch_dir_prefix/branch-2-upper &&
+		test_must_fail git fsck 2>err &&
+		cat >expect <<-EOF &&
+		error: refs/heads/branch-2-upper: invalid ref content
+		error: ref database is corrupt
+		EOF
+		rm $branch_dir_prefix/branch-2-upper &&
+		test_cmp expect err
+	)
+'
+
+test_done
-- 
2.45.1





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux