Re: [PATCH v2 1/3] tree-walk: learn get_tree_entry_follow_symlinks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



dturner@xxxxxxxxxxxxxxxx writes:

> From: David Turner <dturner@xxxxxxxxxxx>
>
> Add a new function, get_tree_entry_follow_symlinks, to tree-walk.[ch].
> The function is not yet used.  It will be used to implement git
> cat-file --batch --follow-symlinks.
>
> The function locates an object by path, following symlinks in the
> repository.  If the symlinks lead outside the repository, the function
> reports this to the caller.
>
> Signed-off-by: David Turner <dturner@xxxxxxxxxxx>
> ---
>  tree-walk.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  tree-walk.h |   2 +
>  2 files changed, 195 insertions(+)

Overall this looks fairly straight-forward and much cleaner than I
feared it would end up to be.

	Side Note: Earlier I stupidly thought that it may have to
	involve ugly code to avoid penalizing common cases by first
	attempting to resolve the whole thing (e.g. a/b/c) and fall
	back to "is there any symbolic link?" codepath only after
	seeing it fail (i.e. if we can find "a/b/c" in the tree, by
	definition "a/" and "a/b/" cannot be a symbolic link).  But
	of course that is a silly thought. As this implementation
	makes it clear, for us to resolve "a/b/c" in the common case
	without symbolic link, we need to incrementally find "a" in
	the root, then find "b" in that "a", and then find "c" in
	"a/b" _anyway_.



I'll leave others (or maybe tomorrow's myself ;-) to nitpick on
styles, code layout and possibly logic errors (if any), but from
a cursory read, I like the general structure of this patch a lot.

Good job.

Thanks.

> diff --git a/tree-walk.c b/tree-walk.c
> index 5dd9a71..2df31a2 100644
> --- a/tree-walk.c
> +++ b/tree-walk.c
> @@ -415,6 +415,12 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
>  	return error;
>  }
>  
> +struct dir_state {
> +	void *tree;
> +	unsigned long size;
> +	unsigned char sha1[20];
> +};
> +
>  static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
>  {
>  	int namelen = strlen(name);
> @@ -478,6 +484,193 @@ int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned ch
>  	return retval;
>  }
>  
> +/* This is Linux's built-in max for the number of symlinks to follow.
> + * That limit, of course, does not affect git, but it's a reasonable
> + * choice.
> + */
> +#define GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS 40
> +
> +/**
> + * Find a tree entry by following symlinks in tree_sha (which is
> + * assumed to be the root of the repository).  In the event that a
> + * symlink points outside the repository (e.g. a link to /foo or a
> + * root-level link to ../foo), the portion of the link which is
> + * outside the repository will be copied into result_path, and *mode
> + * will be set to 0.  Otherwise, result will be filled in with the
> + * sha1 of the found object, and *mode will hold the mode of the
> + * object.
> + */
> +int get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, unsigned char *result_path, unsigned *mode)
> +{
> +	int retval = -1;
> +	void *tree;
> +	struct dir_state *parents = NULL;
> +	size_t parents_alloc = 0;
> +	ssize_t parents_nr = 0;
> +	unsigned long size;
> +	unsigned char root[20];
> +	unsigned char current_tree_sha1[20];
> +	struct strbuf namebuf = STRBUF_INIT;
> +	enum object_type type;
> +	int already_have_tree = 0;
> +	struct tree_desc t = {0};
> +	int follows_remaining = GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS;
> +	int i;
> +
> +	strbuf_addstr(&namebuf, name);
> +	hashcpy(current_tree_sha1, tree_sha1);
> +
> +	while (1) {
> +		char *first_slash;
> +		char *remainder = NULL;
> +		int find_result;
> +
> +		if (!t.buffer) {
> +			tree = read_object_with_reference(current_tree_sha1,
> +							  tree_type, &size,
> +							  root);
> +			if (!tree)
> +				goto done;
> +
> +			ALLOC_GROW(parents, parents_nr + 1, parents_alloc);
> +			parents[parents_nr].tree = tree;
> +			parents[parents_nr].size = size;
> +			hashcpy(parents[parents_nr].sha1, root);
> +
> +			parents_nr++;
> +
> +			if (namebuf.buf[0] == '\0') {
> +				hashcpy(result, root);
> +				retval = 0;
> +				goto done;
> +			}
> +
> +			if (!size)
> +				goto done;
> +
> +			/* descend */
> +			init_tree_desc(&t, tree, size);
> +		}
> +
> +		/* Handle symlinks to e.g. a//b by removing leading slashes */
> +		while (namebuf.buf[0] == '/') {
> +			strbuf_remove(&namebuf, 0, 1);
> +		}
> +
> +		/* Split namebuf into a first component and a
> +		 * remainder */
> +		if ((first_slash = strchr(namebuf.buf, '/'))) {
> +			*first_slash = 0;
> +			remainder = first_slash + 1;
> +		}
> +
> +		if (!strcmp(namebuf.buf, "..")) {
> +			struct dir_state *parent;
> +			/* We could end up with .. in the namebuf if
> +			 * it appears in a symlink. */
> +
> +			if (parents_nr == 1) {
> +				if (remainder)
> +					*first_slash = '/';
> +				if (strlcpy(result_path, namebuf.buf,
> +					    PATH_MAX) < PATH_MAX) {
> +					*mode = 0;
> +					retval = 0;
> +				}
> +				goto done;
> +			}
> +			parent = &parents[parents_nr - 1];
> +			free(parent->tree);
> +			parents_nr--;
> +			parent = &parents[parents_nr - 1];
> +			init_tree_desc(&t, parent->tree, parent->size);
> +			strbuf_remove(&namebuf, 0, remainder ? 3 : 2);
> +			continue;
> +		}
> +
> +		/* We could end up here via a symlink to dir/.. */
> +		if (namebuf.buf[0] == '\0') {
> +			hashcpy(result, parents[parents_nr - 1].sha1);
> +			retval = 0;
> +			goto done;
> +		}
> +
> +		/* Look up the first (or only) path component
> +		 * in the tree. */
> +		find_result = find_tree_entry(&t, namebuf.buf,
> +					      current_tree_sha1, mode);
> +		if (find_result) {
> +			retval = find_result;
> +			goto done;
> +		}
> +
> +		if (S_ISDIR(*mode)) {
> +			if (!remainder) {
> +				hashcpy(result, current_tree_sha1);
> +				retval = 0;
> +				goto done;
> +			}
> +			/* Descend the tree */
> +			t.buffer = NULL;
> +			strbuf_remove(&namebuf, 0,
> +				      1 + first_slash - namebuf.buf);
> +		} else if (S_ISREG(*mode)) {
> +			if (!remainder) {
> +				hashcpy(result, current_tree_sha1);
> +				retval = 0;
> +			}
> +			goto done;
> +		} else if (S_ISLNK(*mode)) {
> +			/* Follow a symlink */
> +			size_t link_len, len;
> +			char *contents, *contents_start;
> +			struct dir_state *parent;
> +
> +			if (follows_remaining-- == 0)
> +				/* Too many symlinks followed */
> +				goto done;
> +
> +			contents = read_sha1_file(current_tree_sha1, &type,
> +						  &link_len);
> +
> +			if (!contents)
> +				goto done;
> +
> +			if (contents[0] == '/') {
> +				if (strlcpy(result_path,
> +					    contents, PATH_MAX) < PATH_MAX) {
> +					*mode = 0;
> +					retval = 0;
> +				}
> +				goto done;
> +			}
> +
> +			if (remainder)
> +				len = first_slash - namebuf.buf;
> +			else
> +				len = namebuf.len;
> +
> +			contents_start = contents;
> +
> +			parent = &parents[parents_nr - 1];
> +			init_tree_desc(&t, parent->tree, parent->size);
> +			strbuf_splice(&namebuf, 0, len,
> +				      contents_start, link_len);
> +			if (remainder)
> +				namebuf.buf[link_len] = '/';
> +			free(contents);
> +		}
> +	}
> +done:
> +	for (i = 0; i < parents_nr; ++i) {
> +		free(parents[i].tree);
> +	}
> +	free(parents);
> +
> +	strbuf_release(&namebuf);
> +	return retval;
> +}
> +
>  static int match_entry(const struct pathspec_item *item,
>  		       const struct name_entry *entry, int pathlen,
>  		       const char *match, int matchlen,
> diff --git a/tree-walk.h b/tree-walk.h
> index ae7fb3a..d9ad768 100644
> --- a/tree-walk.h
> +++ b/tree-walk.h
> @@ -40,6 +40,8 @@ struct traverse_info;
>  typedef int (*traverse_callback_t)(int n, unsigned long mask, unsigned long dirmask, struct name_entry *entry, struct traverse_info *);
>  int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info);
>  
> +int get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, unsigned char *result_path, unsigned *mode);
> +
>  struct traverse_info {
>  	struct traverse_info *prev;
>  	struct name_entry name;
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]