On Wed, 2007-09-19 at 18:27 -0700, Junio C Hamano wrote: > Kristian Høgsberg <krh@xxxxxxxxxx> writes: > > > diff --git a/builtin-commit.c b/builtin-commit.c > > new file mode 100644 > > index 0000000..ee98de9 > > --- /dev/null > > +++ b/builtin-commit.c > > @@ -0,0 +1,740 @@ > > +/* > > + * Builtin "git commit" > > + * > > + * Copyright (c) 2007 Kristian Høgsberg <krh@xxxxxxxxxx> > > + * Based on git-commit.sh by Junio C Hamano and Linus Torvalds > > + */ > > + > > +#include <sys/types.h> > > +#include <sys/stat.h> > > +#include <unistd.h> > > + > > With 85023577a8f4b540aa64aa37f6f44578c0c305a3 (simplify > inclusion of system header files.), we introduced a rule against > these lines. We probably would want a Coding Guideline (aka > "Hacking Git") document somewhere. Ok. I see you removed them in the pu commit, thanks. > > +#include "cache.h" > > +#include "cache-tree.h" > > +#include "builtin.h" > > +#include "diff.h" > > +#include "diffcore.h" > > +#include "commit.h" > > +#include "revision.h" > > +#include "wt-status.h" > > +#include "run-command.h" > > +#include "refs.h" > > +#include "log-tree.h" > > +#include "strbuf.h" > > +#include "utf8.h" > > + > > +static const char builtin_commit_usage[] = > > + "[-a | --interactive] [-s] [-v] [--no-verify] [-m <message> | -F <logfile> | (-C|-c) <commit> | --amend] [-u] [-e] [--author <author>] [--template <file>] [[-i | -o] <path>...]"; > > + > > +static unsigned char head_sha1[20], merge_head_sha1[20]; > > +static char *use_message_buffer; > > +static const char commit_editmsg[] = "COMMIT_EDITMSG"; > > +static struct lock_file lock_file; > > + > > +enum option_type { > > + OPTION_NONE, > > + OPTION_STRING, > > + OPTION_INTEGER, > > + OPTION_LAST, > > +}; > > + > > +struct option { > > + enum option_type type; > > + const char *long_name; > > + char short_name; > > + void *value; > > +}; > > + > > +static int scan_options(const char ***argv, struct option *options) > > +{ > > + const char *value, *eq; > > + int i; > > + > > + if (**argv == NULL) > > + return 0; > > + if ((**argv)[0] != '-') > > + return 0; > > + if (!strcmp(**argv, "--")) > > + return 0; > > + > > + value = NULL; > > + for (i = 0; options[i].type != OPTION_LAST; i++) { > > + if ((**argv)[1] == '-') { > > + if (!prefixcmp(options[i].long_name, **argv + 2)) { > > + if (options[i].type != OPTION_NONE) > > + value = *++(*argv); > > + goto match; > > + } > > + > > + eq = strchr(**argv + 2, '='); > > + if (eq && options[i].type != OPTION_NONE && > > + !strncmp(**argv + 2, > > + options[i].long_name, eq - **argv - 2)) { > > + value = eq + 1; > > + goto match; > > + } > > + } > > + > > + if ((**argv)[1] == options[i].short_name) { > > + if ((**argv)[2] == '\0') { > > + if (options[i].type != OPTION_NONE) > > + value = *++(*argv); > > + goto match; > > + } > > + > > + if (options[i].type != OPTION_NONE) { > > + value = **argv + 2; > > + goto match; > > + } > > + } > > + } > > How do you disambiguate between "--author <me>" and "--amend"? > "The order in *options list matters" is an acceptable answer but > it needs to be documented. Yes, the order matters. I made sure the C version has the same precedence as the shell script. I'll add a comment in parse-options.h. > > + > > + usage(builtin_commit_usage); > > + > > + match: > > + switch (options[i].type) { > > + case OPTION_NONE: > > + *(int *)options[i].value = 1; > > + break; > > + case OPTION_STRING: > > + if (value == NULL) > > + die("option %s requires a value.", (*argv)[-1]); > > + *(const char **)options[i].value = value; > > + break; > > + case OPTION_INTEGER: > > + if (value == NULL) > > + die("option %s requires a value.", (*argv)[-1]); > > + *(int *)options[i].value = atoi(value); > > + break; > > + default: > > + assert(0); > > + } > > + > > + (*argv)++; > > + > > + return 1; > > +} > > I do not particularly like this OPTION_LAST convention, but that > is a minor detail. I also suspect in the longer term we might > be better off using getopt() or popt(), but that would be a > larger project. > > In any case, if you want to use this option parser, you would > need to add a new file, perhaps parse_options.c, and move this > part there, so that other parts of the system can reuse it. I'll split it out in parse-options.[ch]. I still don't understand why using getopt is better; parse-options.c is a 75 line file and it's simple code. It does more work that getopt, since for most options it automatically writes back the option argument into a global. All I have to do then is validate that no illegal combination of options was passed. > And the same comment goes for the launch_editor still in > builtin-tag.c. We should move it to editor.c or something. Yeah. > > + > > +static char *logfile, *force_author, *message, *template_file; > > +static char *edit_message, *use_message; > > +static int all, edit_flag, also, interactive, only, no_verify, amend, signoff; > > +static int quiet, verbose, untracked_files; > > + > > +static int no_edit, initial_commit, in_merge; > > +const char *only_include_assumed; > > + > > +static struct option commit_options[] = { > > + { OPTION_STRING, "file", 'F', (void *) &logfile }, > > + { OPTION_NONE, "all", 'a', &all }, > > + { OPTION_STRING, "author", 0, (void *) &force_author }, > > + { OPTION_NONE, "edit", 'e', &edit_flag }, > > Why do some get casted to (void*) and others don't? It doesn't > seem to have any pattern. I am puzzled... Yes, oops, not sure what that was about. > > + { OPTION_NONE, "include", 'i', &also }, > > + { OPTION_NONE, "interactive", 0, &interactive }, > > + { OPTION_NONE, "only", 'o', &only }, > > + { OPTION_STRING, "message", 'm', &message }, > > + { OPTION_NONE, "no-verify", 'n', &no_verify }, > > + { OPTION_NONE, "amend", 0, &amend }, > > + { OPTION_STRING, "reedit-message", 'c', &edit_message }, > > + { OPTION_STRING, "reuse-message", 'C', &use_message }, > > + { OPTION_NONE, "signoff", 's', &signoff }, > > + { OPTION_NONE, "quiet", 'q', &quiet }, > > + { OPTION_NONE, "verbose", 'v', &verbose }, > > + { OPTION_NONE, "untracked-files", 0, &untracked_files }, > > + { OPTION_STRING, "template", 't', &template_file }, > > + { OPTION_LAST }, > > +}; > > + > > +/* FIXME: Taken from builtin-add, should be shared. */ > > You're darn right. I thought you have some other patch that > touches builtin-add already... Yeah, I'll fix that. > > + > > +static void update_callback(struct diff_queue_struct *q, > > + struct diff_options *opt, void *cbdata) > > +{ > > + int i, verbose; > > + > > + verbose = *((int *)cbdata); > > + for (i = 0; i < q->nr; i++) { > > + struct diff_filepair *p = q->queue[i]; > > + const char *path = p->one->path; > > + switch (p->status) { > > + default: > > + die("unexpacted diff status %c", p->status); > > + case DIFF_STATUS_UNMERGED: > > + case DIFF_STATUS_MODIFIED: > > + case DIFF_STATUS_TYPE_CHANGED: > > + add_file_to_cache(path, verbose); > > + break; > > + case DIFF_STATUS_DELETED: > > + remove_file_from_cache(path); > > + cache_tree_invalidate_path(active_cache_tree, path); > > I've updated remove_file_from_cache() to invalidate the path in > cache-tree so this does not hurt but is no longer necessary. I > removed this line in the version I queued in 'pu'. > > > + if (verbose) > > + printf("remove '%s'\n", path); > > + break; > > + } > > + } > > +} > > + > > +static void > > +add_files_to_cache(int fd, const char **files, const char *prefix) > > +{ > > + struct rev_info rev; > > + > > + init_revisions(&rev, ""); > > + setup_revisions(0, NULL, &rev, NULL); > > + rev.prune_data = get_pathspec(prefix, files); > > + rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK; > > + rev.diffopt.format_callback = update_callback; > > + rev.diffopt.format_callback_data = &verbose; > > + > > + run_diff_files(&rev, 0); > > + refresh_cache(REFRESH_QUIET); > > Your update_callback() does add_file_to_cache() which picks up > the stat information from the filesystem already, so I do not > see much point calling refresh_cache() afterwards. On the other > hand, refreshing before running diff_files() might make sense, > as you can avoid a lot of unnecessary work when you are told to > do "git commit ." I didn't have refresh_cache() in there originally, there was some test case that didn't pass until I added it. The git-commit.sh shell script calls git update-index --refresh after the prepare_index() part, which is where I got it from. Will investigate. > Have you tested this with an unmerged index? Not yet. > > + > > + if (write_cache(fd, active_cache, active_nr) || close(fd)) > > + die("unable to write new index file"); > > +} > > + > > +static char * > > +prepare_index(const char **files, const char *prefix) > > +{ > > + int fd; > > + struct tree *tree; > > + struct lock_file *next_index_lock; > > + > > + fd = hold_locked_index(&lock_file, 1); > > + if (read_cache() < 0) > > + die("index file corrupt"); > > + > > + if (all) { > > + add_files_to_cache(fd, files, NULL); > > + return lock_file.filename; > > Should you be passing files, which is used as pathspec to decide > if your update_callback() should be called, under --all option? Oh... hmm, if 'all' is set there are no paths, but yes, that is confusing. > > + } else if (also) { > > + add_files_to_cache(fd, files, prefix); > > + return lock_file.filename; > > + } > > + > > + if (interactive) > > + interactive_add(); > > Don't you need to > > (1) abort if the user aborts out of interactive add with ^C? > (2) re-read the index after interactive_add() returns? Uhm, yes, good points. > > + if (*files == NULL) { > > + /* Commit index as-is. */ > > + rollback_lock_file(&lock_file); > > + return get_index_file(); > > + } > > + > > + /* > > + * FIXME: Warn on unknown files. Shell script does > > + * > > + * commit_only=`git-ls-files --error-unmatch -- "$@"` > > + */ > > This should be much easier to do than from the shell script, as > you have active_cache[] (aka "the_index.cache[]") in-core. > > > + /* > > + * FIXME: shell script does > > + * > > + * git-read-tree --index-output="$TMP_INDEX" -i -m HEAD > > + * > > + * which warns about unmerged files in the index. > > + */ > > I think "unmerged warning" is an unintended side effect. The > point of the command is to have a copy of index, as there is no > way for shell script to work with more than one index at a time > without using a temporary file. Yes, we talked about this in IRC a few weeks back, and agreed that just read_tree() should be fine. I'll just remove that FIXME. > > + > > + /* update the user index file */ > > + add_files_to_cache(fd, files, prefix); > > + > > + if (!initial_commit) { > > + tree = parse_tree_indirect(head_sha1); > > + if (!tree) > > + die("failed to unpack HEAD tree object"); > > + if (read_tree(tree, 0, NULL)) > > + die("failed to read HEAD tree object"); > > + } > > Huh? Doesn't this read_tree() defeat the add_files_to_cache() > you did earlier? This is the case where we add the files on the command line to .git/index, but commit from a clean index file corresponding to HEAD with the files from the command line added (partial commit?). The first add_files_to_cache() updates .git/index, then we do read_tree() to build a tmp index from HEAD and then we add the files again. The tmp index is written to a tmp index file. > > + > > + /* Uh oh, abusing lock_file to create a garbage collected file */ > > + next_index_lock = xmalloc(sizeof(*next_index_lock)); > > + fd = hold_lock_file_for_update(next_index_lock, > > + git_path("next-index-%d", getpid()), 1); > > That's not an abuse, but I wonder what happened to the fd you > got at the beginning of the function. Ugh, yeah, that's a bit ugly... it gets closed in add_files_to_cache() once we've written the index. The patch I have here uses the add_files_to_cache() from builtin-add.c which doesn't close the fd or write the index. That now happens in prepare_index(), so it's a little clearer what's going on. > > + add_files_to_cache(fd, files, prefix); > > and then this is puzzling to me. > > I am starting to suspect that you might be better off if you do > not follow the use of temporary index file that was in the shell > script version to the letter. You can use more than one index > in the core at the same time, now you are built-in. As described above, we need to add the files to the user index and the temporary index we're committing from. As for just using an in-memory index, I wanted to do it that way originally, but you have to write it to disk after all for the pre-commit hook. Maybe doing it in-memory is better after all, and then only write it to disk if we actually have a pre-commit hook. > > + > > + return next_index_lock->filename; > > +} > > ... and if we were to go that route, wt_status structure would > have a pointer to the "struct index_state" instead of the > filename of the index... oops, wt_status_print() will discard > and re-read the cache from file, so that approach would not work > right now without fixing wt-status first. Hmm. Yep, that was the other reason. I suppose a short term fix for this would be to make run_status() take a struct index_state and write it to a temp file and run against that. We can then clean that up to not use a temp file in a second patch. > > +static int run_status(FILE *fp, const char *index_file) > > +{ > > + struct wt_status s; > > + > > + wt_status_prepare(&s); > > + > > + if (amend) { > > + s.amend = 1; > > + s.reference = "HEAD^1"; > > + } > > + s.verbose = verbose; > > + s.untracked = untracked_files; > > + s.index_file = index_file; > > + s.fp = fp; > > + > > + wt_status_print(&s); > > + > > + return s.commitable; > > +} > > I did not look at the rest of the patch; it should be either > obviously correct or outright incorrect --- anybody would notice > the breakage immediately. > > The prepare_index() part is the most (and only) "interesting" > part of the puzzle. I need to look at this again and think > about it a bit more. It needs to: > > * save the current index and restore in case the whole > "git-commit" is aborted in any way (e.g. ^C, empty log > message from the editor); This is easy to do from C with > hold_locked_index(). This is working in the current version. I use hold_locked_index() in the beginning of prepare_index() and calls commit_locked_index() to write the new index at the end of cmd_commit() once the commit is done. > * when doing a partial commit, > - read HEAD in a temporary index, if not initial commit; > - update the temporary index with the given paths; > - write out the temporary index as a tree to build a commit; > > - update the real index the same way with the given paths; > - if all goes well, commit the updated real index; again, > this is easy in C with commit_locked_index(); prepare_index() does this, that's the case you pointed out above, where I'm calling add_files_to_cache() twice. > * when not doing a partial commit, > - update the real index with given paths (if "--also") or all > what diff-files reports (if "--all"); > - write out the real index as a tree to build a commit; > - if all goes well, commit the updated real index; again, > this is easy in C with commit_locked_index(); This is also in prepare_index() - the 'if (all)' and 'if (also)' cases in the beginning of the function does that, except commiting the the updated real index, that's done at the end of cmd_commit(), share among all cases. > A thing to keep in mind is that you can write out the temporary > index as a tree from the core without writing it out first to a > temporary index file at all. Perhaps the code should be > structured like this. > > - parse options and all the other necessary setup; > > - hold_locked_index() to lock the "real" index; > > - prepare_index() is responsibile for doing two things > > 1. build, write out and return the tree object to be > committed; > > 2. leave the_index[] in the state to become the "real" > index if this commit command is not aborted; > > - take the tree object, put commit log message around it and > make a commit object; > > - commit_locked_index() to write out the "real" index. > > Now, the task #1 for prepare_index() is simpler for "also" and > "all" case. You do the equivalent of "git-add -u" or "git add > <path>" and run cache_tree_update() to write out the tree, and > when you are done, both the_index.cache and the_index.cache_tree > are up-to-date, ready to be written out by the caller at the > very end. Yes, but isn't that what the code is doing now? I write out the tree in cmd_commit(), not in prepare_index(), but prepare_index() is also used in cmd_status(). It seems better to only write the tree if we're going to do a commit. Other than that, the flow of the code follows your description pretty much step by step. > For a partial commit, the task is a bit more convoluted, as > writing out the tree needs to be done from outside the_index. I > would say something like this: > > int prepare_index(unsigned char *sha1) { > if (partial commit) { > struct index_state tmp_index; > > /* "temporary index" only in-core */ > memset(&tmp_index, 0, sizeof(tmp_index)); > read_index(&tmp_index); > add_files_to_index(files, prefix, &tmp_index); > write_index_as_tree(&tmp_index, sha1); > discard_index(&tmp_index); > > /* update the index the same way */ > add_files_to_index(files, prefix, &the_index); > return ok; > } > /* otherwise */ > if (files && *files) > add_files_to_index(files, prefix, &the_index); > else if (all) > add_files_to_index(NULL, prefix, &the_index); > write_index_as_tree(&the_index, sha1); > return ok; > } > > where > > * add_files_to_index() is similar to your add_files_to_cache() > but takes struct index_state; the callback can still diff > with the_index.cache (aka active_cache) as that diff is what > we are interested in updating anyway; > > * write_index_as_tree() would be a new function that takes > struct index_state and writes that as a tree. It would > essentially be the builtin-write-tree.c::write_tree() > function except the opening and reading the index (or > updating the cache-tree) part, something like: > > write_index_as_tree(struct index_state *istate, unsigned char *sha1) > { > if (!cache_tree_fully_valid(istate->cache_tree)) > cache_tree_update(istate->cache_tree, > istate->cache, > istate->cache_nr, 0, 0); > hashcpy(sha1, istate->cache_tree->sha1); > } That looks nicer, the add_files_to_index() function is a pretty clean solution. But again, we can't write the tree in prepare_index(); we need the temporary index for the pre-commit hook, and prepare_index() is used in cmd_status(). But we can just return an struct index_state pointer from prepare_index(). Thanks for reviewing, Kristian - To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html