Appologies for gmail mangling. I will use git send-email for real patches. On Tue, Oct 14, 2008 at 6:44 PM, Stefan Karpinski <stefan.karpinski@xxxxxxxxx> wrote: > I find myself really wanting to enforce standards in the use of spaces > versus tabs. I deal with some unruly programmers who refuse to set > their editors to use soft tabs, but I *hate* tabs in the repo. And of > course others feel equally strongly about keeping only tabs in the > repo (e.g. the git repo). > > This led me to wonder if it wouldn't make sense to have this > conversion ability built into git. The following patch implements this > functionality. It still needs work—it's not meant to be final, just to > give an idea—but I just wanted to see if people on the git list > thought this sort of thing would be worthwhile at all. > > If people think it's worth having in git, then how should it be > configured? I feel like a project should be able to define the > expected tab size for binary file types. Moreover, the project should > be able to define the default cannonicalization with resepect to > whitespace for different files types. Then, if they so desire, each > git user should be able to override the output format on a > per-repository basis. > > Does this make any sense? Comments? > > --- > diff --git a/convert.c b/convert.c > index 1816e97..280f45b 100644 > --- a/convert.c > +++ b/convert.c > @@ -18,7 +18,7 @@ > > struct text_stat { > /* NUL, CR, LF and CRLF counts */ > - unsigned nul, cr, lf, crlf; > + unsigned nul, cr, lf, crlf, tab; > > /* These are just approximations! */ > unsigned printable, nonprintable; > @@ -48,7 +48,10 @@ > static void gather_stats(const char *buf, unsigned long size, struct > text_stat * > else if (c < 32) { > switch (c) { > /* BS, HT, ESC and FF */ > - case '\b': case '\t': case '\033': case '\014': > + case '\t': > + stats->tab++; > + /* fall through */ > + case '\b': case '\033': case '\014': > stats->printable++; > break; > case 0: > @@ -235,6 +238,105 @@ > static int crlf_to_worktree(const char *path, const char *src, size_t len, > return 1; > } > > +static int tabs_to_spaces(const char *path, const char *src, size_t len, > + > struct strbuf *buf, int untabify) > +{ > + char *to_free = NULL; > + struct text_stat stats; > + static const unsigned tab_size = 4; > + char *spaces; > + > + if (!untabify) > + return 0; > + > + /* instead of calling twice, should cache these stats across calls */ > + gather_stats(src, len, &stats); > + > + if (!stats.tab) > + return 0; > + > + /* are we "faking" in place editing ? */ > + if (src == buf->buf) > + to_free = strbuf_detach(buf, NULL); > + > + /* this growth may be excessive: not all tabs get tab_size spaces */ > + strbuf_grow(buf, len + tab_size * stats.tab); > + spaces = (char *) xmalloc(tab_size); > + memset(spaces, ' ', tab_size); > + for (;;) { > + const char *line = src; > + const char *nl = memchr(src, '\n', len); > + char *tab; > + if (!nl) > + nl = src + len; > + while (src < nl && (tab = memchr(src, '\t', nl - src))) { > + strbuf_add(buf, src, tab - src); > + strbuf_add(buf, spaces, tab_size - ((tab - line) % tab_size)); > + src = tab + 1; > + } > + if (src < nl) > + strbuf_add(buf, src, nl - src); > + if (nl < src + len) > + strbuf_addch(buf, '\n'); > + else > + break; > + src = nl + 1; > + len -= src - line; > + } > + > + free(to_free); > + free(spaces); > + return 1; > +} > + > +static int spaces_to_tabs(const char *path, const char *src, size_t len, > + > struct strbuf *buf, int tabify) > +{ > + static const unsigned tab_size = 4; > + > + if (!tabify) > + return 0; > + > + /* only grow if not in place */ > + if (strbuf_avail(buf) + buf->len < len) > + strbuf_grow(buf, len - buf->len); > + > + for (;;) { > + int tabs = 0, spaces = 0; > + const char *line = src; > + const char *nl = memchr(src, '\n', len); > + if (!nl) > + nl = src + len; > + for (;; src++) { > + if (*src == ' ') { > + spaces++; > + if (spaces == tab_size) { > + tabs++; > + spaces = 0; > + } > + } else if (*src == '\t') { > + tabs++; > + spaces = 0; > + } else break; > + } > + if (line < src) { > + memset(buf->buf + buf->len, '\t', tabs); > + memset(buf->buf + buf->len + tabs, ' ', spaces); > + strbuf_setlen(buf, buf->len + tabs + spaces); > + } > + if (src < nl) > + strbuf_add(buf, src, nl - src); > + if (nl < src + len) > + strbuf_addch(buf, '\n'); > + else > + break; > + src = nl + 1; > + len -= src - line; > + } > + > + return 1; > +} > + > struct filter_params { > const char *src; > unsigned long size; > @@ -370,22 +472,29 @@ > static int read_convert_config(const char *var, const char *value, void *cb) > return 0; > } > > -static void setup_convert_check(struct git_attr_check *check) > +struct convert_checks { > + struct git_attr_check crlf, tabs, ident, filter; > +}; > + > +static void setup_convert_check(struct convert_checks *checks) > { > static struct git_attr *attr_crlf; > + static struct git_attr *attr_tabs; > static struct git_attr *attr_ident; > static struct git_attr *attr_filter; > > if (!attr_crlf) { > attr_crlf = git_attr("crlf", 4); > + attr_tabs = git_attr("tabs", 4); > attr_ident = git_attr("ident", 5); > attr_filter = git_attr("filter", 6); > user_convert_tail = &user_convert; > git_config(read_convert_config, NULL); > } > - check[0].attr = attr_crlf; > - check[1].attr = attr_ident; > - check[2].attr = attr_filter; > + checks->crlf.attr = attr_crlf; > + checks->tabs.attr = attr_tabs; > + checks->ident.attr = attr_ident; > + checks->filter.attr = attr_filter; > } > > static int count_ident(const char *cp, unsigned long size) > @@ -566,20 +675,22 @@ > static int git_path_check_ident(const char *path, struct git_attr_check *check) > return !!ATTR_TRUE(value); > } > > +#define CHECK_ARRAY_SIZE (sizeof(struct convert_checks)/sizeof(struct > git_attr_check)) > + > int convert_to_git(const char *path, const char *src, size_t len, > struct strbuf *dst, enum safe_crlf checksafe) > { > - struct git_attr_check check[3]; > + struct convert_checks checks; > int crlf = CRLF_GUESS; > int ident = 0, ret = 0; > const char *filter = NULL; > > - setup_convert_check(check); > - if (!git_checkattr(path, ARRAY_SIZE(check), check)) { > + setup_convert_check(&checks); > + if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *) > &checks)) { > struct convert_driver *drv; > - crlf = git_path_check_crlf(path, check + 0); > - ident = git_path_check_ident(path, check + 1); > - drv = git_path_check_convert(path, check + 2); > + crlf = git_path_check_crlf(path, &(checks.crlf)); > + ident = git_path_check_ident(path, &(checks.ident)); > + drv = git_path_check_convert(path, &(checks.filter)); > if (drv && drv->clean) > filter = drv->clean; > } > @@ -589,6 +700,11 @@ > int convert_to_git(const char *path, const char *src, size_t len, > src = dst->buf; > len = dst->len; > } > + ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable > + if (ret) { > + src = dst->buf; > + len = dst->len; > + } > ret |= crlf_to_git(path, src, len, dst, crlf, checksafe); > if (ret) { > src = dst->buf; > @@ -599,17 +715,17 @@ > int convert_to_git(const char *path, const char *src, size_t len, > > int convert_to_working_tree(const char *path, const char *src, size_t > len, struct strbuf *dst) > { > - struct git_attr_check check[3]; > + struct convert_checks checks; > int crlf = CRLF_GUESS; > int ident = 0, ret = 0; > const char *filter = NULL; > > - setup_convert_check(check); > - if (!git_checkattr(path, ARRAY_SIZE(check), check)) { > + setup_convert_check(&checks); > + if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *) > &checks)) { > struct convert_driver *drv; > - crlf = git_path_check_crlf(path, check + 0); > - ident = git_path_check_ident(path, check + 1); > - drv = git_path_check_convert(path, check + 2); > + crlf = git_path_check_crlf(path, &(checks.crlf)); > + ident = git_path_check_ident(path, &(checks.ident)); > + drv = git_path_check_convert(path, &(checks.filter)); > if (drv && drv->smudge) > filter = drv->smudge; > } > @@ -624,5 +740,10 @@ > int convert_to_working_tree(const char *path, const char *src, size_t > len, struc > src = dst->buf; > len = dst->len; > } > + ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable > + if (ret) { > + src = dst->buf; > + len = dst->len; > + } > return ret | apply_filter(path, src, len, dst, filter); > } > -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html