Re: builtin conversion between tabs and spaces

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Appologies for gmail mangling. I will use git send-email for real patches.

On Tue, Oct 14, 2008 at 6:44 PM, Stefan Karpinski
<stefan.karpinski@xxxxxxxxx> wrote:
> I find myself really wanting to enforce standards in the use of spaces
> versus tabs. I deal with some unruly programmers who refuse to set
> their editors to use soft tabs, but I *hate* tabs in the repo. And of
> course others feel equally strongly about keeping only tabs in the
> repo (e.g. the git repo).
>
> This led me to wonder if it wouldn't make sense to have this
> conversion ability built into git. The following patch implements this
> functionality. It still needs work—it's not meant to be final, just to
> give an idea—but I just wanted to see if people on the git list
> thought this sort of thing would be worthwhile at all.
>
> If people think it's worth having in git, then how should it be
> configured? I feel like a project should be able to define the
> expected tab size for binary file types. Moreover, the project should
> be able to define the default cannonicalization with resepect to
> whitespace for different files types. Then, if they so desire, each
> git user should be able to override the output format on a
> per-repository basis.
>
> Does this make any sense? Comments?
>
> ---
> diff --git a/convert.c b/convert.c
> index 1816e97..280f45b 100644
> --- a/convert.c
> +++ b/convert.c
> @@ -18,7 +18,7 @@
>
>  struct text_stat {
>       /* NUL, CR, LF and CRLF counts */
> -       unsigned nul, cr, lf, crlf;
> +       unsigned nul, cr, lf, crlf, tab;
>
>       /* These are just approximations! */
>       unsigned printable, nonprintable;
> @@ -48,7 +48,10 @@
>  static void gather_stats(const char *buf, unsigned long size, struct
> text_stat *
>               else if (c < 32) {
>                       switch (c) {
>                               /* BS, HT, ESC and FF */
> -                       case '\b': case '\t': case '\033': case '\014':
> +                       case '\t':
> +                               stats->tab++;
> +                               /* fall through */
> +                       case '\b': case '\033': case '\014':
>                               stats->printable++;
>                               break;
>                       case 0:
> @@ -235,6 +238,105 @@
>  static int crlf_to_worktree(const char *path, const char *src, size_t len,
>       return 1;
>  }
>
> +static int tabs_to_spaces(const char *path, const char *src, size_t len,
> +
>                  struct strbuf *buf, int untabify)
> +{
> +       char *to_free = NULL;
> +       struct text_stat stats;
> +  static const unsigned tab_size = 4;
> +       char *spaces;
> +
> +       if (!untabify)
> +               return 0;
> +
> +  /* instead of calling twice, should cache these stats across calls */
> +       gather_stats(src, len, &stats);
> +
> +       if (!stats.tab)
> +    return 0;
> +
> +       /* are we "faking" in place editing ? */
> +       if (src == buf->buf)
> +               to_free = strbuf_detach(buf, NULL);
> +
> +  /* this growth may be excessive: not all tabs get tab_size spaces */
> +       strbuf_grow(buf, len + tab_size * stats.tab);
> +  spaces = (char *) xmalloc(tab_size);
> +  memset(spaces, ' ', tab_size);
> +       for (;;) {
> +               const char *line = src;
> +               const char *nl = memchr(src, '\n', len);
> +               char *tab;
> +    if (!nl)
> +      nl = src + len;
> +    while (src < nl && (tab = memchr(src, '\t', nl - src))) {
> +      strbuf_add(buf, src, tab - src);
> +      strbuf_add(buf, spaces, tab_size - ((tab - line) % tab_size));
> +      src = tab + 1;
> +    }
> +    if (src < nl)
> +      strbuf_add(buf, src, nl - src);
> +               if (nl < src + len)
> +                       strbuf_addch(buf, '\n');
> +               else
> +                       break;
> +               src = nl + 1;
> +               len -= src - line;
> +       }
> +
> +       free(to_free);
> +       free(spaces);
> +       return 1;
> +}
> +
> +static int spaces_to_tabs(const char *path, const char *src, size_t len,
> +
>                  struct strbuf *buf, int tabify)
> +{
> +  static const unsigned tab_size = 4;
> +
> +       if (!tabify)
> +               return 0;
> +
> +       /* only grow if not in place */
> +       if (strbuf_avail(buf) + buf->len < len)
> +               strbuf_grow(buf, len - buf->len);
> +
> +       for (;;) {
> +               int tabs = 0, spaces = 0;
> +               const char *line = src;
> +               const char *nl = memchr(src, '\n', len);
> +    if (!nl)
> +      nl = src + len;
> +               for (;; src++) {
> +                       if (*src == ' ') {
> +                               spaces++;
> +                               if (spaces == tab_size) {
> +                                       tabs++;
> +                                       spaces = 0;
> +                               }
> +                       } else if (*src == '\t') {
> +                               tabs++;
> +                               spaces = 0;
> +                       } else break;
> +               }
> +               if (line < src) {
> +                       memset(buf->buf + buf->len, '\t', tabs);
> +                       memset(buf->buf + buf->len + tabs, ' ', spaces);
> +                       strbuf_setlen(buf, buf->len + tabs + spaces);
> +               }
> +    if (src < nl)
> +      strbuf_add(buf, src, nl - src);
> +               if (nl < src + len)
> +                       strbuf_addch(buf, '\n');
> +               else
> +                       break;
> +               src = nl + 1;
> +               len -= src - line;
> +       }
> +
> +       return 1;
> +}
> +
>  struct filter_params {
>       const char *src;
>       unsigned long size;
> @@ -370,22 +472,29 @@
>  static int read_convert_config(const char *var, const char *value, void *cb)
>       return 0;
>  }
>
> -static void setup_convert_check(struct git_attr_check *check)
> +struct convert_checks {
> +  struct git_attr_check crlf, tabs, ident, filter;
> +};
> +
> +static void setup_convert_check(struct convert_checks *checks)
>  {
>       static struct git_attr *attr_crlf;
> +       static struct git_attr *attr_tabs;
>       static struct git_attr *attr_ident;
>       static struct git_attr *attr_filter;
>
>       if (!attr_crlf) {
>               attr_crlf = git_attr("crlf", 4);
> +               attr_tabs = git_attr("tabs", 4);
>               attr_ident = git_attr("ident", 5);
>               attr_filter = git_attr("filter", 6);
>               user_convert_tail = &user_convert;
>               git_config(read_convert_config, NULL);
>       }
> -       check[0].attr = attr_crlf;
> -       check[1].attr = attr_ident;
> -       check[2].attr = attr_filter;
> +       checks->crlf.attr = attr_crlf;
> +       checks->tabs.attr = attr_tabs;
> +       checks->ident.attr = attr_ident;
> +       checks->filter.attr = attr_filter;
>  }
>
>  static int count_ident(const char *cp, unsigned long size)
> @@ -566,20 +675,22 @@
>  static int git_path_check_ident(const char *path, struct git_attr_check *check)
>       return !!ATTR_TRUE(value);
>  }
>
> +#define CHECK_ARRAY_SIZE (sizeof(struct convert_checks)/sizeof(struct
> git_attr_check))
> +
>  int convert_to_git(const char *path, const char *src, size_t len,
>                   struct strbuf *dst, enum safe_crlf checksafe)
>  {
> -       struct git_attr_check check[3];
> +       struct convert_checks checks;
>       int crlf = CRLF_GUESS;
>       int ident = 0, ret = 0;
>       const char *filter = NULL;
>
> -       setup_convert_check(check);
> -       if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
> +       setup_convert_check(&checks);
> +       if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *)
> &checks)) {
>               struct convert_driver *drv;
> -               crlf = git_path_check_crlf(path, check + 0);
> -               ident = git_path_check_ident(path, check + 1);
> -               drv = git_path_check_convert(path, check + 2);
> +               crlf = git_path_check_crlf(path, &(checks.crlf));
> +               ident = git_path_check_ident(path, &(checks.ident));
> +               drv = git_path_check_convert(path, &(checks.filter));
>               if (drv && drv->clean)
>                       filter = drv->clean;
>       }
> @@ -589,6 +700,11 @@
>  int convert_to_git(const char *path, const char *src, size_t len,
>               src = dst->buf;
>               len = dst->len;
>       }
> +       ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable
> +       if (ret) {
> +               src = dst->buf;
> +               len = dst->len;
> +       }
>       ret |= crlf_to_git(path, src, len, dst, crlf, checksafe);
>       if (ret) {
>               src = dst->buf;
> @@ -599,17 +715,17 @@
>  int convert_to_git(const char *path, const char *src, size_t len,
>
>  int convert_to_working_tree(const char *path, const char *src, size_t
> len, struct strbuf *dst)
>  {
> -       struct git_attr_check check[3];
> +       struct convert_checks checks;
>       int crlf = CRLF_GUESS;
>       int ident = 0, ret = 0;
>       const char *filter = NULL;
>
> -       setup_convert_check(check);
> -       if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
> +       setup_convert_check(&checks);
> +       if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *)
> &checks)) {
>               struct convert_driver *drv;
> -               crlf = git_path_check_crlf(path, check + 0);
> -               ident = git_path_check_ident(path, check + 1);
> -               drv = git_path_check_convert(path, check + 2);
> +               crlf = git_path_check_crlf(path, &(checks.crlf));
> +               ident = git_path_check_ident(path, &(checks.ident));
> +               drv = git_path_check_convert(path, &(checks.filter));
>               if (drv && drv->smudge)
>                       filter = drv->smudge;
>       }
> @@ -624,5 +740,10 @@
>  int convert_to_working_tree(const char *path, const char *src, size_t
> len, struc
>               src = dst->buf;
>               len = dst->len;
>       }
> +       ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable
> +       if (ret) {
> +               src = dst->buf;
> +               len = dst->len;
> +       }
>       return ret | apply_filter(path, src, len, dst, filter);
>  }
>
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux