builtin conversion between tabs and spaces

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I find myself really wanting to enforce standards in the use of spaces
versus tabs. I deal with some unruly programmers who refuse to set
their editors to use soft tabs, but I *hate* tabs in the repo. And of
course others feel equally strongly about keeping only tabs in the
repo (e.g. the git repo).

This led me to wonder if it wouldn't make sense to have this
conversion ability built into git. The following patch implements this
functionality. It still needs work—it's not meant to be final, just to
give an idea—but I just wanted to see if people on the git list
thought this sort of thing would be worthwhile at all.

If people think it's worth having in git, then how should it be
configured? I feel like a project should be able to define the
expected tab size for binary file types. Moreover, the project should
be able to define the default cannonicalization with resepect to
whitespace for different files types. Then, if they so desire, each
git user should be able to override the output format on a
per-repository basis.

Does this make any sense? Comments?

---
diff --git a/convert.c b/convert.c
index 1816e97..280f45b 100644
--- a/convert.c
+++ b/convert.c
@@ -18,7 +18,7 @@

 struct text_stat {
       /* NUL, CR, LF and CRLF counts */
-       unsigned nul, cr, lf, crlf;
+       unsigned nul, cr, lf, crlf, tab;

       /* These are just approximations! */
       unsigned printable, nonprintable;
@@ -48,7 +48,10 @@
 static void gather_stats(const char *buf, unsigned long size, struct
text_stat *
               else if (c < 32) {
                       switch (c) {
                               /* BS, HT, ESC and FF */
-                       case '\b': case '\t': case '\033': case '\014':
+                       case '\t':
+                               stats->tab++;
+                               /* fall through */
+                       case '\b': case '\033': case '\014':
                               stats->printable++;
                               break;
                       case 0:
@@ -235,6 +238,105 @@
 static int crlf_to_worktree(const char *path, const char *src, size_t len,
       return 1;
 }

+static int tabs_to_spaces(const char *path, const char *src, size_t len,
+
                  struct strbuf *buf, int untabify)
+{
+       char *to_free = NULL;
+       struct text_stat stats;
+  static const unsigned tab_size = 4;
+       char *spaces;
+
+       if (!untabify)
+               return 0;
+
+  /* instead of calling twice, should cache these stats across calls */
+       gather_stats(src, len, &stats);
+
+       if (!stats.tab)
+    return 0;
+
+       /* are we "faking" in place editing ? */
+       if (src == buf->buf)
+               to_free = strbuf_detach(buf, NULL);
+
+  /* this growth may be excessive: not all tabs get tab_size spaces */
+       strbuf_grow(buf, len + tab_size * stats.tab);
+  spaces = (char *) xmalloc(tab_size);
+  memset(spaces, ' ', tab_size);
+       for (;;) {
+               const char *line = src;
+               const char *nl = memchr(src, '\n', len);
+               char *tab;
+    if (!nl)
+      nl = src + len;
+    while (src < nl && (tab = memchr(src, '\t', nl - src))) {
+      strbuf_add(buf, src, tab - src);
+      strbuf_add(buf, spaces, tab_size - ((tab - line) % tab_size));
+      src = tab + 1;
+    }
+    if (src < nl)
+      strbuf_add(buf, src, nl - src);
+               if (nl < src + len)
+                       strbuf_addch(buf, '\n');
+               else
+                       break;
+               src = nl + 1;
+               len -= src - line;
+       }
+
+       free(to_free);
+       free(spaces);
+       return 1;
+}
+
+static int spaces_to_tabs(const char *path, const char *src, size_t len,
+
                  struct strbuf *buf, int tabify)
+{
+  static const unsigned tab_size = 4;
+
+       if (!tabify)
+               return 0;
+
+       /* only grow if not in place */
+       if (strbuf_avail(buf) + buf->len < len)
+               strbuf_grow(buf, len - buf->len);
+
+       for (;;) {
+               int tabs = 0, spaces = 0;
+               const char *line = src;
+               const char *nl = memchr(src, '\n', len);
+    if (!nl)
+      nl = src + len;
+               for (;; src++) {
+                       if (*src == ' ') {
+                               spaces++;
+                               if (spaces == tab_size) {
+                                       tabs++;
+                                       spaces = 0;
+                               }
+                       } else if (*src == '\t') {
+                               tabs++;
+                               spaces = 0;
+                       } else break;
+               }
+               if (line < src) {
+                       memset(buf->buf + buf->len, '\t', tabs);
+                       memset(buf->buf + buf->len + tabs, ' ', spaces);
+                       strbuf_setlen(buf, buf->len + tabs + spaces);
+               }
+    if (src < nl)
+      strbuf_add(buf, src, nl - src);
+               if (nl < src + len)
+                       strbuf_addch(buf, '\n');
+               else
+                       break;
+               src = nl + 1;
+               len -= src - line;
+       }
+
+       return 1;
+}
+
 struct filter_params {
       const char *src;
       unsigned long size;
@@ -370,22 +472,29 @@
 static int read_convert_config(const char *var, const char *value, void *cb)
       return 0;
 }

-static void setup_convert_check(struct git_attr_check *check)
+struct convert_checks {
+  struct git_attr_check crlf, tabs, ident, filter;
+};
+
+static void setup_convert_check(struct convert_checks *checks)
 {
       static struct git_attr *attr_crlf;
+       static struct git_attr *attr_tabs;
       static struct git_attr *attr_ident;
       static struct git_attr *attr_filter;

       if (!attr_crlf) {
               attr_crlf = git_attr("crlf", 4);
+               attr_tabs = git_attr("tabs", 4);
               attr_ident = git_attr("ident", 5);
               attr_filter = git_attr("filter", 6);
               user_convert_tail = &user_convert;
               git_config(read_convert_config, NULL);
       }
-       check[0].attr = attr_crlf;
-       check[1].attr = attr_ident;
-       check[2].attr = attr_filter;
+       checks->crlf.attr = attr_crlf;
+       checks->tabs.attr = attr_tabs;
+       checks->ident.attr = attr_ident;
+       checks->filter.attr = attr_filter;
 }

 static int count_ident(const char *cp, unsigned long size)
@@ -566,20 +675,22 @@
 static int git_path_check_ident(const char *path, struct git_attr_check *check)
       return !!ATTR_TRUE(value);
 }

+#define CHECK_ARRAY_SIZE (sizeof(struct convert_checks)/sizeof(struct
git_attr_check))
+
 int convert_to_git(const char *path, const char *src, size_t len,
                   struct strbuf *dst, enum safe_crlf checksafe)
 {
-       struct git_attr_check check[3];
+       struct convert_checks checks;
       int crlf = CRLF_GUESS;
       int ident = 0, ret = 0;
       const char *filter = NULL;

-       setup_convert_check(check);
-       if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
+       setup_convert_check(&checks);
+       if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *)
&checks)) {
               struct convert_driver *drv;
-               crlf = git_path_check_crlf(path, check + 0);
-               ident = git_path_check_ident(path, check + 1);
-               drv = git_path_check_convert(path, check + 2);
+               crlf = git_path_check_crlf(path, &(checks.crlf));
+               ident = git_path_check_ident(path, &(checks.ident));
+               drv = git_path_check_convert(path, &(checks.filter));
               if (drv && drv->clean)
                       filter = drv->clean;
       }
@@ -589,6 +700,11 @@
 int convert_to_git(const char *path, const char *src, size_t len,
               src = dst->buf;
               len = dst->len;
       }
+       ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable
+       if (ret) {
+               src = dst->buf;
+               len = dst->len;
+       }
       ret |= crlf_to_git(path, src, len, dst, crlf, checksafe);
       if (ret) {
               src = dst->buf;
@@ -599,17 +715,17 @@
 int convert_to_git(const char *path, const char *src, size_t len,

 int convert_to_working_tree(const char *path, const char *src, size_t
len, struct strbuf *dst)
 {
-       struct git_attr_check check[3];
+       struct convert_checks checks;
       int crlf = CRLF_GUESS;
       int ident = 0, ret = 0;
       const char *filter = NULL;

-       setup_convert_check(check);
-       if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
+       setup_convert_check(&checks);
+       if (!git_checkattr(path, CHECK_ARRAY_SIZE, (struct git_attr_check *)
&checks)) {
               struct convert_driver *drv;
-               crlf = git_path_check_crlf(path, check + 0);
-               ident = git_path_check_ident(path, check + 1);
-               drv = git_path_check_convert(path, check + 2);
+               crlf = git_path_check_crlf(path, &(checks.crlf));
+               ident = git_path_check_ident(path, &(checks.ident));
+               drv = git_path_check_convert(path, &(checks.filter));
               if (drv && drv->smudge)
                       filter = drv->smudge;
       }
@@ -624,5 +740,10 @@
 int convert_to_working_tree(const char *path, const char *src, size_t
len, struc
               src = dst->buf;
               len = dst->len;
       }
+       ret |= tabs_to_spaces(path, src, len, dst, 1); // get real variable
+       if (ret) {
+               src = dst->buf;
+               len = dst->len;
+       }
       return ret | apply_filter(path, src, len, dst, filter);
 }
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux