[PATCH v5 02/23] build-aux: rewrite whitespace checker in Python

Daniel P. Berrangé <berrange@xxxxxxxxxx> · Mon, 11 Nov 2019 14:38:05 +0000

As part of an goal to eliminate Perl from libvirt build tools,
rewrite the check-spacing.pl tool in Python.

This was a straight conversion, manually going line-by-line to
change the syntax from Perl to Python. Thus the overall structure
of the file and approach is the same.

Signed-off-by: Daniel P. Berrangé <berrange@xxxxxxxxxx>
---
 Makefile.am                |   2 +-
 build-aux/check-spacing.pl | 198 --------------------------------
 build-aux/syntax-check.mk  |   4 +-
 scripts/check-spacing.py   | 229 +++++++++++++++++++++++++++++++++++++
 4 files changed, 232 insertions(+), 201 deletions(-)
 delete mode 100755 build-aux/check-spacing.pl
 create mode 100755 scripts/check-spacing.py

diff --git a/Makefile.am b/Makefile.am
index 9471cf7117..5187ca6cc2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -46,7 +46,7 @@ EXTRA_DIST = \
   README.md \
   AUTHORS.in \
   scripts/augeas-gentest.py \
-  build-aux/check-spacing.pl \
+  scripts/check-spacing.py \
   build-aux/header-ifdef.pl \
   scripts/minimize-po.py \
   build-aux/mock-noinline.pl \
diff --git a/build-aux/check-spacing.pl b/build-aux/check-spacing.pl
deleted file mode 100755
index 33377f3dd3..0000000000
--- a/build-aux/check-spacing.pl
+++ /dev/null
@@ -1,198 +0,0 @@
-#!/usr/bin/env perl
-#
-# check-spacing.pl: Report any usage of 'function (..args..)'
-# Also check for other syntax issues, such as correct use of ';'
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library.  If not, see
-# <http://www.gnu.org/licenses/>.
-
-use strict;
-use warnings;
-
-my $ret = 0;
-my $incomment = 0;
-
-foreach my $file (@ARGV) {
-    # Per-file variables for multiline Curly Bracket (cb_) check
-    my $cb_linenum = 0;
-    my $cb_code = "";
-    my $cb_scolon = 0;
-
-    open FILE, $file;
-
-    while (defined (my $line = <FILE>)) {
-        my $data = $line;
-        # For temporary modifications
-        my $tmpdata;
-
-        # Kill any quoted , ; = or "
-        $data =~ s/'[";,=]'/'X'/g;
-
-        # Kill any quoted strings
-        $data =~ s,"(?:[^\\\"]|\\.)*","XXX",g;
-
-        next if $data =~ /^#/;
-
-        # Kill contents of multi-line comments
-        # and detect end of multi-line comments
-        if ($incomment) {
-            if ($data =~ m,\*/,) {
-                $incomment = 0;
-                $data =~ s,^.*\*/,*/,;
-            } else {
-                $data = "";
-            }
-        }
-
-        # Kill single line comments, and detect
-        # start of multi-line comments
-        if ($data =~ m,/\*.*\*/,) {
-            $data =~ s,/\*.*\*/,/* */,;
-        } elsif ($data =~ m,/\*,) {
-            $incomment = 1;
-            $data =~ s,/\*.*,/*,;
-        }
-
-        # We need to match things like
-        #
-        #  int foo (int bar, bool wizz);
-        #  foo (bar, wizz);
-        #
-        # but not match things like:
-        #
-        #  typedef int (*foo)(bar wizz)
-        #
-        # we can't do this (efficiently) without
-        # missing things like
-        #
-        #  foo (*bar, wizz);
-        #
-        # We also don't want to spoil the $data so it can be used
-        # later on.
-        $tmpdata = $data;
-        while ($tmpdata =~ /(\w+)\s\((?!\*)/) {
-            my $kw = $1;
-
-            # Allow space after keywords only
-            if ($kw =~ /^(?:if|for|while|switch|return)$/) {
-                $tmpdata =~ s/(?:$kw\s\()/XXX(/;
-            } else {
-                print "Whitespace after non-keyword:\n";
-                print "$file:$.: $line";
-                $ret = 1;
-                last;
-            }
-        }
-
-        # Require whitespace immediately after keywords
-        if ($data =~ /\b(?:if|for|while|switch|return)\(/) {
-            print "No whitespace after keyword:\n";
-            print "$file:$.: $line";
-            $ret = 1;
-        }
-
-        # Forbid whitespace between )( of a function typedef
-        if ($data =~ /\(\*\w+\)\s+\(/) {
-            print "Whitespace between ')' and '(':\n";
-            print "$file:$.: $line";
-            $ret = 1;
-        }
-
-        # Forbid whitespace following ( or prior to )
-        # but allow whitespace before ) on a single line
-        # (optionally followed by a semicolon)
-        if (($data =~ /\s\)/ && not $data =~ /^\s+\);?$/) ||
-            $data =~ /\((?!$)\s/) {
-            print "Whitespace after '(' or before ')':\n";
-            print "$file:$.: $line";
-            $ret = 1;
-        }
-
-        # Forbid whitespace before ";" or ",". Things like below are allowed:
-        #
-        # 1) The expression is empty for "for" loop. E.g.
-        #   for (i = 0; ; i++)
-        #
-        # 2) An empty statement. E.g.
-        #   while (write(statuswrite, &status, 1) == -1 &&
-        #          errno == EINTR)
-        #       ;
-        #
-        if ($data =~ /\s[;,]/) {
-            unless ($data =~ /\S; ; / ||
-                    $data =~ /^\s+;/) {
-                print "Whitespace before semicolon or comma:\n";
-                print "$file:$.: $line";
-                $ret = 1;
-            }
-        }
-
-        # Require EOL, macro line continuation, or whitespace after ";".
-        # Allow "for (;;)" as an exception.
-        if ($data =~ /;[^	 \\\n;)]/) {
-            print "Invalid character after semicolon:\n";
-            print "$file:$.: $line";
-            $ret = 1;
-        }
-
-        # Require EOL, space, or enum/struct end after comma.
-        if ($data =~ /,[^ \\\n)}]/) {
-            print "Invalid character after comma:\n";
-            print "$file:$.: $line";
-            $ret = 1;
-        }
-
-        # Require spaces around assignment '=', compounds and '=='
-        if ($data =~ /[^ ]\b[!<>&|\-+*\/%\^=]?=/ ||
-            $data =~ /=[^= \\\n]/) {
-            print "Spacing around '=' or '==':\n";
-            print "$file:$.: $line";
-            $ret = 1;
-        }
-
-        # One line conditional statements with one line bodies should
-        # not use curly brackets.
-        if ($data =~ /^\s*(if|while|for)\b.*\{$/) {
-            $cb_linenum = $.;
-            $cb_code = $line;
-            $cb_scolon = 0;
-        }
-
-        # We need to check for exactly one semicolon inside the body,
-        # because empty statements (e.g. with comment only) are
-        # allowed
-        if ($cb_linenum == $. - 1 && $data =~ /^[^;]*;[^;]*$/) {
-            $cb_code .= $line;
-            $cb_scolon = 1;
-        }
-
-        if ($data =~ /^\s*}\s*$/ &&
-            $cb_linenum == $. - 2 &&
-            $cb_scolon) {
-
-            print "Curly brackets around single-line body:\n";
-            print "$file:$cb_linenum-$.:\n$cb_code$line";
-            $ret = 1;
-
-            # There _should_ be no need to reset the values; but to
-            # keep my inner peace...
-            $cb_linenum = 0;
-            $cb_scolon = 0;
-            $cb_code = "";
-        }
-    }
-    close FILE;
-}
-
-exit $ret;
diff --git a/build-aux/syntax-check.mk b/build-aux/syntax-check.mk
index 9b6c157029..d308896b26 100644
--- a/build-aux/syntax-check.mk
+++ b/build-aux/syntax-check.mk
@@ -2157,8 +2157,8 @@ prohibit-duplicate-header:
 	$(PYTHON) $(top_srcdir)/scripts/prohibit-duplicate-header.py
 
 spacing-check:
-	$(AM_V_GEN)$(VC_LIST) | $(GREP) '\.c$$' | xargs \
-	$(PERL) $(top_srcdir)/build-aux/check-spacing.pl || \
+	$(AM_V_GEN)$(VC_LIST) | $(GREP) '\.c$$' | $(RUNUTF8) xargs \
+	$(PYTHON) $(top_srcdir)/scripts/check-spacing.py || \
 	  { echo '$(ME): incorrect formatting' 1>&2; exit 1; }
 
 mock-noinline:
diff --git a/scripts/check-spacing.py b/scripts/check-spacing.py
new file mode 100755
index 0000000000..6b9f3ec1ba
--- /dev/null
+++ b/scripts/check-spacing.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2012-2019 Red Hat, Inc.
+#
+# check-spacing.pl: Report any usage of 'function (..args..)'
+# Also check for other syntax issues, such as correct use of ';'
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+from __future__ import print_function
+
+import re
+import sys
+
+
+def check_whitespace(filename):
+    errs = False
+    with open(filename, 'r') as fh:
+        quotedmetaprog = re.compile(r"""'[";,=]'""")
+        quotedstringprog = re.compile(r'''"(?:[^\\\"]|\\.)*"''')
+        commentstartprog = re.compile(r'''^(.*)/\*.*$''')
+        commentendprog = re.compile(r'''^.*\*/(.*)$''')
+        commentprog = re.compile(r'''^(.*)/\*.*\*/(.*)''')
+        funcprog = re.compile(r'''(\w+)\s\((?!\*)''')
+        keywordprog = re.compile(
+            r'''^.*\b(?:if|for|while|switch|return)\(.*$''')
+        functypedefprog = re.compile(r'''^.*\(\*\w+\)\s+\(.*$''')
+        whitespaceprog1 = re.compile(r'''^.*\s\).*$''')
+        whitespaceprog2 = re.compile(r'''^\s+\);?$''')
+        whitespaceprog3 = re.compile(r'''^.*\((?!$)\s.*''')
+        commasemiprog1 = re.compile(r'''.*\s[;,].*''')
+        commasemiprog2 = re.compile(r'''.*\S; ; .*''')
+        commasemiprog3 = re.compile(r'''^\s+;''')
+        semicolonprog = re.compile(r'''.*;[^	 \\\n;)].*''')
+        commaprog = re.compile(r'''.*,[^ \\\n)}].*''')
+        assignprog1 = re.compile(r'''[^ ]\b[!<>&|\-+*\/%\^=]?=''')
+        assignprog2 = re.compile(r'''=[^= \\\n]''')
+        condstartprog = re.compile(r'''^\s*(if|while|for)\b.*\{$''')
+        statementprog = re.compile(r'''^[^;]*;[^;]*$''')
+        condendprog = re.compile(r'''^\s*}\s*$''')
+
+        incomment = False
+        # Per-file variables for multiline Curly Bracket (cb_) check
+        cb_lineno = 0
+        cb_code = ""
+        cb_scolon = False
+
+        lineno = 0
+        for line in fh:
+            lineno = lineno + 1
+            data = line
+            # For temporary modifications
+
+            # Kill any quoted , ; = or "
+            data = quotedmetaprog.sub("'X'", data)
+
+            # Kill any quoted strings
+            data = quotedstringprog.sub('"XXX"', data)
+
+            if data[0] == '#':
+                continue
+
+            # Kill contents of multi-line comments
+            # and detect end of multi-line comments
+            if incomment:
+                if commentendprog.match(data):
+                    data = commentendprog.sub('*/\2', data)
+                    incomment = False
+                else:
+                    data = ""
+
+            # Kill single line comments, and detect
+            # start of multi-line comments
+            if commentprog.match(data):
+                data = commentprog.sub(r'''\1/* */\2''', data)
+            elif commentstartprog.match(data):
+                data = commentstartprog.sub(r'''\1/*''', data)
+                incomment = True
+
+            # We need to match things like
+            #
+            #  int foo (int bar, bool wizz);
+            #  foo (bar, wizz);
+            #
+            # but not match things like:
+            #
+            #  typedef int (*foo)(bar wizz)
+            #
+            # we can't do this (efficiently) without
+            # missing things like
+            #
+            #  foo (*bar, wizz);
+            #
+            for match in funcprog.finditer(data):
+                kw = match.group(1)
+
+                # Allow space after keywords only
+                if kw not in ["if", "for", "while", "switch", "return"]:
+                    print("Whitespace after non-keyword:",
+                          file=sys.stderr)
+                    print("%s:%d: %s" % (filename, lineno, line),
+                          file=sys.stderr)
+                    errs = True
+                    break
+
+            # Require whitespace immediately after keywords
+            if keywordprog.match(data):
+                print("No whitespace after keyword:",
+                      file=sys.stderr)
+                print("%s:%d: %s" % (filename, lineno, line),
+                      file=sys.stderr)
+                errs = True
+
+            # Forbid whitespace between )( of a function typedef
+            if functypedefprog.match(data):
+                print("Whitespace between ')' and '(':",
+                      file=sys.stderr)
+                print("%s:%d: %s" % (filename, lineno, line),
+                      file=sys.stderr)
+                errs = True
+
+            # Forbid whitespace following ( or prior to )
+            # but allow whitespace before ) on a single line
+            # (optionally followed by a semicolon)
+            if ((whitespaceprog1.match(data) and
+                 not whitespaceprog2.match(data))
+                or whitespaceprog3.match(data)):
+                print("Whitespace after '(' or before ')':",
+                      file=sys.stderr)
+                print("%s:%d: %s" % (filename, lineno, line),
+                      file=sys.stderr)
+                errs = True
+
+            # Forbid whitespace before ";" or ",". Things like
+            # below are allowed:
+            #
+            # 1) The expression is empty for "for" loop. E.g.
+            #   for (i = 0; ; i++)
+            #
+            # 2) An empty statement. E.g.
+            #   while (write(statuswrite, &status, 1) == -1 &&
+            #          errno == EINTR)
+            #       ;
+            #
+            if commasemiprog1.match(data) and not (
+                    commasemiprog2.match(data) or
+                    commasemiprog3.match(data)):
+                print("Whitespace before semicolon or comma:",
+                      file=sys.stderr)
+                print("%s:%d: %s" % (filename, lineno, line),
+                      file=sys.stderr)
+                errs = True
+
+            # Require EOL, macro line continuation, or whitespace after ";".
+            # Allow "for (;;)" as an exception.
+            if semicolonprog.match(data):
+                print("Invalid character after semicolon:",
+                      file=sys.stderr)
+                print("%s:%d: %s" % (filename, lineno, line),
+                      file=sys.stderr)
+                errs = True
+
+            # Require EOL, space, or enum/struct end after comma.
+            if commaprog.match(data):
+                print("Invalid character after comma:",
+                      file=sys.stderr)
+                print("%s:%d: %s" % (filename, lineno, line),
+                      file=sys.stderr)
+                errs = True
+
+            # Require spaces around assignment '=', compounds and '=='
+            if assignprog1.match(data) or assignprog2.match(data):
+                print("Spacing around '=' or '==':",
+                      file=sys.stderr)
+                print("%s:%d: %s" % (filename, lineno, line),
+                      file=sys.stderr)
+                errs = True
+
+            # One line conditional statements with one line bodies should
+            # not use curly brackets.
+            if condstartprog.match(data):
+                cb_lineno = lineno
+                cb_code = line
+                cb_scolon = False
+
+            # We need to check for exactly one semicolon inside the body,
+            # because empty statements (e.g. with comment only) are
+            # allowed
+            if (cb_lineno == lineno - 1) and statementprog.match(data):
+                cb_code = cb_code + line
+                cb_scolon = True
+
+            if (condendprog.match(data) and
+                (cb_lineno == lineno - 2) and
+                cb_scolon):
+                print("Curly brackets around single-line body:",
+                      file=sys.stderr)
+                print("%s:%d:\n%s%s" % (filename, cb_lineno - lineno,
+                                        cb_code, line),
+                      file=sys.stderr)
+                errs = True
+
+                # There _should_ be no need to reset the values; but to
+                # keep my inner peace...
+                cb_lineno = 0
+                cb_scolon = False
+                cb_code = ""
+
+    return errs
+
+
+ret = 0
+for filename in sys.argv[1:]:
+    if check_whitespace(filename):
+        ret = 1
+
+sys.exit(ret)
-- 
2.21.0

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list