2022-01-18 11:56 UTC+0000 ~ Usama Arif <usama.arif@xxxxxxxxxxxxx> > Currently the syscalls rst and subsequently man page are auto-generated > using function documentation present in bpf.h. If the documentation for the > syscall is missing or doesn't follow a specific format, then that syscall > is not dumped in the auto-generated rst. > > This patch checks the number of syscalls documented within the header file > with those present as part of the enum bpf_cmd and raises an Exception if > they don't match. It is not needed with the currently documented upstream > syscalls, but can help in debugging when developing new syscalls when > there might be missing or misformatted documentation. > > The function helper_number_check is moved to the Printer parent > class and renamed to elem_number_check as all the most derived children > classes are using this function now. > > Signed-off-by: Usama Arif <usama.arif@xxxxxxxxxxxxx> Thanks for the follow-up, looks good and seems to work well. Please find just a few nitpicks below. > --- > scripts/bpf_doc.py | 88 ++++++++++++++++++++++++++++++++-------------- > 1 file changed, 61 insertions(+), 27 deletions(-) > > diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py > index 20441e5d2..11304427e 100755 > --- a/scripts/bpf_doc.py > +++ b/scripts/bpf_doc.py > @@ -89,6 +89,8 @@ class HeaderParser(object): > self.commands = [] > self.desc_unique_helpers = set() > self.define_unique_helpers = [] > + self.desc_syscalls = [] > + self.enum_syscalls = [] > > def parse_element(self): > proto = self.parse_symbol() > @@ -103,7 +105,7 @@ class HeaderParser(object): > return Helper(proto=proto, desc=desc, ret=ret) > > def parse_symbol(self): > - p = re.compile(' \* ?(.+)$') > + p = re.compile(' \* ?(BPF\w+)$') > capture = p.match(self.line) > if not capture: > raise NoSyscallCommandFound > @@ -181,26 +183,55 @@ class HeaderParser(object): > raise Exception("No return found for " + proto) > return ret > > - def seek_to(self, target, help_message): > + def seek_to(self, target, help_message, discard_lines = 1): > self.reader.seek(0) > offset = self.reader.read().find(target) > if offset == -1: > raise Exception(help_message) > self.reader.seek(offset) > self.reader.readline() > - self.reader.readline() > + for _ in range(discard_lines): > + self.reader.readline() > self.line = self.reader.readline() > > - def parse_syscall(self): > + def parse_desc_syscall(self): > self.seek_to('* DOC: eBPF Syscall Commands', > 'Could not find start of eBPF syscall descriptions list') > while True: > try: > command = self.parse_element() > self.commands.append(command) > + self.desc_syscalls.append(command.proto) > + > except NoSyscallCommandFound: > break > > + def parse_enum_syscall(self): > + self.seek_to('enum bpf_cmd {', > + 'Could not find start of bpf_cmd enum', 0) > + # Searches for either one or more BPF\w+ enums > + bpf_p = re.compile('\s*(BPF\w+)+') > + # Searches for an enum entry assigned to another entry, > + # for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is > + # not documented hence should be skipped in check to > + # determine if the right number of syscalls are documented Sounds good. If you respin, would you mind taking this opportunity to add, at the end of BPF_PROG_TEST_RUN's description in {tools/,}include/uapi/linux/bpf.h, that BPF_PROG_RUN is an alias to this command? It may be useful for users looking for BPF_PROG_RUN in the generated man page. > + assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)') > + bpf_cmd_str = '' > + while True: > + capture = assign_p.match(self.line) > + if capture: > + # Skip line if an enum entry is assigned to another entry > + self.line = self.reader.readline() > + continue > + capture = bpf_p.match(self.line) > + if capture: > + bpf_cmd_str += self.line > + else: > + break > + self.line = self.reader.readline() > + # Find the number of occurences of BPF\w+ > + self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str) > + > def parse_desc_helpers(self): > self.seek_to('* Start of BPF helper function descriptions:', > 'Could not find start of eBPF helper descriptions list') > @@ -234,7 +265,8 @@ class HeaderParser(object): > self.define_unique_helpers = re.findall('FN\(\w+\)', fn_defines_str) > > def run(self): > - self.parse_syscall() > + self.parse_desc_syscall() > + self.parse_enum_syscall() > self.parse_desc_helpers() > self.parse_define_helpers() > self.reader.close() > @@ -266,6 +298,27 @@ class Printer(object): > self.print_one(elem) > self.print_footer() > > + def elem_number_check(self, desc_unique_elem, define_unique_elem, type, instance): > + """ > + Checks the number of helpers/syscalls documented within the header file > + description with those defined as part of enum/macro and raise an > + Exception if they don't match. > + """ > + nr_desc_unique_elem = len(desc_unique_elem) > + nr_define_unique_elem = len(define_unique_elem) > + if nr_desc_unique_elem != nr_define_unique_elem: > + exception_msg = ''' > + The number of unique %s in description (%d) doesn\'t match the number of unique %s defined in %s (%d) > + ''' % (type, nr_desc_unique_elem, type, instance, nr_define_unique_elem) Nit: I think you didn't mean to indent the two lines above? > + if nr_desc_unique_elem < nr_define_unique_elem: > + # Function description is parsed until no helper is found (which can be due to > + # misformatting). Hence, only print the first missing/misformatted helper/enum. > + exception_msg += ''' > + The description for %s is not present or formatted correctly. > + ''' % (define_unique_elem[nr_desc_unique_elem]) Same thing for the indent > + print(define_unique_elem) > + print(desc_unique_elem) These two objects should accompany the error message from the Exception. Did you consider adding them to exception_msg? If not convenient, should we at least send them to stderr instead of stdout? > + raise Exception(exception_msg)