On Fri, Apr 23, 2021 at 12:48:39AM +0530, Aditya Srivastava wrote: > +my $pointer_function = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; Is that a pointer-to-function? Or as people who write C usually call it, a function pointer? Wouldn't it be better to call it $function_pointer? > @@ -1210,8 +1211,14 @@ sub dump_struct($$) { > my $decl_type; > my $members; > my $type = qr{struct|union}; > + my $packed = qr{__packed}; > + my $aligned = qr{__aligned}; > + my $cacheline_aligned_in_smp = qr{____cacheline_aligned_in_smp}; > + my $cacheline_aligned = qr{____cacheline_aligned}; I don't think those four definitions actually simplify anything. > + my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; ... whereas this one definitely does. > - $members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi; > - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; > - $members =~ s/\s*__packed\s*/ /gos; > + $members =~ s/\s*$attribute/ /gi; > + $members =~ s/\s*$aligned\s*\([^;]*\)/ /gos; Maybe put the \s*\([^;]*\) into $aligned? Then it becomes a useful abstraction. > - } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || > - $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || > - $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { > + } elsif ($prototype =~ m/^()($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end1/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end1/ || > + $prototype =~ m/^()($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*($name)\s*$prototype_end2/ || > + $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*($name)\s*$prototype_end2/) { This is probably the best patch I've seen so far this year. Now, can we go further? For example: $prototype_end = $prototype_end1|$prototype_end2 That would let us cut the number of lines here in half. Can we create a definition for a variable number of \w and \s and '*' in the return type? In fact, can we define a regex that matches a type? So this would become: > + } elsif ($prototype =~ m/^($type)\s*($name)\s*$prototype_end/) {