This adds a parser capable of handling the XDR protocol files. The parsing grammar requirements are detailed in https://www.rfc-editor.org/rfc/rfc4506#section-6.3 Signed-off-by: Daniel P. Berrangé <berrange@xxxxxxxxxx> --- scripts/rpcgen/rpcgen/parser.py | 497 ++++++++++++++++++++++++++++ scripts/rpcgen/tests/meson.build | 1 + scripts/rpcgen/tests/test_parser.py | 91 +++++ 3 files changed, 589 insertions(+) create mode 100644 scripts/rpcgen/rpcgen/parser.py create mode 100644 scripts/rpcgen/tests/test_parser.py diff --git a/scripts/rpcgen/rpcgen/parser.py b/scripts/rpcgen/rpcgen/parser.py new file mode 100644 index 0000000000..7efbe5468e --- /dev/null +++ b/scripts/rpcgen/rpcgen/parser.py @@ -0,0 +1,497 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +from .lexer import ( + XDRLexer, + XDRTokenPunctuation, + XDRTokenIdentifier, + XDRTokenCEscape, + XDRTokenConstant, +) +from .ast import ( + XDRSpecification, + XDRDefinitionConstant, + XDRDefinitionTypedef, + XDRDefinitionEnum, + XDRDefinitionStruct, + XDRDefinitionUnion, + XDRDefinitionCEscape, + XDRDeclarationScalar, + XDRDeclarationPointer, + XDRDeclarationFixedArray, + XDRDeclarationVariableArray, + XDRTypeVoid, + XDRTypeChar, + XDRTypeUnsignedChar, + XDRTypeShort, + XDRTypeUnsignedShort, + XDRTypeInt, + XDRTypeUnsignedInt, + XDRTypeHyper, + XDRTypeUnsignedHyper, + XDRTypeFloat, + XDRTypeDouble, + XDRTypeBool, + XDRTypeOpaque, + XDRTypeString, + XDRTypeCustom, + XDREnumValue, + XDREnumBody, + XDRTypeEnum, + XDRStructBody, + XDRTypeStruct, + XDRUnionCase, + XDRUnionBody, + XDRTypeUnion, +) + + +# We are parsing (approximately the following grammar +# from RFC 4506 #6.3: +# +# declaration: +# type-specifier identifier +# | type-specifier identifier "[" value "]" +# | type-specifier identifier "<" [ value ] ">" +# | "opaque" identifier "[" value "]" +# | "opaque" identifier "<" [ value ] ">" +# | "string" identifier "<" [ value ] ">" +# | type-specifier "*" identifier +# | "void" +# +# value: +# constant +# | identifier +# +# constant: +# decimal-constant | hexadecimal-constant | octal-constant +# +# type-specifier: +# [ "unsigned" ] "int" +# | [ "unsigned" ] "hyper" +# | "float" +# | "double" +# | "quadruple" /* We're skipping this one */ +# | "bool" +# | enum-type-spec +# | struct-type-spec +# | union-type-spec +# | identifier +# +# enum-type-spec: +# "enum" enum-body +# +# enum-body: +# "{" +# ( identifier "=" value ) +# ( "," identifier "=" value )* +# "}" +# +# struct-type-spec: +# "struct" struct-body +# +# struct-body: +# "{" +# ( declaration ";" ) +# ( declaration ";" )* +# "}" +# +# union-type-spec: +# "union" union-body +# +# union-body: +# "switch" "(" declaration ")" "{" +# case-spec +# case-spec * +# [ "default" ":" declaration ";" ] +# "}" +# +# case-spec: +# ( "case" value ":") +# ( "case" value ":") * +# declaration ";" +# +# constant-def: +# "const" identifier "=" constant ";" +# +# type-def: +# "typedef" declaration ";" +# | "enum" identifier enum-body ";" +# | "struct" identifier struct-body ";" +# | "union" identifier union-body ";" +# +# definition: +# type-def +# | constant-def +# +# specification: +# definition * +# +# Notable divergance: +# +# - In 'type-decl' we allow 'char' and 'short' +# in signed and unsigned variants +# +# - In 'definition' we allow '%...' as escape C code +# to passthrough to the header output +# +# - In 'enum-type-spec' we allow a bare enum name +# instead of enum body +# +# - In 'struct-type-spec' we allow a bare struct name +# instead of struct body +# +# - In 'union-type-spec' we allow a bare union name +# instead of union body +# +class XDRParser: + def __init__(self, fp): + self.lexer = XDRLexer(fp) + self.typedefs = {} + + def parse(self): + spec = XDRSpecification() + while True: + definition = self.parse_definition() + if definition is None: + break + spec.definitions.append(definition) + return spec + + def parse_definition(self): + token = self.lexer.next() + if token is None: + return None + + if type(token) == XDRTokenCEscape: + return XDRDefinitionCEscape(token.value[1:]) + + if type(token) != XDRTokenIdentifier: + raise Exception("Expected identifier, but got %s" % token) + + defs = { + "const": XDRDefinitionConstant, + "typedef": XDRDefinitionTypedef, + "enum": XDRDefinitionEnum, + "struct": XDRDefinitionStruct, + "union": XDRDefinitionUnion, + } + + if token.value not in defs: + raise Exception("Unexpected identifier %s" % token) + + funcname = "parse_definition_" + token.value + func = getattr(self, funcname) + assert func is not None + + definition = func() + + semi = self.lexer.next() + if type(semi) != XDRTokenPunctuation or semi.value != ";": + raise Exception("Expected ';', but got %s" % semi) + + return definition + + def parse_definition_const(self): + ident = self.lexer.next() + if type(ident) != XDRTokenIdentifier: + raise Exception("Expected identifier, but got %s" % ident) + + assign = self.lexer.next() + if type(assign) != XDRTokenPunctuation or assign.value != "=": + raise Exception("Expected '=', but got %s" % assign) + + const = self.lexer.next() + if type(const) not in [XDRTokenConstant, XDRTokenIdentifier]: + raise Exception("Expected constant, but got %s" % const) + + return XDRDefinitionConstant(ident.value, const.value) + + def parse_definition_typedef(self): + decl = self.parse_declaration() + if decl.identifier in self.typedefs: + raise Exception("Type '%s' already defined" % decl.identifier) + + definition = XDRDefinitionTypedef(decl) + self.typedefs[decl.identifier] = definition + return definition + + def parse_definition_enum(self): + name = self.lexer.next() + if type(name) != XDRTokenIdentifier: + raise Exception("Expected identifier, but got %s" % name) + + body = self.parse_enum_body() + + if name.value in self.typedefs: + raise Exception("Type '%s' already defined" % name.value) + + definition = XDRDefinitionEnum(name.value, body) + self.typedefs[name.value] = definition + return definition + + def parse_definition_struct(self): + name = self.lexer.next() + if type(name) != XDRTokenIdentifier: + raise Exception("Expected identifier, but got %s" % name) + + body = self.parse_struct_body() + + if name.value in self.typedefs: + raise Exception("Type '%s' already defined" % name.value) + + definition = XDRDefinitionStruct(name.value, body) + self.typedefs[name.value] = definition + return definition + + def parse_definition_union(self): + name = self.lexer.next() + if type(name) != XDRTokenIdentifier: + raise Exception("Expected identifier, but got %s" % name) + + body = self.parse_union_body() + + if name.value in self.typedefs: + raise Exception("Type '%s' already defined" % name.value) + + definition = XDRDefinitionUnion(name.value, body) + self.typedefs[name.value] = definition + return definition + + def parse_declaration(self): + typ = self.parse_type() + + if type(typ) == XDRTypeVoid: + return XDRDeclarationScalar(typ, None) + + ident = self.lexer.next() + + pointer = False + if type(ident) == XDRTokenPunctuation: + if ident.value != "*": + raise Exception("Expected '*' or identifer, but got %s" % ident) + if type(typ) == XDRTypeString or type(typ) == XDRTypeOpaque: + raise Exception("Pointer invalid for 'string' and 'opaque' types") + + pointer = True + ident = self.lexer.next() + + bracket = self.lexer.peek() + if type(bracket) == XDRTokenPunctuation: + if bracket.value == "[": + _ = self.lexer.next() + value = self.lexer.next() + if type(value) not in [XDRTokenConstant, XDRTokenIdentifier]: + raise Exception("Expected constant, but got %s" % value) + + close = self.lexer.next() + if type(close) != XDRTokenPunctuation or close.value != "]": + raise Exception("Expected ']', but got %s" % value) + + if type(typ) == XDRTypeString: + raise Exception("Fixed array invalid for 'string' type") + return XDRDeclarationFixedArray(typ, ident.value, value.value) + elif bracket.value == "<": + _ = self.lexer.next() + maybeValue = self.lexer.peek() + value = None + if type(maybeValue) in [XDRTokenConstant, XDRTokenIdentifier]: + value = self.lexer.next().value + + close = self.lexer.next() + if type(close) != XDRTokenPunctuation or close.value != ">": + raise Exception("Expected '>', but got %s" % close) + + return XDRDeclarationVariableArray(typ, ident.value, value) + + if pointer: + return XDRDeclarationPointer(typ, ident.value) + else: + return XDRDeclarationScalar(typ, ident.value) + + def parse_type(self): + typ = self.lexer.next() + if type(typ) != XDRTokenIdentifier: + raise Exception("Expected identifier, but got %s" % typ) + + if typ.value == "unsigned": + typ = self.lexer.peek() + if type(typ) != XDRTokenIdentifier: + raise Exception("Expected identifier, but got %s" % typ) + + if typ.value == "char": + _ = self.lexer.next() + return XDRTypeUnsignedChar() + elif typ.value == "short": + _ = self.lexer.next() + return XDRTypeUnsignedShort() + elif typ.value == "int": + _ = self.lexer.next() + return XDRTypeUnsignedInt() + elif typ.value == "hyper": + _ = self.lexer.next() + return XDRTypeUnsignedHyper() + else: + # Bare 'unsigned' isn't allowed by 'type-specifier' + # grammer in RFC 1014, but rpcgen allows it + return XDRTypeUnsignedInt() + + if typ.value == "void": + return XDRTypeVoid() + elif typ.value == "char": + return XDRTypeChar() + elif typ.value == "short": + return XDRTypeShort() + elif typ.value == "int": + return XDRTypeInt() + elif typ.value == "hyper": + return XDRTypeHyper() + elif typ.value == "float": + return XDRTypeFloat() + elif typ.value == "double": + return XDRTypeDouble() + elif typ.value == "bool": + return XDRTypeBool() + elif typ.value == "enum": + return self.parse_type_enum() + elif typ.value == "struct": + return self.parse_type_struct() + elif typ.value == "union": + return self.parse_type_union() + elif typ.value == "opaque": + return XDRTypeOpaque() + elif typ.value == "string": + return XDRTypeString() + else: + return XDRTypeCustom(typ.value, self.typedefs.get(typ.value, None)) + + def parse_enum_body(self): + body = self.lexer.next() + if type(body) != XDRTokenPunctuation or body.value != "{": + raise Exception("Expected '{', but got %s" % body) + + values = [] + while True: + ident = self.lexer.next() + if type(ident) != XDRTokenIdentifier: + raise Exception("Expected identifier, but got %s" % ident) + + equal = self.lexer.next() + if type(equal) != XDRTokenPunctuation or equal.value != "=": + raise Exception("Expected '=', but got %s" % ident) + + value = self.lexer.next() + if type(value) != XDRTokenConstant: + raise Exception("Expected constant, but got %s" % ident) + + separator = self.lexer.next() + if type(separator) != XDRTokenPunctuation and separator.value not in [ + "}", + ",", + ]: + raise Exception("Expected '}' or ',', but got %s" % separator) + + values.append(XDREnumValue(ident.value, value.value)) + + if separator.value == "}": + break + + return XDREnumBody(values) + + def parse_type_enum(self): + body = self.parse_enum_body() + return XDRTypeEnum(body) + + def parse_struct_body(self): + body = self.lexer.next() + if type(body) != XDRTokenPunctuation or body.value != "{": + raise Exception("Expected '{', but got %s" % body) + + fields = [] + while True: + field = self.parse_declaration() + fields.append(field) + + separator = self.lexer.next() + if type(separator) != XDRTokenPunctuation and separator.value != ";": + raise Exception("Expected ';', but got %s" % separator) + + end = self.lexer.peek() + if type(end) == XDRTokenPunctuation and end.value == "}": + break + + # discard the '}' we peeked at to end the loop + _ = self.lexer.next() + return XDRStructBody(fields) + + def parse_type_struct(self): + body = self.parse_struct_body() + return XDRTypeStruct(body) + + def parse_union_body(self): + ident = self.lexer.next() + if type(ident) != XDRTokenIdentifier or ident.value != "switch": + raise Exception("Expected 'switch', but got %s" % ident) + + bracket = self.lexer.next() + if type(bracket) != XDRTokenPunctuation or bracket.value != "(": + raise Exception("Expected '(', but got %s" % bracket) + + discriminator = self.parse_declaration() + + bracket = self.lexer.next() + if type(bracket) != XDRTokenPunctuation or bracket.value != ")": + raise Exception("Expected ')', but got %s" % bracket) + + bracket = self.lexer.next() + if type(bracket) != XDRTokenPunctuation or bracket.value != "{": + raise Exception("Expected '{', but got %s" % bracket) + + default = None + cases = [] + while True: + ident = self.lexer.next() + if type(ident) != XDRTokenIdentifier or ident.value not in [ + "default", + "case", + ]: + raise Exception("Expected 'default' or 'case', but got %s" % ident) + + value = None + if ident.value == "case": + value = self.lexer.next() + if type(value) not in [XDRTokenConstant, XDRTokenIdentifier]: + raise Exception("Expected constant, but got %s" % value) + + sep = self.lexer.next() + if type(sep) != XDRTokenPunctuation or sep.value != ":": + raise Exception("Expected ':', but got %s" % value) + + decl = self.parse_declaration() + + case = XDRUnionCase(value.value, decl) + cases.append(case) + else: + if default is not None: + raise Exception("Duplicate 'default' clause") + + sep = self.lexer.next() + if type(sep) != XDRTokenPunctuation or sep.value != ":": + raise Exception("Expected ':', but got %s" % value) + + default = self.parse_declaration() + + separator = self.lexer.next() + if type(separator) != XDRTokenPunctuation and separator.value != ";": + raise Exception("Expected ';', but got %s" % bracket) + + end = self.lexer.peek() + if type(end) == XDRTokenPunctuation and end.value == "}": + break + + # discard the '}' we peeked at to end the loop + _ = self.lexer.next() + return XDRUnionBody(discriminator, cases, default) + + def parse_type_union(self): + body = self.parse_union_body() + return XDRTypeUnion(body) diff --git a/scripts/rpcgen/tests/meson.build b/scripts/rpcgen/tests/meson.build index 9162412d31..4b1ea308ce 100644 --- a/scripts/rpcgen/tests/meson.build +++ b/scripts/rpcgen/tests/meson.build @@ -1,3 +1,4 @@ rpcgen_tests = files([ 'test_lexer.py', + 'test_parser.py', ]) diff --git a/scripts/rpcgen/tests/test_parser.py b/scripts/rpcgen/tests/test_parser.py new file mode 100644 index 0000000000..8527b8d6e2 --- /dev/null +++ b/scripts/rpcgen/tests/test_parser.py @@ -0,0 +1,91 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +from pathlib import Path + +from rpcgen.ast import ( + XDRSpecification, + XDRDefinitionConstant, + XDRDefinitionEnum, + XDRDefinitionUnion, + XDRDefinitionStruct, + XDRDeclarationScalar, + XDRDeclarationVariableArray, + XDREnumValue, + XDREnumBody, + XDRStructBody, + XDRUnionCase, + XDRUnionBody, + XDRTypeCustom, + XDRTypeVoid, + XDRTypeString, + XDRTypeOpaque, +) +from rpcgen.parser import XDRParser + + +def test_parser(): + p = Path(Path(__file__).parent, "simple.x") + with p.open("r") as fp: + parser = XDRParser(fp) + + got = parser.parse() + + enum = XDRDefinitionEnum( + "filekind", + XDREnumBody( + [ + XDREnumValue("TEXT", "0"), + XDREnumValue("DATA", "1"), + XDREnumValue("EXEC", "2"), + ], + ), + ) + + union = XDRDefinitionUnion( + "filetype", + XDRUnionBody( + XDRDeclarationScalar(XDRTypeCustom("filekind", enum), "kind"), + [ + XDRUnionCase("TEXT", XDRDeclarationScalar(XDRTypeVoid(), None)), + XDRUnionCase( + "DATA", + XDRDeclarationVariableArray( + XDRTypeString(), "creator", "MAXNAMELEN" + ), + ), + XDRUnionCase( + "EXEC", + XDRDeclarationVariableArray( + XDRTypeString(), "interpretor", "MAXNAMELEN" + ), + ), + ], + None, + ), + ) + + struct = XDRDefinitionStruct( + "file", + XDRStructBody( + [ + XDRDeclarationVariableArray(XDRTypeString(), "filename", "MAXNAMELEN"), + XDRDeclarationScalar(XDRTypeCustom("filetype", union), "type"), + XDRDeclarationVariableArray(XDRTypeString(), "owner", "MAXUSERNAME"), + XDRDeclarationVariableArray(XDRTypeOpaque(), "data", "MAXFILELEN"), + ] + ), + ) + + want = XDRSpecification() + want.definitions.extend( + [ + XDRDefinitionConstant("MAXUSERNAME", "32"), + XDRDefinitionConstant("MAXFILELEN", "65535"), + XDRDefinitionConstant("MAXNAMELEN", "255"), + enum, + union, + struct, + ] + ) + + assert str(got) == str(want) -- 2.39.1