M2C Abstract Syntax Tree Specification Version 1.05 Status: 2016-01-14 ------------------------------------------------------------------------------ M2C encodes valid Modula-2 input source files in memory in form of an abstract syntax tree (AST). The AST used by M2C uses three kinds of nodes: (a) The Sentinel Node EMPTY is used to encode the absence of an optional sub-node in non-terminal nodes. (b) Non-Terminal Nodes AST, DEFMOD, IMPLIST, IMPORT, UNQIMP, DEFLIST, CONSTDEF, TYPEDEF, PROCDEF, SUBR, ENUM, SET, ARRAY, RECORD, POINTER, PROCTYPE, EXTREC, VRNTREC, INDEXLIST, FIELDLISTSEQ, FIELDLIST, VFLISTSEQ, VFLIST, VARIANTLIST, VARIANT, CLABELLIST, CLABELS, FTYPELIST, OPENARRAY, CONSTP, VARP, FPARAMLIST, FPARAMS, IMPMOD, BLOCK, DECLLIST, TYPEDECL, VARDECL, PROC, MODDECL, VSREC, VSFIELD, EXPORT, QUALEXP, STMTSEQ, ASSIGN, PCALL, RETURN, WITH, IF, SWITCH, LOOP, WHILE, REPEAT, FORTO, EXIT, ARGS, ELSIFSEQ, ELSIF, CASELIST, CASE, FIELD, INDEX, DESIG, DEREF, NEG, NOT, EQ, NEQ, LT, LTEQ, GT, GTEQ, IN, PLUS, MINUS, OR, ASTERISK, SOLIDUS, DIV, MOD, AND, FCALL, SETVAL are used to encode non-terminal symbols such as modules, imports, definitions, declarations, statements and expressions; (c) Terminal Nodes IDENT, QUALIDENT, IDENTLIST, INTVAL, REALVAL, CHRVAL, QUOTEDVAL, FILENAME, OPTIONS are used to encode terminal symbols such as filenames, options, identifiers, integer literals, real number literals, character code and quoted literals. *** Serialised Representation *** Any AST node may be represented in a serialised format of the form ( nodetype subnode-0 subnode-1 subnode-2 ... subnode-N ) where the actual number of sub-nodes is dependent on the node type. This form of tree representation is called an S-expression. The structure of the AST used by M2C is described below in form of such S-expressions. *** M2C AST S-Expression Syntax *** (0) EMPTY -- Empty Node emptyNode := '(' EMPTY ')' ; (1) AST -- Root Node astRecord := '(' AST filename options compilationUnit ')' ; filename := filenameNode ; /* terminal node */ options := optionsNode ; /* terminal node */ compilationUnit := defModuleNode | impModuleNode ; (2) DEFMOD -- Definition Module Node defModuleNode := '(' DEFMOD moduleIdent importList definitionList ')' ; moduleIdent := identNode ; /* terminal node */ importList := importListNode | emptyNode ; definitionList := definitionListNode | emptyNode ; (3) IMPLIST -- Import List Node importListNode := '(' IMPLIST import+ ')' ; import := qImportNode | unqImportNode ; (4) IMPORT -- Qualified Import Node qImportNode := '(' IMPORT identList ')' ; identList := identListNode ; /* terminal node */ (5) UNQIMP -- Unqualified Import Node unqImportNode := '(' UNQIMP moduleIdent identList ')' ; (6) DEFLIST -- Definition List Node definitionListNode := '(' DEFLIST definition+ ')' ; definition := constDefNode | typeDefNode | varDeclNode | ProcDefNode ; (7) CONSTDEF -- Constant Definition Node constDefNode := '(' CONSTDEF identNode exprNode ')' ; (8) TYPEDEF -- Type Definition Node typeDefNode := '(' TYPEDEF identNode ( typeNode | emptyNode ) ')' ; typeNode := identNode | qualidentNode | subrTypeNode | enumTypeNode | setTypeNode | arrayTypeNode | recTypeNode | extRecTypeNode | vrntRecTypeNode | pointerTypeNode | procTypeNode ; (9) PROCDEF -- Procedure Definition Node procDefNode := '(' PROCDEF identNode formalParamList returnType ')' ; formalParamList := formalParamListNode | emptyNode ; (10) SUBR -- Subrange Type Definition Node subrTypeNode := '(' SUBR lowerBound upperBound subrBaseType ')' ; lowerBound := exprNode ; upperBound := exprNode ; subrBaseType := identNode | qualidentNode | emptyNode ; (11) ENUM -- Enumeration Type Definition Node enumTypeNode := '(' ENUM identListNode ')' ; (12) SET -- Set Type Definition Node setTypeNode := '(' SET countableType ')' ; countableType := identNode | qualidentNode | subrTypeNode | enumTypeNode ; (13) ARRAY -- Array Type Definition Node arrayTypeNode := '(' ARRAY indexTypeListNode arrayBaseType ')' ; arrayBaseType := fieldType ; (14) RECORD -- Simple Record Type Definition Node recTypeNode := '(' RECORD fieldListSeqNode ')' ; (15) POINTER -- Pointer Type Definition Node pointerTypeNode := '(' POINTER typeNode ')' ; (16) PROCTYPE -- Procedure Type Definition Node procTypeNode := '(' PROCTYPE formalTypeList returnedType ')' ; formalTypeList := formalTypeListNode | emptyNode ; returnedType := identNode | qualidentNode | emptyNode ; (17) EXTREC -- Extensible Record Type Definition Node extRecTypeNode := '(' EXTREC recBaseType fieldListSeqNode ')' ; recBaseType := identNode | qualidentNode ; (18) VRNTREC -- Variant Record Type Definition Node variantRecTypeNode := '(' VRNTREC variantFieldListSeqNode ')' ; (19) INDEXLIST -- Array Index Type List Node indexTypeListNode := '(' INDEXLIST indexType+ ')' ; indexType := countableType ; (20) FIELDLISTSEQ -- Simple FieldList Sequence Node fieldListSeqNode := '(' FIELDLISTSEQ fieldListNode+ ')' ; (21) FIELDLIST -- Simple FieldList Node fieldListNode := '(' FIELDLIST identListNode fieldType ')' ; fieldType := identNode | qualidentNode | subrTypeNode | enumTypeNode | setTypeNode | arrayTypeNode | recTypeNode | pointerTypeNode | procTypeNode ; (22) VFLISTSEQ -- Variant Record FieldList Sequence Node variantFieldListSeqNode := '(' VFLISTSEQ ( fieldListNode | variantFieldListNode )+ ')' ; (23) VFLIST -- Variant FieldList Node variantFieldListNode := '(' VFLIST caseIdent caseType variantList defaultFieldListSeq ')' ; caseIdent := identNode | emptyNode ; caseType := identNode | qualidentNode ; defaultFieldListSeq := fieldListSeqNode | emptyNode ; (24) VARIANTLIST -- Variant List Node '(' VARIANTLIST variantNode+ ')' ; (25) VARIANT -- Variant Node variantNode := '(' VARIANT caseLabelListNode fieldListSeqNode ')' ; (26) CLABELLIST -- Case Label List Node caseLabelListNode := '(' CLABELLIST caseLabelsNode+ ') ; (27) CLABELS -- Case Labels Node caseLabelsNode := '(' CLABELS startLabel endLabel ')' ; startLabel := exprNode ; endLabel := exprNode | emptyNode ; (28) FTYPELIST -- Formal Type List Node formalTypeListNode : '(' FTYPELIST formalType+ ')' ; formalType := simpleFormalType | attrFormalType ; simpleFormalType := typeIdent | openArrayTypeNode ; attrFormalType := constAttrFormalTypeNode | varAttrFormalTypeNode ; (29) OPENARRAY -- Open Array Parameter Node openArrayTypeNode := '(' OPENARRAY typeIdent ')' ; typeIdent := identNode | qualidentNode ; (30) CONSTP -- CONST Parameter Node constAttrFormalTypeNode := '(' CONSTP simpleFormalType ')' ; (31) VARP -- VAR Parameter Node varAttrFormalTypeNode := '(' VARP simpleFormalType ')' ; (32) FPARAMLIST -- Formal Parameter List Node formalParamListNode := '(' FPARAMLIST formalParamsNode+ ')' ; (33) FPARAMS -- Formal Parameters Node formalParamsNode := '(' FPARAMS identListNode formalTypeNode ')' ; (34) IMPMOD -- Program Or Implementation Module Node impModuleNode := '(' IMPMOD moduleIdent priority importListNode blockNode ')' ; priority := exprNode | emptyNode ; (35) BLOCK -- Block Node blockNode := '(' BLOCK declarationList body ')' ; declarationList := declarationListNode | emptyNode ; body := statementSeqNode | emptyNode ; (36) DECLLIST -- Declaration List Node declarationListNode := '(' DECLLIST declarationNode+ ')' ; declarationNode := constDefNode | typeDeclNode | varDeclNode | procDeclNode | modDeclNode ; (37) TYPEDECL -- Type Declaration Node typeDeclNode := '(' TYPEDECL identNode ( typeNode | vsrTypeNode ) ')' ; (38) VARDECL -- Variable Declaration Node varDeclNode := '(' VARDECL identListNode fieldType ')' ; (39) PROC -- Procedure Declaration Node procDeclNode := '(' PROC identNode formalParamList returnType blockNode ')' ; (40) MODDECL -- Module Declaration Node modDeclNode := '(' MODDECL moduleIdent priority importListNode exportList blockNode ')' ; exportList := unqualExportNode | qualExportNode | emptyNode ; (41) VSREC -- Variable Size Record Type Declaration Node vsrTypeNode := '(' VSREC fieldListSeqNode varSizeFieldNode ')' ; (42) VSFIELD -- Variable Size Field Node varSizeFieldNode := '(' VSFIELD varSizeField determinantField varSizeFieldType ')' ; varSizeField := IdentNode ; determinantField := IdentNode ; varSizeFieldType := qualidentNode ; (43) EXPORT -- Unqualified Export Node unqualExportNode := '(' EXPORT identListNode ')' ; (44) QUALEXP -- Qualified Export Node qualExportNode := '(' QUALEXP identListNode ')' ; (45) STMTSEQ -- Statement Sequence Node statementSeqNode := '(' STMTSEQ statementNode+ ')' ; statementNode := assignmentNode | procCallNode | returnStmtNode | withStmtNode | ifStmtNode | caseStmtNode | loopStmtNode | whileStmtNode | repeatStmtNode | forStmtNode | exitStmtNode ; (46) ASSIGN -- Assignment Node assignmentNode := '(' ASSIGN designator exprNode ')' ; designator := identNode | qualidentNode | derefNode | designatorNode ; (47) PCALL -- Procedure Call Node procCallNode := '( PCALL designator actualParams ')' ; actualParams := actualParamsNode | emptyNode ; (48) RETURN -- RETURN Statement Node returnStmtNode := '(' RETURN returnValue ')' ; returnValue := exprNode | emptyNode ; (49) WITH -- WITH Statement Node withStmtNode := '(' WITH designator statementSeqNode ')' ; (50) IF -- IF Statement Node ifStmtNode := '(' IF exprNode ifBranch elsifSeq elseBranch ')' ; ifBranch := statementSeqNode ; elsifSeq := elsifSeqNode | emptyNode ; elseBranch := statementSeqNode | emptyNode ; (51) SWITCH -- CASE Statement Node caseStmtNode := '(' SWITCH exprNode caseListNode elseBranch ')' ; (52) LOOP -- LOOP Statement Node loopStmtNode := '(' LOOP statementSeqNode ')' ; (53) WHILE -- WHILE Statement Node whileStmtNode := '(' WHILE exprNode statementSeqNode ')' ; (54) REPEAT -- REPEAT Statement Node repeatStmtNode := '(' REPEAT statementSeqNode exprNode ')' ; (55) FORTO -- FOR Statement Node forStmtNode := '(' FORTO identNode startValue endValue stepValue statementSeqNode ')' ; startValue : exprNode ; endValue := expNode ; stepValue := exprNode | emptyNode ; (56) EXIT -- EXIT Statement Node exitStmtNode := '(' EXIT ')' ; (57) ARGS -- Actual Parameters Node actualParamsNode := '(' ARGS exprNode+ ')' ; (58) ELSIFSEQ -- ELSIFSEQ Node elsifSeqNode := '(' ELSIFSEQ elsifNode+ ')' ; (59) ELSIF -- ELSIF Node elsifNode := '(' ELSIF exprNode statementSeqNode ')' ; (60) CASELIST -- CASE List Node caseListNode := '(' CASELIST caseBranchNode+ ')' ; (61) CASE -- CASE Branch Node caseBranchNode := '(' CASE caseLabelListNode statementSeqNode ')' ; (62) FIELD -- Record Field Selector Node fieldSelectorNode := '(' FIELD selector ')' ; selector := identNode | qualidentNode | designatorNode ; (63) INDEX -- Array Index Node arrayIndexNode := '(' INDEX subscript+ ')' ; subscript := exprNode ; exprNode := negNode | notNode | eqNode | neqNode | ltNode | ltEqNode | gtNode | gtEqNode | inNode | plusNode | minusNode | orNode | asteriskNode | solidusNode | divNode | modulusNode | andNode | designator | intValNode | realValNode | chrValNode | quotedValNode | funcCallNode | setValNode ; (64) DESIG -- Designator Node designatorNode := '(' DESIG head tail ')' ; head := identNode | qualidentNode | derefNode ; tail := fieldSelectorNode | arrayIndexNode ; (65) DEREF -- Pointer Dereference Node derefNode := '(' DEREF designator ')' ; (66) NEG -- Arithmetic Negation Sub-Expression Node negNode := '(' NEG right ')' ; right : exprNode ; (67) NOT -- Logical Negation Sub-Expression Node notNode := '(' NOT right ')' ; (68) EQ -- Equality Sub-Expression Node eqNode := '(' EQ left right ')' ; left : exprNode ; (69) NEQ -- Inequality Sub-Expression Node neqNode := '(' NEQ left right ')' ; (70) LT -- Less-Than Sub-Expression Node ltNode := '(' '<' left right ')' ; (71) LTEQ -- Less-Than-Or-Equal Sub-Expression Node ltEqNode := '(' '<=' left right ')' ; (72) GT -- Greater-Than Sub-Expression Node gtNode := '(' '>' left right ')' ; (73) GTEQ -- Greater-Than-Or-Equal Sub-Expression Node gtEqNode := '(' '>=' left right ')' ; (74) IN -- Set Membership Sub-Expression Node inNode := '(' IN left right ')' ; (75) PLUS -- Plus Sub-Expression Node plusNode := '(' '+' left right ')' ; (76) MINUS -- Minus Sub-Expression Node minusNode := '(' '-' left right ')' ; (77) OR -- Logical Disjunction Sub-Expression Node orNode := '(' OR left right ')' ; (78) ASTERISK -- Asterisk Sub-Expression Node asteriskNode := '(' '*' left right ')' ; (79) SOLIDUS -- Solidus Sub-Expression Node solidusNode := '(' '/' left right ')' ; (80) DIV -- Euclidean Division Sub-Expression Node divNode := '(' DIV left right ')' ; (81) MOD -- Remainder of Euclidean Division Sub-Expression Node modulusNode := '(' MOD left right ')' ; (82) AND -- Logical Conjunction Sub-Expression Node andNode := '(' AND left right ')' ; (83) FCALL -- Function Call Sub-Expression Node funcCallNode := '( FCALL designator actualParams ')' ; (84) SETVAL -- Set Value Sub-Expression Node setValNode := '( SETVAL setTypeIdent elementList ')' ; setTypeIdent := ident | qualident | emptyNode ; elementList := actualParams | emptyNode ; (85) FILENAME -- File Node Signature: (FILENAME "filename") filenameNode := '(' FILENAME '"' filename '"' ')' ; (86) OPTIONS -- Compiler Options Node Signature: (OPTIONS "foo" "bar" "baz" ...) optionsNode := '(' OPTIONS ( '"' option-name '"' )+ ')' ; (87) IDENT -- Identifier Node Signature: (IDENT "identifier") identNode := '(' IDENT '"' Ident '"' ')' ; (88) IDENTLIST -- Identifier List Node Signature: (IDENTLIST "foo" "bar" "baz" ...) identListNode := '(' IDENTLIST ( '"' Ident '"' )+ ')' ; (89) QUALIDENT -- Qualified Identifier Node Signature: (QUALIDENT "foo" "bar" ...) qualidentNode := '(' QUALIDENT ( '"' Ident '"' ) ( '"' Ident '"' )+ ')' ; (90) INTVAL -- Whole Number Value Node Signature: (INTVAL 12345) | (INTVAL #0x7FFF) intValNode := '(' INTVAL ( lexeme | '#' lexeme ) ')' ; (91) REALVAL -- Real Number Value Node Signature: (REALVAL 1.23e45) realValNode := '(' REALVAL lexeme ')' ; (92) CHRVAL -- Character Code Value Node Signature: (CHRVAL #0u7F) chrValNode := '(' CHRVAL '#' lexeme ')' ; (93) QUOTEDVAL -- Quoted Character or String Value Node Signature: (QUOTEDVAL "quoted character or string") quotedValNode := '(' QUOTEDVAL '"' lexeme '"' ')' ; END OF FILE