The Creation of a Compiler for the LITTLE Language

The Creation of a Compiler for the LITTLE Language Will Cassella, Woodrow Melling, Bradley White Montana State University CSCI 468 - Compilers Spring 2017 Table of Contents Table of Contents 1 Section 1: Program 3 Program Specification 3 Source Code 4 IRGenerator 4 IRInstruction 5 Lexer 6 LexerGrammar 8 Listener 10 Operand 20 Symbol 22 SymbolTable 23 TinyEmitter 24 Section 2: Teamwork 30 Section 3: Design Pattern 31 Section 4: Technical Report 32 Introduction 32 Background 32 Methods and Discussion 34 Scanner 34 Background 34 Methods 34 Difficulties 35 Parser 35 Background 35 Methods 35 Difficulties 36 Symbol Table 36 Background 36 Methods 37 Difficulties 37 Semantic Routines 38 Background 38 1 Methods 38 Difficulties 39 Full Fledged Compiler 39 Background 39 Methods 39 Difficulties 40 Conclusion and Future Work 40 Section 5: UML 41 Section 6: Design Trade-offs 43 Section 7: Software Development Lifecycle 44 2 Section 1: Program Program Specification The following source code is a simple compiler for the LITTLE programming language. The compiler is written in Java and utilizes the ANTLR library to generate various classes based on the grammar defined within LexerGrammar. The program is invoked through the Lexer class, which takes a LITTLE source code file as a command line argument. The LITTLE code is then passed through the scanner which was generated by the ANTLR library to tokenize the source code. The stream of tokens will pass through the parser, which was also generated by ANTLR based on the predefined grammar. The parser will build a tree data structure from the given input, as well as check that the syntax is correct based on the grammar definition provided. After the tree is created, it is traversed by the Listener class, which performs symbol-table construction as well as intermediate representation (IR) generation of the code. The SymbolTable is an ordered hash map where the keys are the symbol’s name and the value is a Symbol object, and is used to track variables. The Symbol object will contain a type, name and value. Each scope within the input file will contain a unique SymbolTable and these tables are further organized within a tree data structure allowing nested scopes to have an association with their respective parent scope. The last step of the compiler is IR generation. Unlike commercial compilers, we are not performing optimization techniques on the intermediate code. The Listener will instantiate an IRGenerator which contains a stack of IRInstruction objects. The IRInstruction objects contain two instances of the Operand class, which are integer, float, or string variables, and one operation, in assembly, such as ADDI or MULTI. Furthermore, these instructions are created as the Listener traverses the syntax tree. The generated IR is independent of any machine architecture and is consumed by the TinyEmitter class which then emits assembly code for the Tiny VM architecture. Further details on these processes are provided in the technical report, section four. 3 Source Code IRGenerator import java.util.ArrayList; import java.util.Stack; public class IRGenerator { public IRGenerator() { label_stack = new Stack<>(); expr_instructions = new Stack<>(); instructions = new ArrayList<>(); next_temp = 1; } public Operand allocate_temporary() { Operand result = Operand.temp_operand("$T" + next_temp, null); next_temp += 1; return result; } public IRInstruction add_instruction() { IRInstruction result = new IRInstruction(); instructions.add(result); return result; } public IRInstruction push_instruction() { IRInstruction top = new IRInstruction(); expr_instructions.push(top); return top; } public IRInstruction top_instruction() { if (!expr_instructions.empty()) { return expr_instructions.peek(); } else { return null; } 4 } public IRInstruction pop() { if (!expr_instructions.empty()) { IRInstruction top = expr_instructions.pop(); instructions.add(top); return top; } else { return null; } } public void push_label(String label) { label_stack.push(label); } public String pop_label() { return label_stack.pop(); } public String top_label() { return label_stack.peek(); } public Stack<String> label_stack; public Stack<IRInstruction> expr_instructions; public ArrayList<IRInstruction> instructions; public int next_temp; } IRInstruction public class IRInstruction { public enum OP { ADDI, ADDF, SUBI, SUBF, MULTI, MULTF, DIVI, 5 DIVF, STOREI, STOREF, GT, GE, LT, LE, NE, EQ, JUMP, LABEL, READI, READF, WRITEI, WRITEF, WRITES, UNDETERMINED_RESERVED, } @Override public String toString() { String str = op.toString(); if (operand_1 != null) { str += " " + operand_1; } if (operand_2 != null) { str += " " + operand_2; } return str + " " + result; } public OP op = null; public Operand operand_1 = null; public Operand operand_2 = null; public Operand result = null; } Lexer import org.antlr.v4.runtime.*; import org.antlr.v4.runtime.tree.ParseTreeWalker; import java.util.List; public class Lexer 6 { private static void output_ir(List<IRInstruction> instructions) { // Output ir preamble System.out.println(";IR code"); System.out.println(";LABEL main"); System.out.println(";LINK"); // Output instructions for (IRInstruction instruction : instructions) { System.out.println(";" + instruction); } // Output post System.out.println(";RET"); System.out.println(";tiny code"); } public static void main(String[] args) throws Exception { try { // Read in the micro file from the command line arguments org.antlr.v4.runtime.ANTLRFileStream fileStream = new org.antlr.v4.runtime.ANTLRFileStream(args[0]); // Create a new lexer on the specified 'CharStream' lexerGrammarLexer lexer = new lexerGrammarLexer(fileStream); CommonTokenStream tokens = new CommonTokenStream(lexer); //Vocabulary vocab = lexer.getVocabulary(); // Create a parser from the stream of tokens lexerGrammarParser parser = new lexerGrammarParser(tokens); // Remove the error listener to not interfere with the output stream parser.removeErrorListeners(); // Parse from the beginning of the rules ParseTreeWalker walker = new ParseTreeWalker(); Listener listener = new Listener(); walker.walk(listener, parser.program()); output_ir(listener.ir_generator.instructions); TinyEmitter emitter = new TinyEmitter(); String result = emitter.emit_code(listener.current_scope, listener.ir_generator.instructions); System.out.println(result); } catch (IllegalArgumentException e) { System.out.println(e.getMessage()); 7 } } } LexerGrammar grammar lexerGrammar; /* Program **/ program : 'PROGRAM' id 'BEGIN' pgm_body 'END'; id : IDENTIFIER; pgm_body : decl func_declarations; decl : string_decl decl | var_decl decl | ; /* Global String Declaration */ string_decl : 'STRING' id ':=' str ';' ; str : STRINGLITERAL; /* Variable Declaration */ var_decl : var_type id_list ';' ; var_type : 'FLOAT' | 'INT'; any_type : var_type | 'VOID'; id_list : id id_tail; id_tail : ',' id id_tail | ; /* Function Paramater List */ param_decl_list : '(' param_decl param_decl_tail ')' | '(' ')' | ; param_decl : var_type id; param_decl_tail : ',' param_decl param_decl_tail | ; /* Function Declarations */ func_declarations : func_decl func_declarations | ; func_decl : 'FUNCTION' any_type id param_decl_list 'BEGIN' func_body 'END'; func_body : decl stmt_list; /* Statement List */ stmt_list : stmt stmt_list | ; stmt : base_stmt | if_stmt | while_stmt; base_stmt : assign_stmt | read_stmt | write_stmt | return_stmt; /* Basic Statements */ assign_stmt : assign_expr ';' ; assign_expr : id ':=' expr; read_stmt : 'READ' '(' id_list ')' ';' ; write_stmt : 'WRITE' '(' id_list ')' ';' ; return_stmt : 'RETURN' expr ';' ; /* Expressions */ expr : expr_prefix factor; 8 expr_prefix : expr_prefix factor addop | ; factor : factor_prefix postfix_expr; factor_prefix : factor_prefix postfix_expr mulop | ; postfix_expr : primary | call_expr; call_expr : id '(' expr_list ')'; expr_list : expr expr_list_tail | ; expr_list_tail : ',' expr expr_list_tail | ; primary : '(' expr ')' | id | INTLITERAL | FLOATLITERAL; addop : '+' | '-'; mulop : '*' | '/'; /* Complex Statements and Condition */ if_stmt : 'IF' '(' cond ')' decl stmt_list else_part 'ENDIF'; else_part : 'ELSE' decl stmt_list | ; cond : expr compop expr; compop : '<' | '>' | '=' | '!=' | '<=' | '>='; /* While statements */ while_stmt : 'WHILE' '(' cond ')' decl stmt_list 'ENDWHILE'; start: .*? EOF; WS: (' ' | '\t' | '\r' | '\n' ) -> skip; INTLITERAL: [0-9]+; FLOATLITERAL: [0-9]*'.'[0-9]+ ; STRINGLITERAL: '"'.*?'"'; COMMENT: '--'.*?'\n' -> skip; KEYWORD:

Load more