<<

Source Program Source Program Language Processors

COMS W4115 Input Interpreter Output Compiler Prof. Stephen A. Edwards Fall 2004 Columbia University Input Program Output Department of Science

Bytecode Interpreter Just-in-time Compiler Language Speeds Compared

Language Impl. gcc Source Program Ocaml SML mlton Source Program C++ g++ SML smlnj cmucl Scheme Ocaml ocamlb Compiler java Pike pike Forth gforth Lua lua Compiler Python python perl Ruby ruby Eiffel se Mercury mercury Awk mawk Haskell ghc Lisp rep Icon icon Bytecode tcl Javascript njs Scheme guile Just-in-time Compiler Forth bigforth Erlang erlang Awk gawk Input Output Lisp xemacs Scheme stalin Input Bytecode Interpreter Output PHP Code Bash bash native code JIT

http://www.bagley.org/˜doug/shootout/

Separate Compilation The

foo.c bar.c “Massages” the input before the compiler sees it. cc -E example.c gives #include extern int C compiler cc: • expansion #define min(x, y) \ printf(char*,...); ((x)<(y))?(x):(y) ... many more declarations • foo.s bar.s printf.o fopen.o malloc.o · · · File inclusion #ifdef DEFINE_BAZ from stdio.h • Conditional compilation int baz(); Assembler as: #endif Archiver ar: void foo() void foo() · · · foo.o bar.o libc.a { { int a = 1; int a = 1; int = 2; int b = 2; ld: int c; int c; foo — An Executable c = min(a,b); c = ((a)<(b))?(a):(b); } } Compiling a Simple Program What the Compiler Sees Lexical Gives Tokens

int gcd(int a, int b) int gcd(int a, int b) int gcd(int a, int b) { { while (a != b) { while (a != b) { { if (a > b) a -= b; if (a > b) a -= b; while (a != b) { else b -= a; else b -= a; } } if (a > b) a -= b; return a; return a; } } else b -= a; i n sp g c ( i n t sp a , sp i int gcd ( int a , int b ) { } n t sp b ) nl { nl sp sp w h i l e sp return a; ( a sp ! = sp b ) sp { nl sp sp sp sp i while ( a != b ) { if ( a } f sp ( a sp > sp b ) sp a sp - = sp b ; nl sp sp sp sp e l s e sp b sp - = sp > b ) a -= b ; else b -= a a ; nl sp sp } nl sp sp e t u r n sp ; } return a ; } a ; nl } nl A stream of tokens. Whitespace, comments removed. Text file is a sequence of characters

Parsing Gives an AST Semantic Analysis Resolves Translation into 3-Address Code

Symbols L0: sne $1, a, b func func seq $0, $1, 0 int gcd args seq int gcd args seq btrue $0, L1 % while (a != b) arg arg while return sl $3, b, a arg arg while return seq $2, $3, 0 != int a int b if a int a int b != if a btrue $2, L4 % if (a < b) int gcd(int a, int b) a b > -= -= sub a, a, b % a -= b { Symbol a b > -= -= int gcd(int a, int b) while (a != b) { jmp L5 { a b a b b a while (a != b) { if (a > b) a -= b; Table: a b a b b a else b -= a; L4: sub b, b, a % b -= a if (a > b) a -= b; } int a else b -= a; return a; L5: jmp L0 } } return a; int b L1: ret a } built from rules. Types checked; references to symbols resolved Idealized language w/ infinite registers

Generation of 80386 Assembly gcd: pushl %ebp % Save FP movl %esp,%ebp movl 8(%ebp),%eax % Load a from stack movl 12(%ebp),%edx % Load b from stack .L8: cmpl %edx,%eax je .L3 % while (a != b) jle .L5 % if (a < b) subl %edx,%eax % a -= b jmp .L8 .L5: subl %eax,%edx % b -= a jmp .L8 .L3: leave % Restore SP, BP ret