Changelog

Changes made in this version not seen in first lecture: assembly / ISAs 5 September 2017: slide 3: lea destination should have been rax, not rsp 5 September 2017: slide 10: signed result w/o truncation is −264 + 1, not −264 − 1 5 September 2017: slide 12: changed version B and C to use jge 5 September 2017: slide 14: fix bugs in third version: compare to10 (not 9), end with %rbx set 8 September 2017: slide 11: use %rax, not %rbx

1 1

last time the quiz: ASM

AT&T syntax movq %rsp, %rax movq (%rax,%rcx,4), %rbx = # %rax ← %rsp = X mov RBX, QWORD PTR [RAX + RCX * 4] pushq %rax ← C to assembly # %rsp %rsp - 8 = X - 8 strategy — write with gotos first # memory[%rsp] = %rax subq %rsp, %rax condition codes # %rax ← %rax - %rsp = X - (X - 8) = 8 set by arithmetic instructions + cmp or test leaq (%rax, %rax, 2), %rax used by conditional jump # %rax ← %rax + %rax * 2 = 8 + 8 * 2 = 24 names based on subtraction (cmp) result = 0: equal; result positive: greater; result negative: less than …more detail today

2 3 upcoming labs last time this week: pointer-heavy code in C AT&T syntax movq (%rax,%rcx,4), %rbx = use tool to find malloc/free-related mistakes mov RBX, QWORD PTR [RAX + RCX * 4] fix broken circular doubly-linked list implementation C to assembly strategy — write with gotos first next week: in-lab quiz condition codes set by arithmetic instructions + cmp or test implement library functions strlen/strsep used by conditional jump names based on subtraction (cmp) no notes result = 0: equal; result positive: greater; result negative: less than …more detail today

4 5

condition codes and jumps condition codes: closer look jg, jle, etc condition codes: ZF (“zero flag”) — was result zero? (sub/cmp: equal) named based on interpreting result of subtraction SF (“sign flag”) — was result negative? (sub/cmp: less) CF (“”) — did computation overflow (as unsigned)? zero: equal OF (“”) — did computation overflow (as signed)? (and one more we won’t talk about) negative: less than GDB: part of “eflags” register positive: greater than set by cmp, test, arithmetic

6 7 closer look: condition codes (1) closer look: condition codes (1)

movq $−10, %rax movq $−10, %rax movq $20, %rbx movq $20, %rbx cmpq %rax, %rbx cmpq %rax, %rbx // result = %rbx - %rax = 30 // result = %rbx - %rax = 30 as signed: 20 − (−10) = 30 as signed: 20 − (−10) = 30 X X X X 64 64XX 64 64XX as unsigned: 20 − (2 − 10) = −2 −X30XX 30 (overflow!) as unsigned: 20 − (2 − 10) = −2 −X30XX 30 (overflow!) ZF = 0 (false) not zero rax and rbx not equal ZF = 0 (false) not zero rax and rbx not equal

8 8

closer look: condition codes (1) closer look: condition codes (1)

movq $−10, %rax movq $−10, %rax movq $20, %rbx movq $20, %rbx cmpq %rax, %rbx cmpq %rax, %rbx // result = %rbx - %rax = 30 // result = %rbx - %rax = 30 as signed: 20 − (−10) = 30 as signed: 20 − (−10) = 30 X X X X 64 64XX 64 64XX as unsigned: 20 − (2 − 10) = −2 −X30XX 30 (overflow!) as unsigned: 20 − (2 − 10) = −2 −X30XX 30 (overflow!) ZF = 0 (false) not zero rax and rbx not equal ZF = 0 (false) not zero rax and rbx not equal SF = 0 (false) not negative rax <= rbx SF = 0 (false) not negative rax <= rbx OF = 0 (false) no overflow as signed correct for signed

8 8 closer look: condition codes (1) exercise: condition codes (2)

movq $−10, %rax // 2^63 - 1 movq $20, %rbx movq $0x7FFFFFFFFFFFFFFF, %rax // 2^63 (unsigned); -2**63 (signed) cmpq %rax, %rbx movq $0x8000000000000000, %rbx // result = %rbx - %rax = 30 cmpq %rax, %rbx // result = %rbx - %rax as signed: 20 − (−10) = 30 ZF = ? X X 64 64XX as unsigned: 20 − (2 − 10) = −2 −X30XX 30 (overflow!) SF = ? OF = ? ZF = 0 (false) not zero rax and rbx not equal CF = ? SF = 0 (false) not negative rax <= rbx OF = 0 (false) no overflow as signed correct for signed CF = 1 (true) overflow as unsigned incorrect for unsigned

8 9

closer look: condition codes (2) closer look: condition codes (2)

// 2**63 - 1 // 2**63 - 1 movq $0x7FFFFFFFFFFFFFFF, %rax movq $0x7FFFFFFFFFFFFFFF, %rax // 2**63 (unsigned); -2**63 (signed) // 2**63 (unsigned); -2**63 (signed) movq $0x8000000000000000, %rbx movq $0x8000000000000000, %rbx cmpq %rax, %rbx cmpq %rax, %rbx // result = %rbx - %rax // result = %rbx - %rax 63  63  X64XX 63  63  X64XX as signed: −2 − 2 − 1 = −2X+XX 1 1 (overflow) as signed: −2 − 2 − 1 = −2X+XX 1 1 (overflow)     as unsigned: 263 − 263 − 1 = 1 as unsigned: 263 − 263 − 1 = 1 ZF = 0 (false) not zero rax and rbx not equal ZF = 0 (false) not zero rax and rbx not equal

10 10 closer look: condition codes (2) closer look: condition codes (2)

// 2**63 - 1 // 2**63 - 1 movq $0x7FFFFFFFFFFFFFFF, %rax movq $0x7FFFFFFFFFFFFFFF, %rax // 2**63 (unsigned); -2**63 (signed) // 2**63 (unsigned); -2**63 (signed) movq $0x8000000000000000, %rbx movq $0x8000000000000000, %rbx cmpq %rax, %rbx cmpq %rax, %rbx // result = %rbx - %rax // result = %rbx - %rax 63  63  X64XX 63  63  X64XX as signed: −2 − 2 − 1 = −2X+XX 1 1 (overflow) as signed: −2 − 2 − 1 = −2X+XX 1 1 (overflow)     as unsigned: 263 − 263 − 1 = 1 as unsigned: 263 − 263 − 1 = 1 ZF = 0 (false) not zero rax and rbx not equal ZF = 0 (false) not zero rax and rbx not equal SF = 0 (false) not negative rax <= rbx (if correct) SF = 0 (false) not negative rax <= rbx (if correct) OF = 1 (true) overflow as signed incorrect for signed

10 10

closer look: condition codes (2) closer look: condition codes (3)

// 2**63 - 1 movq $−1, %rax movq $0x7FFFFFFFFFFFFFFF, %rax addq $−2, %rax // 2**63 (unsigned); -2**63 (signed) movq $0x8000000000000000, %rbx // result = -3 cmpq %rax, %rbx // result = %rbx - %rax as signed: −1 + (−2) = −3 X 63  63  X64X 64 64 65XXX 64 as signed: −2 − 2 − 1 = −2X+XX 1 1 (overflow) as unsigned: (2 − 1) + (2 − 2) = 2  −XX3 2 − 3 (overflow)   as unsigned: 263 − 263 − 1 = 1 ZF = 0 (false) not zero result not zero ZF = 0 (false) not zero rax and rbx not equal SF = 0 (false) not negative rax <= rbx (if correct) OF = 1 (true) overflow as signed incorrect for signed CF = 0 (false) no overflow as unsigned correct for unsigned 10 11 start_loop: if (b < 10) goto end_loop; foo(); b += 1; goto start_loop; end_loop:

closer look: condition codes (3) while exercise

movq $−1, %rax while (b < 10) { foo(); b += 1; } addq $−2, %rax Assume b is in callee-saved register %rbx. Which are correct assembly // result = -3 translations? // version A // version B // version C as signed: −1 + (−2) = −3 start_loop: start_loop: start_loop: call foo cmpq $10, %rbx movq $10, %rax 64 64 65X X 64 as unsigned: (2 − 1) + (2 − 2) = 2 X−XX3 2 − 3 (overflow) addq $1, %rbx jge end_loop subq %rbx, %rax cmpq $10, %rbx call foo jge end_loop ZF = 0 (false) not zero result not zero jl start_loop addq $1, %rbx call foo jmp start_loop addq $1, %rbx SF = 1 (true) negative result is negative end_loop: jmp start_loop OF = 0 (false) no overflow as signed correct for signed end_loop: CF = 1 (true) overflow as unsigned incorrect for unsigned

11 12

while to assembly (1) while to assembly (1) while (b < 10) { while (b < 10) { foo(); foo(); b += 1; b += 1; } } start_loop: if (b < 10) goto end_loop; foo(); b += 1; goto start_loop; end_loop:

13 13 cmpq $10, %rbx cmpq $10, %rbx cmpq $10, %rbx jge end_loop jge end_loop jge end_loop start_loop: movq $10, %rax movq $10, %rax call foo subq %rbx, %rax subq %rbx, %rax addq $1, %rbx movq %rax, %rbx movq %rax, %rbx cmpq $10, %rbx start_loop: start_loop: jne start_loop call foo call foo end_loop: decq %rbx decq %rbx ... jne start_loop jne start_loop ... movq $10, %rbx movq $10, %rbx ... end_loop: end_loop:

while — levels of optimization while — levels of optimization

while (b < 10) { foo(); b += 1; } while (b < 10) { foo(); b += 1; }

start_loop: start_loop: cmpq $10, %rbx cmpq $10, %rbx cmpq $10, %rbx jge end_loop jge end_loop jge end_loop start_loop: call foo call foo call foo addq $1, %rbx addq $1, %rbx addq $1, %rbx jmp start_loop jmp start_loop cmpq $10, %rbx end_loop: end_loop: jne start_loop ...... end_loop: ......

14 14

while — levels of optimization compiling switches (1)

while (b < 10) { foo(); b += 1; } switch (a) { case 1: ...; break; start_loop: cmpq $10, %rbx cmpq $10, %rbx case 2: ...; break; cmpq $10, %rbx jge end_loop jge end_loop ... jge end_loop start_loop: movq $10, %rax call foo call foo subq %rbx, %rax default: ... addq $1, %rbx addq $1, %rbx movq %rax, %rbx } jmp start_loop cmpq $10, %rbx start_loop: end_loop: jne start_loop call foo // same as if statement? ... end_loop: decq %rbx cmpq $1, %rax ...... jne start_loop je code_for_1 ...... movq $10, %rbx cmpq $2, %rax ...... end_loop: je code_for_2 cmpq $3, %rax je code_for_3 ... jmp code_for_default

14 15 compiling switches (2) compiling switches (3)

switch (a) { switch (a) { case 1: ...; break; case 1: ...; break; case 2: ...; break; case 2: ...; break; ...... case 100: ...; break; case 100: ...; break; default: ... default: ... } } // binary search table: cmpq $50, %rax // jump table // not instructions jl code_for_less_than_50 cmpq $100, %rax // .quad = 64-bit (4 x 16) constant cmpq $75, %rax jg code_for_default .quad code_for_1 jl code_for_50_to_75 cmpq $1, %rax .quad code_for_2 ... jl code_for_default .quad code_for_3 code_for_less_than_50: jmp *table(,%rax,8) .quad code_for_4 cmpq $25, %rax ... jl less_than_25_cases ... 16 17

computed jumps preview: our Y86 condition codes

cmpq $100, %rax ZF (zero flag), SF (sign flag) jg code_for_default cmpq $1, %rax just won’t handle overflow/underflow jl code_for_default // jump to memory[table + rax * 8] // table of pointers to instructions jmp *table(,%rax,8) // intel: jmp QWORD PTR[rax*8 + table] ... table: .quad code_for_1 .quad code_for_2 .quad code_for_3 18 19 ... microarchitecture v. instruction set selected instruction set design concerns microarchitecture — design of the hardware ease of creating efficient assembly/ “generations” of Intel’s x86 chips different microarchitectures for very low-power versus laptop/desktop ease of designing efficient/cheap/low power/…hardware changes in performance/efficiency flexibility for the future instruction set — interface visible by software what matters for software compatibility many ways to implement (but some might be easier)

20 21

ISAs being manufactured today ISA variation x86 — dominant in desktops, servers instruction set instr. # normal approx. length registers # instrs. ARM — dominant in mobile devices x86-64 1–15 byte 16 1500 POWER — Wii U, IBM supercomputers and some servers Y86-64 1–10 byte 15 18 MIPS — common in consumer wifi access points ARMv7 4 byte* 16 400 SPARC — some Oracle servers, Fujitsu supercomputers POWER8 4 byte 32 1400 z/Architecture — IBM mainframes MIPS32 4 byte 31 200 Z80 — TI calculators Itanium 41 bits* 128 300 Z80 1–4 byte 7 40 SHARC — some digital signal processors VAX 1–14 byte 8 150 Itanium — some HP servers (being retired) z/Architecture 2–6 byte 16 1000 RISC V — some embedded RISC V 4 byte* 31 500* … 22 23 other choices: condition codes? other choices: addressing modes instead of: ways of specifying operands. examples: cmpq %r11, %r12 x86-64: 10(%r11,%r12,4) je somewhere ARM: %r11 << 3 (shift register value by constant) could do: VAX: ((%r11)) (register value is pointer to pointer) /* _B_ranch if _EQ_ual */ beq %r11, %r12, somewhere

24 25

other choices: number of operands other choices: instruction complexity add src1, src2, dest instructions that write multiple values? ARM, POWER, MIPS, SPARC, … x86-64: push, pop, movsb,… add src2, src1=dest more? x86, AVR, Z80, … VAX: both

26 27 CISC — Complex Instruction Set Computer

CISC and RISC CISC and RISC

RISC — Reduced Instruction Set Computer RISC — Reduced Instruction Set Computer reduced from what? reduced from what?

CISC — Complex Instruction Set Computer

28 28

some VAX instructions microcode

MATCHC haystackPtr, haystackLen, needlePtr, needleLen MATCHC haystackPtr, haystackLen, needlePtr, needleLen Find the position of the string in needle within haystack. Find the position of the string in needle within haystack.

POLY x, coefficientsLen, coefficientsPtr loop in hardware??? Evaluate the polynomial whose coefficients are pointed toby coefficientPtr at the value x. typically: lookup sequence of microinstructions (“microcode”) secret simpler instruction set EDITPC sourceLen, sourcePtr, patternLen, patternPtr Edit the string pointed to by sourcePtr using the pattern string specified by patternPtr.

29 30 Why RISC? typical RISC ISA properties complex instructions were usually not faster fewer, simpler instructions complex instructions were harder to implement seperate instructions to access memory compilers, not hand-written assembly fixed-length instructions more registers no “loops” within single instructions no instructions with two memory operands few addressing modes

31 32

Y86-64 instruction set Y86-64 instruction set based on x86 based on x86 omits most of the 1000+ instructions omits most of the 1000+ instructions leaves leaves addq jmp pushq addq jmp pushq subq jCC popq subq jCC popq andq cmovCC movq (renamed) andq cmovCC movq (renamed) xorq call hlt (renamed) xorq call hlt (renamed) nop ret nop ret much, much simpler encoding much, much simpler encoding

33 33 Y86-64: movq Y86-64: movq

destination i — immediate destination i — immediate source r — register source r — register m — memory m — memory SDmovq SDmovq

X X X X XX XX XX XX irmovq immovqXXX iimovqXXX irmovq immovqXXX iimovqXXX XXX XXX rrmovq rmmovq rimovqXXX rrmovq rmmovq rimovqXXX X X h (((h XX h (((h XX mrmovq (mmmovq((hhhh mimovqXXX mrmovq (mmmovq((hhhh mimovqXXX

34 34

Y86-64: movq Y86-64 instruction set

destination i — immediate based on x86 source r — register omits most of the 1000+ instructions m — memory leaves SDmovq addq jmp pushq subq jCC popq andq cmovCC movq (renamed) X X XX XX irmovq immovqXXX iimovqXXX xorq call hlt (renamed) XXX nop ret rrmovq rmmovq rimovqXXX X h (((h XX much, much simpler encoding mrmovq (mmmovq((hhhh mimovqXXX

34 35 hh (((h hhh (((( hhhh ((( (h(hh(( Invalid: rmmovq %r11,((( 10(%r12,%r13)hhh ((( hhhh ((( hhh (hh (((h h hhh (((( hhhh ((( (((hh(h Invalid: rmmovq((%r11,( 10(,%r12,4)hhhh (((( hhh ( hh ((h hhh ((( hhhh ((( hhh (((( (h(hh(h Invalid: rmmovq %r11,(((( 10(%r12,%r13,4)hhh ((( hhhh (((( hhhh

cmovCC Y86-64 instruction set conditional move based on x86 exist on x86-64 (but you probably didn’t see them) omits most of the 1000+ instructions Y86-64: register-to-register only leaves instead of: addq jmp pushq jle skip_move subq jCC popq rrmovq %rax, %rbx andq cmovCC movq (renamed) skip_move: xorq call hlt (renamed) // ... nop ret can do: much, much simpler encoding cmovg %rax, %rbx

36 37

halt Y86-64: specifying addresses

(x86-64 instruction called hlt) Valid: rmmovq %r11, 10(%r12) Y86-64 instruction halt stops the processor otherwise — something’s in memory “after” program! real processors: reserved for OS

38 39 Instead: mrmovq 10(%r11), %r11 /* overwrites %r11 */

addq %r11, %r12

Instead: /* replace %r11 with 8*%r11 */ addq %r11, %r11 addq %r11, %r11 addq %r11, %r11

mrmovq 10(%r11), %r11 addq %r11, %r12

Y86-64: specifying addresses Y86-64: accessing memory (1)

Valid: rmmovq %r11, 10(%r12) r12 ← memory[10 + r11] + r12 (h hhh ((( hhh ((((h hhh ((( hhh ((( hhh (((( h(hh(( (h(hh( Invalid: addq (10(%r11),((( hhh %r12 Invalid: rmmovq %r11,((( 10(%r12,%r13)hhh ((( hhh ((( hhhh ( hh ((( hhh (hh (((h h hhh (((( hhhh ((( (((hh(h Invalid: rmmovq((%r11,( 10(,%r12,4)hhhh (((( hhh ( hh ((h hhh ((( hhhh ((( hhh (((( (h(hh(h Invalid: rmmovq %r11,(((( 10(%r12,%r13,4)hhh ((( hhhh (((( hhhh

39 40

Y86-64: accessing memory (1) Y86-64: accessing memory (2) r12 ← memory[10 + r11] + r12 r12 ← memory[10 + 8 * r11] + r12 ((h hhh ((((h hhh ((( hhh ((( hhhh ((( hhh((( (((hh(h (((hhh Invalid: addq((10(%r11),( hhh %r12 Invalid:addq (10(,%r11,8),(( hhh %r12 ((( hhhh (((( hhhh Instead: mrmovq 10(%r11), %r11 /* overwrites %r11 */

addq %r11, %r12

40 41 Instead, need an extra register: irmovq $1, %r11 addq %r11, %r12

Y86-64: accessing memory (2) Y86-64 constants (1) r12 ← memory[10 + 8 * r11] + r12 irmovq $100, %r11 h ((h hhhh (((( hhh ((( (((hh(hh only instruction with non-address constant operand Invalid:addq (10(,%r11,8),(( hhh %r12 (((( hhhh Instead: /* replace %r11 with 8*%r11 */ addq %r11, %r11 addq %r11, %r11 addq %r11, %r11

mrmovq 10(%r11), %r11 addq %r11, %r12

41 42

Y86-64 constants (2) Y86-64 constants (2) r12 ← r12 + 1 r12 ← r12 + 1 (h (h hhh (((( hhh (((( (h(hh(h (h(hh(h Invalid: (addq((( $1,h %r12hhh Invalid: (addq((( $1,h %r12hhh Instead, need an extra register: irmovq $1, %r11 addq %r11, %r12

43 43 instead: use side effect of normal arithmetic instead of cmpq %r11, %r12 jle somewhere maybe: subq %r11, %r12 jle (but changes %r12)

Y86-64: operand uniqueness Y86-64: condition codes only one kind of value for each operand ZF — value was zero? instruction name tells you the kind SF — sign bit was set? i.e. value was negative? (why movq was ‘split’ into four names) this course: no OF, CF (to simplify assignments)

set by addq, subq, andq, xorq not set by anything else

44 45

Y86-64: using condition codes Y86-64: conditionals (1) subq SECOND, FIRST (value = FIRST - SECOND) XX cmpXXX , testXX j__ or condition code bit test value test cmov__ le SF = 1 or ZF = 1 value ≤ 0 l SF = 1 value < 0 e ZF = 1 value = 0 ne ZF = 0 value 6= 0 ge SF = 0 value ≥ 0 g SF = 0 and ZF = 0 value > 0 missing OF (overflow flag); CF (carry flag)

46 47 instead of cmpq %r11, %r12 jle somewhere maybe: subq %r11, %r12 jle (but changes %r12)

Y86-64: conditionals (1) Y86-64: conditionals (1)

XX XX cmpXXX , testXX cmpXXX , testXX instead: use side effect of normal arithmetic instead: use side effect of normal arithmetic instead of cmpq %r11, %r12 jle somewhere maybe: subq %r11, %r12 jle (but changes %r12)

47 47

push/pop call/ret pushq %rbx call LABEL %rsp ← %rsp − 8 push PC (next instruction address) on stack memory[%rsp] ← %rbx jmp to LABEL address popq %rbx ret %rbx ← memory[%rsp] pop address from stack %rsp ← %rsp + 8 jmp to that address . . stack . stack . growth memory[%rsp + 16] growth memory[%rsp + 16]

memory[%rsp + 8] memory[%rsp + 8] value to pop memory[%rsp] address ret jumps to memory[%rsp] where to push memory[%rsp - 8] where call stores return address memory[%rsp - 8] memory[%rsp - 16] memory[%rsp - 16] 48 49 Y86-64 state typical RISC ISA properties

%rXX — 15 registers fewer, simpler instructions %r15 missing smaller parts of registers missing seperate instructions to access memory ZF (zero), SF (sign), OF (overflow) fixed-length instructions book has OF, we’ll not use it CF (carry) missing more registers Stat — processor status — halted? no “loops” within single instructions PC — (AKA instruction pointer) no instructions with two memory operands main memory few addressing modes

50 51

Y86-64 instruction formats Secondary : cmovcc/jcc

byte: 0 1 2 3 4 5 6 7 8 9 byte: 0 1 2 3 4 5 6 7 8 9 halt 0 0 halt 0 0 nop 1 0 nop 1 0 0 always (jmp/rrmovq) rrmovq/cmovCC rA, rB 2 cc rA rB rrmovq/cmovCC rA, rB 2 cc rA rB 1 le irmovq V, rB 3 0 F rB V irmovq V, rB 3 0 F rB V rmmovq rA, D(rB) 4 0 rA rB D rmmovq rA, D(rB) 4 0 rA rB 2 l D mrmovq D(rB), rA 5 0 rA rB D mrmovq D(rB), rA 5 0 rA rB D 3 e OPq rA, rB 6 fn rA rB OPq rA, rB 6 fn rA rB jCC Dest 7 cc Dest jCC Dest 7 cc 4 ne Dest call Dest 8 0 Dest call Dest 8 0 Dest 5 ge ret 9 0 ret 9 0 pushq rA A 0 rA F pushq rA A 0 rA F 6 g popq rA B 0 rA F popq rA B 0 rA F

52 53 Secondary opcodes: OPq Registers: rA, rB

byte: 0 1 2 3 4 5 6 7 8 9 byte: 0 1 2 30 %rax4 5 6 8 7 %r88 9 halt 0 0 halt 0 0 nop 1 0 1 %rcx 9 %r9 nop 1 0 rrmovq/cmovCC rA, rB 2 cc rA rB rrmovq/cmovCC rA, rB 2 cc rA rB 2 %rdx A %r10 irmovq V, rB 3 0 F rB V irmovq V, rB 3 0 F rB V rmmovq rA, D(rB) 4 0 rA rB D 3 %rbx B %r11 0 add rmmovq rA, D(rB) 4 0 rA rB D mrmovq D(rB), rA 5 0 rA rB D D 1 mrmovq D(rB), rA 5 0 rA rB 4 %rsp C %r12 OPq rA, rB 6 fn rA rB sub OPq rA, rB 6 fn rA rB jCC Dest 7 cc 2 Dest 5 %rbp D %r13 and jCC Dest 7 cc Dest call Dest 8 0 Dest 3 call Dest 8 0 6 %rsiDest E %r14 ret 9 0 xor ret 9 0 pushq rA A 0 rA F 7 %rdi F none pushq rA A 0 rA F popq rA B 0 rA F popq rA B 0 rA F 54 55

Immediates: V, D, Dest Immediates: V, D, Dest

byte: 0 1 2 3 4 5 6 7 8 9 byte: 0 1 2 3 4 5 6 7 8 9 halt 0 0 halt 0 0 nop 1 0 nop 1 0 rrmovq/cmovCC rA, rB 2 cc rA rB rrmovq/cmovCC rA, rB 2 cc rA rB irmovq V, rB 3 0 F rB V irmovq V, rB 3 0 F rB V rmmovq rA, D(rB) 4 0 rA rB D rmmovq rA, D(rB) 4 0 rA rB D mrmovq D(rB), rA 5 0 rA rB D mrmovq D(rB), rA 5 0 rA rB D OPq rA, rB 6 fn rA rB OPq rA, rB 6 fn rA rB jCC Dest 7 cc Dest jCC Dest 7 cc Dest call Dest 8 0 Dest call Dest 8 0 Dest ret 9 0 ret 9 0 pushq rA A 0 rA F pushq rA A 0 rA F popq rA B 0 rA F popq rA B 0 rA F

56 56 irmovq $1, %rax addq %rdi, %rax ret

? ? ? ? 30 F0 01 00 00 00 00 00 00 00 60 70 90 30 F0 01 00 00 00 00 00 00 00 60 70 90

Y86-64 encoding (1) Y86-64 encoding (1) long addOne(long x) { long addOne(long x) { return x + 1; return x + 1; } } x86-64: x86-64: movq %rdi, %rax movq %rdi, %rax addq $1, %rax addq $1, %rax ret ret Y86-64: Y86-64: irmovq $1, %rax addq %rdi, %rax ret

57 57

Y86-64 encoding (2) Y86-64 encoding (2) addOne: addOne: irmovq $1, %rax irmovq $1, %rax addq %rdi, %rax addq %rdi, %rax ret ret

? 3 0 F %rax 01 00 00 00 00 00 00 00 ? 3 0 F 0 01 00 00 00 00 00 00 00

58 58 ? ?

? ? 30 F0 01 00 00 00 00 00 00 00 60 70 90 30 F0 01 00 00 00 00 00 00 00 60 70 90

? ? ? ? ?

30 F0 01 00 00 00 00 00 00 00 60 70 90

Y86-64 encoding (2) Y86-64 encoding (2) addOne: addOne: irmovq $1, %rax irmovq $1, %rax addq %rdi, %rax addq %rdi, %rax ret ret

3 0 F 0 01 00 00 00 00 00 00 00 3 0 F 0 01 00 00 00 00 00 00 00 ? 6 add %rdi %rax ? 6 0 7 0

58 58

Y86-64 encoding (2) Y86-64 encoding (2) addOne: addOne: irmovq $1, %rax irmovq $1, %rax addq %rdi, %rax addq %rdi, %rax ret ret

3 0 F 0 01 00 00 00 00 00 00 00 3 0 F 0 01 00 00 00 00 00 00 00 6 0 7 0 6 0 7 0 ? 9 0 9 0

30 F0 01 00 00 00 00 00 00 00 60 70 90

58 58 ? ? ?

? ? ?

Y86-64 encoding (3) Y86-64 encoding (3) doubleTillNegative: doubleTillNegative: /* suppose at address 0x123 */ /* suppose at address 0x123 */ addq %rax, %rax addq %rax, %rax jge doubleTillNegative jge doubleTillNegative

6 add %rax %rax ? 6 add %rax %rax

59 59

Y86-64 encoding (3) Y86-64 encoding (3) doubleTillNegative: doubleTillNegative: /* suppose at address 0x123 */ /* suppose at address 0x123 */ addq %rax, %rax addq %rax, %rax jge doubleTillNegative jge doubleTillNegative

? 6 0 0 0 6 0 0 0 7 5 23 01 00 00 00 00 00 00

59 59 ? ? ?

rrmovq %rcx, %rax rrmovq %rcx, %rax addq %rdx, %rax addq %rdx, %rax subq %rbx, %rdi subq %rbx, %rdi jl 0x84 jl 0x84 rrmovq %rax, %rcx rrmovq %rax, %rcx jmp 0x68 jmp 0x68

Y86-64 encoding (3) Y86-64 encoding (3) doubleTillNegative: doubleTillNegative: /* suppose at address 0x123 */ /* suppose at address 0x123 */ addq %rax, %rax addq %rax, %rax jge doubleTillNegative jge doubleTillNegative

6 0 0 0 6 0 0 0 ? 7 5 23 01 00 00 00 00 00 00 7 5 23 01 00 00 00 00 00 00

59 59

Y86-64 decoding Y86-64 decoding

20 10 60 20 61 37 72 84 00 00 00 00 00 00 00 20 10 60 20 61 37 72 84 00 00 00 00 00 00 00 20 12 20 01 70 68 00 00 00 00 00 00 00 20 12 20 01 70 68 00 00 00 00 00 00 00

byte: 0 1 2 3 4 5 6 7 8 9 byte: 0 1 2 3 4 5 6 7 8 9 halt 0 0 halt 0 0 nop 1 0 nop 1 0 rrmovq/cmovCC rA, rB 2 cc rA rB rrmovq/cmovCC rA, rB 2 cc rA rB irmovq V, rB 3 0 F rB V irmovq V, rB 3 0 F rB V rmmovq rA, D(rB) 4 0 rA rB D rmmovq rA, D(rB) 4 0 rA rB D mrmovq D(rB), rA 5 0 rA rB D mrmovq D(rB), rA 5 0 rA rB D OPq rA, rB 6 fn rA rB OPq rA, rB 6 fn rA rB jCC Dest 7 cc Dest jCC Dest 7 cc Dest call Dest 8 0 Dest call Dest 8 0 Dest ret 9 0 ret 9 0 pushq rA A 0 rA F pushq rA A 0 rA F popq rA B 0 rA F popq rA B 0 rA F

60 60 addq %rdx, %rax subq %rbx, %rdi jl 0x84 jl 0x84 rrmovq %rax, %rcx rrmovq %rax, %rcx jmp 0x68 jmp 0x68

rrmovq %rax, %rcx jmp 0x68

Y86-64 decoding Y86-64 decoding

20 10 60 20 61 37 72 84 00 00 00 00 00 00 00 20 10 60 20 61 37 72 84 00 00 00 00 00 00 00 20 12 20 01 70 68 00 00 00 00 00 00 00 20 12 20 01 70 68 00 00 00 00 00 00 00

rrmovq %rcx, %rax byte: 0 1 2 3 4 5 6 7 8 9 rrmovq %rcx, %rax byte: 0 1 2 3 4 5 6 7 8 9 halt 0 0 halt 0 0 I 0 as cc: always addq %rdx, %rax nop 1 0 nop 1 0 I 1 as reg: %rcx subq %rbx, %rdi rrmovq/cmovCC rA, rB 2 cc rA rB rrmovq/cmovCC rA, rB 2 cc rA rB I 0 as reg: %rax I 0 as fn: add irmovq V, rB 3 0 F rB V irmovq V, rB 3 0 F rB V I 1 as fn: sub rmmovq rA, D(rB) 4 0 rA rB D rmmovq rA, D(rB) 4 0 rA rB D mrmovq D(rB), rA 5 0 rA rB D mrmovq D(rB), rA 5 0 rA rB D OPq rA, rB 6 fn rA rB OPq rA, rB 6 fn rA rB jCC Dest 7 cc Dest jCC Dest 7 cc Dest call Dest 8 0 Dest call Dest 8 0 Dest ret 9 0 ret 9 0 pushq rA A 0 rA F pushq rA A 0 rA F popq rA B 0 rA F popq rA B 0 rA F

60 60

Y86-64 decoding Y86-64 decoding

20 10 60 20 61 37 72 84 00 00 00 00 00 00 00 20 10 60 20 61 37 72 84 00 00 00 00 00 00 00 20 12 20 01 70 68 00 00 00 00 00 00 00 20 12 20 01 70 68 00 00 00 00 00 00 00

rrmovq %rcx, %rax byte: 0 1 2 3 4 5 6 7 8 9 rrmovq %rcx, %rax byte: 0 1 2 3 4 5 6 7 8 9 addq %rdx, %rax halt 0 0 addq %rdx, %rax halt 0 0 subq %rbx, %rdi nop 1 0 subq %rbx, %rdi nop 1 0 jl 0x84 rrmovq/cmovCC rA, rB 2 cc rA rB jl 0x84 rrmovq/cmovCC rA, rB 2 cc rA rB irmovq V, rB 3 0 F rB V irmovq V, rB 3 0 F rB V I 2 as cc: l (less than) rrmovq %rax, %rcx rmmovq rA, D(rB) 4 0 rA rB D rmmovq rA, D(rB) 4 0 rA rB D I hex 84 00… as little endian Dest: jmp 0x68 mrmovq D(rB), rA 5 0 rA rB D mrmovq D(rB), rA 5 0 rA rB D 0x84 OPq rA, rB 6 fn rA rB OPq rA, rB 6 fn rA rB jCC Dest 7 cc Dest jCC Dest 7 cc Dest call Dest 8 0 Dest call Dest 8 0 Dest ret 9 0 ret 9 0 pushq rA A 0 rA F pushq rA A 0 rA F popq rA B 0 rA F popq rA B 0 rA F

60 60 Y86-64: convenience for hardware Y86-64

Y86-64: simplified, more RISC-y version of X86-64 4 bits to decode instruction byte: 0 1 2 3 4 5 6 7 8 9 halt 0 0 minimal set of arithmetic size/layout nop 1 0 rrmovq/cmovCC rA, rB 2 cc rA rB only movs touch memory (mostly) uniform placement of irmovq V, rB 3 0 F rB V operands D rmmovq rA, D(rB) 4 0 rA rB only jumps, calls, and movs take immediates mrmovq D(rB), rA 5 0 rA rB D jumping to zeroes (uninitialized?) OPq rA, rB 6 fn rA rB simple variable-length encoding by accident halts jCC Dest 7 cc Dest call Dest 8 0 Dest no attempt to fit (parts of) ret 9 0 next time: implementing with circuits multiple instructions in a byte pushq rA A 0 rA F popq rA B 0 rA F

61 62

backup slides closer look: condition codes (3)

movq $−1, %rax testq %rax, %rax // result = -1 ZF = 0 (false) not zero rax not zero SF = 1 (true) negative rax is negative OF = 0 (false) no overflow as signed operation can’t overflow CF = 0 (false) no oveflow as unsigned operation can’t overflow

63 64 exercise: condition codes and jXX logical operators

ZF = 0 (false) SF = 1 (true) return 1 for true or 0 for false OF = 0 (false) ( 1 && 1 ) == 1 ( 1 || 1 ) == 1 CF = 1 (true) ( 2 && 4 ) == 1 ( 2 || 4 ) == 1 jle (signed less than equal) should do what? ( 1 && 0 ) == 0 ( 1 || 0 ) == 1 ( 0 && 0 ) == 0 ( 0 || 0 ) == 0 ja (unsigned greater than) should do what? (-1 && -2) == 1 (-1 || -2) == 1 ("" && "") == 1 ("" || "") == 1 ! 1 == 0 ! 4 == 0 !-1 == 0 ! 0 == 1

65 67

recall: short-circuit (&&) recall: short-circuit (&&)

1 #include 1 #include 2 int zero() { printf("zero()\n"); return 0; } 2 int zero() { printf("zero()\n"); return 0; } 3 int one() { printf("one()\n"); return 1; } 3 int one() { printf("one()\n"); return 1; } 4 int main() { 4 int main() { 5 printf(">␣%d\n", zero() && one()); 5 printf(">␣%d\n", zero() && one()); 6 printf(">␣%d\n", one() && zero()); 6 printf(">␣%d\n", one() && zero()); 7 return 0; 7 return 0; 8 } 8 } zero() zero() > 0 AND false true > 0 AND false true one() false false false one() false false false zero() true false true zero() true false true > 0 > 0 68 68 result = foo(); if (result == 0) goto skip_bar; result = bar(); skip_bar: result = (result != 0);

recall: short-circuit (&&) recall: short-circuit (&&)

1 #include 1 #include 2 int zero() { printf("zero()\n"); return 0; } 2 int zero() { printf("zero()\n"); return 0; } 3 int one() { printf("one()\n"); return 1; } 3 int one() { printf("one()\n"); return 1; } 4 int main() { 4 int main() { 5 printf(">␣%d\n", zero() && one()); 5 printf(">␣%d\n", zero() && one()); 6 printf(">␣%d\n", one() && zero()); 6 printf(">␣%d\n", one() && zero()); 7 return 0; 7 return 0; 8 } 8 }

zero() zero() > 0 AND false true > 0 AND false true one() false false false one() false false false zero() true false true zero() true false true > 0 > 0 68 68

recall: short-circuit (&&) && to assembly

1 #include return foo() && bar(); 2 int zero() { printf("zero()\n"); return 0; } 3 int one() { printf("one()\n"); return 1; } 4 int main() { 5 printf(">␣%d\n", zero() && one()); 6 printf(">␣%d\n", one() && zero()); 7 return 0; 8 }

zero() > 0 AND false true one() false false false zero() true false true > 0 68 69 && to assembly && to assembly return foo() && bar(); return foo() && bar();

result = foo(); call foo if (result == 0) goto skip_bar; testl %eax, %eax // result is %eax (return val) result = bar(); je skip_bar // if result == 0 (equal for cmp)... skip_bar: call bar testl %eax, %eax // result is %eax (return val) result = (result != 0); skip_bar: setne %al // set %al (low 8 bits of %eax) // to 1 if result != 0, to 0 otherwise movzbl %al, %eax // add zeroes to rest of %eax

69 70

recall: short-circuit (||) recall: short-circuit (||)

1 #include 1 #include 2 int zero() { printf("zero()\n"); return 0; } 2 int zero() { printf("zero()\n"); return 0; } 3 int one() { printf("one()\n"); return 1; } 3 int one() { printf("one()\n"); return 1; } 4 int main() { 4 int main() { 5 printf(">␣%d\n", zero() || one()); 5 printf(">␣%d\n", zero() || one()); 6 printf(">␣%d\n", one() || zero()); 6 printf(">␣%d\n", one() || zero()); 7 return 0; 7 return 0; 8 } 8 }

zero() zero() one() OR false true one() OR false true > 1 false false true > 1 false false true one() true true true one() true true true > 1 > 1 71 71 recall: short-circuit (||) recall: short-circuit (||)

1 #include 1 #include 2 int zero() { printf("zero()\n"); return 0; } 2 int zero() { printf("zero()\n"); return 0; } 3 int one() { printf("one()\n"); return 1; } 3 int one() { printf("one()\n"); return 1; } 4 int main() { 4 int main() { 5 printf(">␣%d\n", zero() || one()); 5 printf(">␣%d\n", zero() || one()); 6 printf(">␣%d\n", one() || zero()); 6 printf(">␣%d\n", one() || zero()); 7 return 0; 7 return 0; 8 } 8 }

zero() zero() one() OR false true one() OR false true > 1 false false true > 1 false false true one() true true true one() true true true > 1 > 1 71 71

recall: short-circuit (||) exercise

1 #include movq $3, %rax 2 int zero() { printf("zero()\n"); return 0; } movq $2, %rbx 3 int one() { printf("one()\n"); return 1; } 4 int main() { start_loop: 5 printf(">␣%d\n", zero() || one()); addq %rbx, %rbx 6 printf(">␣%d\n", one() || zero()); cmpq $3, %rbx 7 return 0; subq $1, %rax 8 } jg start_loop zero() What is the value of %rbx after this runs? one() OR false true A. 2 D. 16 > 1 false false true B. 4 E. 32 one() true true true C. 8 F. something else > 1 71 72 Y86-64: simple condition codes (1) the quiz: C (1)

If %r9 is -1 and %r10 is 1: int array[3]; int *ptr; subq %r10, %r9 sizeof(ptr) == sizeof(int *) (pointer size; lab: 8) r9 becomes −1 − (1) = −2. SF = 1 (negative) sizeof(array) = 3 * sizeof(int) (lab: 3 × 4 = 12) ZF = 0 (not zero) andq %r10, %r10 r10 becomes 1 SF = 0 (non-negative) ZF = 0 (not zero)

73 74

the quiz: C (2) exericse: || to assembly typedef struct foo { ... } bar; if (foo() || bar()) quux(); struct foo a_variable_of_this_type; call foo common for, e.g., linked list cmpl $0, %eax bar a_variable_of_this_type; ____ skip_bar // (1) hhh ((((h call bar hhh (((( ((hh(hh wrong type: struct(((( bar a_variable((h hhh cmpl $0, %eax ((hhhh ((( hhh (((hh(h wrong type: (foo(( a_variablehhhh skip_bar: ____ skip_quux // (2) typedef FIRST SECOND; call quux ‘FIRST a_variable;’ same as ‘SECOND a_variable;’ skip_quux: What belongs in the blanks? A. jg/jle D. je/jne B. jne/jne E. something else C. jne/je F. there’s no instructions that make this work 75 76 77