Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Compilers 101: An introduction to programming languages and parsing

Compilers 101: An introduction to programming languages and parsing

I gave this talk at Coderfaire, August 17, 2013. As usual, the slides do not make a ton of sense without the words. Sorry!

However they contain a complete recursive-descent parser for a toy grammar, in JS. I’ve never given a talk with this much code before. The slides reveal this code line by line, and that worked out very nicely.

More resources for this talk: http://gist.io/6256943

Jason Orendorff

August 17, 2013
Tweet

More Decks by Jason Orendorff

Other Decks in Programming

Transcript

  1. 17 August 2013 Compilers 101 An introduction to programming languages

    and parsing WARNING CONTAINS CODE 0 0 0 1 1 Tuesday, September 3, 13
  2. A compiler is a program that translates code from one

    language to another. Tuesday, September 3, 13
  3. A compiler is a program that translates code from one

    language to another. ✤ to machine code (like FORTRAN, C, Go) Tuesday, September 3, 13
  4. A compiler is a program that translates code from one

    language to another. ✤ to machine code (like FORTRAN, C, Go) ✤ to bytecode (like Java, Python) Tuesday, September 3, 13
  5. A compiler is a program that translates code from one

    language to another. ✤ to machine code (like FORTRAN, C, Go) ✤ to bytecode (like Java, Python) ✤ to another high-level language (cfront, LESS & Sass, CoffeeScript, Traceur, emscripten) Tuesday, September 3, 13
  6. Language is a different kind of data. It’s not line-based.

    It’s not record-based. Tuesday, September 3, 13
  7. Language is a different kind of data. It’s not line-based.

    It’s not record-based. And you can’t fake understanding. Tuesday, September 3, 13
  8. Language is a different kind of data. It’s not line-based.

    It’s not record-based. And you can’t fake understanding. Language has structure. Tuesday, September 3, 13
  9. when flag clicked forever point towards mouse-pointer if move 3

    * speed steps not touching mouse-pointer ? then Tuesday, September 3, 13
  10. when flag clicked forever point towards mouse-pointer if move 3

    * speed steps not touching mouse-pointer ? then Tuesday, September 3, 13
  11. front end (parsing, analysis) back end (code generation) sleep(2*x) pushq

    %rbp movq %rsp, %rbp addl %edi, %edi callq _sleep Inside a compiler Tuesday, September 3, 13
  12. front end (parsing, analysis) back end (code generation) sleep(2*x) call

    sleep * 2 x pushq %rbp movq %rsp, %rbp addl %edi, %edi callq _sleep Inside a compiler Tuesday, September 3, 13
  13. Inside a front end tokenizer parser AST builder 12*x *

    12 x ['12', '*', 'x'] Tuesday, September 3, 13
  14. Inside a front end tokenizer parser AST builder 12*x *

    12 x ['12', '*', 'x'] Tuesday, September 3, 13
  15. Inside a front end tokenizer parser AST builder 12*x *

    12 x ['12', '*', 'x'] out.number('12') Tuesday, September 3, 13
  16. Inside a front end tokenizer parser AST builder 12*x *

    12 x ['12', '*', 'x'] out.number('12') out.name('x') Tuesday, September 3, 13
  17. Inside a front end tokenizer parser AST builder 12*x *

    12 x ['12', '*', 'x'] out.number('12') out.name('x') out.mul( , ) Tuesday, September 3, 13
  18. IfStatement : if ( Expression ) Statement else Statement if

    ( Expression ) Statement Tuesday, September 3, 13
  19. PrimaryExpr : Number Name ( Expr ) MulExpr : PrimaryExpr

    ( * PrimaryExpr | / PrimaryExpr )* Expr : MulExpr ( + MulExpr | - MulExpr )* Tuesday, September 3, 13
  20. PrimaryExpr : Number Name ( Expr ) MulExpr : PrimaryExpr

    ( * PrimaryExpr | / PrimaryExpr )* Expr : MulExpr ( + MulExpr | - MulExpr )* out.number("3") Tuesday, September 3, 13
  21. PrimaryExpr : Number Name ( Expr ) MulExpr : PrimaryExpr

    ( * PrimaryExpr | / PrimaryExpr )* Expr : MulExpr ( + MulExpr | - MulExpr )* out.number("3") out.name("pi") Tuesday, September 3, 13
  22. PrimaryExpr : Number Name ( Expr ) MulExpr : PrimaryExpr

    ( * PrimaryExpr | / PrimaryExpr )* Expr : MulExpr ( + MulExpr | - MulExpr )* out.number("3") out.name("pi") out.mul(lhs, rhs) Tuesday, September 3, 13
  23. PrimaryExpr : Number Name ( Expr ) MulExpr : PrimaryExpr

    ( * PrimaryExpr | / PrimaryExpr )* Expr : MulExpr ( + MulExpr | - MulExpr )* out.number("3") out.name("pi") out.mul(lhs, rhs) out.add(lhs, rhs) Tuesday, September 3, 13
  24. function parse(code, out) { var tokens = tokenize(code); var position

    = 0; function peek() { return tokens[position]; } function consume(token) { assert.strictEqual(token, tokens[position]); position++; } Calculator parser - page 1 of 5 Tuesday, September 3, 13
  25. function parse(code, out) { var tokens = tokenize(code); var position

    = 0; function peek() { return tokens[position]; } function consume(token) { assert.strictEqual(token, tokens[position]); position++; } Calculator parser - page 1 of 5 Tuesday, September 3, 13
  26. function parse(code, out) { var tokens = tokenize(code); var position

    = 0; function peek() { return tokens[position]; } function consume(token) { assert.strictEqual(token, tokens[position]); position++; } Calculator parser - page 1 of 5 Tuesday, September 3, 13
  27. function parse(code, out) { var tokens = tokenize(code); var position

    = 0; function peek() { return tokens[position]; } function consume(token) { assert.strictEqual(token, tokens[position]); position++; } Calculator parser - page 1 of 5 Tuesday, September 3, 13
  28. function parse(code, out) { var tokens = tokenize(code); var position

    = 0; function peek() { return tokens[position]; } function consume(token) { assert.strictEqual(token, tokens[position]); position++; } Calculator parser - page 1 of 5 Tuesday, September 3, 13
  29. function parse(code, out) { var tokens = tokenize(code); var position

    = 0; function peek() { return tokens[position]; } function consume(token) { assert.strictEqual(token, tokens[position]); position++; } Calculator parser - page 1 of 5 Tuesday, September 3, 13
  30. function parse(code, out) { var tokens = tokenize(code); var position

    = 0; function peek() { return tokens[position]; } function consume(token) { assert.strictEqual(token, tokens[position]); position++; } Calculator parser - page 1 of 5 Tuesday, September 3, 13
  31. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  32. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  33. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  34. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  35. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  36. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  37. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  38. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  39. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  40. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  41. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  42. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  43. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  44. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  45. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  46. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  47. function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) {

    consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Calculator parser - page 2 of 5 PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  48. function parseMulExpr() { var expr = parsePrimaryExpr(); var t =

    peek(); while (t === "*" || t === "/") { consume(t); var rhs = parsePrimaryExpr(); if (t === "*") expr = out.mul(expr, rhs); else expr = out.div(expr, rhs); t = peek(); } return expr; } Calculator parser - page 3 of 5 MulExpr : PrimaryExpr ( * PrimaryExpr | / PrimaryExpr )* Tuesday, September 3, 13
  49. function parseMulExpr() { var expr = parsePrimaryExpr(); var t =

    peek(); while (t === "*" || t === "/") { consume(t); var rhs = parsePrimaryExpr(); if (t === "*") expr = out.mul(expr, rhs); else expr = out.div(expr, rhs); t = peek(); } return expr; } Calculator parser - page 3 of 5 MulExpr : PrimaryExpr ( * PrimaryExpr | / PrimaryExpr )* Tuesday, September 3, 13
  50. function parseMulExpr() { var expr = parsePrimaryExpr(); var t =

    peek(); while (t === "*" || t === "/") { consume(t); var rhs = parsePrimaryExpr(); if (t === "*") expr = out.mul(expr, rhs); else expr = out.div(expr, rhs); t = peek(); } return expr; } Calculator parser - page 3 of 5 MulExpr : PrimaryExpr ( * PrimaryExpr | / PrimaryExpr )* Tuesday, September 3, 13
  51. function parseMulExpr() { var expr = parsePrimaryExpr(); var t =

    peek(); while (t === "*" || t === "/") { consume(t); var rhs = parsePrimaryExpr(); if (t === "*") expr = out.mul(expr, rhs); else expr = out.div(expr, rhs); t = peek(); } return expr; } Calculator parser - page 3 of 5 MulExpr : PrimaryExpr ( * PrimaryExpr | / PrimaryExpr )* Tuesday, September 3, 13
  52. function parseMulExpr() { var expr = parsePrimaryExpr(); var t =

    peek(); while (t === "*" || t === "/") { consume(t); var rhs = parsePrimaryExpr(); if (t === "*") expr = out.mul(expr, rhs); else expr = out.div(expr, rhs); t = peek(); } return expr; } Calculator parser - page 3 of 5 MulExpr : PrimaryExpr ( * PrimaryExpr | / PrimaryExpr )* Tuesday, September 3, 13
  53. function parseMulExpr() { var expr = parsePrimaryExpr(); var t =

    peek(); while (t === "*" || t === "/") { consume(t); var rhs = parsePrimaryExpr(); if (t === "*") expr = out.mul(expr, rhs); else expr = out.div(expr, rhs); t = peek(); } return expr; } Calculator parser - page 3 of 5 MulExpr : PrimaryExpr ( * PrimaryExpr | / PrimaryExpr )* Tuesday, September 3, 13
  54. function parseMulExpr() { var expr = parsePrimaryExpr(); var t =

    peek(); while (t === "*" || t === "/") { consume(t); var rhs = parsePrimaryExpr(); if (t === "*") expr = out.mul(expr, rhs); else expr = out.div(expr, rhs); t = peek(); } return expr; } Calculator parser - page 3 of 5 MulExpr : PrimaryExpr ( * PrimaryExpr | / PrimaryExpr )* Tuesday, September 3, 13
  55. function parseMulExpr() { var expr = parsePrimaryExpr(); var t =

    peek(); while (t === "*" || t === "/") { consume(t); var rhs = parsePrimaryExpr(); if (t === "*") expr = out.mul(expr, rhs); else expr = out.div(expr, rhs); t = peek(); } return expr; } Calculator parser - page 3 of 5 MulExpr : PrimaryExpr ( * PrimaryExpr | / PrimaryExpr )* Tuesday, September 3, 13
  56. function parseMulExpr() { var expr = parsePrimaryExpr(); var t =

    peek(); while (t === "*" || t === "/") { consume(t); var rhs = parsePrimaryExpr(); if (t === "*") expr = out.mul(expr, rhs); else expr = out.div(expr, rhs); t = peek(); } return expr; } Calculator parser - page 3 of 5 MulExpr : PrimaryExpr ( * PrimaryExpr | / PrimaryExpr )* Tuesday, September 3, 13
  57. function parseMulExpr() { var expr = parsePrimaryExpr(); var t =

    peek(); while (t === "*" || t === "/") { consume(t); var rhs = parsePrimaryExpr(); if (t === "*") expr = out.mul(expr, rhs); else expr = out.div(expr, rhs); t = peek(); } return expr; } Calculator parser - page 3 of 5 MulExpr : PrimaryExpr ( * PrimaryExpr | / PrimaryExpr )* Tuesday, September 3, 13
  58. function parseMulExpr() { var expr = parsePrimaryExpr(); var t =

    peek(); while (t === "*" || t === "/") { consume(t); var rhs = parsePrimaryExpr(); if (t === "*") expr = out.mul(expr, rhs); else expr = out.div(expr, rhs); t = peek(); } return expr; } Calculator parser - page 3 of 5 MulExpr : PrimaryExpr ( * PrimaryExpr | / PrimaryExpr )* Tuesday, September 3, 13
  59. function parseMulExpr() { var expr = parsePrimaryExpr(); var t =

    peek(); while (t === "*" || t === "/") { consume(t); var rhs = parsePrimaryExpr(); if (t === "*") expr = out.mul(expr, rhs); else expr = out.div(expr, rhs); t = peek(); } return expr; } Calculator parser - page 3 of 5 MulExpr : PrimaryExpr ( * PrimaryExpr | / PrimaryExpr )* Tuesday, September 3, 13
  60. function parseExpr() { var expr = parseMulExpr(); var t =

    peek(); while (t === "+" || t === "-") { consume(t); var rhs = parseMulExpr(); if (t === "+") expr = out.add(expr, rhs); else expr = out.sub(expr, rhs); t = peek(); } return expr; } Calculator parser - page 4 of 5 Expr : MulExpr ( + MulExpr | - MulExpr )* Tuesday, September 3, 13
  61. Calculator parser - page 5 of 5 var result =

    parseExpr(); if (position !== tokens.length) { throw new SyntaxError( "unexpected '" + peek() + "'"); } return result; } Tuesday, September 3, 13
  62. Calculator parser - page 5 of 5 var result =

    parseExpr(); if (position !== tokens.length) { throw new SyntaxError( "unexpected '" + peek() + "'"); } return result; } Tuesday, September 3, 13
  63. Calculator parser - page 5 of 5 var result =

    parseExpr(); if (position !== tokens.length) { throw new SyntaxError( "unexpected '" + peek() + "'"); } return result; } Tuesday, September 3, 13
  64. Calculator parser - page 5 of 5 var result =

    parseExpr(); if (position !== tokens.length) { throw new SyntaxError( "unexpected '" + peek() + "'"); } return result; } Tuesday, September 3, 13
  65. IfStatement : if ( Expression ) Statement else Statement if

    ( Expression ) Statement Tuesday, September 3, 13
  66. IfStatement : if ( Expression ) Statement else Statement if

    ( Expression ) Statement Tuesday, September 3, 13
  67. IfStatement : if ( Expression ) Statement else Statement if

    ( Expression ) Statement PrimaryExpr : Number Name ( Expr ) Tuesday, September 3, 13
  68. IfStatement : if ( Expression ) Statement else Statement if

    ( Expression ) Statement PrimaryExpr : Number Name ( Expr ) function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) { consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Tuesday, September 3, 13
  69. IfStatement : if ( Expression ) Statement else Statement if

    ( Expression ) Statement PrimaryExpr : Number Name ( Expr ) function parsePrimaryExpr() { var t = peek(); if (isNumber(t)) { consume(t); return out.number(t); } else if (isName(t)) { consume(t); return out.name(t); } else if (t === "(") { consume(t); var expr = parseExpr(); if (peek() !== ")") throw new SyntaxError("expected )"); consume(")"); return expr; } else { throw new SyntaxError("didn't expect '" + t + "'"); } } Tuesday, September 3, 13