Parsing, Compiling, and Static Metaprogramming

Dad50df1d9acad462018f77360434de6?s=47 Patrick Dubroy
September 13, 2013

Parsing, Compiling, and Static Metaprogramming

Learn how to use compilers and parser generators to remove boilerplate, build DSLs, and generally do the impossible.

I’ll explain the basics of how compilers work, and give an overview of some popular JS tools & libraries. I’ll demonstrate how they can help you do all kinds of useful things, like:

- presubmit checks for style guide violations
- extracting strings requiring translation in your code
- automatically inserting logging statements around certain function calls

Finally, for the budding language designers, I’ll explain how to create your own compiled-to-JS language in five minutes using a parser generator.

Dad50df1d9acad462018f77360434de6?s=128

Patrick Dubroy

September 13, 2013
Tweet

Transcript

  1. Parsing, Compiling and Static Metaprogramming Patrick Dubroy Google Munich @dubroy

  2. Writing programs that manipulate programs as data Metaprogramming

  3. Static Metaprogramming Writing programs that manipulate code

  4. COMPILER

  5. COMPILER Code Code

  6. COMPILER C++ 1011

  7. C++ 1011 PARSER CODEGEN Parse Tree

  8. Esprima A high-performance JavaScript parser written in JavaScript. Also: estraverse

    & escodegen
  9. var esprima = require('esprima'); esprima.parse(" \ function getAnswer() { \

    return 42; \ } \ ");
  10. Program FunctionDeclaration Identifier Body ReturnStatement Literal “42”

  11. { "type": "Program", "body": [ { "type": "FunctionDeclaration", "id": {

    "type": "Identifier", "name": "getAnswer" }, "params": [], "defaults": [], "body": { "type": "BlockStatement", "body": [ { "type": "ReturnStatement", "argument": { "type": "Literal", "value": 42, "raw": "42" } } ] }, } ] }
  12. function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var

    errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); }
  13. function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var

    errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); } var ast = esprima.parse(code, parseOptions);
  14. function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var

    errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); } estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } });
  15. function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var

    errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); }
  16. function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var

    errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); }
  17. var foo = bar; var this_is_bad = 3; function blah()

    { return function x() { var oops_another_one; } }
  18. var foo = bar; var this_is_bad = 3; function blah()

    { return function x() { var oops_another_one; } } [ 'Line 1, column 34: Use camelCase for variable names, not hacker_style.', 'Line 1, column 119: Use camelCase for variable names, not hacker_style.' ]
  19. function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:

    function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); }
  20. function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:

    function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); } var ast = esprima.parse(code);
  21. function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:

    function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); } estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } });
  22. function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:

    function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); }
  23. FunctionDeclaration Identifier BlockStatement Statement Statement ... Array .body .body

  24. FunctionDeclaration Identifier BlockStatement Statement Statement ... Array .body .body Statement

  25. FunctionDeclaration Identifier BlockStatement Statement Statement ... Array .body .body Statement

  26. function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:

    function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); } return escodegen.generate(ast);
  27. function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:

    function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); }
  28. addLogging(" \ function foo(a, b) { \ var x =

    'blah'; \ var y = (function () { \ return 3; \ })(); \ } \ foo(1, 'wut', 3); \ ");
  29. addLogging(" \ function foo(a, b) { \ var x =

    'blah'; \ var y = (function () { \ return 3; \ })(); \ } \ foo(1, 'wut', 3); \ "); function foo(a, b) { console.log('Entering foo()'); var x = 'blah'; var y = function () { console.log('Entering <anonymous function>()'); return 3; }(); } foo(1, 'wut', 3);
  30. addLogging(" \ function foo(a, b) { \ var x =

    'blah'; \ var y = (function () { \ return 3; \ })(); \ } \ foo(1, 'wut', 3); \ "); function foo(a, b) { console.log('Entering foo()'); var x = 'blah'; var y = function () { console.log('Entering <anonymous function>()'); return 3; }(); } foo(1, 'wut', 3);
  31. Parser Generators

  32. PARSER GENERATOR Language Grammar Parser

  33. None
  34. Formal Grammars

  35. expr → expr [-+] term | term term → term

    [*/] factor | factor factor → '(' expr ')' | number number → [0-9]+ Context-Free Grammar
  36. expr ::= expr [-+] term | term term ::= term

    [*/] factor | factor factor ::= '(' expr ')' | number number ::= [0-9]+ Backus–Naur Form
  37. expr = expr [-+] term | term term = term

    [*/] factor | factor factor = '(' expr ')' | number number = [0-9]+ EBNF
  38. expr = expr [-+] term | term term = term

    [*/] factor | factor factor = '(' expr ')' | number number = [0-9]+ | | | CFG: Unordered choice
  39. expr = expr [-+] term / term term = term

    [*/] factor / factor factor = '(' expr ')' / number number = [0-9]+ / / / PEG: Ordered Choice
  40. var PEG = require('pegjs'); var parser = PEG.buildParser(" \ expr

    = expr [-+] term / term \ term = term [*/] factor / factor \ factor = '(' expr ')' / number \ number = [0-9]+ \ "); parser.parse('1+10');
  41. var PEG = require('pegjs'); var parser = PEG.buildParser(" \ expr

    = expr [-+] term / term \ term = term [*/] factor / factor \ factor = '(' expr ')' / number \ number = [0-9]+ \ "); parser.parse('1+10'); ~/node_modules/pegjs/lib/peg.js:3316 throw new PEG.GrammarError( ^ PEG.GrammarError: Left recursion detected for rule "expr".
  42. expr = expr [-+] term / term term = term

    [*/] factor / factor factor = '(' expr ')' / number number = [0-9]+ Left Recursion
  43. expr = term ([-+] term)* term = factor ([*/] factor)*

    factor = '(' expr ')' / number number = [0-9]+ Left Recursion
  44. var PEG = require('pegjs'); var parser = PEG.buildParser(" \ expr

    = term ([-+] term)* \ term = factor ([*/] factor)* \ factor = '(' expr ')' / number \ number = [0-9]+ \ "); parser.parse('1+10');
  45. var PEG = require('pegjs'); var parser = PEG.buildParser(" \ expr

    = term ([-+] term)* \ term = factor ([*/] factor)* \ factor = '(' expr ')' / number \ number = [0-9]+ \ "); parser.parse('1+10'); [[["1"],[]],[["+",[["1","0"],[]]]]]
  46. expr = term ([-+] term)* term = factor ([*/] factor)*

    factor = '(' expr ')' / number number = [0-9]+ Semantic Actions
  47. expr = term ([-+] term)* term = factor ([*/] factor)*

    factor = '(' expr ')' / number number = digits:[0-9]+ Semantic Actions
  48. expr = term ([-+] term)* term = factor ([*/] factor)*

    factor = '(' expr ')' / number number = digits:[0-9]+ { return digits.join(''); } Semantic Actions
  49. expr = term ([-+] term)* term = factor ([*/] factor)*

    factor = '(' expr ')' / number number = digits:[0-9]+ { return digits.join(''); } Semantic Actions [["1",[]],[["+",["10",[]]]]]
  50. expr = term ([-+] term)* term = factor ([*/] factor)*

    factor = '(' expr ')' / number number = digits:[0-9]+ { return digits.join(''); } Semantic Actions [["1",[]],[["+",["10",[]]]]] "10"
  51. expr = term ([-+] term)* term = factor ([*/] factor)*

    factor = '(' expr ')' / number number = digits:[0-9]+ { return digits.join(''); }
  52. { function Number(digits) { this.nodeType = 'Number'; this.value = digits.join('');

    } ... } expr = term ([-+] term)* term = factor ([*/] factor)* factor = '(' expr ')' / number number = digits:[0-9]+ { return digits.join(''); }
  53. { function Number(digits) { this.nodeType = 'Number'; this.value = digits.join('');

    } ... } expr = term ([-+] term)* term = factor ([*/] factor)* factor = '(' expr ')' / number number = digits:[0-9]+ { return digits.join(''); }
  54. { function Number(digits) { this.nodeType = 'Number'; this.value = digits.join('');

    } ... } expr = term ([-+] term)* term = factor ([*/] factor)* factor = '(' expr ')' / number number = digits:[0-9]+ { return new Number(digits); }
  55. An AltJS Language in 5 minutes

  56. expr = term ([-+] term)* term = factor ([*/] factor)*

    factor = '(' expr ')' / number number = digits:[0-9]+ { return digits.join(''); }
  57. expr = term ([-+] term)*

  58. expr = term ([-+] term)* / decl

  59. expr = term ([-+] term)* / decl decl = ident

    ' := ' expr
  60. expr = term ([-+] term)* / decl decl = ident

    ' := ' expr ident = (digit / letter / '_')+
  61. expr = term ([-+] term)* / decl decl = ident

    ' := ' expr ident = (digit / letter / '_')+ digit = [0-9]
  62. expr = term ([-+] term)* / decl decl = ident

    ' := ' expr ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z]
  63. expr = term ([-+] term)* / decl decl = ident

    ' := ' expr ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] program = expr? ('.' [ \\n]* expr)*
  64. expr = term ([-+] term)* / decl decl = ident

    ' := ' expr ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] program = expr? ('.' [ \\n]* expr)* > parser.parse('x := 2+5. y := 3')
  65. expr = term ([-+] term)* / decl decl = ident

    ' := ' expr ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] program = expr? ('.' [ \\n]* expr)* > parser.parse('x := 2+5. y := 3') [[["x"]," := ",[["2",[]],[["+",["5",[]]]]]],[[".",[], [["y"]," := ",[["3",[]],[]]]]]]
  66. expr = term ([-+] term)* / decl decl = ident

    ' := ' expr ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] program = expr? ('.' [ \\n]* expr)*
  67. program = expr? ('.' [ \\n]* expr)* expr = term

    ([-+] term)* / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z]
  68. program = expr? ('.' [ \\n]* expr)* expr = term

    ([-+] term)* / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; }
  69. program = expr? ('.' [ \\n]* expr)* expr = term

    ([-+] term)* / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z]
  70. program = expr? ('.' [ \\n]* expr)* expr = term

    ([-+] term)* / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3')
  71. program = expr? ('.' [ \\n]* expr)* expr = term

    ([-+] term)* / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3') ["var x = 2,,+,5,;",[[".",[],"var y = 3,,;"]]]
  72. program = expr? ('.' [ \\n]* expr)* expr = term

    ([-+] term)* / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z]
  73. program = expr? ('.' [ \\n]* expr)* expr = t:term

    rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z]
  74. program = expr? ('.' [ \\n]* expr)* expr = t:term

    rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3')
  75. program = expr? ('.' [ \\n]* expr)* expr = t:term

    rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3') ["var x = 2+5;",[[".",[],"var y = 3;"]]]
  76. program = expr? ('.' [ \\n]* expr)* expr = t:term

    rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z]
  77. program = e:expr? rest:(('.' [\\n ]* e:expr){ return e; })*

    { return [e].concat(rest).join('\n'); } expr = t:term rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z]
  78. program = e:expr? rest:(('.' [\\n ]* e:expr){ return e; })*

    { return [e].concat(rest).join('\n'); } expr = t:term rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3')
  79. program = e:expr? rest:(('.' [\\n ]* e:expr){ return e; })*

    { return [e].concat(rest).join('\n'); } expr = t:term rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3') var x = 2+5; var y = 3;
  80. Resources github.com/pdubroy/jsconfeu-talk Slides: goo.gl/qs4Gna Esprima: esprima.org PEG.js: pegjs.majda.cz

  81. Danke!