Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Writing an interpreter for fun and profit

Writing an interpreter for fun and profit

PyconSG 2015

Terry Chia

June 19, 2015
Tweet

Other Decks in Programming

Transcript

  1. What do you think of when you think of interpreters?

    • Black magic • Written in C or C++
  2. What is RPython? • RPython is a restricted subset of

    Python amenable to static analysis. • RPython is also a framework for producing implementations of dynamic languages. • RPython was developed for the PyPy project.
  3. What is RPly? • RPly is a lexer and parser

    generator. • That works with RPython.
  4. The 5 pieces of an interpreter • Lexer • Parser

    • AST • Bytecode compiler • Bytecode interpreter
  5. from rply import LexerGenerator ! ! lg = LexerGenerator() lg.ignore(r"\s+")

    lg.add("NUMBER", r"\d+") lg.add("ADD", r"\+") lg.add("SEMICOLON", r";") lg.add("PRINT", r"print") lexer = lg.build()
  6. l = lexer.lex("print 1 + 1;") l.next() >>> Token('PRINT', ‘print')

    l.next() >>> Token('NUMBER', '1') l.next() >>> Token('ADD', '+') l.next() >>> Token('NUMBER', '1') l.next() >>> Token('SEMICOLON', ';') l.next() >>>
  7. from rply.token import BaseBox ! class Node(BaseBox): pass ! class

    Block(Node): def __init__(self, statements): self.statements = statements ! class Statement(Node): def __init__(self, expression): self.expression = expression ! class Number(Node): def __init__(self, value): self.value = value ! class Print(Node): def __init__(self, expression): self.expression = expression ! class BinaryOp(Node): def __init__(self, operator, left, right): self.operator = operator self.left = left self.right = right
  8. from rply import ParserGenerator ! import ast ! ! pg

    = ParserGenerator( ["SEMICOLON", "NUMBER", "ADD", "PRINT"], ) ! ! @pg.production("statements : statements statement") def statements(s): return ast.Block(s[0].getastlist() + [s[1]]) ! ! @pg.production("statements : statement") def statements_statement(s): return ast.Block([s[0]]) ! ! @pg.production("statement : expression SEMICOLON") def statement_expression(s): return ast.Statement(s[0])
  9. @pg.production("statement : PRINT expression SEMICOLON") def statement_print(s): return ast.Print(s[1]) !

    ! @pg.production("expression : NUMBER") def expression_number(s): return ast.Number(int(s[0].getstr())) ! ! @pg.production("expression : expression ADD expression") def expression_binop(s): return ast.BinaryOp(s[1].getstr(), s[0], s[2])
  10. class Context(object): def __init__(self): self.data = [] self.constants = []

    ! def new_const(self, const): self.constants.append(const) return len(self.constants) - 1 ! def emit(self, bytecode, arg=0): self.data.append(chr(bytecode)) self.data.append(chr(arg)) ! def create_bytecode(self): return ByteCode( "".join(self.data), self.constants[:] )
  11. class RilaObject(object): pass ! ! class RilaNumber(RilaObject): def __init__(self, value):

    self.value = value ! def add(self, other): assert isinstance(other, RilaNumber) return RilaNumber(self.value + other.value) ! def str(self): return str(self.value)
  12. class Block(Node): def compile(self, ctx): for i in self.statements: i.compile(ctx)

    ! class Statement(Node): def compile(self, ctx): self.expression.compile(ctx) ctx.emit(bytecodes.POP_TOP) ! class Number(Node): def compile(self, ctx): ctx.emit( bytecodes.LOAD_CONST, ctx.new_const(RilaNumber(self.value)) ) ! class BinaryOp(BaseBox): def compile(self, ctx): self.left.compile(ctx) self.right.compile(ctx) opname = { "+": bytecodes.BINARY_ADD, } ctx.emit(opname[self.operator]) ! class Print(Node): def compile(self, ctx): self.expression.compile(ctx) ctx.emit(bytecodes.PRINT)
  13. class Frame(object): def __init__(self): self.valuestack = [] ! def push(self,

    value): self.valuestack.append(value) ! def pop(self): return self.valuestack.pop()
  14. class Interpreter(object): ! def interpret(self, bytecode, frame): pc = 0

    while pc < len(bytecode.code): opcode = ord(bytecode.code[pc]) for i, name in bytecodes: if i == opcode: pc = self.run_instructions(name, pc, bytecode, frame) break
  15. def LOAD_CONST(self, pc, bytecode, frame): arg = ord(bytecode.code[pc + 1])

    frame.push(bytecode.constants[arg]) return pc + 2
  16. def BINARY_ADD(self, pc, bytecode, frame): right = frame.pop() left =

    frame.pop() frame.push(left.add(right)) return pc + 2 ! def POP_TOP(self, pc, bytecode, frame): frame.pop() return pc + 2 ! def PRINT(self, pc, bytecode, frame): item = frame.pop() print item.str() return pc + 2
  17. def run(fname): with open(fname, "r") as f: source_code = f.read()

    ! ctx = Context() parser.parse(lexer.lex(source_code)).compile(ctx) ! bytecode = ctx.create_bytecode() frame = Frame(bytecode) interpreter = Interpreter() ! interpreter.interpret(bytecode, frame) ! ! def entry_point(argv): try: filename = argv[1] ! except IndexError: print "Missing argument" return 1 ! run(filename) return 0 ! ! def target(*args): return entry_point, None