Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Playing with Python's internals by Alex Hall

Pycon ZA
October 11, 2018
100

Playing with Python's internals by Alex Hall

This talk will look at two of my libraries which stretch the limits of what's possible with Python:

1. [birdseye](https://github.com/alexmojaki/birdseye), a debugger that records the value of every expression for easy viewing, and
2. [sorcery](https://github.com/alexmojaki/sorcery), a framework for writing magical functions which know the context in which they are called.

They work by inspecting and manipulating Python's inner workings: execution frames, code objects, and most importantly the Abstract Syntax Tree (AST). I will give an overview of these concepts and explain how some parts of the libraries work.

This is for people interested in peeking under the hood of Python from within Python, i.e. no C and no messing with the interpreter.

Pycon ZA

October 11, 2018
Tweet

More Decks by Pycon ZA

Transcript

  1. node = ast.parse('x = 2; print(x + 3)') ast.dump(node) =>

    Module(body=[ Assign(targets=[Name(id='x')], value=Num(n=2)), Expr(value=Call(func=Name(id='print'), args=[BinOp(left=Name(id='x'), op=Add(), right=Num(n=3))])) ])
  2. node = ast.parse('x = 2; print(x + 3)') ast.dump(node) =>

    Module(body=[ Assign(targets=[Name(id='x')], value=Num(n=2)), Expr(value=Call(func=Name(id='print'), args=[BinOp(left=Name(id='x'), op=Add(), right=Num(n=3))])) ])
  3. node = ast.parse('x = 2; print(x + 3)') ast.dump(node) =>

    Module(body=[ Assign(targets=[Name(id='x')], value=Num(n=2)), Expr(value=Call(func=Name(id='print'), args=[BinOp(left=Name(id='x'), op=Add(), right=Num(n=3))])) ])
  4. node = ast.parse('x = 2; print(x + 3)') ast.dump(node) =>

    Module(body=[ Assign(targets=[Name(id='x')], value=Num(n=2)), Expr(value=Call(func=Name(id='print'), args=[BinOp(left=Name(id='x'), op=Add(), right=Num(n=3))])) ]) node.body[0].value.n => 2
  5. node = ast.parse('x = 2; print(x + 3)') node.body[0].value.n =>

    2 code = compile(node, filename='<string>', mode='exec') => <code object <module> at 0x104c990c0, file "<string>", line 1>
  6. node = ast.parse('x = 2; print(x + 3)') node.body[0].value.n =>

    2 code = compile(node, filename='<string>', mode='exec') => <code object <module> at 0x104c990c0, file "<string>", line 1>
  7. node = ast.parse('x = 2; print(x + 3)') node.body[0].value.n =>

    2 code = compile(node, filename='<string>', mode='exec') => <code object <module> at 0x104c990c0, file "<string>", line 1> exec(code) => 5
  8. node = ast.parse('x = 2; print(x + 3)') node.body[0].value.n =

    10 code = compile(node, filename='<string>', mode='exec') => <code object <module> at 0x104c990c0, file "<string>", line 1> exec(code) => 5 13
  9. node = ast.parse('x = 2; print(x + 3)') class MyVisitor(ast.NodeTransformer):

    def visit_Num(self, _node): return ast.Num(n=100) MyVisitor().visit(node) ast.fix_missing_locations(node) code = compile(node, filename='<string>', mode='exec') exec(code) => 200
  10. node = ast.parse('x = 2; print(x + 3)') class MyVisitor(ast.NodeTransformer):

    def visit_Num(self, _node): return ast.Num(n=100) MyVisitor().visit(node) ast.fix_missing_locations(node) code = compile(node, filename='<string>', mode='exec') exec(code) => 200
  11. import inspect class A: def __init__(self): super().__init__() print('success!') source =

    inspect.getsource(A.__init__).strip() node = ast.parse(source) code = compile(node, filename=__file__, mode='exec') exec(source) A.__init__ = __init__ A() RuntimeError: super(): __class__ cell not found
  12. import inspect class A: def __init__(self): super().__init__() print('success!') source =

    inspect.getsource(A.__init__).strip() node = ast.parse(source) code = compile(node, filename=__file__, mode='exec') exec(source) A.__init__ = __init__ A() RuntimeError: super(): __class__ cell not found
  13. import inspect class A: def __init__(self): super().__init__() print('success!') source =

    inspect.getsource(A.__init__).strip() node = ast.parse(source) # modify node somehow... code = compile(node, filename=__file__, mode='exec') exec(source) A.__init__ = __init__ A() RuntimeError: super(): __class__ cell not found
  14. import inspect class A: def __init__(self): super().__init__() print('success!') source =

    inspect.getsource(A.__init__).strip() node = ast.parse(source) # modify node somehow... code = compile(node, filename=__file__, mode='exec') exec(source) A.__init__ = __init__ A() RuntimeError: super(): __class__ cell not found
  15. import inspect class A: def __init__(self): super().__init__() print('success!') source =

    inspect.getsource(A.__init__).strip() node = ast.parse(source) # modify node somehow... code = compile(node, filename=__file__, mode='exec') exec(source) A.__init__ = __init__ A() RuntimeError: super(): __class__ cell not found
  16. help(code) => class code(object) | code(argcount, kwonlyargcount, nlocals, stacksize, |

    flags, codestring, constants, names, varnames, | filename, name, firstlineno, lnotab, | [freevars, [cellvars]]) | | Create a code object. Not for the faint of heart. ...
  17. y = 2 class A: z = 4 def __init__(self):

    ... filename = inspect.getsourcefile(A.__init__) source = open(filename).read() node = ast.parse(source) code = compile(node, filename=filename, mode='exec') => <code object <module> ...> code.co_consts => (2, <code object A ...>, 'A', None) code.co_consts[1].co_consts => ('A', 4, <code object __init__ ...>, 'A.__init__')
  18. y = 2 class A: z = 4 def __init__(self):

    ... A.__init__.__code__.co_name: '__init__' A.__init__.__code__.co_firstlineno: 4ode = ast.parse(source) code = compile(node, filename=filename, mode='exec') => <code object <module> ...> code.co_consts => (2, <code object A ...>, 'A', None) code.co_consts[1].co_consts => ('A', 4, <code object __init__ ...>, 'A.__init__')
  19. y = 2 class A: z = 4 def __init__(self):

    ... filename = inspect.getsourcefile(A.__init__) source = open(filename).read() node = ast.parse(source) # modify node somehow... code = compile(node, filename=filename, mode='exec') => <code object <module> ... line 1> code.co_consts => (2, <code object A ...>, 'A', None) code.co_consts[1].co_consts => ('A', 4, <code object __init__ ...>, 'A.__init__')
  20. y = 2 class A: z = 4 def __init__(self):

    ... filename = inspect.getsourcefile(A.__init__) source = open(filename).read() node = ast.parse(source) # modify node somehow... code = compile(node, filename=filename, mode='exec') => <code object <module> ... line 1> code.co_consts => (2, <code object A ... line 2>, 'A', None) code.co_consts[1].co_consts => ('A', 4, <code object __init__ ...>, 'A.__init__')
  21. y = 2 class A: z = 4 def __init__(self):

    ... filename = inspect.getsourcefile(A.__init__) source = open(filename).read() node = ast.parse(source) # modify node somehow... code = compile(node, filename=filename, mode='exec') => <code object <module> ... line 1> code.co_consts => (2, <code object A ... line 2>, 'A', None) code.co_consts[1].co_consts => ('A', 4, <code object __init__ ... line 4>, 'A.__init__')
  22. foo = func('foo') bar = func('bar') ↓ foo, bar =

    [ func(name) for name in assigned_names() ]
  23. foo = func('foo') bar = func('bar') ↓ foo, bar =

    [ func(name) for name in assigned_names() ]
  24. foo = func('foo') bar = func('bar') ↓ foo, bar =

    [ func(name) for name in assigned_names() ] ('foo', 'bar')
  25. class Thing(Enum): foo = 'foo' bar = 'bar' ↓ class

    Thing(Enum): foo, bar = assigned_names()
  26. class Thing(Enum): foo = 'foo' bar = 'bar' ↓ class

    Thing(Enum): foo, bar = assigned_names()
  27. foo = d['foo'] bar = d['bar'] ↓ foo, bar =

    unpack_keys(d) for foo, bar in unpack_keys([{‘foo’: 1, ‘bar’: 2}, …]):
  28. def foo(): bar() def bar(): 1/0 foo() Traceback (most recent

    call last): File "/my/script.py", line 7, in <module> foo() File "/my/script.py", line 2, in foo bar() File "/my/script.py", line 5, in bar 1/0 ZeroDivisionError: division by zero
  29. def foo(): bar() def bar(): 1/0 foo() Traceback (most recent

    call last): File "/my/script.py", line 7, in <module> foo() File "/my/script.py", line 2, in foo bar() File "/my/script.py", line 5, in bar 1/0 ZeroDivisionError: division by zero
  30. def foo(): bar() def bar(): 1/0 foo() Traceback (most recent

    call last): File "/my/script.py", line 7, in <module> foo() File "/my/script.py", line 2, in foo bar() File "/my/script.py", line 5, in bar 1/0 ZeroDivisionError: division by zero code.co_filename frame.f_lineno code.co_name
  31. def foo(): x = 1 y = 2 return 3

    * bar(x) def bar(): previous = inspect.currentframe().f_back previous.f_locals: {'x': 1, 'y': 2} previous.f_code: <code object foo ...> previous.f_lineno: 4
  32. def foo(): x = 1 y = 2 return 3

    * bar(x) def bar(): previous = inspect.currentframe().f_back previous.f_locals: {'x': 1, 'y': 2} previous.f_code: <code object foo ...> previous.f_lineno: 4
  33. def foo(): x = 1 y = 2 return 3

    * bar(x) def bar(): previous = inspect.currentframe().f_back f = open(previous.f_code.co_filename) lines = f.readlines() line = lines[previous.f_lineno - 1].strip() node = ast.parse(line) # find Call node... => Call(func=Name(id='bar'), ...)
  34. def foo(): x = 1 y = bar return 3

    * y(x) def bar(): previous = inspect.currentframe().f_back f = open(previous.f_code.co_filename) lines = f.readlines() line = lines[previous.f_lineno - 1].strip() node = ast.parse(line) # find Call node... previous.f_locals: {'y': <function bar ...>, ...} => Call(func=Name(id='y'), ...)
  35. def foo(): x = 1 y = 2 bar(x, y)

    @spell def bar(frame_info, ...): ... args: (1, 2) frame_info.call: Call(func=Name(id='bar'), args=[Name(id='x'), Name(id='y')])
  36. def foo(): x = 1 y = 2 bar(x, y)

    @spell def bar(frame_info, ...): ... args: (1, 2) frame_info.call: Call(func=Name(id='bar'), args=[Name(id='x'), Name(id='y')])
  37. dict_of(foo, bar) == dict(foo=foo, bar=bar) @spell def dict_of(frame_info, *values): return

    { arg.id: value for arg, value in zip(frame_info.call.args, values) }
  38. import sys from inspect import stack def trace(frame, event, arg):

    if event == 'call': print(' ' * len(stack()), frame.f_code.co_name) sys.settrace(trace)
  39. def main(): for x in range(3): foo() def foo(): bar()

    def bar(): pass main() main foo bar foo bar foo bar