Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Doug Hellmann - How I built a power debugger ou...

Doug Hellmann - How I built a power debugger out of the standard library and things I found on the internet

Smiley spys on your application, recording everything it does, using Python's built-in tracing facility and a database to create a complete record of your program's runtime so you can study it. Most of the tools I used to create Smiley are in Python's standard library, and the history of Smiley's development serves as a framework to discuss those tools and how you can tap their power yourself.

https://us.pycon.org/2016/schedule/presentation/1650/

PyCon 2016

May 29, 2016
Tweet

More Decks by PyCon 2016

Other Decks in Programming

Transcript

  1. How I Built a Power Debugger Out of the Standard

    Library and Things I Found on the Internet Doug Hellmann PyCon US 2016
  2. Features • Record calls with data • Remote monitoring, local

    analysis • Browse history • Learn new tools flickr/Kangrex
  3. class Publisher(object): def __init__(self, endpoint, high_water_mark=10000): self.context = zmq.Context() self.pub_socket

    = \ self.context.socket(zmq.PUSH) self.pub_socket.bind(endpoint) self.pub_socket.identity = 'publisher' self.pub_socket.hwm = high_water_mark def send(self, msg_type, data): msg = [ msg_type, json.dumps(data, default=self._json_special_types), ] self.pub_socket.send_multipart(msg)
  4. def trace_calls(self, frame, event, arg): co = frame.f_code filename =

    co.co_filename if filename in (__file__,): # Ignore ourself return self._send_notice(frame, event, arg) return self.trace_calls
  5. def _send_notice(self, frame, event, arg): co = frame.f_code func_name =

    co.co_name line_no = frame.f_lineno filename = os.path.abspath(co.co_filename) for d in IGNORE_DIRS: if filename.startswith(d): return # …
  6. # … interesting_locals = { n: v for n, v

    in frame.f_locals.items() if (not inspect.ismodule(v) and not inspect.isfunction(v) and not inspect.ismethod(v) and (n[:2] != '__' and n[-2:] != '__')) } # …
  7. # … self.publisher.send( event, {'func_name': func_name, 'line_no': line_no, 'filename': filename,

    'arg': arg, 'locals': interesting_locals, 'timestamp': time.time(), 'run_id': self.run_id, })
  8. $ smiley help usage: smiley [--version] [-v] [--log-file LOG_FILE] [-q]

    [-h] [--debug] smiley spies on your apps as they run optional arguments: --version show program's version number and exit -v, --verbose Increase verbosity of output. --log-file LOG_FILE Specify a file to log output. -q, --quiet suppress output except warnings -h, --help show this help message and exit --debug show tracebacks on errors Commands: complete print bash completion command help print detailed help for another command monitor Listen for running programs and show their progress. run Run another program with monitoring enabled.
  9. $ smiley help run usage: smiley run [-h] [--socket SOCKET]

    command [command ...] Run another program with monitoring enabled. positional arguments: command the command to spy on optional arguments: -h, --help show this help message and exit --socket SOCKET URL for the socket where the listener will be (tcp://127.0.0.1:5556)
  10. $ smiley help monitor usage: smiley monitor [-h] [--socket SOCKET]

    Listen for running programs and show their progress. optional arguments: -h, --help show this help message and exit --socket SOCKET URL for the socket where to monitor on (tcp://127.0.0.1:5556)
  11. def _process_message(self, msg): print 'MESSAGE:', msg msg_type, msg_payload = msg

    if msg_type == 'start_run': print (‘Starting new run:', msg_payload.get(‘command_line')) elif msg_type == 'end_run': print 'Finished run' else: line = linecache.getline(msg_payload['filename'], msg_payload['line_no']).rstrip() if msg_type == 'return': print '%s:%4s: return>>> %s' % ( msg_payload['filename'], msg_payload[‘line_no'], msg_payload['arg']) else: print '%s:%4s: %s' % ( msg_payload['filename'], msg_payload[‘line_no'], line) if msg_payload.get('locals'): for n, v in sorted(msg_payload['locals'].items()): print '%s %s = %s' % ( ' ' * len(msg_payload['filename']), n, v, ) print
  12. def gen(m): for i in xrange(m): yield i def c(input):

    print 'input =', input data = list(gen(input)) print 'Leaving c()' def b(arg): val = arg * 5 c(val) print 'Leaving b()' return val def a(): print 'args:', sys.argv b(2) print 'Leaving a()' a()
  13. $ smiley monitor Waiting for incoming data MESSAGE: ['start_run', {u'command_line':

    [u'simple.py'], u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e'}] Starting new run: [u'simple.py'] MESSAGE: ['call', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.142863, u'line_no': 1, u'filename': u'/Users/dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'<module>', u'arg': None, u'locals': {u'self': u'<smiley.tracer.Tracer object at 0x103caff10>', u'command_line': [u'simple.py']}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 1: import sys command_line = [u'simple.py'] self = <smiley.tracer.Tracer object at 0x103caff10> MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.143175, u'line_no': 1, u'filename': u'/Users/dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'<module>', u'arg': None, u'locals': {u'self': u'<smiley.tracer.Tracer object at 0x103caff10>', u'command_line': [u'simple.py']}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 1: import sys command_line = [u'simple.py'] self = <smiley.tracer.Tracer object at 0x103caff10> MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.143499, u'line_no': 3, u'filename': u'/Users/ dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'<module>', u'arg':
  14. $ smiley monitor Waiting for incoming data MESSAGE: ['start_run', {u'command_line':

    [u'simple.py'], u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e'}] Starting new run: [u'simple.py'] MESSAGE: ['call', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.142863, u'line_no': 1, u'filename': u'/Users/dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'<module>', u'arg': None, u'locals': {u'self': u'<smiley.tracer.Tracer object at 0x103caff10>', u'command_line': [u'simple.py']}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 1: import sys command_line = [u'simple.py'] self = <smiley.tracer.Tracer object at 0x103caff10> MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.143175, u'line_no': 1, u'filename': u'/Users/dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'<module>', u'arg': None, u'locals': {u'self': u'<smiley.tracer.Tracer object at 0x103caff10>', u'command_line': [u'simple.py']}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 1: import sys command_line = [u'simple.py'] self = <smiley.tracer.Tracer object at 0x103caff10> MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.143499, u'line_no': 3, u'filename': u'/Users/ dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'<module>', u'arg':
  15. MESSAGE: ['call', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.144927, u'line_no': 21, u'filename': u'/Users/

    dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'a', u'arg': None, u'locals': {}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 21: def a(): MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.145128, u'line_no': 22, u'filename': u'/Users/ dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'a', u'arg': None, u'locals': {}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 22: print 'args:', sys.argv MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.145343, u'line_no': 23, u'filename': u'/Users/ dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'a', u'arg': None, u'locals': {}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 23: b(2)
  16. def start_run(self, run_id, cwd, description, start_time): "Record the beginning of

    a run." with transaction(self.conn) as c: c.execute( """ INSERT INTO run (id, cwd, description, start_time) VALUES (:id, :cwd, :description, :start_time) """, {'id': run_id, 'cwd': cwd, 'description': description, 'start_time': start_time} )
  17. def _process_message(self, msg): msg_type, msg_payload = msg if msg_type ==

    'start_run': command_line = ' ‘.join( msg_payload.get('command_line', [])) self._cwd = msg_payload.get('cwd', '') if self._cwd: self._cwd = (self._cwd.rstrip(os.sep) + os.sep) self.db.start_run( run_id=msg_payload.get('run_id'), cwd=self._cwd, description=command_line, start_time=msg_payload.get('timestamp'), )
  18. elif msg_type == 'end_run': self.log.info('Finished run') self.db.end_run( run_id=msg_payload['run_id'], end_time=msg_payload.get('timestamp'), message=msg_payload.get('message'),

    traceback=msg_payload.get('traceback'), ) else: self.db.trace( run_id=msg_payload['run_id'], event=msg_type, func_name=msg_payload.get('func_name'), line_no=msg_payload.get('line_no'), filename=msg_payload.get('filename'), trace_arg=msg_payload.get('arg'), locals=msg_payload.get('locals'), timestamp=msg_payload.get('timestamp'), )
  19. import json import traceback import types def _json_special_types(obj): if isinstance(obj,

    types.TracebackType): return traceback.extract_tb(obj) if isinstance(obj, type): # We don't want to return classes return repr(obj) try: data = dict(vars(obj)) data['__class__'] = obj.__class__.__name__ data['__module__'] = obj.__class__.__module__ except Exception as err: data = repr(obj) return data def dumps(data): return json.dumps(data, default=_json_special_types)
  20. ?

  21. class EventProcessor(object): __metaclass__ = abc.ABCMeta @abc.abstractmethod def start_run(self, run_id, cwd,

    description, start_time): """Called when a 'start_run' event is seen. """ @abc.abstractmethod def end_run(self, run_id, end_time, message, traceback): """Called when an 'end_run' event is seen. """ @abc.abstractmethod def trace(self, run_id, event, func_name, line_no, filename, trace_arg, local_vars, timestamp): """Called when any other event type is seen. """
  22. def get_runs(self): "Return the runs available to browse." with transaction(self.conn)

    as c: c.execute( """ SELECT id, cwd, description, start_time, end_time, error_message FROM run """ ) return c.fetchall()
  23. def take_action(self, parsed_args): self.out = output.OutputFormatter( linecache.getline) self.db = db.DB(parsed_args.database)

    run_details = self.db.get_run(parsed_args.run_id) self.out.start_run( run_details.id, run_details.cwd, run_details.description, run_details.start_time, ) for t in self.db.get_trace(parsed_args.run_id): self.out.trace( t.run_id, t.event, t.func_name, t.line_no, t.filename, t.trace_arg, t.local_vars, t.timestamp, ) self.out.end_run( run_details.id, run_details.end_time, run_details.error_message, None) # run_details.traceback
  24. MESSAGE: ['call', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.144927, u'line_no': 21, u'filename': u'/Users/

    dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'a', u'arg': None, u'locals': {}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 21: def a(): MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.145128, u'line_no': 22, u'filename': u'/Users/ dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'a', u'arg': None, u'locals': {}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 22: print 'args:', sys.argv MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.145343, u'line_no': 23, u'filename': u'/Users/ dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'a', u'arg': None, u'locals': {}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 23: b(2)
  25. class DBLineCache(object): def __init__(self, db, run_id): self._db = db self._run_id

    = run_id self._files = {} def getline(self, filename, line_no): if filename not in self._files: body = self._db.get_cached_file( self._run_id, filename) self._files[filename] = body.splitlines() try: return self._files[filename][line_no] except IndexError: # Line number is out of range return ''
  26. def take_action(self, parsed_args): # Fix import path cwd = os.getcwd()

    if (cwd not in sys.path and os.curdir not in sys.path): sys.path.insert(0, cwd) # Fix command line args sys.argv = parsed_args.command # Run the app p = publisher.Publisher(parsed_args.socket) t = tracer.Tracer(p) t.run(parsed_args.command)
  27. def take_action(self, parsed_args): # Fix import path cwd = os.getcwd()

    if (cwd not in sys.path and os.curdir not in sys.path): sys.path.insert(0, cwd) # Fix command line args sys.argv = parsed_args.command # Run the app if parsed_args.mode == 'remote': p = publisher.Publisher(parsed_args.socket) else: p = db.DB(parsed_args.database) t = tracer.Tracer(p) t.run(parsed_args.command)
  28. class FileController(RestController): @expose(generic=True, template='file.html') @nav.active_section('runs') def get_one(self, run_id, file_id): filename,

    body = \ request.db.get_cached_file_by_id(run_id, file_id) run = request.db.get_run(run_id) lexer = guess_lexer_for_filename(filename, body) formatter = HtmlFormatter(linenos=True) styled_body = highlight(body, lexer, formatter) return { 'run_id': run_id, 'run': run, 'filename': filename, 'body': body, 'styled_body': styled_body, }
  29. class StyledLineCache(object): def __init__(self, db, run_id): self._db = db self._run_id

    = run_id self._files = {} EXPECTED_PREFIX = '<div class="highlight"><pre>' EXPECTED_SUFFIX = '</pre></div>' def getline(self, filename, line_no): if filename not in self._files: body = self._db.get_cached_file(self._run_id, filename) styled_body = apply_style(filename, body, linenos=False) start = len(self.EXPECTED_PREFIX) end = -1 * (len(self.EXPECTED_SUFFIX) + 1) middle_body = styled_body[start:end].rstrip('\n') self._files[filename] = middle_body.splitlines() try: return self._files[filename][line_no-1] except IndexError: # Line number is out of range return ''
  30. ✓ Web UI ✓ Profiling Data ✓ Call Graph ✓

    Syntax Highlighting • Only Changed Variables • Comments
  31. def _mk_seq(d): return sorted( (k, pformat(v, width=20)) for k, v

    in d.iteritems() ) def get_variable_changes(older, newer): s_a = _mk_seq(older) s_b = _mk_seq(newer) matcher = difflib.SequenceMatcher(None, s_a, s_b) for tag, i1, i2, j1, j2 in matcher.get_opcodes(): if tag in {'insert', 'replace'}: for i in s_b[j1:j2]: yield i
  32. 0 1000 2000 3000 4000 2013-05-25 2013-05-26 2013-05-27 2013-05-28 2013-06-02

    2013-06-03 2013-06-05 2013-06-06 2013-06-08 2013-06-16 2013-06-23 2013-07-14 2013-07-27 2013-07-28 2013-09-01 2013-12-28 2013-12-29 2013-12-30 2014-01-04 2014-01-12 2014-07-06 2014-08-01
  33. 0 1000 2000 3000 4000 2013-05-25 2013-05-26 2013-05-27 2013-05-28 2013-06-02

    2013-06-03 2013-06-05 2013-06-06 2013-06-08 2013-06-16 2013-06-23 2013-07-14 2013-07-27 2013-07-28 2013-09-01 2013-12-28 2013-12-29 2013-12-30 2014-01-04 2014-01-12 2014-07-06 2014-08-01
  34. 0 1000 2000 3000 4000 2013-05-25 2013-05-26 2013-05-27 2013-05-28 2013-06-02

    2013-06-03 2013-06-05 2013-06-06 2013-06-08 2013-06-16 2013-06-23 2013-07-14 2013-07-27 2013-07-28 2013-09-01 2013-12-28 2013-12-29 2013-12-30 2014-01-04 2014-01-12 2014-07-06 2014-08-01