Upgrade to Pro — share decks privately, control downloads, hide ads and more …

How I Built a Power Debugger Out of the Standard Library and Things I Found on the Internet

How I Built a Power Debugger Out of the Standard Library and Things I Found on the Internet

Smiley demonstrates how to use Python's native tracing capabilities to monitor not just what parts of your program run, but the data flowing through the program as it runs. All of the data is recorded for study after the program exits, which means you can pass different inputs and then compare the results of the runs. In this presentation, I describe the evolution of Smiley, from concept through internal API changes as I worked on the implementation. I also talk about tracing Python programs in general, and explain how the trace code in Smiley can be used to send trace data to different output destinations.

doughellmann

May 31, 2016
Tweet

More Decks by doughellmann

Other Decks in Technology

Transcript

  1. How I Built a Power Debugger Out of the Standard

    Library and Things I Found on the Internet Doug Hellmann PyCon US 2016
  2. Features • Record calls with data • Remote monitoring, local

    analysis • Browse history • Learn new tools flickr/Kangrex
  3. class Publisher(object): def __init__(self, endpoint, high_water_mark=10000): self.context = zmq.Context() self.pub_socket

    = \ self.context.socket(zmq.PUSH) self.pub_socket.bind(endpoint) self.pub_socket.identity = 'publisher' self.pub_socket.hwm = high_water_mark def send(self, msg_type, data): msg = [ msg_type, json.dumps(data, default=self._json_special_types), ] self.pub_socket.send_multipart(msg)
  4. def trace_calls(self, frame, event, arg): co = frame.f_code filename =

    co.co_filename if filename in (__file__,): # Ignore ourself return self._send_notice(frame, event, arg) return self.trace_calls
  5. def _send_notice(self, frame, event, arg): co = frame.f_code func_name =

    co.co_name line_no = frame.f_lineno filename = os.path.abspath(co.co_filename) for d in IGNORE_DIRS: if filename.startswith(d): return # …
  6. # … interesting_locals = { n: v for n, v

    in frame.f_locals.items() if (not inspect.ismodule(v) and not inspect.isfunction(v) and not inspect.ismethod(v) and (n[:2] != '__' and n[-2:] != '__')) } # …
  7. # … self.publisher.send( event, {'func_name': func_name, 'line_no': line_no, 'filename': filename,

    'arg': arg, 'locals': interesting_locals, 'timestamp': time.time(), 'run_id': self.run_id, })
  8. $ smiley help usage: smiley [--version] [-v] [--log-file LOG_FILE] [-q]

    [-h] [--debug] smiley spies on your apps as they run optional arguments: --version show program's version number and exit -v, --verbose Increase verbosity of output. --log-file LOG_FILE Specify a file to log output. -q, --quiet suppress output except warnings -h, --help show this help message and exit --debug show tracebacks on errors Commands: complete print bash completion command help print detailed help for another command monitor Listen for running programs and show their progress. run Run another program with monitoring enabled.
  9. $ smiley help run usage: smiley run [-h] [--socket SOCKET]

    command [command ...] Run another program with monitoring enabled. positional arguments: command the command to spy on optional arguments: -h, --help show this help message and exit --socket SOCKET URL for the socket where the listener will be (tcp://127.0.0.1:5556)
  10. $ smiley help monitor usage: smiley monitor [-h] [--socket SOCKET]

    Listen for running programs and show their progress. optional arguments: -h, --help show this help message and exit --socket SOCKET URL for the socket where to monitor on (tcp://127.0.0.1:5556)
  11. def _process_message(self, msg): print 'MESSAGE:', msg msg_type, msg_payload = msg

    if msg_type == 'start_run': print (‘Starting new run:', msg_payload.get(‘command_line')) elif msg_type == 'end_run': print 'Finished run' else: line = linecache.getline(msg_payload['filename'], msg_payload['line_no']).rstrip() if msg_type == 'return': print '%s:%4s: return>>> %s' % ( msg_payload['filename'], msg_payload[‘line_no'], msg_payload['arg']) else: print '%s:%4s: %s' % ( msg_payload['filename'], msg_payload[‘line_no'], line) if msg_payload.get('locals'): for n, v in sorted(msg_payload['locals'].items()): print '%s %s = %s' % ( ' ' * len(msg_payload['filename']), n, v, ) print
  12. def gen(m): for i in xrange(m): yield i def c(input):

    print 'input =', input data = list(gen(input)) print 'Leaving c()' def b(arg): val = arg * 5 c(val) print 'Leaving b()' return val def a(): print 'args:', sys.argv b(2) print 'Leaving a()' a()
  13. $ smiley monitor Waiting for incoming data MESSAGE: ['start_run', {u'command_line':

    [u'simple.py'], u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e'}] Starting new run: [u'simple.py'] MESSAGE: ['call', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.142863, u'line_no': 1, u'filename': u'/Users/dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'<module>', u'arg': None, u'locals': {u'self': u'<smiley.tracer.Tracer object at 0x103caff10>', u'command_line': [u'simple.py']}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 1: import sys command_line = [u'simple.py'] self = <smiley.tracer.Tracer object at 0x103caff10> MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.143175, u'line_no': 1, u'filename': u'/Users/dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'<module>', u'arg': None, u'locals': {u'self': u'<smiley.tracer.Tracer object at 0x103caff10>', u'command_line': [u'simple.py']}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 1: import sys command_line = [u'simple.py'] self = <smiley.tracer.Tracer object at 0x103caff10> MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.143499, u'line_no': 3, u'filename': u'/Users/ dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'<module>', u'arg':
  14. $ smiley monitor Waiting for incoming data MESSAGE: ['start_run', {u'command_line':

    [u'simple.py'], u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e'}] Starting new run: [u'simple.py'] MESSAGE: ['call', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.142863, u'line_no': 1, u'filename': u'/Users/dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'<module>', u'arg': None, u'locals': {u'self': u'<smiley.tracer.Tracer object at 0x103caff10>', u'command_line': [u'simple.py']}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 1: import sys command_line = [u'simple.py'] self = <smiley.tracer.Tracer object at 0x103caff10> MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.143175, u'line_no': 1, u'filename': u'/Users/dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'<module>', u'arg': None, u'locals': {u'self': u'<smiley.tracer.Tracer object at 0x103caff10>', u'command_line': [u'simple.py']}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 1: import sys command_line = [u'simple.py'] self = <smiley.tracer.Tracer object at 0x103caff10> MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.143499, u'line_no': 3, u'filename': u'/Users/ dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'<module>', u'arg':
  15. MESSAGE: ['call', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.144927, u'line_no': 21, u'filename': u'/Users/

    dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'a', u'arg': None, u'locals': {}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 21: def a(): MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.145128, u'line_no': 22, u'filename': u'/Users/ dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'a', u'arg': None, u'locals': {}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 22: print 'args:', sys.argv MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.145343, u'line_no': 23, u'filename': u'/Users/ dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'a', u'arg': None, u'locals': {}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 23: b(2)
  16. def start_run(self, run_id, cwd, description, start_time): "Record the beginning of

    a run." with transaction(self.conn) as c: c.execute( """ INSERT INTO run (id, cwd, description, start_time) VALUES (:id, :cwd, :description, :start_time) """, {'id': run_id, 'cwd': cwd, 'description': description, 'start_time': start_time} )
  17. def _process_message(self, msg): msg_type, msg_payload = msg if msg_type ==

    'start_run': command_line = ' ‘.join( msg_payload.get('command_line', [])) self._cwd = msg_payload.get('cwd', '') if self._cwd: self._cwd = (self._cwd.rstrip(os.sep) + os.sep) self.db.start_run( run_id=msg_payload.get('run_id'), cwd=self._cwd, description=command_line, start_time=msg_payload.get('timestamp'), )
  18. elif msg_type == 'end_run': self.log.info('Finished run') self.db.end_run( run_id=msg_payload['run_id'], end_time=msg_payload.get('timestamp'), message=msg_payload.get('message'),

    traceback=msg_payload.get('traceback'), ) else: self.db.trace( run_id=msg_payload['run_id'], event=msg_type, func_name=msg_payload.get('func_name'), line_no=msg_payload.get('line_no'), filename=msg_payload.get('filename'), trace_arg=msg_payload.get('arg'), locals=msg_payload.get('locals'), timestamp=msg_payload.get('timestamp'), )
  19. import json import traceback import types def _json_special_types(obj): if isinstance(obj,

    types.TracebackType): return traceback.extract_tb(obj) if isinstance(obj, type): # We don't want to return classes return repr(obj) try: data = dict(vars(obj)) data['__class__'] = obj.__class__.__name__ data['__module__'] = obj.__class__.__module__ except Exception as err: data = repr(obj) return data def dumps(data): return json.dumps(data, default=_json_special_types)
  20. ?

  21. class EventProcessor(object): __metaclass__ = abc.ABCMeta @abc.abstractmethod def start_run(self, run_id, cwd,

    description, start_time): """Called when a 'start_run' event is seen. """ @abc.abstractmethod def end_run(self, run_id, end_time, message, traceback): """Called when an 'end_run' event is seen. """ @abc.abstractmethod def trace(self, run_id, event, func_name, line_no, filename, trace_arg, local_vars, timestamp): """Called when any other event type is seen. """
  22. def get_runs(self): "Return the runs available to browse." with transaction(self.conn)

    as c: c.execute( """ SELECT id, cwd, description, start_time, end_time, error_message FROM run """ ) return c.fetchall()
  23. def take_action(self, parsed_args): self.out = output.OutputFormatter( linecache.getline) self.db = db.DB(parsed_args.database)

    run_details = self.db.get_run(parsed_args.run_id) self.out.start_run( run_details.id, run_details.cwd, run_details.description, run_details.start_time, ) for t in self.db.get_trace(parsed_args.run_id): self.out.trace( t.run_id, t.event, t.func_name, t.line_no, t.filename, t.trace_arg, t.local_vars, t.timestamp, ) self.out.end_run( run_details.id, run_details.end_time, run_details.error_message, None) # run_details.traceback
  24. MESSAGE: ['call', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.144927, u'line_no': 21, u'filename': u'/Users/

    dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'a', u'arg': None, u'locals': {}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 21: def a(): MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.145128, u'line_no': 22, u'filename': u'/Users/ dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'a', u'arg': None, u'locals': {}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 22: print 'args:', sys.argv MESSAGE: ['line', {u'run_id': u'e87302b2-402a-4243-bb27-69a467d4bd8e', u'timestamp': 1436367235.145343, u'line_no': 23, u'filename': u'/Users/ dhellmann/Devel/smiley/scratch/simple.py', u'func_name': u'a', u'arg': None, u'locals': {}}] /Users/dhellmann/Devel/smiley/scratch/simple.py: 23: b(2)
  25. class DBLineCache(object): def __init__(self, db, run_id): self._db = db self._run_id

    = run_id self._files = {} def getline(self, filename, line_no): if filename not in self._files: body = self._db.get_cached_file( self._run_id, filename) self._files[filename] = body.splitlines() try: return self._files[filename][line_no] except IndexError: # Line number is out of range return ''
  26. def take_action(self, parsed_args): # Fix import path cwd = os.getcwd()

    if (cwd not in sys.path and os.curdir not in sys.path): sys.path.insert(0, cwd) # Fix command line args sys.argv = parsed_args.command # Run the app p = publisher.Publisher(parsed_args.socket) t = tracer.Tracer(p) t.run(parsed_args.command)
  27. def take_action(self, parsed_args): # Fix import path cwd = os.getcwd()

    if (cwd not in sys.path and os.curdir not in sys.path): sys.path.insert(0, cwd) # Fix command line args sys.argv = parsed_args.command # Run the app if parsed_args.mode == 'remote': p = publisher.Publisher(parsed_args.socket) else: p = db.DB(parsed_args.database) t = tracer.Tracer(p) t.run(parsed_args.command)
  28. class FileController(RestController): @expose(generic=True, template='file.html') @nav.active_section('runs') def get_one(self, run_id, file_id): filename,

    body = \ request.db.get_cached_file_by_id(run_id, file_id) run = request.db.get_run(run_id) lexer = guess_lexer_for_filename(filename, body) formatter = HtmlFormatter(linenos=True) styled_body = highlight(body, lexer, formatter) return { 'run_id': run_id, 'run': run, 'filename': filename, 'body': body, 'styled_body': styled_body, }
  29. class StyledLineCache(object): def __init__(self, db, run_id): self._db = db self._run_id

    = run_id self._files = {} EXPECTED_PREFIX = '<div class="highlight"><pre>' EXPECTED_SUFFIX = '</pre></div>' def getline(self, filename, line_no): if filename not in self._files: body = self._db.get_cached_file(self._run_id, filename) styled_body = apply_style(filename, body, linenos=False) start = len(self.EXPECTED_PREFIX) end = -1 * (len(self.EXPECTED_SUFFIX) + 1) middle_body = styled_body[start:end].rstrip('\n') self._files[filename] = middle_body.splitlines() try: return self._files[filename][line_no-1] except IndexError: # Line number is out of range return ''
  30. ✓ Web UI ✓ Profiling Data ✓ Call Graph ✓

    Syntax Highlighting • Only Changed Variables • Comments
  31. def _mk_seq(d): return sorted( (k, pformat(v, width=20)) for k, v

    in d.iteritems() ) def get_variable_changes(older, newer): s_a = _mk_seq(older) s_b = _mk_seq(newer) matcher = difflib.SequenceMatcher(None, s_a, s_b) for tag, i1, i2, j1, j2 in matcher.get_opcodes(): if tag in {'insert', 'replace'}: for i in s_b[j1:j2]: yield i
  32. 0 1000 2000 3000 4000 2013-05-25 2013-05-26 2013-05-27 2013-05-28 2013-06-02

    2013-06-03 2013-06-05 2013-06-06 2013-06-08 2013-06-16 2013-06-23 2013-07-14 2013-07-27 2013-07-28 2013-09-01 2013-12-28 2013-12-29 2013-12-30 2014-01-04 2014-01-12 2014-07-06 2014-08-01
  33. 0 1000 2000 3000 4000 2013-05-25 2013-05-26 2013-05-27 2013-05-28 2013-06-02

    2013-06-03 2013-06-05 2013-06-06 2013-06-08 2013-06-16 2013-06-23 2013-07-14 2013-07-27 2013-07-28 2013-09-01 2013-12-28 2013-12-29 2013-12-30 2014-01-04 2014-01-12 2014-07-06 2014-08-01
  34. 0 1000 2000 3000 4000 2013-05-25 2013-05-26 2013-05-27 2013-05-28 2013-06-02

    2013-06-03 2013-06-05 2013-06-06 2013-06-08 2013-06-16 2013-06-23 2013-07-14 2013-07-27 2013-07-28 2013-09-01 2013-12-28 2013-12-29 2013-12-30 2014-01-04 2014-01-12 2014-07-06 2014-08-01