Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Accelerating your Python application: Cython and PyPy

Greg Price
February 28, 2013

Accelerating your Python application: Cython and PyPy

Learn how to make almost any Python application run faster, without sacrificing the clarity and flexibility of Python. PyPy is a new, fast Python interpreter with a JIT; it runs unmodified Python code. Cython compiles Python to C; a few hints from the programmer can make it run very fast. This talk will show how to use each one and their limits.

Greg Price

February 28, 2013
Tweet

More Decks by Greg Price

Other Decks in Programming

Transcript

  1. “The universal speed-up is rewriting small bits of code in

    C. Do this only when all else fails.” —Guido, 2012 5 / 59
  2. # from json/encoding.py in stdlib # 23 lines def py_encode_basestring_ascii(s):

    """Return an ASCII-only JSON representation of a Python string """ if isinstance(s, str) and HAS_UTF8.search(s) is not None: s = s.decode(’utf-8’) def replace(match): s = match.group(0) try: return ESCAPE_DCT[s] except KeyError: n = ord(s) if n < 0x10000: return ’\\u{0:04x}’.format(n) #return ’\\u%04x’ % (n,) else: # surrogate pair n -= 0x10000 s1 = 0xd800 | ((n >> 10) & 0x3ff) s2 = 0xdc00 | (n & 0x3ff) return ’\\u{0:04x}\\u{1:04x}’.format(s1, s2) #return ’\\u%04x\\u%04x’ % (s1, s2) return ’"’ + str(ESCAPE_ASCII.sub(replace, s)) + ’"’ 7 / 59
  3. /* Same function in stdlib, from _json.c */ /* 200

    lines total: 85 lines this, plus 57 unicode, plus 58 helpers */ static PyObject * ascii_escape_str(PyObject *pystr) { /* Take a PyString pystr and return a new ASCII-only escaped PyString */ Py_ssize_t i; Py_ssize_t input_chars; Py_ssize_t output_size; Py_ssize_t chars; PyObject *rval; char *output; char *input_str; input_chars = PyString_GET_SIZE(pystr); input_str = PyString_AS_STRING(pystr); /* Fast path for a string that’s already ASCII */ for (i = 0; i < input_chars; i++) { Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; if (!S_CHAR(c)) { /* If we have to escape something, scan the string for unicode */ Py_ssize_t j; for (j = i; j < input_chars; j++) { c = (Py_UNICODE)(unsigned char)input_str[j]; if (c > 0x7f) { /* We hit a non-ASCII character, bail to unicode mode */ PyObject *uni; uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); if (uni == NULL) { return NULL; } rval = ascii_escape_unicode(uni); Py_DECREF(uni); return rval; } } 8 / 59
  4. $ python Python 2.7.3 (default, Aug 1 2012, 05:16:07) >>>

    import MySQLdb >>> MySQLdb.__version__ ’1.2.3’ >>> d = {’a’: 3} >>> def bad(i, _): ... d.clear() ... d.update((j,j) for j in xrange(300)) ... >>> MySQLdb._mysql.escape_dict(d, {int: bad}) Segmentation fault (core dumped) 9 / 59
  5. /* CPython 2.7, Python/ceval.c */ PyObject * PyEval_EvalFrameEx(PyFrameObject *f, int

    throwflag) { /* ... */ for (;;) { /* ... */ switch (opcode) { /* ... */ case BINARY_ADD: w = POP(); v = TOP(); if (PyInt_CheckExact(v) && PyInt_CheckExact(w)) { /* INLINE: int + int */ register long a, b, i; a = PyInt_AS_LONG(v); b = PyInt_AS_LONG(w); i = (long)((unsigned long)a + b); if ((i^a) < 0 && (i^b) < 0) goto slow_add; x = PyInt_FromLong(i); } else if (PyString_CheckExact(v) && PyString_CheckExact(w)) { x = string_concatenate(v, w, f, next_instr); goto skip_decref_vx; } else { slow_add: x = PyNumber_Add(v, w); } /* Objects/abstract.c */ PyObject * PyNumber_Add(PyObject *v, PyObject *w) { PyObject *result = binary_op1(v, w, NB_SLOT(nb_add)); if (result == Py_NotImplemented) { PySequenceMethods *m = v->ob_type->tp_as_sequence; Py_DECREF(result); if (m && m->sq_concat) { return (*m->sq_concat)(v, w); } result = binop_type_error(v, w, "+"); } return result; } 15 / 59
  6. Smaller is better. Ratio of PyPy time to CPython 2.7

    time. http://speed.pypy.org/ 25 / 59
  7. /* CPython 2.7, Python/ceval.c */ PyObject * PyEval_EvalFrameEx(PyFrameObject *f, int

    throwflag) { /* ... */ for (;;) { /* ... */ switch (opcode) { /* ... */ case BINARY_ADD: w = POP(); v = TOP(); if (PyInt_CheckExact(v) && PyInt_CheckExact(w)) { /* INLINE: int + int */ register long a, b, i; a = PyInt_AS_LONG(v); b = PyInt_AS_LONG(w); i = (long)((unsigned long)a + b); if ((i^a) < 0 && (i^b) < 0) goto slow_add; x = PyInt_FromLong(i); } else if (PyString_CheckExact(v) && PyString_CheckExact(w)) { x = string_concatenate(v, w, f, next_instr); goto skip_decref_vx; } else { slow_add: x = PyNumber_Add(v, w); } /* Objects/abstract.c */ PyObject * PyNumber_Add(PyObject *v, PyObject *w) { PyObject *result = binary_op1(v, w, NB_SLOT(nb_add)); if (result == Py_NotImplemented) { PySequenceMethods *m = v->ob_type->tp_as_sequence; Py_DECREF(result); if (m && m->sq_concat) { return (*m->sq_concat)(v, w); } result = binop_type_error(v, w, "+"); } return result; } 28 / 59
  8. # pypy/module/pypyjit/interp_jit.py # (simplified for slide) def dispatch(self, pycode, next_instr,

    ec): try: while True: co_code = pycode.co_code next_instr = self.handle_bytecode( co_code, next_instr, ec) except ExitFrame: return self.popvalue() 29 / 59
  9. # pypy/module/pypyjit/interp_jit.py # (simplified for slide) def dispatch(self, pycode, next_instr,

    ec): try: while True: pypyjitdriver.jit_merge_point(ec=ec, frame=self, next_instr=next_instr, pycode=pycode) co_code = pycode.co_code next_instr = self.handle_bytecode( co_code, next_instr, ec) except ExitFrame: return self.popvalue() 30 / 59
  10. Faster than C? char x[44]; sprintf(x, "%d %d", i, i);

    "%d %d" % (i, i) C 1.0x CPython 0.16x PyPy 1.9x 31 / 59
  11. Extension modules # a simple Django app, newsdiffs.org $ grep

    python.*so$ /proc/19044/maps b6560000-b6570000 r-xp 00000000 fc:01 397463 /usr/lib/python2.7/lib-dynload/_sqlite3.so b6570000-b6571000 r--p 0000f000 fc:01 397463 /usr/lib/python2.7/lib-dynload/_sqlite3.so b6571000-b6573000 rw-p 00010000 fc:01 397463 /usr/lib/python2.7/lib-dynload/_sqlite3.so b6f45000-b6f58000 r-xp 00000000 fc:01 397446 /usr/lib/python2.7/lib-dynload/datetime.so b6f58000-b6f59000 r--p 00012000 fc:01 397446 /usr/lib/python2.7/lib-dynload/datetime.so $ perl -lne ’print $1 if (m{python.*?/(.*.so)})’ \ /proc/19044/maps | uniq lib-dynload/_sqlite3.so lib-dynload/datetime.so dist-packages/simplejson/_speedups.so lib-dynload/termios.so lib-dynload/_heapq.so 33 / 59
  12. Easy way $ python Python 2.7.3 (default, Aug 1 2012)

    >>> import pyximport >>> pyximport.install() >>> import hello Hello world >>> 37 / 59
  13. /* "hello.py":1 * def hello(): # <<<<<<<<<<<<<< * print ’Hello

    world’ * */ static PyObject *__pyx_pf_5hello_hello(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused); /*proto*/ static PyMethodDef __pyx_mdef_5hello_hello = {__Pyx_NAMESTR("hello"), (PyCFunction)__pyx_pf_5hello_hello, METH_NOARGS, __Pyx_DOCSTR(0)}; static PyObject *__pyx_pf_5hello_hello(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("hello"); __pyx_self = __pyx_self; /* "hello.py":2 * def hello(): * print ’Hello world’ # <<<<<<<<<<<<<< * * hello() */ if (__Pyx_PrintOne(0, ((PyObject *)__pyx_kp_s_1)) < 0) { __pyx_filename = __pyx_f[0]; __pyx_lineno = 2; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = Py_None; __Pyx_INCREF(Py_None); goto __pyx_L0; __pyx_L1_error:; __Pyx_AddTraceback("hello.hello", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } 39 / 59
  14. def f(n): t = 0 for i in xrange(n): t

    += i-2*i+i+1 return t print f(10000000) 40 / 59
  15. import cython @cython.locals(n=cython.int, i=cython.int) def f(n): t = 0 for

    i in xrange(n): t += i-2*i+i+1 return t print f(10000000) 43 / 59
  16. import cython @cython.locals(n=cython.int, i=cython.int, t=cython.int) def f(n): t = 0

    for i in xrange(n): t += i-2*i+i+1 return t print f(10000000) 45 / 59
  17. ________________ test_list _________________ def test_list(): > assert [1, 2] ==

    [3, 2] E assert [1, 2] == [3, 2] E At index 0 diff: 1 != 3 test_diffs.py:11: AssertionError 57 / 59