Save 37% off PRO during our Black Friday Sale! »

Accelerating your Python application: Cython and PyPy

91ce30388d6d552b697eb67659a371ba?s=47 Greg Price
February 28, 2013

Accelerating your Python application: Cython and PyPy

Learn how to make almost any Python application run faster, without sacrificing the clarity and flexibility of Python. PyPy is a new, fast Python interpreter with a JIT; it runs unmodified Python code. Cython compiles Python to C; a few hints from the programmer can make it run very fast. This talk will show how to use each one and their limits.

91ce30388d6d552b697eb67659a371ba?s=128

Greg Price

February 28, 2013
Tweet

Transcript

  1. Accelerating Your Python Code: Cython and PyPy Greg Price google:greg+price

    Solano Labs ConFoo 2013-02-28 1 / 59
  2. 1 2 / 59

  3. Problem: CPython is slow 3 / 59

  4. Solution? Rewrite bits in C 4 / 59

  5. “The universal speed-up is rewriting small bits of code in

    C. Do this only when all else fails.” —Guido, 2012 5 / 59
  6. 6 / 59

  7. # from json/encoding.py in stdlib # 23 lines def py_encode_basestring_ascii(s):

    """Return an ASCII-only JSON representation of a Python string """ if isinstance(s, str) and HAS_UTF8.search(s) is not None: s = s.decode(’utf-8’) def replace(match): s = match.group(0) try: return ESCAPE_DCT[s] except KeyError: n = ord(s) if n < 0x10000: return ’\\u{0:04x}’.format(n) #return ’\\u%04x’ % (n,) else: # surrogate pair n -= 0x10000 s1 = 0xd800 | ((n >> 10) & 0x3ff) s2 = 0xdc00 | (n & 0x3ff) return ’\\u{0:04x}\\u{1:04x}’.format(s1, s2) #return ’\\u%04x\\u%04x’ % (s1, s2) return ’"’ + str(ESCAPE_ASCII.sub(replace, s)) + ’"’ 7 / 59
  8. /* Same function in stdlib, from _json.c */ /* 200

    lines total: 85 lines this, plus 57 unicode, plus 58 helpers */ static PyObject * ascii_escape_str(PyObject *pystr) { /* Take a PyString pystr and return a new ASCII-only escaped PyString */ Py_ssize_t i; Py_ssize_t input_chars; Py_ssize_t output_size; Py_ssize_t chars; PyObject *rval; char *output; char *input_str; input_chars = PyString_GET_SIZE(pystr); input_str = PyString_AS_STRING(pystr); /* Fast path for a string that’s already ASCII */ for (i = 0; i < input_chars; i++) { Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; if (!S_CHAR(c)) { /* If we have to escape something, scan the string for unicode */ Py_ssize_t j; for (j = i; j < input_chars; j++) { c = (Py_UNICODE)(unsigned char)input_str[j]; if (c > 0x7f) { /* We hit a non-ASCII character, bail to unicode mode */ PyObject *uni; uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); if (uni == NULL) { return NULL; } rval = ascii_escape_unicode(uni); Py_DECREF(uni); return rval; } } 8 / 59
  9. $ python Python 2.7.3 (default, Aug 1 2012, 05:16:07) >>>

    import MySQLdb >>> MySQLdb.__version__ ’1.2.3’ >>> d = {’a’: 3} >>> def bad(i, _): ... d.clear() ... d.update((j,j) for j in xrange(300)) ... >>> MySQLdb._mysql.escape_dict(d, {int: bad}) Segmentation fault (core dumped) 9 / 59
  10. Solution scope C API Cython PyPy 10 / 59

  11. /1 11 / 59

  12. 2: Why? 12 / 59

  13. naive implementations of dynamic languages 13 / 59

  14. Python 14 / 59

  15. Python Ruby 14 / 59

  16. Python Ruby PHP 14 / 59

  17. Python Ruby PHP Lua 14 / 59

  18. Python Ruby PHP Lua JavaScript 14 / 59

  19. /* CPython 2.7, Python/ceval.c */ PyObject * PyEval_EvalFrameEx(PyFrameObject *f, int

    throwflag) { /* ... */ for (;;) { /* ... */ switch (opcode) { /* ... */ case BINARY_ADD: w = POP(); v = TOP(); if (PyInt_CheckExact(v) && PyInt_CheckExact(w)) { /* INLINE: int + int */ register long a, b, i; a = PyInt_AS_LONG(v); b = PyInt_AS_LONG(w); i = (long)((unsigned long)a + b); if ((i^a) < 0 && (i^b) < 0) goto slow_add; x = PyInt_FromLong(i); } else if (PyString_CheckExact(v) && PyString_CheckExact(w)) { x = string_concatenate(v, w, f, next_instr); goto skip_decref_vx; } else { slow_add: x = PyNumber_Add(v, w); } /* Objects/abstract.c */ PyObject * PyNumber_Add(PyObject *v, PyObject *w) { PyObject *result = binary_op1(v, w, NB_SLOT(nb_add)); if (result == Py_NotImplemented) { PySequenceMethods *m = v->ob_type->tp_as_sequence; Py_DECREF(result); if (m && m->sq_concat) { return (*m->sq_concat)(v, w); } result = binop_type_error(v, w, "+"); } return result; } 15 / 59
  20. int x; x = x + 2; add $0x2,%ebx 16

    / 59
  21. CPython naive dynamic C static 17 / 59

  22. CPython naive dynamic C static Cython static, Python-like 17 /

    59
  23. void work(thing *th, void (*cb)(void *, thing_part *), void *data)

    { ... cb(data, &th->part) ... } 18 / 59
  24. CPython naive dynamic C static Cython static, Python-like PyPy Python,

    smart dynamic 19 / 59
  25. Solution scope C API Cython PyPy 20 / 59

  26. /2: Why? 21 / 59

  27. 3: PyPy 22 / 59

  28. Same language, new interpreter 23 / 59

  29. Fast interpreter 24 / 59

  30. Smaller is better. Ratio of PyPy time to CPython 2.7

    time. http://speed.pypy.org/ 25 / 59
  31. print sum(i*i for i in xrange(10000000 CPython 1.866s PyPy 1.377s

    26 / 59
  32. def f(x): return x*x print sum(f(i) for i in xrange(1000000

    CPython 2.546s PyPy 1.398s 27 / 59
  33. /* CPython 2.7, Python/ceval.c */ PyObject * PyEval_EvalFrameEx(PyFrameObject *f, int

    throwflag) { /* ... */ for (;;) { /* ... */ switch (opcode) { /* ... */ case BINARY_ADD: w = POP(); v = TOP(); if (PyInt_CheckExact(v) && PyInt_CheckExact(w)) { /* INLINE: int + int */ register long a, b, i; a = PyInt_AS_LONG(v); b = PyInt_AS_LONG(w); i = (long)((unsigned long)a + b); if ((i^a) < 0 && (i^b) < 0) goto slow_add; x = PyInt_FromLong(i); } else if (PyString_CheckExact(v) && PyString_CheckExact(w)) { x = string_concatenate(v, w, f, next_instr); goto skip_decref_vx; } else { slow_add: x = PyNumber_Add(v, w); } /* Objects/abstract.c */ PyObject * PyNumber_Add(PyObject *v, PyObject *w) { PyObject *result = binary_op1(v, w, NB_SLOT(nb_add)); if (result == Py_NotImplemented) { PySequenceMethods *m = v->ob_type->tp_as_sequence; Py_DECREF(result); if (m && m->sq_concat) { return (*m->sq_concat)(v, w); } result = binop_type_error(v, w, "+"); } return result; } 28 / 59
  34. # pypy/module/pypyjit/interp_jit.py # (simplified for slide) def dispatch(self, pycode, next_instr,

    ec): try: while True: co_code = pycode.co_code next_instr = self.handle_bytecode( co_code, next_instr, ec) except ExitFrame: return self.popvalue() 29 / 59
  35. # pypy/module/pypyjit/interp_jit.py # (simplified for slide) def dispatch(self, pycode, next_instr,

    ec): try: while True: pypyjitdriver.jit_merge_point(ec=ec, frame=self, next_instr=next_instr, pycode=pycode) co_code = pycode.co_code next_instr = self.handle_bytecode( co_code, next_instr, ec) except ExitFrame: return self.popvalue() 30 / 59
  36. Faster than C? char x[44]; sprintf(x, "%d %d", i, i);

    "%d %d" % (i, i) 31 / 59
  37. Faster than C? char x[44]; sprintf(x, "%d %d", i, i);

    "%d %d" % (i, i) C 1.0x CPython 0.16x PyPy 1.9x 31 / 59
  38. Extension modules 32 / 59

  39. Extension modules # a simple Django app, newsdiffs.org $ grep

    python.*so$ /proc/19044/maps b6560000-b6570000 r-xp 00000000 fc:01 397463 /usr/lib/python2.7/lib-dynload/_sqlite3.so b6570000-b6571000 r--p 0000f000 fc:01 397463 /usr/lib/python2.7/lib-dynload/_sqlite3.so b6571000-b6573000 rw-p 00010000 fc:01 397463 /usr/lib/python2.7/lib-dynload/_sqlite3.so b6f45000-b6f58000 r-xp 00000000 fc:01 397446 /usr/lib/python2.7/lib-dynload/datetime.so b6f58000-b6f59000 r--p 00012000 fc:01 397446 /usr/lib/python2.7/lib-dynload/datetime.so $ perl -lne ’print $1 if (m{python.*?/(.*.so)})’ \ /proc/19044/maps | uniq lib-dynload/_sqlite3.so lib-dynload/datetime.so dist-packages/simplejson/_speedups.so lib-dynload/termios.so lib-dynload/_heapq.so 33 / 59
  40. /3: PyPy 34 / 59

  41. 4: Cython 35 / 59

  42. # hello.py def hello(): print ’Hello world’ hello() 36 /

    59
  43. Easy way $ python Python 2.7.3 (default, Aug 1 2012)

    >>> import pyximport >>> pyximport.install() >>> import hello Hello world >>> 37 / 59
  44. Digging deeper $ cython hello.py $ ls hello.py hello.c 38

    / 59
  45. /* "hello.py":1 * def hello(): # <<<<<<<<<<<<<< * print ’Hello

    world’ * */ static PyObject *__pyx_pf_5hello_hello(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused); /*proto*/ static PyMethodDef __pyx_mdef_5hello_hello = {__Pyx_NAMESTR("hello"), (PyCFunction)__pyx_pf_5hello_hello, METH_NOARGS, __Pyx_DOCSTR(0)}; static PyObject *__pyx_pf_5hello_hello(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused) { PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("hello"); __pyx_self = __pyx_self; /* "hello.py":2 * def hello(): * print ’Hello world’ # <<<<<<<<<<<<<< * * hello() */ if (__Pyx_PrintOne(0, ((PyObject *)__pyx_kp_s_1)) < 0) { __pyx_filename = __pyx_f[0]; __pyx_lineno = 2; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_r = Py_None; __Pyx_INCREF(Py_None); goto __pyx_L0; __pyx_L1_error:; __Pyx_AddTraceback("hello.hello", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } 39 / 59
  46. def f(n): t = 0 for i in xrange(n): t

    += i-2*i+i+1 return t print f(10000000) 40 / 59
  47. $ cython --embed sum.py $ gcc -I/usr/include/python2.7/ \ sum.c -lpython2.7

    -o sum $ ./sum 10000000 41 / 59
  48. CPython 1.3s PyPy 0.057s Cython 1.7s 42 / 59

  49. import cython @cython.locals(n=cython.int, i=cython.int) def f(n): t = 0 for

    i in xrange(n): t += i-2*i+i+1 return t print f(10000000) 43 / 59
  50. CPython 1.3s PyPy 0.057s Cython v1 1.7s Cython v2 0.37s

    44 / 59
  51. import cython @cython.locals(n=cython.int, i=cython.int, t=cython.int) def f(n): t = 0

    for i in xrange(n): t += i-2*i+i+1 return t print f(10000000) 45 / 59
  52. CPython 1.3s PyPy 0.057s Cython v1 1.7s Cython v2 0.37s

    Cython v3 0.047s 46 / 59
  53. lxml 47 / 59

  54. /4: Cython 48 / 59

  55. End http://pypy.org/ google:pypy+papers http://cython.org/ Slides: https://joind.in/7937 49 / 59

  56. 50 / 59

  57. 51 / 59

  58. 52 / 59

  59. 53 / 59

  60. 54 / 59

  61. § 1: Just so 55 / 59

  62. 56 / 59

  63. ________________ test_list _________________ def test_list(): > assert [1, 2] ==

    [3, 2] E assert [1, 2] == [3, 2] E At index 0 diff: 1 != 3 test_diffs.py:11: AssertionError 57 / 59
  64. (CC-BY, “soham pablo” on Flickr) 58 / 59

  65. End http://pytest.org/ http://tddium.com/ 59 / 59