Upgrade to Pro — share decks privately, control downloads, hide ads and more …

CPython 코드로 보는 파이썬의 심층 세상

Sungmin Han
August 13, 2023

CPython 코드로 보는 파이썬의 심층 세상

Python을 다루다보면 CPU/IO Bound, Selector, Global Interpreter Lock, AsyncIO 등의 low-level 구현체를 만나게됩니다. 그리고 우리는 때로는 구조체의 기능을 정확히 이해하지 못하고, 코드 패턴을 외운 상태로 개발하는 경우가 종종 있습니다.

low-level 구현체를 응용해야 하는 순간이면 외워두었던 패턴을 벗어나게 되므로 구현을 할 수 없게 되고, 이렇게 우리는 추상화의 지속에 빠지게 됩니다.

이 발표 세션에서는 Python에서 마주치는 low-level 구현체를 CPython 코드와 Computer Science 이론을 그림으로 도식화하여, 시작하기 막막하던 원리 이해의 첫 발을 띌 수 있게끔 준비하였습니다.

Sungmin Han

August 13, 2023
Tweet

More Decks by Sungmin Han

Other Decks in Technology

Transcript

  1. v 제목
 이름 Speaker 한성민 (Sungmin Han) MLOps Lead at

    Riiid Former Research engineer at Naver Clova Former Software engineer at IGAWorks Former Software engineer at 심심이 Google Developer Experts(GDE) for ML Google Developer Groups(GDG) Golang Korea F-Lab Python Mentor [email protected]
  2. v 제목
 이름 v 목차 • Python Runtime • Data

    Type • Built-in functions • Threads • Garbage Collection (GC) • Selector & Signals
  3. v 제목
 이름 v CPython in terms of Runtime Python

    Application Native Library Application Python Runtime Native Library Operating System Hardware Standard Module
  4. v 제목
 이름 CPython-level Data type handling Python Source Code

    Lexical Analysis Syntax Parsing Abstract Syntax Tree (AST) Compile Bytecode Code Execution User-level Compiler-level Runtime-level (Virtual Machine)
  5. v 제목
 이름 PyLongObject * _PyLong_New(Py_ssize_t size) { ... Py_ssize_t

    ndigits = size ? size : 1; result = PyObject_Malloc(offsetof(PyLongObject, long_value.ob_digit) + ndigits*sizeof(digit)); ... _PyLong_SetSignAndDigitCount(result, size != 0, size); _PyObject_Init((PyObject*)result, &PyLong_Type); result->long_value.ob_digit[0] = 0; return result; } Integer (PyLongObject)
  6. v 제목
 이름 for (cur = start, i = 0;

    i < slicelength; cur += (size_t)step, i++) { garbage[i] = selfitems[cur]; ins = Py_NewRef(seqitems[i]); selfitems[cur] = ins; } for (i = 0; i < slicelength; i++) { Py_DECREF(garbage[i]); } List Slicing
  7. v 제목
 이름 int PyObject_RichCompareBool(PyObject *v, PyObject *w, int op)

    { PyObject *res; int ok; if (v == w) { if (op == Py_EQ) return 1; else if (op == Py_NE) return 0; } res = PyObject_RichCompare(v, w, op); if (res == NULL) return -1; if (PyBool_Check(res)) ok = (res == Py_True); Else ok = PyObject_IsTrue(res); Py_DECREF(res); return ok; } Rich Compare
  8. v 제목
 이름 if (!Py_IS_TYPE(v, Py_TYPE(w)) && PyType_IsSubtype(Py_TYPE(w), Py_TYPE(v)) &&

    (f = Py_TYPE(w)->tp_richcompare) != NULL) { checked_reverse_op = 1; res = (*f)(w, v, _Py_SwappedOp[op]); if (res != Py_NotImplemented) return res; Py_DECREF(res); } do_richcompare
  9. v 제목
 이름 case Py_EQ: if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w)) Py_RETURN_FALSE;

    if (v->hash != -1 && ((PySetObject *)w)->hash != -1 && v->hash != ((PySetObject *)w)->hash) Py_RETURN_FALSE; return set_issubset(v, w); PyObject Py_EQ
  10. v 제목
 이름 Py_hash_t PyObject_Hash(PyObject *v) { PyTypeObject *tp =

    Py_TYPE(v); if (tp->tp_hash != NULL) return (*tp->tp_hash)(v); if (tp->tp_dict == NULL) { if (PyType_Ready(tp) < 0) return -1; if (tp->tp_hash != NULL) return (*tp->tp_hash)(v); } return PyObject_HashNotImplemented(v); } PyObject Hash
  11. v 제목
 이름 static Py_hash_t tuplehash(PyTupleObject *v) { Py_ssize_t i,

    len = Py_SIZE(v); PyObject **item = v->ob_item; Py_uhash_t acc = _PyHASH_XXPRIME_5; for (i = 0; i < len; i++) { Py_uhash_t lane = PyObject_Hash(item[i]); if (lane == (Py_uhash_t)-1) return -1; acc += lane * _PyHASH_XXPRIME_2; acc = _PyHASH_XXROTATE(acc); acc *= _PyHASH_XXPRIME_1; } /* Add input length, mangled to keep the historical value of hash(()). */ acc += len ^ (_PyHASH_XXPRIME_5 ^ 3527539UL); if (acc == (Py_uhash_t)-1) return 1546275796; return acc; } Tuple Hash
  12. v 제목
 이름 Thread Heap Data Code Stack Stack Stack

    registers registers registers Thread 1 Thread 2 Thread 3 Process
  13. v 제목
 이름 import requests from time import perf_counter buffer_size

    = 1024 def download(url): response = requests.get(url, stream=True) filename = url.split("/")[-1] with open(filename, "wb") as f: for data in response.iter_content(buffer_size): f.write(data) if __name__ == "__main__": urls = [f"https://2023.pycon.kr/session/{i}" for i in range(20)] * 20 t = perf_counter() for url in urls: download(url) print(f"Time took: {perf_counter() - t:.2f}s") Http request using single thread (28.53s)
  14. v 제목
 이름 import requests from concurrent.futures import ThreadPoolExecutor from

    time import perf_counter buffer_size = 1024 def download(url): response = requests.get(url, stream=True) filename = url.split("/")[-1] with open(filename, "wb") as f: for data in response.iter_content(buffer_size): f.write(data) if __name__ == "__main__": urls = [f"https://2023.pycon.kr/session/{i}" for i in range(20)] * 20 t = perf_counter() with ThreadPoolExecutor(max_workers=32) as pool: pool.map(download, urls) print(f"Time took: {perf_counter() - t:.2f}s") Http request using multi thread (2.56s | 91.1% ↓)
  15. v 제목
 이름 Http request using multi thread (2.56s |

    91.1% ↓) import requests from concurrent.futures import ThreadPoolExecutor from time import perf_counter buffer_size = 1024 def download(url): response = requests.get(url, stream=True) filename = url.split("/")[-1] with open(filename, "wb") as f: for data in response.iter_content(buffer_size): f.write(data) if __name__ == "__main__": urls = [f"https://2023.pycon.kr/session/{i}" for i in range(20)] * 20 t = perf_counter() with ThreadPoolExecutor(max_workers=32) as pool: pool.map(download, urls) print(f"Time took: {perf_counter() - t:.2f}s")
  16. v 제목
 이름 RefCount a = [10] b = a

    c = a [10] a b c 0x31a65b652
  17. v 제목
 이름 import sys a = [10] sys.getrefcount(a) 2

    b = a sys.getrefcount(a) 3 c = a sys.getrefcount(a) 4 c = None sys.getrefcount(a) 3 b = None sys.getrefcount(a) 2 RefCount in code
  18. v 제목
 이름 Race Condition LOAD INC INC LOAD STORE

    STORE T1 T2 Idle Idle int 0 int 1 int 1
  19. v 제목
 이름 _thread static PyObject * thread_PyThread_start_new_thread(PyObject *self, PyObject

    *fargs) { PyObject *func, *args, *keyw = NULL; struct bootstate *boot; unsigned long ident; if (!PyArg_UnpackTuple(fargs, "start_new_thread", 2, 3, &func, &args, &keyw)) return NULL; ... boot = PyMem_NEW(struct bootstate, 1); if (boot == NULL) return PyErr_NoMemory(); boot->interp = PyThreadState_Get()->interp; boot->func = func; boot->args = args; boot->keyw = keyw; Py_INCREF(func); Py_INCREF(args); Py_XINCREF(keyw); ... ident = PyThread_start_new_thread(t_bootstrap, boot); ... return PyLong_FromUnsignedLong(ident); }
  20. v 제목
 이름 PyThread_start_new_thread PyThread_start_new_thread(void (*func)(void *), void *arg) {

    pthread_t th; int err = 0; pthread_attr_t attrs; (void) pthread_attr_init(&attrs); (void) pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); ... err = pthread_create(&th, &attrs, (void* (*)(void *))func, arg); pthread_attr_destroy(&attrs); if (err != 0) return PYTHREAD_INVALID_THREAD_ID; return (long) th; }
  21. v 제목
 이름 gcmodule.c / collect() static Py_ssize_t collect(Py_ssize_t generation,

    Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, int nofail) { PyGC_Head *young; PyGC_Head *unreachable = NULL; ... update_refs(young); subtract_refs(young); move_unreachable(young, unreachable); ... m += handle_weakrefs(unreachable, old); delete_garbage(unreachable, old); ... }
  22. v 제목
 이름 v Blocking / Non-blocking / Sync /

    Async Read / Write I/O Multiplexing (select / poll) Read / Write (Polling) Async I/O Blocking Non-blocking Sync Async
  23. v 제목
 이름 import asyncio async def hello(): print("Hello,") await

    asyncio.sleep(1) print("PyCon Korea!") async def main(): await asyncio.gather(hello(), hello()) asyncio.run(main()) asyncio.sleep(1)
  24. v 제목
 이름 CPython Lib => sleep async def sleep(delay,

    result=None): ... loop = events.get_running_loop() future = loop.create_future() h = loop.call_later(delay, futures._set_result_unless_cancelled, future, result) try: return await future finally: h.cancel()
  25. v 제목
 이름 CPython Lib => call_later def call_later(self, delay,

    callback, *args, context=None): if delay is None: raise TypeError('delay must not be None') timer = self.call_at(self.time() + delay, callback, *args, context=context) if timer._source_traceback: del timer._source_traceback[-1] return timer
  26. v 제목
 이름 CPython Lib => call_at def call_at(self, when,

    callback, *args, context=None): if when is None: raise TypeError("when cannot be None") self._check_closed() if self._debug: self._check_thread() self._check_callback(callback, 'call_at') timer = events.TimerHandle(when, callback, args, self, context) if timer._source_traceback: del timer._source_traceback[-1] heapq.heappush(self._scheduled, timer) timer._scheduled = True return timer
  27. v 제목
 이름 aiohttp import aiohttp import asyncio async def

    main(): async with aiohttp.ClientSession() as session: pycon_kr_program_url = "https://2023.pycon.kr/session/37" async with session.get(pycon_kr_program_url) as res: program = await res.text() print(program) asyncio.run(main())
  28. v 제목
 이름 aiohttp => client => get def get(

    self, url: StrOrURL, *, allow_redirects: bool = True, **kwargs: Any ) -> "_RequestContextManager": """Perform HTTP GET request.""" return _RequestContextManager( self._request(hdrs.METH_GET, url, allow_redirects=allow_redirects, **kwargs) )
  29. v 제목
 이름 aiohttp => client_reqrep => text async def

    text(self, encoding: Optional[str] = None, errors: str = "strict") -> str: """Read response payload and decode.""" if self._body is None: await self.read() if encoding is None: encoding = self.get_encoding()
  30. v 제목
 이름 aiohttp => client_reqrep => read async def

    read(self) -> bytes: if self._body is None: try: self._body = await self.content.read() for trace in self._traces: await trace.send_response_chunk_received( self.method, self.url, self._body ) ... return self._body
  31. v 제목
 이름 aiohttp => client_reqrep => content class ClientResponse(HeadersMixin):

    ... # type: ignore[assignment] # Payload stream content: StreamReader = None
  32. v 제목
 이름 v Streams / Protocol / Transport Streams

    Transport Protocol High-level Low-level
  33. v 제목
 이름 IO Multiplexing User Event Loop Task IO

    Dest Requests Call Resume Resume Suspend Suspend send recv recv Response Response
  34. v 제목
 이름 v File Descriptor fd flags file ptr

    file offset status flags inode ptr file type file locks ... fd flags file ptr fd 0 fd 1 fd N fd 0 fd 1 0 11 26 56 70 88 64 199 562 1021 3084 5120 Process A Process B Open file table (OS-level) Inode table (OS-level)
  35. v 제목
 이름 static PyObject * select_default_selector(PyObject *self) { #ifdef

    HAVE_EPOLL return (PyObject *)&select_epoll_Type; #elif defined(HAVE_KQUEUE) return (PyObject *)&select_kqueue_Type; #elif defined(HAVE_DEV_POLL) return (PyObject *)&select_devpoll_Type; #elif defined(HAVE_POLL) return (PyObject *)&select_poll_Type; #else return (PyObject *)&select_select_Type; #endif } selectmodule / scheduler
  36. v 제목
 이름 def _run_once(self): ... end_time = self.time() +

    self._clock_resolution while self._scheduled and self._scheduled[0]._when < end_time: handle = heapq.heappop(self._scheduled) if handle._cancelled: continue handle._run() ... if self._ready or events: self._process_events(event_list) ... BaseEventLoop
  37. v 제목
 이름 def _process_events(self, event_list): for key, mask in

    event_list: fileobj = key.fileobj if mask & selectors.EVENT_READ: self._readers[fileobj].cancel() self._process_reader(fileobj) if mask & selectors.EVENT_WRITE: self._writers[fileobj].cancel() self._process_writer(fileobj) _process_events