Upgrade to Pro — share decks privately, control downloads, hide ads and more …

2017 Celery Introduction

2017 Celery Introduction

Short introduction to Celery given in an internal work discussion

Wibowo Arindrarto

May 22, 2017
Tweet

More Decks by Wibowo Arindrarto

Other Decks in Programming

Transcript

  1. “ … an asynchronous task queue based on distributed message

    passing, focused on real-time operation with support for scheduling “
  2. Use Cases Long-Running Tasks ⇒ web service for VCF annotation

    ⇒ web service audio conversion Error-Prone Tasks ⇒ microservices messaging ⇒ http requests Periodic Tasks ⇒ daily email digest ⇒ news site scraping
  3. $ cat tasks.py $ celery worker -A tasks from celery

    import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y
  4. # functions are still usable as regular # Python functions

    ... >>> add(3, 5) 8 $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y
  5. # functions are still usable as regular # Python functions

    ... >>> add(3, 5) 8 # ... but now you can async-call them >>> task_result = add.delay(3, 5) >>> task_result <AsyncResult: .. > >>> task_result.ready() False # execution ongoing $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y
  6. # functions are still usable as regular # Python functions

    ... >>> add(3, 5) 8 # ... but now you can async-call them >>> task_result = add.delay(3, 5) >>> task_result <AsyncResult: .. > >>> task_result.ready() False # execution ongoing # ... and if you define a results backend >>> task_result.get(3, 5) 8 $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y
  7. # functions are still usable as regular # Python functions

    ... >>> add(3, 5) 8 # ... but now you can async-call them >>> task_result = add.delay(3, 5) >>> task_result <AsyncResult: .. > >>> task_result.ready() False # execution ongoing # ... and if you define a results backend >>> task_result.get(3, 5) 8 # delay() is in fact a shortcut >>> add.delay(3, 5) <AsyncResult: .. > >>> add.apply_async(args=(3, 5)) <AsyncResult: .. > $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y
  8. # functions are still usable as regular # Python functions

    ... >>> add(3, 5) 8 # ... but now you can async-call them >>> task_result = add.delay(3, 5) >>> task_result <AsyncResult: .. > >>> task_result.ready() False # execution ongoing # ... and if you define a results backend >>> task_result.get(3, 5) 8 # delay() is in fact a shortcut >>> add.delay(3, 5) <AsyncResult: .. > >>> add.apply_async(args=(3, 5)) <AsyncResult: .. > # apply_async() is the real deal >>> add.apply_async(args=(3, 5), countdown=10, expires=70) <AsyncResult: .. > $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y
  9. # let's talk about signatures >>> sig = add.s(3, 5)

    >>> sig tasks.add(3, 5) >>> sig() 8 $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y
  10. # let's talk about signatures >>> sig = add.s(3, 5)

    >>> sig tasks.add(3, 5) >>> sig() 8 # they're like partial function applications >>> sig = add.s(3) >>> sig tasks.add(3) >>> sig() ... TypeError: add() missing 1 required positional argument: 'y' >>> sig(5) 8 >>> sig(10) 13 $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y
  11. # let's talk about signatures >>> sig = add.s(3, 5)

    >>> sig tasks.add(3, 5) >>> sig() 8 # they're like partial function applications >>> sig = add.s(3) >>> sig tasks.add(3) >>> sig() ... TypeError: add() missing 1 required positional argument: 'y' >>> sig(5) 8 >>> sig(10) 13 # ... but with the added Celery goodness >>> add.s(3).delay(5) <AsyncResult: .. > >>> add.s(3).set(countdown=10).delay(5) <AsyncResult: .. > $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y
  12. # signatures allow complex configurations >>> from celery import chain,

    group $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y
  13. # signatures allow complex configurations >>> from celery import chain,

    group # we can bind arguments to functions # without executing them >>> step1 = mul.s(3, 4) >>> step1 tasks.mul(3, 4) $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y @cl.task def mul(x, y): return x * y
  14. # signatures allow complex configurations >>> from celery import chain,

    group # we can bind arguments to functions # without executing them >>> step1 = mul.s(3, 4) >>> step1 tasks.mul(3, 4) # ... which allows us to combine tasks >>> step2 = chain(add.s(1, 2), mul.s(3)) >>> step2 tasks.add(1, 2) | mul(3) $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y @cl.task def mul(x, y): return x * y
  15. # signatures allow complex configurations >>> from celery import chain,

    group # we can bind arguments to functions # without executing them >>> step1 = mul.s(3, 4) >>> step1 tasks.mul(3, 4) # ... which allows us to combine tasks >>> step2 = chain(add.s(1, 2), mul.s(3)) >>> step2 tasks.add(1, 2) | mul(3) # various combinations are possible >>> step3 = chain(group(step1, step2), ... aggr.s(func=add)) >>> step3 %tasks.aggr(...) >>> result = step3.delay() >>> result.get() $ python -i tasks.py from functools import reduce from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y @cl.task def mul(x, y): return x * y @cl.task def aggr(numbers, func=None): return reduce(func, numbers) $ cat tasks.py $ celery worker -A tasks
  16. # signatures allow complex configurations >>> from celery import chain,

    group # we can bind arguments to functions # without executing them >>> step1 = mul.s(3, 4) >>> step1 tasks.mul(3, 4) # ... which allows us to combine tasks >>> step2 = chain(add.s(1, 2), mul.s(3)) >>> step2 tasks.add(1, 2) | mul(3) # various combinations are possible >>> step3 = chain(group(step1, step2), ... aggr.s(func=add)) >>> step3 %tasks.aggr(...) >>> result = step3.delay() >>> result.get() 21 $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from functools import reduce from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y @cl.task def mul(x, y): return x * y @cl.task def aggr(numbers, func=None): return reduce(func, numbers)
  17. Use Cases Long-Running Tasks ⇒ web service for VCF annotation

    ⇒ web service audio conversion Error-Prone Tasks ⇒ microservices messaging ⇒ http requests
  18. Just Keep Trying™ import requests from celery import Celery cl

    = Celery('tasks', broker='..', backend='..') @cl.task def query_url(url): r = requests.get(url) return r.json()
  19. Just Keep Trying™ import requests from requests.exceptions import ConnectTimeout from

    celery import Celery cl = Celery('tasks', broker='..', backend='..') @cl.task(bind=True) def query_url(self, url): try: r = requests.get(url) except ConnectTimeout as exc: self.retry(exc=exc) return r.json()
  20. Just Keep Trying™ import requests from requests.exceptions import ConnectTimeout from

    celery import Celery cl = Celery('tasks', broker='..', backend='..') @cl.task(bind=True, max_retries=4) def query_url(self, url): try: r = requests.get(url) except ConnectTimeout as exc: if self.request.retries > self.max_retries: raise self.retry(exc=exc) return r.json()
  21. Just Keep Trying™ import requests from requests.exceptions import ConnectTimeout from

    celery import Celery cl = Celery('tasks', broker='..', backend='..') @cl.task(bind=True, max_retries=4) def query_url(self, url): try: r = requests.get(url) except ConnectTimeout as exc: if self.request.retries > self.max_retries: raise self.retry(exc=exc, countdown=5 * self.request.retries) return r.json()
  22. Long-Running Tasks ⇒ web service for VCF annotation ⇒ web

    service audio conversion Use Cases Error-Prone Tasks ⇒ microservices messaging ⇒ http requests Periodic Tasks ⇒ daily email digest ⇒ news site scraping
  23. Hold on, it's Python from celery import Celery cl =

    Celery('tasks', broker='..', backend='..') @cl.task def show(arg): print(arg)
  24. Hold on, it's Python, but like cron from celery import

    Celery cl = Celery('tasks', broker='..', backend='..') @cl.on_after_configure.connect def setup_periodic_tasks(sender, **kwargs): # calls show('hello') every 10 seconds sender.add_periodic_task(10.0, show.s('hello'), name='add every 10 seconds') @cl.task def show(arg): print(arg)
  25. Hold on, it's Python, but like cron from celery import

    Celery cl = Celery('tasks', broker='..', backend='..') @cl.on_after_configure.connect def setup_periodic_tasks(sender, **kwargs): # calls show('hello') every 10 seconds sender.add_periodic_task(10.0, show.s('hello'), name='add every 10 seconds') # calls show('world') every 30 seconds sender.add_periodic_task(30.0, show.s('world'), expires=10) @cl.task def show(arg): print(arg)
  26. Hold on, it's Python, but like cron from celery import

    Celery from celery.schedules import crontab cl = Celery('tasks', broker='..', backend='..') @cl.on_after_configure.connect def setup_periodic_tasks(sender, **kwargs): # calls show('hello') every 10 seconds sender.add_periodic_task(10.0, show.s('hello'), name='add every 10 seconds') # calls show('world') every 30 seconds sender.add_periodic_task(30.0, show.s('world'), expires=10) # executes every friday at 17.30 sender.add_periodic_task( crontab(hour=17, minute=30, day_of_week=5), show.s('Time to hit the bar!') ) @cl.task def show(arg): print(arg)
  27. Flask-based Web Service ⇒ runs a pipeline on a sample

    ⇒ sample: query from another service ⇒ pipeline: db query + run directory setup Do we have time to make a web app? (yeah, we do)
  28. from flask import Flask, request from .tasks import * web

    = Flask('demo') @web.route('/sample', methods=['POST']): def process_sample(): sample_id = request.args.get('id') p_name = request.args.get('pipeline') sample = get_sample(sample_id) pipeline = prep_pipeline(p_name) results = proc((pipeline, sample)) return results, 200 demo/web.py import subprocess from in_house_lib import \ sample_from_db, get_and_check def get_sample(sample_id): sample = sample_from_db(sample_id) return sample def prep_pipeline(p_name): pipeline = get_and_check(p_name) return pipeline def proc(pipeline_and_sample): pipeline, sample = pipeline_and_sample results = subprocess.call(...) return results demo/tasks.py
  29. from celery import Celery cl = Celery('demo.tasks') cl.config_from_object('config.py') @cl.task def

    get_sample(sample_id): sample = sample_from_db(sample_id) return sample @cl.task def prep_pipeline(p_name): pipeline = get_and_check(p_name) return pipeline @cl.task def proc(pipeline_and_sample): pipeline, sample = pipeline_and_sample results = subprocess.call(...) return results demo/tasks.py broker_url = 'pyamqp://guest@localhost' result_backend = 'db+sqlite:///res.sqlite' task_serializer = 'json' demo/config.py
  30. from celery import Celery cl = Celery('demo.tasks') cl.config_from_object('config.py') @cl.task def

    get_sample(sample_id): sample = sample_from_db(sample_id) return sample @cl.task def prep_pipeline(p_name): pipeline = get_and_check(p_name) return pipeline @cl.task def proc(pipeline_and_sample): pipeline, sample = pipeline_and_sample results = subprocess.call(...) return results demo/tasks.py demo/config.py from kombu import Queue broker_url = 'pyamqp://guest@localhost' result_backend = 'db+sqlite:///res.sqlite' task_serializer = 'json' task_queues = ( Queue('sq'), Queue('pq'), Queue('procq') ) task_routes = { 'demo.tasks.get_sample': { {'queue': 'sq'}, }, 'demo.tasks.prep_pipeline': { {'queue': 'pq'}, }, 'demo.tasks.proc': { {'queue': 'procq'}, } }
  31. demo/web.py from flask import Flask, request from .tasks import *

    web = Flask('demo') @web.route('/sample', methods=['POST']): def process_sample(): sample_id = request.args.get('id') p_name = request.args.get('pipeline') sample = get_sample(sample_id) pipeline = prep_pipeline(p_name) results = proc((pipeline, sample)) return results, 200 demo/tasks.py from celery import Celery cl = Celery('demo.tasks') cl.config_from_object('config.py') @cl.task def get_sample(sample_id): sample = sample_from_db(sample_id) return sample @cl.task def prep_pipeline(p_name): pipeline = get_and_check(p_name) return pipeline @cl.task def proc(pipeline_and_sample): pipeline, sample = pipeline_and_sample results = subprocess.call(...) return results
  32. demo/web.py from flask import Flask, request from celery import chain,

    group from .tasks import * web = Flask('demo') @web.route('/sample', methods=['POST']): def process_sample(): sample_id = request.args.get('id') p_name = request.args.get('pipeline') task_set = chain( group(get_sample.s(sample_id), prep_pipeline.s(p_name)), proc.s()) task_set.delay() return task_set.id, 200 @web.route('/tasks/<task_id>'): def retrieve_task_status(): ... demo/tasks.py from celery import Celery cl = Celery('demo.tasks') cl.config_from_object('config.py') @cl.task def get_sample(sample_id): sample = sample_from_db(sample_id) return sample @cl.task def prep_pipeline(p_name): pipeline = get_and_check(p_name) return pipeline @cl.task def proc(pipeline_and_sample): pipeline, sample = pipeline_and_sample results = subprocess.call(...) return results