Slide 1

Slide 1 text

Task Execution with Celery Developer Work Discussion | 22 • 05 • 2017

Slide 2

Slide 2 text

“ … an asynchronous task queue based on distributed message passing, focused on real-time operation with support for scheduling “

Slide 3

Slide 3 text

message broker celery workers results backend client codebase

Slide 4

Slide 4 text

message broker celery workers results backend json / yaml msgpack pickle client codebase etc.

Slide 5

Slide 5 text

Use Cases Long-Running Tasks ⇒ web service for VCF annotation ⇒ web service audio conversion Error-Prone Tasks ⇒ microservices messaging ⇒ http requests Periodic Tasks ⇒ daily email digest ⇒ news site scraping

Slide 6

Slide 6 text

Use Cases Long-Running Tasks ⇒ web service for VCF annotation ⇒ web service audio conversion

Slide 7

Slide 7 text

$ cat tasks.py def add(x, y): return x + y

Slide 8

Slide 8 text

$ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y

Slide 9

Slide 9 text

# functions are still usable as regular # Python functions ... >>> add(3, 5) 8 $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y

Slide 10

Slide 10 text

# functions are still usable as regular # Python functions ... >>> add(3, 5) 8 # ... but now you can async-call them >>> task_result = add.delay(3, 5) >>> task_result >>> task_result.ready() False # execution ongoing $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y

Slide 11

Slide 11 text

# functions are still usable as regular # Python functions ... >>> add(3, 5) 8 # ... but now you can async-call them >>> task_result = add.delay(3, 5) >>> task_result >>> task_result.ready() False # execution ongoing # ... and if you define a results backend >>> task_result.get(3, 5) 8 $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y

Slide 12

Slide 12 text

# functions are still usable as regular # Python functions ... >>> add(3, 5) 8 # ... but now you can async-call them >>> task_result = add.delay(3, 5) >>> task_result >>> task_result.ready() False # execution ongoing # ... and if you define a results backend >>> task_result.get(3, 5) 8 # delay() is in fact a shortcut >>> add.delay(3, 5) >>> add.apply_async(args=(3, 5)) $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y

Slide 13

Slide 13 text

# functions are still usable as regular # Python functions ... >>> add(3, 5) 8 # ... but now you can async-call them >>> task_result = add.delay(3, 5) >>> task_result >>> task_result.ready() False # execution ongoing # ... and if you define a results backend >>> task_result.get(3, 5) 8 # delay() is in fact a shortcut >>> add.delay(3, 5) >>> add.apply_async(args=(3, 5)) # apply_async() is the real deal >>> add.apply_async(args=(3, 5), countdown=10, expires=70) $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y

Slide 14

Slide 14 text

# let's talk about signatures >>> sig = add.s(3, 5) >>> sig tasks.add(3, 5) >>> sig() 8 $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y

Slide 15

Slide 15 text

# let's talk about signatures >>> sig = add.s(3, 5) >>> sig tasks.add(3, 5) >>> sig() 8 # they're like partial function applications >>> sig = add.s(3) >>> sig tasks.add(3) >>> sig() ... TypeError: add() missing 1 required positional argument: 'y' >>> sig(5) 8 >>> sig(10) 13 $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y

Slide 16

Slide 16 text

# let's talk about signatures >>> sig = add.s(3, 5) >>> sig tasks.add(3, 5) >>> sig() 8 # they're like partial function applications >>> sig = add.s(3) >>> sig tasks.add(3) >>> sig() ... TypeError: add() missing 1 required positional argument: 'y' >>> sig(5) 8 >>> sig(10) 13 # ... but with the added Celery goodness >>> add.s(3).delay(5) >>> add.s(3).set(countdown=10).delay(5) $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y

Slide 17

Slide 17 text

# signatures allow complex configurations >>> from celery import chain, group $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y

Slide 18

Slide 18 text

# signatures allow complex configurations >>> from celery import chain, group # we can bind arguments to functions # without executing them >>> step1 = mul.s(3, 4) >>> step1 tasks.mul(3, 4) $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y @cl.task def mul(x, y): return x * y

Slide 19

Slide 19 text

# signatures allow complex configurations >>> from celery import chain, group # we can bind arguments to functions # without executing them >>> step1 = mul.s(3, 4) >>> step1 tasks.mul(3, 4) # ... which allows us to combine tasks >>> step2 = chain(add.s(1, 2), mul.s(3)) >>> step2 tasks.add(1, 2) | mul(3) $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y @cl.task def mul(x, y): return x * y

Slide 20

Slide 20 text

# signatures allow complex configurations >>> from celery import chain, group # we can bind arguments to functions # without executing them >>> step1 = mul.s(3, 4) >>> step1 tasks.mul(3, 4) # ... which allows us to combine tasks >>> step2 = chain(add.s(1, 2), mul.s(3)) >>> step2 tasks.add(1, 2) | mul(3) # various combinations are possible >>> step3 = chain(group(step1, step2), ... aggr.s(func=add)) >>> step3 %tasks.aggr(...) >>> result = step3.delay() >>> result.get() $ python -i tasks.py from functools import reduce from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y @cl.task def mul(x, y): return x * y @cl.task def aggr(numbers, func=None): return reduce(func, numbers) $ cat tasks.py $ celery worker -A tasks

Slide 21

Slide 21 text

# signatures allow complex configurations >>> from celery import chain, group # we can bind arguments to functions # without executing them >>> step1 = mul.s(3, 4) >>> step1 tasks.mul(3, 4) # ... which allows us to combine tasks >>> step2 = chain(add.s(1, 2), mul.s(3)) >>> step2 tasks.add(1, 2) | mul(3) # various combinations are possible >>> step3 = chain(group(step1, step2), ... aggr.s(func=add)) >>> step3 %tasks.aggr(...) >>> result = step3.delay() >>> result.get() 21 $ python -i tasks.py $ cat tasks.py $ celery worker -A tasks from functools import reduce from celery import Celery cl = Celery('tasks', broker='...', backend='...') @cl.task def add(x, y): return x + y @cl.task def mul(x, y): return x * y @cl.task def aggr(numbers, func=None): return reduce(func, numbers)

Slide 22

Slide 22 text

Use Cases Long-Running Tasks ⇒ web service for VCF annotation ⇒ web service audio conversion Error-Prone Tasks ⇒ microservices messaging ⇒ http requests

Slide 23

Slide 23 text

Just Keep Trying™ import requests from celery import Celery cl = Celery('tasks', broker='..', backend='..') @cl.task def query_url(url): r = requests.get(url) return r.json()

Slide 24

Slide 24 text

Just Keep Trying™ import requests from requests.exceptions import ConnectTimeout from celery import Celery cl = Celery('tasks', broker='..', backend='..') @cl.task(bind=True) def query_url(self, url): try: r = requests.get(url) except ConnectTimeout as exc: self.retry(exc=exc) return r.json()

Slide 25

Slide 25 text

Just Keep Trying™ import requests from requests.exceptions import ConnectTimeout from celery import Celery cl = Celery('tasks', broker='..', backend='..') @cl.task(bind=True, max_retries=4) def query_url(self, url): try: r = requests.get(url) except ConnectTimeout as exc: if self.request.retries > self.max_retries: raise self.retry(exc=exc) return r.json()

Slide 26

Slide 26 text

Just Keep Trying™ import requests from requests.exceptions import ConnectTimeout from celery import Celery cl = Celery('tasks', broker='..', backend='..') @cl.task(bind=True, max_retries=4) def query_url(self, url): try: r = requests.get(url) except ConnectTimeout as exc: if self.request.retries > self.max_retries: raise self.retry(exc=exc, countdown=5 * self.request.retries) return r.json()

Slide 27

Slide 27 text

Long-Running Tasks ⇒ web service for VCF annotation ⇒ web service audio conversion Use Cases Error-Prone Tasks ⇒ microservices messaging ⇒ http requests Periodic Tasks ⇒ daily email digest ⇒ news site scraping

Slide 28

Slide 28 text

Hold on, it's Python from celery import Celery cl = Celery('tasks', broker='..', backend='..') @cl.task def show(arg): print(arg)

Slide 29

Slide 29 text

Hold on, it's Python, but like cron from celery import Celery cl = Celery('tasks', broker='..', backend='..') @cl.on_after_configure.connect def setup_periodic_tasks(sender, **kwargs): # calls show('hello') every 10 seconds sender.add_periodic_task(10.0, show.s('hello'), name='add every 10 seconds') @cl.task def show(arg): print(arg)

Slide 30

Slide 30 text

Hold on, it's Python, but like cron from celery import Celery cl = Celery('tasks', broker='..', backend='..') @cl.on_after_configure.connect def setup_periodic_tasks(sender, **kwargs): # calls show('hello') every 10 seconds sender.add_periodic_task(10.0, show.s('hello'), name='add every 10 seconds') # calls show('world') every 30 seconds sender.add_periodic_task(30.0, show.s('world'), expires=10) @cl.task def show(arg): print(arg)

Slide 31

Slide 31 text

Hold on, it's Python, but like cron from celery import Celery from celery.schedules import crontab cl = Celery('tasks', broker='..', backend='..') @cl.on_after_configure.connect def setup_periodic_tasks(sender, **kwargs): # calls show('hello') every 10 seconds sender.add_periodic_task(10.0, show.s('hello'), name='add every 10 seconds') # calls show('world') every 30 seconds sender.add_periodic_task(30.0, show.s('world'), expires=10) # executes every friday at 17.30 sender.add_periodic_task( crontab(hour=17, minute=30, day_of_week=5), show.s('Time to hit the bar!') ) @cl.task def show(arg): print(arg)

Slide 32

Slide 32 text

Thanks!

Slide 33

Slide 33 text

Flask-based Web Service ⇒ runs a pipeline on a sample ⇒ sample: query from another service ⇒ pipeline: db query + run directory setup Do we have time to make a web app? (yeah, we do)

Slide 34

Slide 34 text

from flask import Flask, request from .tasks import * web = Flask('demo') @web.route('/sample', methods=['POST']): def process_sample(): sample_id = request.args.get('id') p_name = request.args.get('pipeline') sample = get_sample(sample_id) pipeline = prep_pipeline(p_name) results = proc((pipeline, sample)) return results, 200 demo/web.py import subprocess from in_house_lib import \ sample_from_db, get_and_check def get_sample(sample_id): sample = sample_from_db(sample_id) return sample def prep_pipeline(p_name): pipeline = get_and_check(p_name) return pipeline def proc(pipeline_and_sample): pipeline, sample = pipeline_and_sample results = subprocess.call(...) return results demo/tasks.py

Slide 35

Slide 35 text

from celery import Celery cl = Celery('demo.tasks') cl.config_from_object('config.py') @cl.task def get_sample(sample_id): sample = sample_from_db(sample_id) return sample @cl.task def prep_pipeline(p_name): pipeline = get_and_check(p_name) return pipeline @cl.task def proc(pipeline_and_sample): pipeline, sample = pipeline_and_sample results = subprocess.call(...) return results demo/tasks.py broker_url = 'pyamqp://guest@localhost' result_backend = 'db+sqlite:///res.sqlite' task_serializer = 'json' demo/config.py

Slide 36

Slide 36 text

from celery import Celery cl = Celery('demo.tasks') cl.config_from_object('config.py') @cl.task def get_sample(sample_id): sample = sample_from_db(sample_id) return sample @cl.task def prep_pipeline(p_name): pipeline = get_and_check(p_name) return pipeline @cl.task def proc(pipeline_and_sample): pipeline, sample = pipeline_and_sample results = subprocess.call(...) return results demo/tasks.py demo/config.py from kombu import Queue broker_url = 'pyamqp://guest@localhost' result_backend = 'db+sqlite:///res.sqlite' task_serializer = 'json' task_queues = ( Queue('sq'), Queue('pq'), Queue('procq') ) task_routes = { 'demo.tasks.get_sample': { {'queue': 'sq'}, }, 'demo.tasks.prep_pipeline': { {'queue': 'pq'}, }, 'demo.tasks.proc': { {'queue': 'procq'}, } }

Slide 37

Slide 37 text

demo/web.py from flask import Flask, request from .tasks import * web = Flask('demo') @web.route('/sample', methods=['POST']): def process_sample(): sample_id = request.args.get('id') p_name = request.args.get('pipeline') sample = get_sample(sample_id) pipeline = prep_pipeline(p_name) results = proc((pipeline, sample)) return results, 200 demo/tasks.py from celery import Celery cl = Celery('demo.tasks') cl.config_from_object('config.py') @cl.task def get_sample(sample_id): sample = sample_from_db(sample_id) return sample @cl.task def prep_pipeline(p_name): pipeline = get_and_check(p_name) return pipeline @cl.task def proc(pipeline_and_sample): pipeline, sample = pipeline_and_sample results = subprocess.call(...) return results

Slide 38

Slide 38 text

demo/web.py from flask import Flask, request from celery import chain, group from .tasks import * web = Flask('demo') @web.route('/sample', methods=['POST']): def process_sample(): sample_id = request.args.get('id') p_name = request.args.get('pipeline') task_set = chain( group(get_sample.s(sample_id), prep_pipeline.s(p_name)), proc.s()) task_set.delay() return task_set.id, 200 @web.route('/tasks/'): def retrieve_task_status(): ... demo/tasks.py from celery import Celery cl = Celery('demo.tasks') cl.config_from_object('config.py') @cl.task def get_sample(sample_id): sample = sample_from_db(sample_id) return sample @cl.task def prep_pipeline(p_name): pipeline = get_and_check(p_name) return pipeline @cl.task def proc(pipeline_and_sample): pipeline, sample = pipeline_and_sample results = subprocess.call(...) return results

Slide 39

Slide 39 text

Thanks Again!