Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Pycon Russia 2016. Осторожно, DSL!

Pycon Russia 2016. Осторожно, DSL!

Каждый разработчик рано или поздно сталкивается с предметно-ориентированными языками (DSL). Мы разберемся, зачем же нам нужны DSL, и какие проблемы они нам помогают решать. Поймем, в каких случаях нам стоит разрабатывать свой язык, а в каких — использовать уже существующий. Попробуем провести грань и решить, где у нас просто библиотека, а где — предметно ориентированный язык. Придумаем свой DSL и сравним различные подходы к работе с ним в Python. Увидим, как работают лексический и синтаксический анализаторы. Обязательно поговорим про то, как облегчить жизнь пользователям нашего языка. Как сделать информативными сообщения об ошибках? Как тестировать сценарии, написанные на нашем языке? На эти вопросы мы сможем дать ответ.

Ivan Tsyganov

July 07, 2016
Tweet

More Decks by Ivan Tsyganov

Other Decks in Programming

Transcript

  1. version: '2' services: db: image: postgres web: build: . command:

    python manage.py runserver 0.0.0.0:8000 volumes: - .:/code ports: - "8000:8000" depends_on: - db DOCKER COMPOSE
  2. LOGROTATE compress "/var/log/httpd/access.log" /var/log/httpd/error.log { rotate 5 size=100k sharedscripts nocompress

    postrotate /sbin/killall -HUP httpd endscript } /var/log/messages { rotate 5 weekly postrotate /sbin/killall -HUP syslogd endscript }
  3. config = {
 'compress': True,
 ('/var/log/nginx/access.log', '/var/log/nginx/error.log'): {
 'rotate': 5,


    'size': 102400, 'compress': False,
 'sharedscripts': True,
 'postrotate': [
 '/sbin/killall -HUP syslogd'
 ]
 }
 }
 compress "/var/log/httpd/access.log" /var/log/httpd/error.log { rotate 5 size=100k sharedscripts nocompress postrotate /sbin/killall -HUP httpd endscript }
  4. config = {
 'compress': True,
 ('/var/log/nginx/access.log', '/var/log/nginx/error.log'): {
 'rotate': 5,


    'size': 102400, 'compress': False,
 'sharedscripts': True,
 'postrotate': [
 '/sbin/killall -HUP syslogd'
 ]
 }
 }
 compress "/var/log/httpd/access.log" /var/log/httpd/error.log { rotate 5 size=100k sharedscripts nocompress postrotate /sbin/killall -HUP httpd endscript } size=100k 'size': 102400,
  5. config = {
 'compress': True,
 ('/var/log/nginx/access.log', '/var/log/nginx/error.log'): {
 'rotate': 5,


    'size': 102400, 'postrotate': [
 '/sbin/killall -HUP syslogd'
 ]
 }
 }
 'size': '100 KB',
  6. config = {
 'compress': True,
 ('/var/log/nginx/access.log', '/var/log/nginx/error.log'): {
 'rotate': 5,


    'size': 102400, 'postrotate': [
 '/sbin/killall -HUP syslogd'
 ]
 }
 }
 'size': '100 KB', def get_size(self, value):
 size_value, _, size_unit = value.partition(' ')
 return self.units_to_bytes(size_value, size_unit)
  7. config = {
 'compress': True,
 ('/var/log/nginx/access.log', '/var/log/nginx/error.log'): {
 'rotate': 5,


    'size': 102400, 'postrotate': [
 '/sbin/killall -HUP syslogd'
 ]
 }
 }
 'size': '1MB + 100KB',
  8. config = {
 'compress': True,
 ('/var/log/nginx/access.log', '/var/log/nginx/error.log'): {
 'rotate': 5,


    'size': 102400, 'postrotate': [
 '/sbin/killall -HUP syslogd'
 ]
 }
 }
 'size': '1MB + 100KB', parse_size = re.compile(
 pattern=r'''
 (?P<Value>\d+)\s*
 (?P<ValueUnit>KB|MB|GB)?\s*
 ((?P<Operator>[-+/*])\s*)?
 (
 (?P<Delta>\d+)\s*
 (?P<DeltaUnit>KB|MB|GB)?
 )? ''',
 flags=re.IGNORECASE | re.VERBOSE
 ).search
  9. config = {
 'compress': True,
 ('/var/log/nginx/access.log', '/var/log/nginx/error.log'): {
 'rotate': 5,


    'size': 102400, 'postrotate': [
 '/sbin/killall -HUP syslogd'
 ]
 }
 }
 'size': ‘100MB - 2 * (1MB + 100KB)’,
  10. Some people, when confronted with a problem, think "I know,

    I’ll use regular expressions." Now they have two problems. — Jamie Zawinski
  11. – Мартин Фаулер Предметно-ориентированный язык — это язык программирования с

    ограниченными выразительными возможностями, ориентированный на некую конкретную предметную область
  12. ✤ SQL ✤ REGEXP ✤ TeX/LaTeX Виды DSL DSL Внутренние

    Внешние ✤ PonyORM ✤ WTForm ✤ Django models
  13. Что будет дальше? ✤ Внутренние DSL ✤ Внешние DSL ✤

    Инструменты для создания анализаторов
  14. GB = lambda x: x*2**30
 MB = lambda x: x*2**20


    KB = lambda x: x*2**10 GB = 2**30
 MB = 2**20
 KB = 2**10 config = {
 (‘/var/log/nginx/access.log',): {
 'size': 100*MB - 2 * (1*MB - 100*KB) }, (‘/var/log/nginx/error.log',): {
 'size': 100*MB - 2 * (1*MB - 100*KB) }
 }
 'size': MB(100) - 2*(MB(1) + KB(100)) 'size': 100*MB - 2 * (1*MB - 100*KB)
  15. compress: true
 rules:
 - files: [/var/log/nginx/access.log, /var/log/nginx/error.log]
 rotate: 5
 size:

    100MB - 2 * (1MB + 100KB)
 postrotate: [/sbin/killall -HUP syslogd]
 config = {
 'compress': True,
 ('/var/log/nginx/access.log', '/var/log/nginx/error.log'): {
 'rotate': 5,
 'size': 100*MB - 2 * (1*MB - 100*KB) 'postrotate': [
 '/sbin/killall -HUP syslogd'
 ]
 }
 }

  16. ply.lex Type Value PLUS + MINUS - MUL * DIV

    / UNIT GB|MB|KB|B DIGIT \d+ LPAREN ( RPAREN )
  17. ply.lex 2 * (1KB + 1KB) DIGIT = 2 MUL

    = * LPAREN = ( DIGIT = 1 UNIT = KB PLUS = + DIGIT = 1 UNIT = KB RPAREN = )
  18. ply.lex 1KB1KB-MB DIGIT = 1 DIGIT = 1 UNIT =

    KB MINUS = - UNIT = MB UNIT = KB
  19. ply.yacc expression : expression PLUS expression
 | expression MINUS expression


    | expression MUL expression
 | expression DIV expression expression : LPAREN expression RPAREN expression : DIGIT UNIT expression : DIGIT
  20. ply.yacc def p_expression(self, p):
 '''
 expression : expression PLUS expression


    | expression MINUS expression
 | expression MUL expression
 | expression DIV expression
 '''
 if p[2] == '+': p[0] = p[1] + p[3]
 if p[2] == '-': p[0] = p[1] - p[3]
 if p[2] == '*': p[0] = p[1] * p[3]
 if p[2] == '/': p[0] = p[1] / p[3]
  21. ply.yacc precedence = (
 ('left', 'PLUS', 'MINUS'),
 ('left', 'MUL', 'DIV'),


    ) def p_expression(self, p):
 '''
 expression : expression PLUS expression
 | expression MINUS expression
 | expression MUL expression
 | expression DIV expression
 '''
 if p[2] == '+': p[0] = p[1] + p[3]
 if p[2] == '-': p[0] = p[1] - p[3]
 if p[2] == '*': p[0] = p[1] * p[3]
 if p[2] == '/': p[0] = p[1] / p[3]
  22. def tokenize(str):
 specs = [
 ('Spaces', (r'[ \s\t\r\n]+',)),
 
 ('PLUS',

    (r'\+',)),
 ('MINUS', (r'-',)),
 ('MUL', (r'\*',)),
 ('DIV', (r'/',)),
 ('UNIT', (r'GB|MB|KB|B',)),
 ('DIGIT', (r'\d+',)),
 ('LPAREN', (r'\(',)),
 ('RPAREN', (r'\)',)),
 ]
 return list(filter(
 lambda t: t.type != 'Spaces',
 (t for t in make_tokenizer(specs)(str))
 )) funcparserlib
  23. number = value_of('DIGIT') >> int
 unit = number + value_of('UNIT')

    >> to_bytes
 operand = unit | number
 
 makeop = lambda s, f: skip_token(s) >> const(f)
 add = makeop('PLUS', operator.add)
 sub = makeop('MINUS', operator.sub)
 mul = makeop('MUL', operator.mul)
 div = makeop('DIV', operator.floordiv) 
 mul_op = mul | div
 add_op = add | sub funcparserlib
  24. primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr +

    skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib
  25. primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr +

    skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib 2+2*2
  26. primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr +

    skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib 2+2*2 maybe(expr)
  27. primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr +

    skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib 2+2*2 term
  28. primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr +

    skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib 2+2*2 primary
  29. primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr +

    skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib 2+2*2 2 operand
  30. primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr +

    skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib 2+2*2 2 many(mul_op + primary)
  31. primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr +

    skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib many(add_op + term) 2+2*2 2+
  32. primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr +

    skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib primary 2+2*2 2+
  33. primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr +

    skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib operand 2+2*2 2+2
  34. primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr +

    skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib 2+2*2 2+2* many(mul_op + primary)
  35. primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr +

    skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib operand 2+2*2 2+2*2
  36. primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr +

    skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib eval_expr 2+2*2 2+2*2
  37. 2+4 primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr

    + skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib eval_expr
  38. 6 primary = with_forward_decls(
 lambda: operand | (skip_token('LPAREN') + expr

    + skip_token('RPAREN'))
 )
 
 term = primary + many(mul_op + primary) >> eval_expr
 expr = term + many(add_op + term) >> eval_expr
 parser = maybe(expr) funcparserlib maybe(expr)
  39. funcparserlib Компактный Гибкий Для любителей функционального программирования :) Многое приходится

    делать руками Для любителей функционального программирования :)
  40. pyparsing plusop = oneOf('+ -')
 multop = oneOf('* /')
 


    digit = Word(nums)
 unit = digit + oneOf('GB MB KB’) operand = unit | digit
 
 parser = Forward()
 primary = operand | Literal('(') + parser + Literal(')')
 term = (primary + ZeroOrMore(multop + primary)).setParseAction(eval_expr)
 expr = (term + ZeroOrMore(plusop + term)).setParseAction(eval_expr)
 parser << Optional(expr)

  41. pyparsing plusop = oneOf('+ -')
 multop = oneOf('* /')
 digit

    = Word(nums)
 unit = digit + oneOf('GB MB KB’) operand = unit | digit 
 parser = operatorPrecedence(
 operand, [
 (multop, 2, opAssoc.LEFT, calculate),
 (plusop, 2, opAssoc.LEFT, calculate)
 ]
 )
  42. Сообщения об ошибках Traceback (most recent call last): File "size_parser/on_ply.py",

    line 100, in parse return parser.parse(string) File "size_parser/on_ply.py", line 94, in parse p = self._parser.parse(data, debug=self._debug) File ".env/site-packages/ply/yacc.py", line 331, in parse return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) File ".env/site-packages/ply/yacc.py", line 1049, in parseopt_notrack lookahead = get_token() # Get the next token File ".env/site-packages/ply/lex.py", line 396, in token raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos), lexdata[lexpos:]) ply.lex.LexError: Illegal character '_' at index 5
  43. Traceback (most recent call last): File "size_parser/on_ply.py", line 100, in

    parse return parser.parse(string) File "size_parser/on_ply.py", line 94, in parse p = self._parser.parse(data, debug=self._debug) File ".env/site-packages/ply/yacc.py", line 331, in parse return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) File ".env/site-packages/ply/yacc.py", line 1049, in parseopt_notrack lookahead = get_token() # Get the next token File ".env/site-packages/ply/lex.py", line 396, in token raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos), lexdata[lexpos:]) ply.lex.LexError: Illegal character '_' at index 5 Сообщения об ошибках
  44. ply 
 def t_error(self, t):
 raise ParserException(t, self.source)
 
 def

    p_error(self, p):
 raise ParserException(p, self.source) >>> parse(‘1MB+_GB-100KB’) Unexpected "_GB-100KB" at position 4: 1MB+_GB-100KB ^^^
  45. Что же выбрать? funcparserlib pyparsing ✤ Хочу легко все описать

    ✤ Быстродействие не главное ✤ Люблю функциональное программирование ✤ Быстродействие не главное
  46. Что же выбрать? PLY funcparserlib pyparsing ✤ Хочу как в

    учебнике ✤ Скорость работы - главное! ✤ Хочу легко все описать ✤ Быстродействие не главное ✤ Люблю функциональное программирование ✤ Быстродействие не главное
  47. И что в итоге? ✤ Для простых задач попробуйте: ✤

    Средства самого языка ✤ Регулярные выражения ✤ Если задача сложная: ✤ Внутренние DSL ✤ Yaml, Json, XML, … ✤ Внешние DSL