Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Clean & fast code with enumerators

Clean & fast code with enumerators

Ruby’s Enumerator class is a powerful tool for writing code for dealing with streams of data and events, while being easy-to-understand and at the same time both concurrent and parallel.

This talk will go in more detail on how to do so, and also show SlowEnumeratorTools, which provides some of the glue code that makes Enumerator nicer to use, and faster to boot.

There will be some overlap with Sergio’s “Understanding Unix pipes with Ruby” talk from the November meetup.

Denis Defreyne

January 11, 2018
Tweet

More Decks by Denis Defreyne

Other Decks in Programming

Transcript

  1. Clean & fast code with Enumerators
    DENIS DEFREYNE RUG::B DECEMBER 7TH, 2017

    View full-size slide

  2. 1. Simple HTTP client

    View full-size slide

  3. {
    "books": [
    {
    "id": "B98312",
    "title": "The Monkey's Raincoat",
    "author": "Oswaldo Berge"
    },
    {
    "id": "B98318",
    "title": "The World, the Flesh and the Devil",
    "author": "Haleigh Thompson"
    }
    ]
    }

    View full-size slide

  4. def fetch_books(base_url)
    url = base_url + '/books'
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    body.fetch('books')
    else
    raise "Unexpected response code: #{response.code}"
    end
    end

    View full-size slide

  5. books = fetch_books('http://localhost:4567')
    books.each do |book|
    p book
    end

    View full-size slide

  6. 2. Paginating HTTP client

    View full-size slide

  7. {
    "books": [
    {
    "id": "B98312",
    "title": "The Monkey's Raincoat",
    "author": "Oswaldo Berge"
    },
    {
    "id": "B98318",
    "title": "The World, the Flesh and the Devil",
    "author": "Haleigh Thompson"
    }
    ],
    "cursor": "B98318"
    }

    View full-size slide

  8. def fetch_books(base_url)
    url = base_url + '/books'
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    body.fetch('books')
    else
    raise "Unexpected response code: #{response.code}"
    end
    end

    View full-size slide

  9. def fetch_books(base_url)
    books = []
    loop do
    url = base_url + '/books'
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    books.concat(body.fetch('books'))
    break
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    books
    end

    View full-size slide

  10. def fetch_books(base_url)
    books = []
    cursor = nil
    loop do
    url = base_url + '/books'
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    books.concat(body.fetch('books'))
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    books
    end

    View full-size slide

  11. def fetch_books(base_url)
    books = []
    cursor = nil
    loop do
    url = base_url + '/books'
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    books.concat(body.fetch('books'))
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    books
    end

    View full-size slide

  12. books = fetch_books('http://localhost:4567')
    books.each do |book|
    p book
    end

    View full-size slide

  13. 3. Refactoring

    View full-size slide

  14. def fetch_books(base_url)
    books = []
    cursor = nil
    loop do
    url = base_url + '/books'
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    books.concat(body.fetch('books'))
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    books
    end

    View full-size slide

  15. def fetch_books(base_url)

    cursor = nil
    loop do
    url = base_url + '/books'
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    body.fetch('books').each { |b| yield(b) }
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end


    end

    View full-size slide

  16. fetch_books('http://localhost:4567') do |book|
    p book
    end

    View full-size slide

  17. 4. More refactoring

    View full-size slide

  18. def fetch_books(base_url)
    cursor = nil
    loop do
    url = base_url + '/books'
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    body.fetch('books').each { |b| yield(b) }
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    end

    View full-size slide

  19. def fetch(base_url)
    cursor = nil
    loop do
    url = base_url
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    yield(body)
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    end

    View full-size slide

  20. fetch('http://localhost:4567/books') do |response|
    response.fetch('books').each do |book|
    p book
    end
    end

    View full-size slide

  21. responses = []
    fetch('http://localhost:4567/books') do |response|
    responses << response
    end
    books = responses.map { |r| r.fetch('books') }.flatten
    books.each do |book|
    p book
    end

    View full-size slide

  22. responses = []
    fetch('http://localhost:4567/books') do |response|
    responses << response
    end






    View full-size slide

  23. responses = to_enum(gfetch, 'http://localhost:4567/books')









    View full-size slide

  24. responses = to_enum(gfetch, 'http://localhost:4567/books')

    responses.each do |response|
    p response
    end





    View full-size slide

  25. responses = to_enum(gfetch, 'http://localhost:4567/books')
    books = responses.flat_map { |r| r.fetch('books') }




    View full-size slide

  26. (1..3).map { |i| [i, 10+i] }
    # h> [[1, 11], [2, 12], [3, 13]]

    View full-size slide

  27. (1..3).map { |i| [i, 10+i] }
    # h> [[1, 11], [2, 12], [3, 13]]
    (1..3).flat_map { |i| [i, 10+i] }
    # h> [1, 11, 2, 12, 3, 13]

    View full-size slide

  28. responses = to_enum(gfetch, 'http://localhost:4567/books')
    books = responses.flat_map { |r| r.fetch('books') }
    books.each do |book|
    p book
    end




    View full-size slide

  29. def fetch(base_url)
    return to_enum(__method__, base_url).lazy unless block_given?

    end

    View full-size slide

  30. books =
    fetch('http://localhost:4567/books')
    .flat_map { |r| r.fetch('books') }

    View full-size slide

  31. def fetch(base_url)
    return to_enum(__method__, base_url).lazy unless block_given?
    cursor = nil
    loop do
    url = base_url
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    yield(body)
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    end

    View full-size slide

  32. db = Database.connect
    books =
    fetch('http://localhost:4567/books')
    .flat_map { |r| r.fetch('books') }


    books.each_slice(50) { |batch| db.store(batch) }

    View full-size slide

  33. Given
    ‣ 200 books
    ‣ HTTP response contains 50 books
    ‣ it takes 1s to fetch a batch of books
    ‣ it takes 1s to store a batch of books


    View full-size slide

  34. Given
    ‣ 200 books
    ‣ HTTP response contains 50 books
    ‣ it takes 1s to fetch a batch of books
    ‣ it takes 1s to store a batch of books

    Question
    How long does this code take to execute?


    3s 4s 5s 8s 16s

    View full-size slide

  35. Given
    ‣ 200 books
    ‣ HTTP response contains 50 books
    ‣ it takes 1s to fetch a batch of books
    ‣ it takes 1s to store a batch of books

    Question
    How long does this code take to execute?


    3s 4s 5s 8s 16s

    View full-size slide

  36. Fetch
    Store
    Fetch
    Store

    View full-size slide

  37. Fetch
    Store
    Fetch
    Store

    View full-size slide

  38. gem 'slow_enumerator_tools'

    View full-size slide

  39. db = Database.connect
    books =
    fetch('http://localhost:4567/books')
    .flat_map { |r| r.fetch('books') }



    books.each_slice(50) { |batch| db.store(batch) }

    View full-size slide

  40. db = Database.connect
    books =
    fetch('http://localhost:4567/books')
    .flat_map { |r| r.fetch('books') }

    books = SlowEnumeratorTools.buffer(books, 50)


    books.each_slice(50) { |batch| db.store(batch) }

    View full-size slide

  41. Given
    ‣ 200 books
    ‣ HTTP response contains 50 books
    ‣ it takes 1s to fetch a batch of books
    ‣ it takes 1s to store a batch of books


    View full-size slide

  42. Given
    ‣ 200 books
    ‣ HTTP response contains 50 books
    ‣ it takes 1s to fetch a batch of books
    ‣ it takes 1s to store a batch of books

    Question
    How long does this code take to execute?


    3s 4s 5s 8s 16s

    View full-size slide

  43. Given
    ‣ 200 books
    ‣ HTTP response contains 50 books
    ‣ it takes 1s to fetch a batch of books
    ‣ it takes 1s to store a batch of books

    Question
    How long does this code take to execute?


    3s 4s 5s 8s 16s

    View full-size slide

  44. db = Database.connect
    books =
    fetch('http://localhost:4567/books')
    .flat_map { |r| r.fetch('books') }

    books = SlowEnumeratorTools.buffer(books, 50)



    books.each_slice(50) { |batch| db.store(batch) }

    View full-size slide

  45. db = Database.connect
    books =
    fetch('http://localhost:4567/books')
    .flat_map { |r| r.fetch('books') }

    books = SlowEnumeratorTools.buffer(books, 50)

    book_batches = SlowEnumeratorTools.batch(books)


    book_batches.each { |batch| db.store(batch) }

    View full-size slide

  46. events = MyEventStream.new('content')
    events.each do |_e|
    system('./rebuild.sh')
    end

    View full-size slide

  47. events = MyEventStream.new('content')
    event_batches = SlowEnumeratorTools.batch(events)
    event_batches.each do |_es|
    system('./rebuild.sh')
    end

    View full-size slide

  48. content_events = MyEventStream.new('content')
    layout_events = MyEventStream.new('layouts')

    View full-size slide

  49. content_events = MyEventStream.new('content')
    layout_events = MyEventStream.new('layouts')
    events = SlowEnumeratorTools.merge(
    [content_events, layout_events])

    View full-size slide

  50. content_events = MyEventStream.new('content')
    layout_events = MyEventStream.new('layouts')
    events = SlowEnumeratorTools.merge(
    [content_events, layout_events])
    event_batches = SlowEnumeratorTools.batch(events)
    event_batches.each do |es|
    system('./rebuild.sh')
    end

    View full-size slide

  51. 10. Everything together

    View full-size slide

  52. db = MyDB.new
    api_a = MyAPI.new(MyHTTPClient.new('example.com'))
    api_b = MyAPI.new(MyHTTPClient.new('example.org'))

    View full-size slide

  53. db = MyDB.new
    api_a = MyAPI.new(MyHTTPClient.new('example.com'))
    api_b = MyAPI.new(MyHTTPClient.new('example.org'))
    articles = SlowEnumeratorTools.merge(
    [api_a.articles, api_b.articles])

    View full-size slide

  54. db = MyDB.new
    api_a = MyAPI.new(MyHTTPClient.new('example.com'))
    api_b = MyAPI.new(MyHTTPClient.new('example.org'))
    articles = SlowEnumeratorTools.merge(
    [api_a.articles, api_b.articles])
    articles = SlowEnumeratorTools.buffer(articles, 200)

    View full-size slide

  55. db = MyDB.new
    api_a = MyAPI.new(MyHTTPClient.new('example.com'))
    api_b = MyAPI.new(MyHTTPClient.new('example.org'))
    articles = SlowEnumeratorTools.merge(
    [api_a.articles, api_b.articles])
    articles = SlowEnumeratorTools.buffer(articles, 200)
    article_batches = SlowEnumeratorTools.batch(articles)

    View full-size slide

  56. db = MyDB.new
    api_a = MyAPI.new(MyHTTPClient.new('example.com'))
    api_b = MyAPI.new(MyHTTPClient.new('example.org'))
    articles = SlowEnumeratorTools.merge(
    [api_a.articles, api_b.articles])
    articles = SlowEnumeratorTools.buffer(articles, 200)
    article_batches = SlowEnumeratorTools.batch(articles)
    article_batches.each { |as| db.store_multi(as) }

    View full-size slide

  57. SlowEnumeratorTools.buffer(enum, size)
    SlowEnumeratorTools.batch(enum)
    SlowEnumeratorTools.merge(enums)
    github.com/ddfreyne/slow_enumerator_tools

    View full-size slide

  58. Now your code is clean & fast,
    thanks to Enumerators.

    View full-size slide

  59. DENIS DEFREYNE @DDFREYNE
    Q&A

    View full-size slide