Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Clean & fast code with enumerators

Clean & fast code with enumerators

Ruby’s Enumerator class is a powerful tool for writing code for dealing with streams of data and events, while being easy-to-understand and at the same time both concurrent and parallel.

This talk will go in more detail on how to do so, and also show SlowEnumeratorTools, which provides some of the glue code that makes Enumerator nicer to use, and faster to boot.

There will be some overlap with Sergio’s “Understanding Unix pipes with Ruby” talk from the November meetup.

Denis Defreyne

January 11, 2018
Tweet

More Decks by Denis Defreyne

Other Decks in Programming

Transcript

  1. Clean & fast code with Enumerators
    DENIS DEFREYNE RUG::B DECEMBER 7TH, 2017

    View Slide

  2. View Slide

  3. View Slide

  4. View Slide

  5. View Slide

  6. 1. Simple HTTP client

    View Slide

  7. {
    "books": [
    {
    "id": "B98312",
    "title": "The Monkey's Raincoat",
    "author": "Oswaldo Berge"
    },
    {
    "id": "B98318",
    "title": "The World, the Flesh and the Devil",
    "author": "Haleigh Thompson"
    }
    ]
    }

    View Slide

  8. def fetch_books(base_url)
    url = base_url + '/books'
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    body.fetch('books')
    else
    raise "Unexpected response code: #{response.code}"
    end
    end

    View Slide

  9. books = fetch_books('http://localhost:4567')
    books.each do |book|
    p book
    end

    View Slide

  10. 2. Paginating HTTP client

    View Slide

  11. {
    "books": [
    {
    "id": "B98312",
    "title": "The Monkey's Raincoat",
    "author": "Oswaldo Berge"
    },
    {
    "id": "B98318",
    "title": "The World, the Flesh and the Devil",
    "author": "Haleigh Thompson"
    }
    ],
    "cursor": "B98318"
    }

    View Slide

  12. def fetch_books(base_url)
    url = base_url + '/books'
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    body.fetch('books')
    else
    raise "Unexpected response code: #{response.code}"
    end
    end

    View Slide

  13. def fetch_books(base_url)
    books = []
    loop do
    url = base_url + '/books'
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    books.concat(body.fetch('books'))
    break
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    books
    end

    View Slide

  14. def fetch_books(base_url)
    books = []
    cursor = nil
    loop do
    url = base_url + '/books'
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    books.concat(body.fetch('books'))
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    books
    end

    View Slide

  15. def fetch_books(base_url)
    books = []
    cursor = nil
    loop do
    url = base_url + '/books'
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    books.concat(body.fetch('books'))
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    books
    end

    View Slide

  16. books = fetch_books('http://localhost:4567')
    books.each do |book|
    p book
    end

    View Slide

  17. 3. Refactoring

    View Slide

  18. def fetch_books(base_url)
    books = []
    cursor = nil
    loop do
    url = base_url + '/books'
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    books.concat(body.fetch('books'))
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    books
    end

    View Slide

  19. def fetch_books(base_url)

    cursor = nil
    loop do
    url = base_url + '/books'
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    body.fetch('books').each { |b| yield(b) }
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end


    end

    View Slide

  20. fetch_books('http://localhost:4567') do |book|
    p book
    end

    View Slide

  21. 4. More refactoring

    View Slide

  22. def fetch_books(base_url)
    cursor = nil
    loop do
    url = base_url + '/books'
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    body.fetch('books').each { |b| yield(b) }
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    end

    View Slide

  23. def fetch(base_url)
    cursor = nil
    loop do
    url = base_url
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    yield(body)
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    end

    View Slide

  24. fetch('http://localhost:4567/books') do |response|
    response.fetch('books').each do |book|
    p book
    end
    end

    View Slide

  25. responses = []
    fetch('http://localhost:4567/books') do |response|
    responses << response
    end
    books = responses.map { |r| r.fetch('books') }.flatten
    books.each do |book|
    p book
    end

    View Slide

  26. responses = []
    fetch('http://localhost:4567/books') do |response|
    responses << response
    end






    View Slide

  27. responses = to_enum(gfetch, 'http://localhost:4567/books')









    View Slide

  28. responses = to_enum(gfetch, 'http://localhost:4567/books')

    responses.each do |response|
    p response
    end





    View Slide

  29. responses = to_enum(gfetch, 'http://localhost:4567/books')
    books = responses.flat_map { |r| r.fetch('books') }




    View Slide

  30. (1..3).map { |i| [i, 10+i] }
    # h> [[1, 11], [2, 12], [3, 13]]

    View Slide

  31. (1..3).map { |i| [i, 10+i] }
    # h> [[1, 11], [2, 12], [3, 13]]
    (1..3).flat_map { |i| [i, 10+i] }
    # h> [1, 11, 2, 12, 3, 13]

    View Slide

  32. responses = to_enum(gfetch, 'http://localhost:4567/books')
    books = responses.flat_map { |r| r.fetch('books') }
    books.each do |book|
    p book
    end




    View Slide

  33. def fetch(base_url)
    return to_enum(__method__, base_url).lazy unless block_given?

    end

    View Slide

  34. books =
    fetch('http://localhost:4567/books')
    .flat_map { |r| r.fetch('books') }

    View Slide

  35. def fetch(base_url)
    return to_enum(__method__, base_url).lazy unless block_given?
    cursor = nil
    loop do
    url = base_url
    url += "?cursor=#{cursor}" if cursor
    response = NetUVHTTP.get_response(URI.parse(url))
    case response.code
    when '200'
    body = JSON.parse(response.body)
    yield(body)
    cursor = body.fetch('cursor')
    break if cursor.nil?
    else
    raise "Unexpected response code: #{response.code}"
    end
    end
    end

    View Slide

  36. 5. Database

    View Slide

  37. db = Database.connect
    books =
    fetch('http://localhost:4567/books')
    .flat_map { |r| r.fetch('books') }


    books.each_slice(50) { |batch| db.store(batch) }

    View Slide

  38. View Slide

  39. Given
    ‣ 200 books
    ‣ HTTP response contains 50 books
    ‣ it takes 1s to fetch a batch of books
    ‣ it takes 1s to store a batch of books


    View Slide

  40. Given
    ‣ 200 books
    ‣ HTTP response contains 50 books
    ‣ it takes 1s to fetch a batch of books
    ‣ it takes 1s to store a batch of books

    Question
    How long does this code take to execute?


    3s 4s 5s 8s 16s

    View Slide

  41. Given
    ‣ 200 books
    ‣ HTTP response contains 50 books
    ‣ it takes 1s to fetch a batch of books
    ‣ it takes 1s to store a batch of books

    Question
    How long does this code take to execute?


    3s 4s 5s 8s 16s

    View Slide

  42. Fetch
    Store
    Fetch
    Store

    View Slide

  43. Fetch
    Store
    Fetch
    Store

    View Slide

  44. 6. Buffer

    View Slide

  45. gem 'slow_enumerator_tools'

    View Slide

  46. db = Database.connect
    books =
    fetch('http://localhost:4567/books')
    .flat_map { |r| r.fetch('books') }



    books.each_slice(50) { |batch| db.store(batch) }

    View Slide

  47. db = Database.connect
    books =
    fetch('http://localhost:4567/books')
    .flat_map { |r| r.fetch('books') }

    books = SlowEnumeratorTools.buffer(books, 50)


    books.each_slice(50) { |batch| db.store(batch) }

    View Slide

  48. View Slide

  49. Given
    ‣ 200 books
    ‣ HTTP response contains 50 books
    ‣ it takes 1s to fetch a batch of books
    ‣ it takes 1s to store a batch of books


    View Slide

  50. Given
    ‣ 200 books
    ‣ HTTP response contains 50 books
    ‣ it takes 1s to fetch a batch of books
    ‣ it takes 1s to store a batch of books

    Question
    How long does this code take to execute?


    3s 4s 5s 8s 16s

    View Slide

  51. Given
    ‣ 200 books
    ‣ HTTP response contains 50 books
    ‣ it takes 1s to fetch a batch of books
    ‣ it takes 1s to store a batch of books

    Question
    How long does this code take to execute?


    3s 4s 5s 8s 16s

    View Slide

  52. Fetch
    Store

    View Slide

  53. 7. Batches

    View Slide

  54. db = Database.connect
    books =
    fetch('http://localhost:4567/books')
    .flat_map { |r| r.fetch('books') }

    books = SlowEnumeratorTools.buffer(books, 50)



    books.each_slice(50) { |batch| db.store(batch) }

    View Slide

  55. db = Database.connect
    books =
    fetch('http://localhost:4567/books')
    .flat_map { |r| r.fetch('books') }

    books = SlowEnumeratorTools.buffer(books, 50)

    book_batches = SlowEnumeratorTools.batch(books)


    book_batches.each { |batch| db.store(batch) }

    View Slide

  56. events = MyEventStream.new('content')
    events.each do |_e|
    system('./rebuild.sh')
    end

    View Slide

  57. events = MyEventStream.new('content')
    event_batches = SlowEnumeratorTools.batch(events)
    event_batches.each do |_es|
    system('./rebuild.sh')
    end

    View Slide

  58. 9. Merging

    View Slide

  59. content_events = MyEventStream.new('content')
    layout_events = MyEventStream.new('layouts')

    View Slide

  60. content_events = MyEventStream.new('content')
    layout_events = MyEventStream.new('layouts')
    events = SlowEnumeratorTools.merge(
    [content_events, layout_events])

    View Slide

  61. content_events = MyEventStream.new('content')
    layout_events = MyEventStream.new('layouts')
    events = SlowEnumeratorTools.merge(
    [content_events, layout_events])
    event_batches = SlowEnumeratorTools.batch(events)
    event_batches.each do |es|
    system('./rebuild.sh')
    end

    View Slide

  62. 10. Everything together

    View Slide

  63. db = MyDB.new
    api_a = MyAPI.new(MyHTTPClient.new('example.com'))
    api_b = MyAPI.new(MyHTTPClient.new('example.org'))

    View Slide

  64. db = MyDB.new
    api_a = MyAPI.new(MyHTTPClient.new('example.com'))
    api_b = MyAPI.new(MyHTTPClient.new('example.org'))
    articles = SlowEnumeratorTools.merge(
    [api_a.articles, api_b.articles])

    View Slide

  65. db = MyDB.new
    api_a = MyAPI.new(MyHTTPClient.new('example.com'))
    api_b = MyAPI.new(MyHTTPClient.new('example.org'))
    articles = SlowEnumeratorTools.merge(
    [api_a.articles, api_b.articles])
    articles = SlowEnumeratorTools.buffer(articles, 200)

    View Slide

  66. db = MyDB.new
    api_a = MyAPI.new(MyHTTPClient.new('example.com'))
    api_b = MyAPI.new(MyHTTPClient.new('example.org'))
    articles = SlowEnumeratorTools.merge(
    [api_a.articles, api_b.articles])
    articles = SlowEnumeratorTools.buffer(articles, 200)
    article_batches = SlowEnumeratorTools.batch(articles)

    View Slide

  67. db = MyDB.new
    api_a = MyAPI.new(MyHTTPClient.new('example.com'))
    api_b = MyAPI.new(MyHTTPClient.new('example.org'))
    articles = SlowEnumeratorTools.merge(
    [api_a.articles, api_b.articles])
    articles = SlowEnumeratorTools.buffer(articles, 200)
    article_batches = SlowEnumeratorTools.batch(articles)
    article_batches.each { |as| db.store_multi(as) }

    View Slide

  68. SlowEnumeratorTools.buffer(enum, size)
    SlowEnumeratorTools.batch(enum)
    SlowEnumeratorTools.merge(enums)
    github.com/ddfreyne/slow_enumerator_tools

    View Slide

  69. Now your code is clean & fast,
    thanks to Enumerators.

    View Slide

  70. DENIS DEFREYNE @DDFREYNE
    Q&A

    View Slide

  71. View Slide