Clean & fast code with enumerators

Clean & fast code with enumerators

Ruby’s Enumerator class is a powerful tool for writing code for dealing with streams of data and events, while being easy-to-understand and at the same time both concurrent and parallel.

This talk will go in more detail on how to do so, and also show SlowEnumeratorTools, which provides some of the glue code that makes Enumerator nicer to use, and faster to boot.

There will be some overlap with Sergio’s “Understanding Unix pipes with Ruby” talk from the November meetup.

Be732ee41fd3038aa98a0a7e7b7be081?s=128

Denis Defreyne

January 11, 2018
Tweet

Transcript

  1. Clean & fast code with Enumerators DENIS DEFREYNE RUG::B DECEMBER

    7TH, 2017
  2. None
  3. None
  4. None
  5. None
  6. 1. Simple HTTP client

  7. { "books": [ { "id": "B98312", "title": "The Monkey's Raincoat",

    "author": "Oswaldo Berge" }, { "id": "B98318", "title": "The World, the Flesh and the Devil", "author": "Haleigh Thompson" } ] }
  8. def fetch_books(base_url) url = base_url + '/books' response = NetUVHTTP.get_response(URI.parse(url))

    case response.code when '200' body = JSON.parse(response.body) body.fetch('books') else raise "Unexpected response code: #{response.code}" end end
  9. books = fetch_books('http://localhost:4567') books.each do |book| p book end

  10. 2. Paginating HTTP client

  11. { "books": [ { "id": "B98312", "title": "The Monkey's Raincoat",

    "author": "Oswaldo Berge" }, { "id": "B98318", "title": "The World, the Flesh and the Devil", "author": "Haleigh Thompson" } ], "cursor": "B98318" }
  12. def fetch_books(base_url) url = base_url + '/books' response = NetUVHTTP.get_response(URI.parse(url))

    case response.code when '200' body = JSON.parse(response.body) body.fetch('books') else raise "Unexpected response code: #{response.code}" end end
  13. def fetch_books(base_url) books = [] loop do url = base_url

    + '/books' response = NetUVHTTP.get_response(URI.parse(url)) case response.code when '200' body = JSON.parse(response.body) books.concat(body.fetch('books')) break else raise "Unexpected response code: #{response.code}" end end books end
  14. def fetch_books(base_url) books = [] cursor = nil loop do

    url = base_url + '/books' url += "?cursor=#{cursor}" if cursor response = NetUVHTTP.get_response(URI.parse(url)) case response.code when '200' body = JSON.parse(response.body) books.concat(body.fetch('books')) cursor = body.fetch('cursor') break if cursor.nil? else raise "Unexpected response code: #{response.code}" end end books end
  15. def fetch_books(base_url) books = [] cursor = nil loop do

    url = base_url + '/books' url += "?cursor=#{cursor}" if cursor response = NetUVHTTP.get_response(URI.parse(url)) case response.code when '200' body = JSON.parse(response.body) books.concat(body.fetch('books')) cursor = body.fetch('cursor') break if cursor.nil? else raise "Unexpected response code: #{response.code}" end end books end
  16. books = fetch_books('http://localhost:4567') books.each do |book| p book end

  17. 3. Refactoring

  18. def fetch_books(base_url) books = [] cursor = nil loop do

    url = base_url + '/books' url += "?cursor=#{cursor}" if cursor response = NetUVHTTP.get_response(URI.parse(url)) case response.code when '200' body = JSON.parse(response.body) books.concat(body.fetch('books')) cursor = body.fetch('cursor') break if cursor.nil? else raise "Unexpected response code: #{response.code}" end end books end
  19. def fetch_books(base_url)
 cursor = nil loop do url = base_url

    + '/books' url += "?cursor=#{cursor}" if cursor response = NetUVHTTP.get_response(URI.parse(url)) case response.code when '200' body = JSON.parse(response.body) body.fetch('books').each { |b| yield(b) } cursor = body.fetch('cursor') break if cursor.nil? else raise "Unexpected response code: #{response.code}" end end
 
 end
  20. fetch_books('http://localhost:4567') do |book| p book end

  21. 4. More refactoring

  22. def fetch_books(base_url) cursor = nil loop do url = base_url

    + '/books' url += "?cursor=#{cursor}" if cursor response = NetUVHTTP.get_response(URI.parse(url)) case response.code when '200' body = JSON.parse(response.body) body.fetch('books').each { |b| yield(b) } cursor = body.fetch('cursor') break if cursor.nil? else raise "Unexpected response code: #{response.code}" end end end
  23. def fetch(base_url) cursor = nil loop do url = base_url

    url += "?cursor=#{cursor}" if cursor response = NetUVHTTP.get_response(URI.parse(url)) case response.code when '200' body = JSON.parse(response.body) yield(body) cursor = body.fetch('cursor') break if cursor.nil? else raise "Unexpected response code: #{response.code}" end end end
  24. fetch('http://localhost:4567/books') do |response| response.fetch('books').each do |book| p book end end

  25. responses = [] fetch('http://localhost:4567/books') do |response| responses << response end

    books = responses.map { |r| r.fetch('books') }.flatten books.each do |book| p book end
  26. responses = [] fetch('http://localhost:4567/books') do |response| responses << response end


    
 
 
 
 

  27. responses = to_enum(gfetch, 'http://localhost:4567/books')
 
 
 
 
 
 


    
 

  28. responses = to_enum(gfetch, 'http://localhost:4567/books')
 responses.each do |response| p response end


    
 
 
 

  29. responses = to_enum(gfetch, 'http://localhost:4567/books') books = responses.flat_map { |r| r.fetch('books')

    } 
 
 
 

  30. (1..3).map { |i| [i, 10+i] } # h> [[1, 11],

    [2, 12], [3, 13]] 

  31. (1..3).map { |i| [i, 10+i] } # h> [[1, 11],

    [2, 12], [3, 13]] (1..3).flat_map { |i| [i, 10+i] } # h> [1, 11, 2, 12, 3, 13]
  32. responses = to_enum(gfetch, 'http://localhost:4567/books') books = responses.flat_map { |r| r.fetch('books')

    } books.each do |book| p book end
 
 
 

  33. def fetch(base_url) return to_enum(__method__, base_url).lazy unless block_given? … end

  34. books = fetch('http://localhost:4567/books') .flat_map { |r| r.fetch('books') }

  35. def fetch(base_url) return to_enum(__method__, base_url).lazy unless block_given? cursor = nil

    loop do url = base_url url += "?cursor=#{cursor}" if cursor response = NetUVHTTP.get_response(URI.parse(url)) case response.code when '200' body = JSON.parse(response.body) yield(body) cursor = body.fetch('cursor') break if cursor.nil? else raise "Unexpected response code: #{response.code}" end end end
  36. 5. Database

  37. db = Database.connect books = fetch('http://localhost:4567/books') .flat_map { |r| r.fetch('books')

    }
 
 books.each_slice(50) { |batch| db.store(batch) }
  38. None
  39. Given ‣ 200 books ‣ HTTP response contains 50 books

    ‣ it takes 1s to fetch a batch of books ‣ it takes 1s to store a batch of books

  40. Given ‣ 200 books ‣ HTTP response contains 50 books

    ‣ it takes 1s to fetch a batch of books ‣ it takes 1s to store a batch of books
 Question How long does this code take to execute?
 
 3s 4s 5s 8s 16s
  41. Given ‣ 200 books ‣ HTTP response contains 50 books

    ‣ it takes 1s to fetch a batch of books ‣ it takes 1s to store a batch of books
 Question How long does this code take to execute?
 
 3s 4s 5s 8s 16s
  42. Fetch Store Fetch Store

  43. Fetch Store Fetch Store

  44. 6. Buffer

  45. gem 'slow_enumerator_tools'

  46. db = Database.connect books = fetch('http://localhost:4567/books') .flat_map { |r| r.fetch('books')

    }
 
 
 books.each_slice(50) { |batch| db.store(batch) }
  47. db = Database.connect books = fetch('http://localhost:4567/books') .flat_map { |r| r.fetch('books')

    }
 books = SlowEnumeratorTools.buffer(books, 50)
 
 books.each_slice(50) { |batch| db.store(batch) }
  48. None
  49. Given ‣ 200 books ‣ HTTP response contains 50 books

    ‣ it takes 1s to fetch a batch of books ‣ it takes 1s to store a batch of books

  50. Given ‣ 200 books ‣ HTTP response contains 50 books

    ‣ it takes 1s to fetch a batch of books ‣ it takes 1s to store a batch of books
 Question How long does this code take to execute?
 
 3s 4s 5s 8s 16s
  51. Given ‣ 200 books ‣ HTTP response contains 50 books

    ‣ it takes 1s to fetch a batch of books ‣ it takes 1s to store a batch of books
 Question How long does this code take to execute?
 
 3s 4s 5s 8s 16s
  52. Fetch Store

  53. 7. Batches

  54. db = Database.connect books = fetch('http://localhost:4567/books') .flat_map { |r| r.fetch('books')

    }
 books = SlowEnumeratorTools.buffer(books, 50)
 
 
 books.each_slice(50) { |batch| db.store(batch) }
  55. db = Database.connect books = fetch('http://localhost:4567/books') .flat_map { |r| r.fetch('books')

    }
 books = SlowEnumeratorTools.buffer(books, 50)
 book_batches = SlowEnumeratorTools.batch(books)
 
 book_batches.each { |batch| db.store(batch) }
  56. events = MyEventStream.new('content') events.each do |_e| system('./rebuild.sh') end

  57. events = MyEventStream.new('content') event_batches = SlowEnumeratorTools.batch(events) event_batches.each do |_es| system('./rebuild.sh')

    end
  58. 9. Merging

  59. content_events = MyEventStream.new('content') layout_events = MyEventStream.new('layouts')

  60. content_events = MyEventStream.new('content') layout_events = MyEventStream.new('layouts') events = SlowEnumeratorTools.merge( [content_events,

    layout_events])
  61. content_events = MyEventStream.new('content') layout_events = MyEventStream.new('layouts') events = SlowEnumeratorTools.merge( [content_events,

    layout_events]) event_batches = SlowEnumeratorTools.batch(events) event_batches.each do |es| system('./rebuild.sh') end
  62. 10. Everything together

  63. db = MyDB.new api_a = MyAPI.new(MyHTTPClient.new('example.com')) api_b = MyAPI.new(MyHTTPClient.new('example.org'))

  64. db = MyDB.new api_a = MyAPI.new(MyHTTPClient.new('example.com')) api_b = MyAPI.new(MyHTTPClient.new('example.org')) articles

    = SlowEnumeratorTools.merge( [api_a.articles, api_b.articles])
  65. db = MyDB.new api_a = MyAPI.new(MyHTTPClient.new('example.com')) api_b = MyAPI.new(MyHTTPClient.new('example.org')) articles

    = SlowEnumeratorTools.merge( [api_a.articles, api_b.articles]) articles = SlowEnumeratorTools.buffer(articles, 200)
  66. db = MyDB.new api_a = MyAPI.new(MyHTTPClient.new('example.com')) api_b = MyAPI.new(MyHTTPClient.new('example.org')) articles

    = SlowEnumeratorTools.merge( [api_a.articles, api_b.articles]) articles = SlowEnumeratorTools.buffer(articles, 200) article_batches = SlowEnumeratorTools.batch(articles)
  67. db = MyDB.new api_a = MyAPI.new(MyHTTPClient.new('example.com')) api_b = MyAPI.new(MyHTTPClient.new('example.org')) articles

    = SlowEnumeratorTools.merge( [api_a.articles, api_b.articles]) articles = SlowEnumeratorTools.buffer(articles, 200) article_batches = SlowEnumeratorTools.batch(articles) article_batches.each { |as| db.store_multi(as) }
  68. SlowEnumeratorTools.buffer(enum, size) SlowEnumeratorTools.batch(enum) SlowEnumeratorTools.merge(enums) github.com/ddfreyne/slow_enumerator_tools

  69. Now your code is clean & fast, thanks to Enumerators.

  70. DENIS DEFREYNE @DDFREYNE Q&A

  71. None