Procesando millones de imágenes con Elixir

Procesando millones de imágenes con Elixir

0f9c9bbecc4067b9bce445cb11ed5d53?s=128

David Padilla

November 28, 2015
Tweet

Transcript

  1. Procesando millones de imágenes David Padilla @dabit

  2. None
  3. Carrierwave

  4. None
  5. None
  6. None
  7. image.recreate_versions!

  8. Image.each do |i| i.file.recreate_versions! end

  9. 2,700,000

  10. 1 segundo

  11. 2,700,000 ÷ 60 = 45,000 minutos

  12. 45,000 ÷ 60 = 750 horas

  13. 750 ÷ 24 = 31.25 días

  14. MAX_WORKERS = 20 Image.find_each(batch_size: 1200) do |batch| queue = Queue.new

    batch.each {|o| queue.push o} workers = (0..MAX_WORKERS).map do |w| Thread.new do while image = queue.pop(true) image.file.recreate_versions! end end end workers.map(&:join) end
  15. MAX_WORKERS = 20 Image.find_each(batch_size: 1200) do |batch| queue = Queue.new

    batch.each {|o| queue.push o} workers = (0..MAX_WORKERS).map do |w| Thread.new do while image = queue.pop(true) image.file.recreate_versions! end end end workers.map(&:join) end
  16. MAX_WORKERS = 20 Image.find_each(batch_size: 1200) do |batch| queue = Queue.new

    batch.each {|o| queue.push o} workers = (0..MAX_WORKERS).map do |w| Thread.new do while image = queue.pop(true) image.file.recreate_versions! end end end workers.map(&:join) end
  17. MAX_WORKERS = 20 Image.find_each(batch_size: 1200) do |batch| queue = Queue.new

    batch.each {|o| queue.push o} workers = (0..MAX_WORKERS).map do |w| Thread.new do while image = queue.pop(true) image.file.recreate_versions! end end end workers.map(&:join) end
  18. MAX_WORKERS = 20 Image.find_each(batch_size: 1200) do |batch| queue = Queue.new

    batch.each {|o| queue.push o} workers = (0..MAX_WORKERS).map do |w| Thread.new do while image = queue.pop(true) image.file.recreate_versions! end end end workers.map(&:join) end
  19. None
  20. array = [] 5.times.map do Thread.new do 1000.times do array

    << nil end end end.each(&:join) puts array.size
  21. $ ruby pushing_nil.rb 5000 $ jruby pushing_nil.rb 4446 $ rbx

    pushing_nil.rb 3088
  22. array = [] 5.times.map do Thread.new do 1000.times do array

    << nil end end end.each(&:join) puts array.size
  23. 1 segundo

  24. 0.6 segundos

  25. 18 días

  26. Elixir

  27. class Code def method(*args) code = code + code while(true)

    do something_awesome end end def something_awesome add = 1 + 2 o = Object.new o.save_to_database end end
  28. Exception

  29. None
  30. None
  31. None
  32. None
  33. None
  34. Traer registros de la Base de Datos Bajar de S3

    imagen original Crear dos tamaños Subir a S3 Crear aplicación
  35. Crear aplicación

  36. $ mix new images

  37. None
  38. Traer registros de la Base de Datos

  39. use Mix.Config config :images, Images.Repo, adapter: Ecto.Adapters.MySQL, database: "images_database", username:

    "root", password: "", hostname: "localhost"
  40. defmodule Images.PropertyImage do use Ecto.Model schema "images" do field :file,

    :string end end
  41. defmodule Images.Image do use Ecto.Model def main_query do from i

    in Images.Image, select: i end def find(image_id) do from i in main_query where: id = ^image_id end def paged(offset, limit) do from i in main_query, limit: ^limit, offset: ^offset end def all do Images.Repo.all main_query end end
  42. Bajar de S3 la imagen original

  43. def download_original(filename, id) do file = temp_filename(filename, id) ibrowse =

    [save_response_to_file: String.to_char_list(file)] s3_url(filename, id) |> HTTPotion.get([ibrowse: ibrowse]) file end def s3_url(file, id) do "#{s3_path}/#{id}/#{file}" end
  44. def download_original(filename, id) do file = temp_filename(filename, id) ibrowse =

    [save_response_to_file: String.to_char_list(file)] s3_url(filename, id) |> HTTPotion.get([ibrowse: ibrowse]) file end def s3_url(file, id) do "#{s3_path}/#{id}/#{file}" end
  45. def download_original(filename, id) do file = temp_filename(filename, id) ibrowse =

    [save_response_to_file: String.to_char_list(file)] HTTPotion.get(s3_url(filename, id), [ibrowse: ibrowse]) file end def s3_url(file, id) do "#{s3_path}/#{id}/#{file}" end
  46. def download_original(filename, id) do file = temp_filename(filename, id) ibrowse =

    [save_response_to_file: String.to_char_list(file)] s3_url(filename, id) |> HTTPotion.get([ibrowse: ibrowse]) file end def s3_url(file, id) do "#{s3_path}/#{id}/#{file}" end
  47. Crear dos tamaños

  48. None
  49. def generate_medium(file, filename, id) do result = Path.join(System.tmp_dir, size_name(filename, :medium))

    Mogrify.open(file) |> Mogrify.copy |> Mogrify.resize_to_fill("450x300") |> Mogrify.save(result) end
  50. None
  51. Subir a S3

  52. None
  53. {status, _} = System.cmd("s3cmd", ["-P", "put", filename, s3_name])

  54. None
  55. None
  56. None
  57. {_, file} = File.read(result) Application.get_env(:images, :s3_bucket) |> String.to_char_list |> :erlcloud_s3.put_object(s3_name,

    file, [], [{#…}])
  58. y la concurrencia?

  59. Traer registros de la Base de Datos Bajar de S3

    imagen original Crear dos tamaños Subir a S3
  60. :poolboy

  61. None
  62. defmodule Images.PropertyImageWorker do use GenServer def start_link([]) do :gen_server.start_link(__MODULE__, [],

    []) end def init(state) do secret = to_char_list(Application.get_env(:images, :aws_secret_key)) Application.get_env(:images, :aws_access_key) |> to_char_list |> :erlcloud_s3.configure(secret) {:ok, state} end def handle_call(image, from, state) do result = Images.PropertyImage.process(image) {:reply, [result], state} end end
  63. defmodule Images.PropertyImageWorker do use GenServer def start_link([]) do :gen_server.start_link(__MODULE__, [],

    []) end def init(state) do secret = to_char_list(Application.get_env(:images, :aws_secret_key)) Application.get_env(:images, :aws_access_key) |> to_char_list |> :erlcloud_s3.configure(secret) {:ok, state} end def handle_call(image, from, state) do result = Images.PropertyImage.process(image) {:reply, [result], state} end end
  64. defmodule Images.ImagesSupervisor do use Supervisor def start_link do :supervisor.start_link(__MODULE__, [])

    end def init([]) do poolboy_config = [ {:name, {:local, pool_name()}}, {:worker_module, Images.PropertyImageWorker}, {:size, 20}, {:max_overflow, 0} ] children = [ :poolboy.child_spec(pool_name(), poolboy_config, []), worker(Images.Repo, []) ] supervise(children, strategy: :one_for_one) end def pool_name do :property_images end end
  65. defmodule Images.ImagesSupervisor do use Supervisor def start_link do :supervisor.start_link(__MODULE__, [])

    end def init([]) do poolboy_config = [ {:name, {:local, pool_name()}}, {:worker_module, Images.PropertyImageWorker}, {:size, 20}, {:max_overflow, 0} ] children = [ :poolboy.child_spec(pool_name(), poolboy_config, []), worker(Images.Repo, []) ] supervise(children, strategy: :one_for_one) end def pool_name do :property_images end end
  66. defmodule Images do def start(_type, _args) do supervisor = Images.ImagesSupervisor.start_link

    enqueue supervisor end def enqueue do Images.PropertyImage.all |> Enum.each fn(r) -> spawn(fn() -> pool_image(r) end) end end def pool_image(image) do :poolboy.transaction( Images.ImagesSupervisor.pool_name, fn(pid) -> :gen_server.call(pid, image) end, :infinity ) end end
  67. defmodule Images do def start(_type, _args) do supervisor = Images.ImagesSupervisor.start_link

    enqueue supervisor end def enqueue do Images.PropertyImage.all |> Enum.each fn(r) -> spawn(fn() -> pool_image(r) end) end end def pool_image(image) do :poolboy.transaction( Images.ImagesSupervisor.pool_name, fn(pid) -> :gen_server.call(pid, image) end, :infinity ) end end
  68. defmodule Images do def start(_type, _args) do supervisor = Images.ImagesSupervisor.start_link

    enqueue supervisor end def enqueue do Images.PropertyImage.all |> Enum.each fn(r) -> spawn(fn() -> pool_image(r) end) end end def pool_image(image) do :poolboy.transaction( Images.ImagesSupervisor.pool_name, fn(pid) -> :gen_server.call(pid, image) end, :infinity ) end end
  69. defmodule Images do def start(_type, _args) do supervisor = Images.ImagesSupervisor.start_link

    enqueue supervisor end def enqueue do Images.PropertyImage.all |> Enum.each fn(r) -> spawn(fn() -> pool_image(r) end) end end def pool_image(image) do :poolboy.transaction( Images.ImagesSupervisor.pool_name, fn(pid) -> :gen_server.call(pid, image) end, :infinity ) end end
  70. None
  71. BEAM

  72. Conclusión

  73. 4 días

  74. 4 x 24 = 96 96 x 60 = 5,760

    5,760 x 60 = 345,600 345,600 / 2,700,000 = 0.128s
  75. Conclusión II

  76. Fin