まだ正規表現で消耗してるの?

 まだ正規表現で消耗してるの?

C4c161ae9eeeed8f161197410f7a228a?s=128

Kenichiro Kishida

April 16, 2016
Tweet

Transcript

  1. · ͩ ਖ਼ ن ද ݱ Ͱ ফ ໣ ͠

    ͯ Δ ͷ ʁ Մ ಡ ੑ Λ ্ ͛ Δ ͱ ͏ · ͘ ͍ ͘ ͠ ͣ ͻ ͜ 1)1ΧϯϑΝϨϯεࡳຈ ෳࡶͳਖ਼نදݱΛߟ͑Δͷ͸࣌ؒͷແବͩͬͨ ౦ ژ
  2. ,FOJDIJSP,JTIJEB 5PLZP +"1"/ TJ[VIJLP!HNBJMDPN !TJ[VIJLP IUUQTHJUIVCDPNTJ[VIJLP IUUQCMPHPQFOUPLZPKQ R: HmM^JRTIeUY

  3. I — ਖ਼نදݱ

  4. /^(?P<scheme>[a-zA-Z](?:[a-zA-Z0-9\+\-\.])*)\:(?P<hierPart>(?:\/\/(? P<authority>(?:(?P<userinfo>(?:(?:[a-zA-Z0-9\-\._~]|%(?:[0-9A-Z]){2}| [\!\$&'\(\)\*\+,;\=]|\:))*)@)?(?P<host>(?:(?:0-9|1-90-9|10-90-9|20-40-9| 250-5)\.(?:0-9|1-90-9|10-90-9|20-40-9|250-5)\.(?:0-9|1-90-9|10-90-9| 20-40-9|250-5)\.(?:0-9|1-90-9|10-90-9|20-40-9|250-5)|(?:(?:[a-zA- Z0-9\-\._~]|%(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\+,;\=]))+))(?:\:(?P<port>(?:\d) +))?)(?P<pathAbempty>(?:\/(?:(?:[a-zA-Z0-9\-\._~]|%(?:[0-9A-Z]){2}|[\! \$&'\(\)\*\+,;\=]|\:|@))*)*)|(?P<pathAbsolute>\/(?:(?:(?:[a-zA-Z0-9\- \._~]|%(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\+,;\=]|\:|@))+(?:\/(?:(?:[a-zA-Z0-9\- \._~]|%(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\+,;\=]|\:|@))*)+)?)|(?

    P<pathRootless>(?:(?:[a-zA-Z0-9\-\._~]|%(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\ +,;\=]|\:|@))+(?:\/(?:(?:[a-zA-Z0-9\-\._~]|%(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\ +,;\=]|\:|@))*)*)|(?P<pathEmpty>^(?:[a-zA-Z0-9\-\._~]|%(?:[0-9A-Z]) {2}|[\!\$&'\(\)\*\+,;\=]|\:|@))))(?:\?(?P<query>(?:(?:(?:[a-zA-Z0-9\-\._~]| %(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\+,;\=]|\:|@)|\/|\?))*))?(?:#(?P<fragment>(?: (?:(?:[a-zA-Z0-9\-\._~]|%(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\+,;\=]|\:|@)|\/| \?))*))?$/
  5. None
  6. /^(?P<scheme>[a-zA-Z](?:[a-zA-Z0-9\+\-\.])*)\:(?P<hierPart>(?:\/\/(? P<authority>(?:(?P<userinfo>(?:(?:[a-zA-Z0-9\-\._~]|%(?:[0-9A-Z]){2}| [\!\$&'\(\)\*\+,;\=]|\:))*)@)?(?P<host>(?:(?:0-9|1-90-9|10-90-9|20-40-9| 250-5)\.(?:0-9|1-90-9|10-90-9|20-40-9|250-5)\.(?:0-9|1-90-9|10-90-9| 20-40-9|250-5)\.(?:0-9|1-90-9|10-90-9|20-40-9|250-5)|(?:(?:[a-zA- Z0-9\-\._~]|%(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\+,;\=]))+))(?:\:(?P<port>(?:\d) +))?)(?P<pathAbempty>(?:\/(?:(?:[a-zA-Z0-9\-\._~]|%(?:[0-9A-Z]){2}|[\! \$&'\(\)\*\+,;\=]|\:|@))*)*)|(?P<pathAbsolute>\/(?:(?:(?:[a-zA-Z0-9\- \._~]|%(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\+,;\=]|\:|@))+(?:\/(?:(?:[a-zA-Z0-9\- \._~]|%(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\+,;\=]|\:|@))*)+)?)|(?

    P<pathRootless>(?:(?:[a-zA-Z0-9\-\._~]|%(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\ +,;\=]|\:|@))+(?:\/(?:(?:[a-zA-Z0-9\-\._~]|%(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\ +,;\=]|\:|@))*)*)|(?P<pathEmpty>^(?:[a-zA-Z0-9\-\._~]|%(?:[0-9A-Z]) {2}|[\!\$&'\(\)\*\+,;\=]|\:|@))))(?:\?(?P<query>(?:(?:(?:[a-zA-Z0-9\-\._~]| %(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\+,;\=]|\:|@)|\/|\?))*))?(?:#(?P<fragment>(?: (?:(?:[a-zA-Z0-9\-\._~]|%(?:[0-9A-Z]){2}|[\!\$&'\(\)\*\+,;\=]|\:|@)|\/| \?))*))?$/
  7. RFC3986 Uniform Resource Identifier (URI): Generic Syntax

  8. URI = scheme ":" hier-part [ "?" query ] [

    "#" fragment ] scheme = ALPHA * ( ALPHA / DIGIT / "+" / "-" / "." ) hier-part = "//" authority path-abempty / path-absolute / path-rootless / path-empty https://www.ietf.org/rfc/rfc3986.txt
  9. wݕࡧͨ݁͠Ռɺग़͖ͯͨਖ਼نදݱΛί ϐϖͯ͠·ͤΜ͔ʁͦΕਖ਼͍͠Ͱ͔͢ʁ wଞͷਓ͕ॻ͍ͨਖ਼نදݱΛؚΉίʔυ ΛϨϏϡʔͰ͖·͔͢ʁ wޙͰͦͷਖ਼نදݱϝϯςͰ͖·͔͢ʁ

  10. ΋ͬͱ؆୯ʹ ॻ͖͍ͨPSಡΈ͍ͨ ͱࢥͬͨ͜ͱ͸ͳ͍Ͱ͔͢ʁ

  11. None
  12. Regular Expressions made easy

  13. $regex = new VerbalExpressions; $regex->startOfLine() ->then("http") ->maybe("s") ->then("://") ->maybe("www.") ->anythingBut("

    ") ->endOfLine(); /^(?:http)(?:s)?(?:\:\/\/)(?:www\.)?(?:[^ ]*)$/m
  14. $rfc3986 = new VerbalExpressions; // scheme $scheme = new VerbalExpressions;

    $scheme->add("http")->maybe("s") ->_or("ftp"); $rfc3986->startOfLine() ->add($scheme) ->add("://"); /^(?:\(\?\:http\)\(\?\:s\)\?\)\|\(\?\:ftp)(?:\:\/\/)/m
  15. w⾭ଟ͘ͷݴޠʹରԠ͍ͯ͠Δ w⾭؆୯ʹॻ͚Δ w⾪ෳࡶͳέʔε͸ॻ͚ͳ͍ w⾪ݴޠʹΑ࣮ͬͯ૷͕ϚνϚν

  16. None
  17. None
  18. None
  19. http://www.kurtisrainboltgreene.name/hexpress/

  20. The hexpress gem is another take at the concept of

    "Verbal Hexpressions" in Ruby.
  21. pattern = Hexpress.new. start("http"). maybe("s"). with("://"). maybe { words.with(".") }.

    find { matching { [word, "-"] }.multiple }. has("."). either("com", "org"). maybe("/"). ending
  22. To PHP https://github.com/sizuhiko/hexpress

  23. trait Find { public function find($value = null, $named =

    false) { $param = compact('value', 'named'); return is_callable($value) ? $this->addNested(FindValue::class, $param) : $this->addValue(FindValue::class, $param); } public function capture($value = null) { return $this->find($value); } } class FindValue { use Nested; private $hexpression; private $open; private $close; public function __construct($param) { extract($param); $this->hexpression = is_callable($value) ? new Hexpress($value) : $value; $this->open = $named ? "(?P<{$named}>" : '('; $this->close = ')'; } } class Hexpress def find(value = nil, &block) value ? add_value(Nested::Find, value) : add_nested(Nested::Find, &block) end alias_method :capture, :find module Nested class Find include Nested def initialize(value=nil,&block) @hexpression = value || Hexpress.new.instance_eval(&block) @open, @close = "(", ")" end end end end ࠷ۙͷ1)1ͳΒ͔ͳΓ஧࣮ʹҠ২Մೳ
  24. URI = scheme ":" hier-part [ "?" query ] [

    "#" fragment ] scheme = ALPHA * ( ALPHA / DIGIT / "+" / "-" / "." ) hier-part = "//" authority path-abempty / path-absolute / path-rootless / path-empty https://www.ietf.org/rfc/rfc3986.txt
  25. $this->hexpress ->start($this->scheme()) ->with(':') ->has($this->hierPart()) ->maybe($this->query()) ->maybe($this->fragment()) ->end(); URI = scheme

    ":" hier-part [ "?" query ] [ "#" fragment ]
  26. private function scheme() { return (new Hexpress()) ->find(function ($hex) {

    $hex->matching(function ($hex) { $hex->letter(); }); $hex->many(function ($hex) { $hex->matching(function ($hex) { $hex->letter()->number()->with('+-.'); }); }, 0); }, 'scheme'); } scheme = ALPHA * ( ALPHA / DIGIT / "+" / "-" / "." ) ̍ ̍    
  27. 1)1൛ͷಠࣗػೳ ໊લ෇͖αϒύλʔϯ

  28. private function scheme() { return (new Hexpress()) ->find(function ($hex) {

    $hex->matching(function ($hex) { $hex->letter(); }); $hex->many(function ($hex) { $hex->matching(function ($hex) { $hex->letter()->number()->with('+-.'); }); }, 0); }, 'scheme'); } scheme = ALPHA * ( ALPHA / DIGIT / "+" / "-" / "." ) pOEϝιουͷୈೋҾ਺ʹ αϒύλʔϯͷ໊લΛࢦఆͰ͖Δ
  29. preg_match( 'http://example.com:80/', $pattern->toRegExp(), $matches); echo $matches['scheme']; #=> 'http'

  30. w΋͏ਖ਼نදݱͳΜ͔Ͱফ໣͠ͳ͍ʂ w3VCZͷΤίγεςϜ͔ΒͷҠ২΋༰қ ʹͳ͍ͬͯΔʂʂ wఘΊͳ͍Ͱʂʂʂ

  31. WJTJUNZCMPHHJUIVC https://github.com/sizuhiko http://blog.open.tokyo.jp R: HmM^JRTIeUY @sizuhiko #phpstudy 2016/3/30