Upgrade to Pro — share decks privately, control downloads, hide ads and more …

PCRE With PHP

PCRE With PHP

PHP Benelux 2015

Thomas Weinert

January 24, 2015
Tweet

More Decks by Thomas Weinert

Other Decks in Programming

Transcript

  1. OFFSET $subject = 'aa ab ac ad'; $offset = 0;

    $length = strlen($subject); while ($offset < $length) { if (preg_match('(a.)', $subject, $match, PREG_OFFSET_CAPTURE, $offset)) { $offset = $match[0][1] + strlen($match[0][0]); var_dump($match[0][0]); } else { break; } } string(2) "aa" string(2) "ab" string(2) "ac" string(2) "ad"
  2. PATTERN String Escaping $pattern = '(\\\n)'; $text = <<<'TEXT' foo\nbar

    TEXT; preg_match($pattern, $text, $match); var_dump($pattern, $text, $match); string(5) "(\\n)" string(8) "foo\nbar" array(1) { [0]=> string(2) "\n" }
  3. MODIFIERS x - PCRE_EXTENDED u - PCRE_UTF8 D - PCRE_DOLLAR_ENDONLY

    s - PCRE_DOTALL m - PCRE_MULTILINE i - PCRE_CASELESS ...
  4. PCRE_EXTENDED $pattern = <<<'REGEX' (^ (d‐)? # optional country prefix

    (\d{5}) # german zip code $)Dix REGEX; var_dump((bool)preg_match($pattern, 'D‐50670')); bool(true)
  5. PCRE_DOLLAR_ENDONLY $examples = [ ["(^\\d+$)", "123"], ["(^\\d+$)", "123\n"], ["(^\\d+$)D", "123\n"],

    ["(\\A\\d+\\G)", "123\n"] ]; foreach ($examples as $example) { var_dump((bool)preg_match($example[0], $example[1], $match)); } bool(true) bool(true) bool(false) bool(false)
  6. PCRE_DOTALL $examples = [ ["(^.+$)", "123"], ["(^.+$)", "123\n456"], ["(^.+$)s", "123\n456"]

    ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(3) "123" } array(0) { } array(1) { [0]=> string(7) "123 456" }
  7. PCRE_MULTILINE $examples = [ ["(^.+$)", "123"], ["(^.+$)", "123\n456"], ["(^.+$)m", "123\n456"]

    ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(3) "123" } array(0) { } array(1) { [0]=> string(3) "123" }
  8. PCRE_CASELESS $examples = [ ["(foo)", "foo"], ["(foo)", "FOO"], ["(foo)i", "FOO"]

    ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(3) "foo" } array(0) { } array(1) { [0]=> string(3) "FOO" }
  9. PREG_MATCH_ALL() $subject = 'aa ab ac ad'; preg_match_all('(a.)', $subject, $match);

    var_dump($match); array(1) { [0]=> array(4) { [0]=> string(2) "aa" [1]=> string(2) "ab" [2]=> string(2) "ac" [3]=> string(2) "ad" } }
  10. PREG_PATTERN_ORDER $subject = 'ab ac'; preg_match_all('(a(.))', $subject, $match); var_dump($match); array(2)

    { [0]=> array(2) { [0]=> string(2) "ab" [1]=> string(2) "ac" } [1]=> array(2) { [0]=> string(1) "b" [1]=> string(1) "c" } }
  11. PREG_SET_ORDER $subject = 'ab ac'; preg_match_all('(a(.))', $subject, $match, PREG_SET_ORDER); var_dump($match);

    array(2) { [0]=> array(2) { [0]=> string(2) "ab" [1]=> string(1) "b" } [1]=> array(2) { [0]=> string(2) "ac" [1]=> string(1) "c" } }
  12. PREG_REPLACE_CALLBACK() No need for modifier "e" (PREG_REPLACE_EVAL) var_dump( preg_replace_callback( '(a(.))',

    function ($match) { return strtoupper($match[1]); }, 'ab ac' ) ); string(3) "B C"
  13. FUNCTOR class Replacer { public function __invoke($match) { return strtoupper($match[1]);

    } } var_dump( preg_replace_callback( '(a(.))', new Replacer(), 'ab ac' ) );
  14. PREG_SPLIT() $pattern = '(\\R)u'; $subject = "one\rtwo\n\nthree\r\nfour"; $match = preg_split($pattern,

    $subject); var_dump($match); array(5) { [0]=> string(3) "one" [1]=> string(3) "two" [2]=> string(0) "" [3]=> string(5) "three" [4]=> string(4) "four" }
  15. PREG_SPLIT_NO_EMPTY $pattern = '(\\R)u'; $subject = "one\rtwo\n\nthree\r\nfour"; $match = preg_split($pattern,

    $subject, ‐1, PREG_SPLIT_NO_EMPTY); var_dump($match); array(4) { [0]=> string(3) "one" [1]=> string(3) "two" [2]=> string(5) "three" [3]=> string(4) "four" }
  16. PREG_SPLIT_OFFSET_CAPTURE $pattern = '(\\R)u'; $subject = "one\rtwo\n\nthree"; $flags = PREG_SPLIT_NO_EMPTY

    | PREG_SPLIT_OFFSET_CAPTURE; $match = preg_split($pattern, $subject, ‐1, $flags); var_dump($match); array(3) { [0]=> array(2) { [0]=> string(3) "one" [1]=> int(0) } [1]=> array(2) { [0]=> string(3) "two" [1]=> int(4) } [2]=> array(2) { [0]=> string(5) "three" [1]=> int(9) } }
  17. PREG_SPLIT_DELIM_CAPTURE $highlights = ['small' => '*', 'short' => '_']; $pattern

    = '((small|short))u'; $subject = "A small, short example"; $match = preg_split($pattern, $subject, ‐1, PREG_SPLIT_DELIM_CAPTURE); foreach ($match as $part) { if (isset($highlights[$part])) { echo $highlights[$part], $part, $highlights[$part]; } else { echo $part; } } A *small*, _short_ example
  18. REGEXITERATOR $data = new ArrayIterator(['aa', 'ab']); $iterator = new RegexIterator(

    $data, '(.(.))', RegexIterator::REPLACE ); $iterator‐>replacement = '$1'; var_dump(iterator_to_array($iterator)); array(2) { [0] => string(1) "a" [1] => string(1) "b" }
  19. UNICODE Modifier u All: \X Token: \x{A9} Category: \p{L} Negation:

    \P{L}, \p{^L} Scripts: \p{Hangul} Blocks: \p{Arrows}
  20. UNICODE EXAMPLE $data = <<<'DATA' English German 한국어 日本語 DATA;

    preg_match_all('(\\pL+)u', $data, $match); var_dump($match[0]); array(4) { [0] => string(7) "English" [1] => string(6) "German" [2] => string(9) "한국어" [3] => string(9) "日本語" }
  21. SUBPATTERN MODIFIERS (?i‐sm) $examples = [ ["((?i)foo)", "FOO"], ["((?‐i)foo)i", "FOO"]

    ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(3) "FOO" } array(0) { }
  22. NAMED SUBPATTERNS $pattern = "(^ (?P<year>\d{4}) (?:‐(?<month>\d{1,2}))? (?:‐(?'day'\d{1,2}))? )x"; preg_match($pattern,

    "2015‐01‐24", $match); var_dump($match);</month></year> array(7) { [0]=> string(10) "2015‐01‐24" ["year"]=> string(4) "2015" [1]=> string(4) "2015" ["month"]=> string(2) "01" [2]=> string(2) "01" ["day"]=> string(2) "24" [3]=> string(2) "24" }
  23. PRE-DEFINED SUBROUTINES $pattern = "( ^ (?&number) (?:\\.(?&number)){3} $ (?(DEFINE)

    (?'number'25[0‐5]|2[1‐4]\d|1\d{2}|\d{1,2}) ) )x"; var_dump((bool)preg_match($pattern, "127.0.0.1", $match)); var_dump((bool)preg_match($pattern, "355.0.0.1", $match)); bool(true) bool(false)
  24. LOOK AHEAD $examples = [ ["(h(?=e))", "hello"], ["(h(?=e)llo)", "hello"], ["(h(?=e).llo)",

    "hello"] ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(1) "h" } array(0) { } array(1) { [0]=> string(5) "hello" }
  25. LOOK AHEAD - NEGATION $examples = [ ["(h(?!e))", "hello"], ["(h(?!e))",

    "hallo"] ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(0) { } array(1) { [0]=> string(1) "h" }
  26. LOOK BEHIND $examples = [ ["((?<=h).)", "hello"], ["((?<!h).)", "hallo"] ];

    foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(1) "e" } array(1) { [0]=> string(1) "h" }
  27. LOOK BEHIND - ALTERNATIVES $examples = [ ["((?<=e|ha|.{2})l)", "hello"], ["((?<=e|ha)l)",

    "hallo"], ["((?<=e|.{2})l)", "hallo"] ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(1) "l" } array(1) { [0]=> string(1) "l" } array(1) { [0]=> string(1) "l" }
  28. LOOK BEHIND - UNKNOWN LENGTH preg_match("((?<=.{2,})l)", 'hello', $match); Warning: preg_match():

    Compilation failed: lookbehind assertion is not fixed length at offset 9 in /tmp... on line 2
  29. CONDITIONALS $pattern = '((?<quote>[\'"])?(?(quote).*?\\k<quote>|\\w+))'; $data = ['foo', '"foo"', "'foo'", 'foo

    bar', '"foo bar"']; foreach ($data as $subject) { if (preg_match($pattern, $subject, $match)) { echo $match[0], "\n"; } }</quote></quote> foo "foo" 'foo' foo "foo bar"
  30. RECURSIONS $pattern = <<<'PCRE' ( \( ( (?>[^()]+) | (?R)

    )* \) )Ux PCRE; preg_match_all($pattern, '(ab(cd)ef)(gh)', $match); var_dump($match); array(2) { [0] => array(2) { [0] => string(10) "(ab(cd)ef)" [1] => string(4) "(gh)" } [1] => array(2) { [0] => string(1) "f" [1] => string(1) "h" } }
  31. START OF PATTERN MODIFIERS (*UTF), (*UTF8), (*UTF16), (*UTF32) (*UTF)(*UCP) =

    u (*CR), (*LF), (*CRLF), (*ANYCRLF), (*ANY) (*BSR_ANYCRLF), (*BSR_UNICODE) - \R (*LIMIT_MATCH=x), (*LIMIT_RECURSION=d) (*NO_AUTO_POSSESS), (*NO_START_OPT) (*NOTEMPTY), (*NOTEMPTY_ATSTART)
  32. VERSIONS PCRE2 10.0 2015-01-05 PCRE 8.36 2014-09-26 3V4L.ORG PHP7, HHVM

    >= 3.3: 8.35 2014-04-04 PHP >= 5.5.10: 8.34 2013-12-15