Slide 1

Slide 1 text

Understanding Parser Combinators in Scala Oleksiy Dyagilev #scala school

Slide 2

Slide 2 text

function: String => ParseResult

Slide 3

Slide 3 text

function: String => ParseResult ParseResult is a Success(result, rest) or Failure(message)

Slide 4

Slide 4 text

function: String => ParseResult ParseResult is a Success(result, rest) or Failure(message) “abc” => Success(“a”, “bc”) “” => Failure(“string is empty”)

Slide 5

Slide 5 text

trait Parser[+T] { def parse(input:String): ParseResult[T] }

Slide 6

Slide 6 text

trait Parser[+T] { def parse(input:String): ParseResult[T] } trait Parser[+T] extends Function1[String, ParseResult[T]]

Slide 7

Slide 7 text

trait Parser[+T] { def parse(input:String): ParseResult[T] } trait Parser[+T] extends Function1[String, ParseResult[T]] trait Parser[+T] extends (String => ParseResult[T])

Slide 8

Slide 8 text

trait Parser[+T] { def parse(input:String): ParseResult[T] } trait Parser[+T] extends Function1[String, ParseResult[T]] trait Parser[+T] extends (String => ParseResult[T]) sealed abstract class ParseResult[+T] case class Success[+T](result: T, rest: String) extends ParseResult[T] case class Failure(msg: String) extends ParseResult[Nothing]

Slide 9

Slide 9 text

val anyChar = new Parser[Char] { def apply(input: String): ParseResult[Char] = { if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } }

Slide 10

Slide 10 text

val anyChar = new Parser[Char] { def apply(input: String): ParseResult[Char] = { if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } } def parser[T](f: String => ParseResult[T]) = new Parser[T] { def apply(in: String): ParseResult[T] = f(in) }

Slide 11

Slide 11 text

val anyChar = new Parser[Char] { def apply(input: String): ParseResult[Char] = { if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } } def parser[T](f: String => ParseResult[T]) = new Parser[T] { def apply(in: String): ParseResult[T] = f(in) } val anyChar = parser { input => if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) }

Slide 12

Slide 12 text

val anyChar = new Parser[Char] { def apply(input: String): ParseResult[Char] = { if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } } def parser[T](f: String => ParseResult[T]) = new Parser[T] { def apply(in: String): ParseResult[T] = f(in) } val anyChar = parser { input => if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } anyChar(“abc”) == Success(“a”, ”bc”)

Slide 13

Slide 13 text

We can parse a SINGLE char, but how do we parse a SEQUENCE ?

Slide 14

Slide 14 text

We can parse a SINGLE char, but how do we parse a SEQUENCE ?

Slide 15

Slide 15 text

is a higher-order function that accepts several parsers as input and returns a new parser as its output https://en.wikipedia.org/wiki/Parser_combinator

Slide 16

Slide 16 text

def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } parser combinator

Slide 17

Slide 17 text

def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } parser combinator

Slide 18

Slide 18 text

def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } parser combinator

Slide 19

Slide 19 text

def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } res :+ current.result parser combinator

Slide 20

Slide 20 text

def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } res :+ current.result map {_.reverse} parser combinator

Slide 21

Slide 21 text

… ParseResult.map sealed abstract class ParseResult[+T] { def map[U](f: T => U): ParseResult[U] }

Slide 22

Slide 22 text

… ParseResult.map sealed abstract class ParseResult[+T] { def map[U](f: T => U): ParseResult[U] } case class Success[+T](result: T, rest: String) extends ParseResult[T] { override def map[U](f: T => U): ParseResult[U] = Success(f(result), rest) }

Slide 23

Slide 23 text

… ParseResult.map sealed abstract class ParseResult[+T] { def map[U](f: T => U): ParseResult[U] } case class Success[+T](result: T, rest: String) extends ParseResult[T] { override def map[U](f: T => U): ParseResult[U] = Success(f(result), rest) } case class Failure(msg: String) extends ParseResult[Nothing] { override def map[U](f: Nothing => U): ParseResult[U] = this }

Slide 24

Slide 24 text

val stringParser: Parser[List[Char]] = many(anyChar)

Slide 25

Slide 25 text

val stringParser: Parser[List[Char]] = many(anyChar) stringParser("abc") == Success(List(“a”, “b”, “c”), ””)

Slide 26

Slide 26 text

{ "firstName":"John", "isAlive":true, "age":25, "address":{ "streetAddress":"21 2nd Street", "city":"New York“ }, "phoneNumbers":[ { "type":"home", "number":"212 555-1234" }, { "type":"office", "number":"646 555-4567" } ] }

Slide 27

Slide 27 text

{ "firstName":"John", "isAlive":true, "age":25, "address":{ "streetAddress":"21 2nd Street", "city":"New York“ }, "phoneNumbers":[ { "type":"home", "number":"212 555-1234" }, { "type":"office", "number":"646 555-4567" } ] } let’s parse this double-quoted string

Slide 28

Slide 28 text

anyChar def iff[T](p: Parser[T], f: T => Boolean) = parser { input => p(input) match { case succ@Success(res, rest) => if (f(res)) succ else Failure("iff failed") case failure => failure } } parser combinator

Slide 29

Slide 29 text

anyChar def iff[T](p: Parser[T], f: T => Boolean) = parser { input => p(input) match { case succ@Success(res, rest) => if (f(res)) succ else Failure("iff failed") case failure => failure } } def char(c: Char): Parser[Char] = iff(anyChar, _ == c) def charNot(except: Char*): Parser[Char] = iff[Char](anyChar, c => !except.contains(c)) val quote = char('"') val digit = iff[Char](anyChar, _.isDigit) parser combinator

Slide 30

Slide 30 text

anyChar def iff[T](p: Parser[T], f: T => Boolean) = parser { input => p(input) match { case succ@Success(res, rest) => if (f(res)) succ else Failure("iff failed") case failure => failure } } def char(c: Char): Parser[Char] = iff(anyChar, _ == c) def charNot(except: Char*): Parser[Char] = iff[Char](anyChar, c => !except.contains(c)) val quote = char('"') val digit = iff[Char](anyChar, _.isDigit) quote(""" "firstName" """.trim) == Success(", firstName") parser combinator

Slide 31

Slide 31 text

def and[A, B](parserA: Parser[A], parserB: Parser[B]): Parser[(A,B)] = parser { input => parserA(input) match { case Success(res, rest) => parserB(rest) match { case Success(res2, rest2) => Success((res, res2), rest2) case _ => Failure(s"(and) second failed on $rest") } case _ => Failure("(and) first failed") } } parser combinator

Slide 32

Slide 32 text

def and[A, B](parserA: Parser[A], parserB: Parser[B]): Parser[(A,B)] = parser { input => parserA(input) match { case Success(res, rest) => parserB(rest) match { case Success(res2, rest2) => Success((res, res2), rest2) case _ => Failure(s"(and) second failed on $rest") } case _ => Failure("(and) first failed") } } and(digit, char('a'))("3abc") == Success((3,a), bc) and(digit, char('a'))("3333") == Failure((and) second failed on 333) parser combinator

Slide 33

Slide 33 text

takeFirst takeSecond def takeFirst[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input => and(parserA, parserB)(input) map { case (r1, r2) => r1 } }

Slide 34

Slide 34 text

takeFirst takeSecond def takeFirst[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input => and(parserA, parserB)(input) map { case (r1, r2) => r1 } } def takeSecond[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input => and(parserA, parserB)(input) map { case (r1, r2) => r2 } }

Slide 35

Slide 35 text

takeFirst takeSecond def takeFirst[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input => and(parserA, parserB)(input) map { case (r1, r2) => r1 } } def takeSecond[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input => and(parserA, parserB)(input) map { case (r1, r2) => r2 } } takeSecond(quote, stringParser)(""" "firstName" """.trim) == Success(firstName”, )

Slide 36

Slide 36 text

val stringVal = takeFirst( takeSecond( char('"'), many(charNot('"')) ), char('"') )

Slide 37

Slide 37 text

val stringVal = takeFirst( takeSecond( char('"'), many(charNot('"')) ), char('"') ) takeSecond(quote, stringParser)(""" "firstName" """.trim) == Success(firstName, )

Slide 38

Slide 38 text

trait Parser[+T] extends (String => ParseResult[T]) { def ~>[U](right: Parser[U]): Parser[U] = takeSecond(this, right) def <~[U](right: Parser[U]): Parser[T] = takeFirst(this, right) def ~[U](right: => Parser[U]): Parser[(T, U)] = and(this, right) def *() = many(this) } shortcuts

Slide 39

Slide 39 text

trait Parser[+T] extends (String => ParseResult[T]) { def ~>[U](right: Parser[U]): Parser[U] = takeSecond(this, right) def <~[U](right: Parser[U]): Parser[T] = takeFirst(this, right) def ~[U](right: => Parser[U]): Parser[(T, U)] = and(this, right) def *() = many(this) } val stringVal = quote ~> charNot('"').* <~ quote implicit def charToParser(c: Char): Parser[Char] = char(c) val stringVal = '"' ~> charNot('"').* <~ '"' shortcuts

Slide 40

Slide 40 text

trait Parser[+T] extends (String => ParseResult[T]) { def map[U](f: T => U): Parser[U] = parser { in => this(in) map f } def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f } }

Slide 41

Slide 41 text

trait Parser[+T] extends (String => ParseResult[T]) { def map[U](f: T => U): Parser[U] = parser { in => this(in) map f } def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f } } sealed abstract class ParseResult[+T] { def withNext[U](f: T => String => ParseResult[U]): ParseResult[U] }

Slide 42

Slide 42 text

trait Parser[+T] extends (String => ParseResult[T]) { def map[U](f: T => U): Parser[U] = parser { in => this(in) map f } def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f } } sealed abstract class ParseResult[+T] { def withNext[U](f: T => String => ParseResult[U]): ParseResult[U] } case class Success[+T](result: T, rest: String) extends ParseResult[T] { override def withNext[U](f: T => String => ParseResult[U]) = f(result)(rest) }

Slide 43

Slide 43 text

trait Parser[+T] extends (String => ParseResult[T]) { def map[U](f: T => U): Parser[U] = parser { in => this(in) map f } def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f } } sealed abstract class ParseResult[+T] { def withNext[U](f: T => String => ParseResult[U]): ParseResult[U] } case class Success[+T](result: T, rest: String) extends ParseResult[T] { override def withNext[U](f: T => String => ParseResult[U]) = f(result)(rest) } case class Failure(msg: String) extends ParseResult[Nothing] { override def withNext[U](f: Nothing => String => ParseResult[U]) = this }

Slide 44

Slide 44 text

trait Parser[+T] extends (String => ParseResult[T]) { def ~>[U](right: Parser[U]): Parser[U] = parser { input => (this ~ right)(input) map { case (r1, r2) => r2 } } def <~[U](right: Parser[U]): Parser[T] = parser { input => (this ~ right)(input) map { case (r1, r2) => r1 } } }

Slide 45

Slide 45 text

No content

Slide 46

Slide 46 text

trait Parser[+T] extends (String => ParseResult[T]) { def ~>[U](right: Parser[U]): Parser[U] = { for (l <- this; r <- right) yield r } def <~[U](right: Parser[U]): Parser[T] = { for (l <- this; r <- right) yield l } }

Slide 47

Slide 47 text

for(x <- c1; y <- c2; z <- c3) yield {...} c1.flatMap(x => c2.flatMap(y => c3.map(z => {...})))

Slide 48

Slide 48 text

for(x <- c1; y <- c2; z <- c3) yield {...} c1.flatMap(x => c2.flatMap(y => c3.map(z => {...}))) def ~>[U](right: Parser[U]): Parser[U] = { for (l <- this; r <- right) yield r }

Slide 49

Slide 49 text

for(x <- c1; y <- c2; z <- c3) yield {...} c1.flatMap(x => c2.flatMap(y => c3.map(z => {...}))) def ~>[U](right: Parser[U]): Parser[U] = { for (l <- this; r <- right) yield r } this.flatMap(l => right.map(r => r))

Slide 50

Slide 50 text

for(x <- c1; y <- c2; z <- c3) yield {...} c1.flatMap(x => c2.flatMap(y => c3.map(z => {...}))) def ~>[U](right: Parser[U]): Parser[U] = { for (l <- this; r <- right) yield r } this.flatMap(l => right.map(r => r)) this.flatMap(_ => right)

Slide 51

Slide 51 text

json: object | array; object : '{' pair (',' pair)* '}' | '{' '}' // empty object pair: STRING ':' value ; array : '[' value (',' value)* ']' | '[' ']' // empty array value : STRING | NUMBER | object // recursion | array // recursion | 'true' // keywords | 'false' | 'null' STRING : '"' (ESC | ~["\\])* '"' ; fragment ESC : '\\' (["\\/bfnrt] | UNICODE) ; fragment UNICODE : 'u' HEX HEX HEX HEX ; fragment HEX : [0-9a-fA-F] ; NUMBER : '-'? INT '.' [0-9]+ EXP? // 1.35, 1.35E-9, 0.3, -4.5 | '-'? INT EXP // 1e10 -3e4 | '-'? INT // -3, 45 ; fragment INT : '0' | [1-9] [0-9]* ; // no leading zeros fragment EXP : [Ee] [+\-]? INT ; WS : [ \t\n\r]+ -> skip ;

Slide 52

Slide 52 text

No content

Slide 53

Slide 53 text

trait Parser[+T] extends (String => ParseResult[T]) { def | [U >: T](right: Parser[U]): Parser[U] = parser { input => this(input) match { case Failure(_) => right(input) case succ => succ } } }

Slide 54

Slide 54 text

trait Parser[+T] extends (String => ParseResult[T]) { def rep1Sep[U](sep: Parser[U]): Parser[List[T]] = (this ~ (sep ~> this).*) map { case (x, xs) => x +: xs } def repSep[U](sep: Parser[U]): Parser[List[T]] = this.rep1Sep(sep) | success(List()) }

Slide 55

Slide 55 text

trait Parser[+T] extends (String => ParseResult[T]) { def rep1Sep[U](sep: Parser[U]): Parser[List[T]] = (this ~ (sep ~> this).*) map { case (x, xs) => x +: xs } def repSep[U](sep: Parser[U]): Parser[List[T]] = this.rep1Sep(sep) | success(List()) } object Parser { def success[T](res: T) = parser { in => Success(res, in) } }

Slide 56

Slide 56 text

model object Json { sealed trait JsonVal case class JsonStringVal(s:String) extends JsonVal case class JsonIntVal(i:Int) extends JsonVal case class JsonArray(items: List[JsonVal]) extends JsonVal case class JsonNull() extends JsonVal case class JsonKey(k:String) case class JsonEntry(k:JsonKey, v:JsonVal) case class JsonObject(attrs:List[JsonEntry]) extends JsonVal }

Slide 57

Slide 57 text

object JsonParser extends CharParser { def obj = '{' ~> (entry repSep ',') <~ '}' >> {attrs => JsonObject(attrs)} def entry = entryKey ~ (':' ~> entryVal) >> {case(k,v) => JsonEntry(k, v)} def entryKey = '"' ~> charNot('"').* <~ '"' >> {v => JsonKey(v.mkString)} def entryVal: Parser[JsonVal] = intVal | stringVal | obj | arrayVal | nullVal def intVal = intNumber >> {i => JsonIntVal(i)} def stringVal = '"' ~> charNot('"').* <~ '"' >> {v => JsonStringVal(v.mkString)} def arrayVal = '[' ~> (entryVal repSep ',') <~ ']' >> {items => JsonArray(items)} def nullVal = "null" >> {_ => JsonNull()} }

Slide 58

Slide 58 text

object JsonParser extends CharParser { def obj = '{' ~> (entry repSep ',') <~ '}' >> {attrs => JsonObject(attrs)} def entry = entryKey ~ (':' ~> entryVal) >> {case(k,v) => JsonEntry(k, v)} def entryKey = '"' ~> charNot('"').* <~ '"' >> {v => JsonKey(v.mkString)} def entryVal: Parser[JsonVal] = intVal | stringVal | obj | arrayVal | nullVal def intVal = intNumber >> {i => JsonIntVal(i)} def stringVal = '"' ~> charNot('"').* <~ '"' >> {v => JsonStringVal(v.mkString)} def arrayVal = '[' ~> (entryVal repSep ',') <~ ']' >> {items => JsonArray(items)} def nullVal = "null" >> {_ => JsonNull()} }

Slide 59

Slide 59 text

object JsonParser extends CharParser { def obj = '{' ~> (entry repSep ',') <~ '}' >> {attrs => JsonObject(attrs)} def entry = entryKey ~ (':' ~> entryVal) >> {case(k,v) => JsonEntry(k, v)} def entryKey = '"' ~> charNot('"').* <~ '"' >> {v => JsonKey(v.mkString)} def entryVal: Parser[JsonVal] = intVal | stringVal | obj | arrayVal | nullVal def intVal = intNumber >> {i => JsonIntVal(i)} def stringVal = '"' ~> charNot('"').* <~ '"' >> {v => JsonStringVal(v.mkString)} def arrayVal = '[' ~> (entryVal repSep ',') <~ ']' >> {items => JsonArray(items)} def nullVal = "null" >> {_ => JsonNull()} }

Slide 60

Slide 60 text

def | [U >: T](right: Parser[U]): Parser[U] = parser { input => ... }

Slide 61

Slide 61 text

def | [U >: T](right: Parser[U]): Parser[U] = parser { input => ... }

Slide 62

Slide 62 text

def | [U >: T](right: Parser[U]): Parser[U] = parser { input => ... } def | [U >: T](right: => Parser[U]): Parser[U] = parser { input => ... } call-by-name

Slide 63

Slide 63 text

parseJson( """ {"name":"John","lastname":"Doe","age":55,"hobbies":["tennis","football"],"pet":null} """.trim)

Slide 64

Slide 64 text

parseJson( """ {"name":"John","lastname":"Doe","age":55,"hobbies":["tennis","football"],"pet":null} """.trim) JsonObject( List( JsonEntry(JsonKey(name), JsonStringVal(John)), JsonEntry(JsonKey(lastname), JsonStringVal(Doe)), JsonEntry(JsonKey(age), JsonIntVal(55)), JsonEntry(JsonKey(hobbies), JsonArray( List( JsonStringVal(tennis), JsonStringVal(football))) ), JsonEntry(JsonKey(pet), JsonNull()) ) )

Slide 65

Slide 65 text

• Sources of this talk https://github.com/fe2s/parser-combinators-talk • Scala library https://github.com/scala/scala-parser-combinators • Haskell Parsec https://wiki.haskell.org/Parsec