Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Understanding Parser Combinators in Scala

Understanding Parser Combinators in Scala

We will gradually build an embedded domain-specific language (DSL) for specifying grammars in a EBNF-like notation in Scala.

We will use monadic parser combinators approach. As a result we should be able to parse JSON document using our library.

Avatar for Oleksii Diagiliev

Oleksii Diagiliev

July 15, 2015
Tweet

More Decks by Oleksii Diagiliev

Other Decks in Programming

Transcript

  1. function: String => ParseResult ParseResult is a Success(result, rest) or

    Failure(message) “abc” => Success(“a”, “bc”) “” => Failure(“string is empty”)
  2. trait Parser[+T] { def parse(input:String): ParseResult[T] } trait Parser[+T] extends

    Function1[String, ParseResult[T]] trait Parser[+T] extends (String => ParseResult[T])
  3. trait Parser[+T] { def parse(input:String): ParseResult[T] } trait Parser[+T] extends

    Function1[String, ParseResult[T]] trait Parser[+T] extends (String => ParseResult[T]) sealed abstract class ParseResult[+T] case class Success[+T](result: T, rest: String) extends ParseResult[T] case class Failure(msg: String) extends ParseResult[Nothing]
  4. val anyChar = new Parser[Char] { def apply(input: String): ParseResult[Char]

    = { if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } }
  5. val anyChar = new Parser[Char] { def apply(input: String): ParseResult[Char]

    = { if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } } def parser[T](f: String => ParseResult[T]) = new Parser[T] { def apply(in: String): ParseResult[T] = f(in) }
  6. val anyChar = new Parser[Char] { def apply(input: String): ParseResult[Char]

    = { if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } } def parser[T](f: String => ParseResult[T]) = new Parser[T] { def apply(in: String): ParseResult[T] = f(in) } val anyChar = parser { input => if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) }
  7. val anyChar = new Parser[Char] { def apply(input: String): ParseResult[Char]

    = { if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } } def parser[T](f: String => ParseResult[T]) = new Parser[T] { def apply(in: String): ParseResult[T] = f(in) } val anyChar = parser { input => if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } anyChar(“abc”) == Success(“a”, ”bc”)
  8. is a higher-order function that accepts several parsers as input

    and returns a new parser as its output https://en.wikipedia.org/wiki/Parser_combinator
  9. def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec

    def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } parser combinator
  10. def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec

    def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } parser combinator
  11. def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec

    def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } parser combinator
  12. def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec

    def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } res :+ current.result parser combinator
  13. def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec

    def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } res :+ current.result map {_.reverse} parser combinator
  14. … ParseResult.map sealed abstract class ParseResult[+T] { def map[U](f: T

    => U): ParseResult[U] } case class Success[+T](result: T, rest: String) extends ParseResult[T] { override def map[U](f: T => U): ParseResult[U] = Success(f(result), rest) }
  15. … ParseResult.map sealed abstract class ParseResult[+T] { def map[U](f: T

    => U): ParseResult[U] } case class Success[+T](result: T, rest: String) extends ParseResult[T] { override def map[U](f: T => U): ParseResult[U] = Success(f(result), rest) } case class Failure(msg: String) extends ParseResult[Nothing] { override def map[U](f: Nothing => U): ParseResult[U] = this }
  16. { "firstName":"John", "isAlive":true, "age":25, "address":{ "streetAddress":"21 2nd Street", "city":"New York“

    }, "phoneNumbers":[ { "type":"home", "number":"212 555-1234" }, { "type":"office", "number":"646 555-4567" } ] }
  17. { "firstName":"John", "isAlive":true, "age":25, "address":{ "streetAddress":"21 2nd Street", "city":"New York“

    }, "phoneNumbers":[ { "type":"home", "number":"212 555-1234" }, { "type":"office", "number":"646 555-4567" } ] } let’s parse this double-quoted string
  18. anyChar def iff[T](p: Parser[T], f: T => Boolean) = parser

    { input => p(input) match { case succ@Success(res, rest) => if (f(res)) succ else Failure("iff failed") case failure => failure } } parser combinator
  19. anyChar def iff[T](p: Parser[T], f: T => Boolean) = parser

    { input => p(input) match { case succ@Success(res, rest) => if (f(res)) succ else Failure("iff failed") case failure => failure } } def char(c: Char): Parser[Char] = iff(anyChar, _ == c) def charNot(except: Char*): Parser[Char] = iff[Char](anyChar, c => !except.contains(c)) val quote = char('"') val digit = iff[Char](anyChar, _.isDigit) parser combinator
  20. anyChar def iff[T](p: Parser[T], f: T => Boolean) = parser

    { input => p(input) match { case succ@Success(res, rest) => if (f(res)) succ else Failure("iff failed") case failure => failure } } def char(c: Char): Parser[Char] = iff(anyChar, _ == c) def charNot(except: Char*): Parser[Char] = iff[Char](anyChar, c => !except.contains(c)) val quote = char('"') val digit = iff[Char](anyChar, _.isDigit) quote(""" "firstName" """.trim) == Success(", firstName") parser combinator
  21. def and[A, B](parserA: Parser[A], parserB: Parser[B]): Parser[(A,B)] = parser {

    input => parserA(input) match { case Success(res, rest) => parserB(rest) match { case Success(res2, rest2) => Success((res, res2), rest2) case _ => Failure(s"(and) second failed on $rest") } case _ => Failure("(and) first failed") } } parser combinator
  22. def and[A, B](parserA: Parser[A], parserB: Parser[B]): Parser[(A,B)] = parser {

    input => parserA(input) match { case Success(res, rest) => parserB(rest) match { case Success(res2, rest2) => Success((res, res2), rest2) case _ => Failure(s"(and) second failed on $rest") } case _ => Failure("(and) first failed") } } and(digit, char('a'))("3abc") == Success((3,a), bc) and(digit, char('a'))("3333") == Failure((and) second failed on 333) parser combinator
  23. takeFirst takeSecond def takeFirst[A, B](parserA: Parser[A], parserB: Parser[B]) = parser

    { input => and(parserA, parserB)(input) map { case (r1, r2) => r1 } }
  24. takeFirst takeSecond def takeFirst[A, B](parserA: Parser[A], parserB: Parser[B]) = parser

    { input => and(parserA, parserB)(input) map { case (r1, r2) => r1 } } def takeSecond[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input => and(parserA, parserB)(input) map { case (r1, r2) => r2 } }
  25. takeFirst takeSecond def takeFirst[A, B](parserA: Parser[A], parserB: Parser[B]) = parser

    { input => and(parserA, parserB)(input) map { case (r1, r2) => r1 } } def takeSecond[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input => and(parserA, parserB)(input) map { case (r1, r2) => r2 } } takeSecond(quote, stringParser)(""" "firstName" """.trim) == Success(firstName”, )
  26. val stringVal = takeFirst( takeSecond( char('"'), many(charNot('"')) ), char('"') )

    takeSecond(quote, stringParser)(""" "firstName" """.trim) == Success(firstName, )
  27. trait Parser[+T] extends (String => ParseResult[T]) { def ~>[U](right: Parser[U]):

    Parser[U] = takeSecond(this, right) def <~[U](right: Parser[U]): Parser[T] = takeFirst(this, right) def ~[U](right: => Parser[U]): Parser[(T, U)] = and(this, right) def *() = many(this) } shortcuts
  28. trait Parser[+T] extends (String => ParseResult[T]) { def ~>[U](right: Parser[U]):

    Parser[U] = takeSecond(this, right) def <~[U](right: Parser[U]): Parser[T] = takeFirst(this, right) def ~[U](right: => Parser[U]): Parser[(T, U)] = and(this, right) def *() = many(this) } val stringVal = quote ~> charNot('"').* <~ quote implicit def charToParser(c: Char): Parser[Char] = char(c) val stringVal = '"' ~> charNot('"').* <~ '"' shortcuts
  29. trait Parser[+T] extends (String => ParseResult[T]) { def map[U](f: T

    => U): Parser[U] = parser { in => this(in) map f } def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f } }
  30. trait Parser[+T] extends (String => ParseResult[T]) { def map[U](f: T

    => U): Parser[U] = parser { in => this(in) map f } def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f } } sealed abstract class ParseResult[+T] { def withNext[U](f: T => String => ParseResult[U]): ParseResult[U] }
  31. trait Parser[+T] extends (String => ParseResult[T]) { def map[U](f: T

    => U): Parser[U] = parser { in => this(in) map f } def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f } } sealed abstract class ParseResult[+T] { def withNext[U](f: T => String => ParseResult[U]): ParseResult[U] } case class Success[+T](result: T, rest: String) extends ParseResult[T] { override def withNext[U](f: T => String => ParseResult[U]) = f(result)(rest) }
  32. trait Parser[+T] extends (String => ParseResult[T]) { def map[U](f: T

    => U): Parser[U] = parser { in => this(in) map f } def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f } } sealed abstract class ParseResult[+T] { def withNext[U](f: T => String => ParseResult[U]): ParseResult[U] } case class Success[+T](result: T, rest: String) extends ParseResult[T] { override def withNext[U](f: T => String => ParseResult[U]) = f(result)(rest) } case class Failure(msg: String) extends ParseResult[Nothing] { override def withNext[U](f: Nothing => String => ParseResult[U]) = this }
  33. trait Parser[+T] extends (String => ParseResult[T]) { def ~>[U](right: Parser[U]):

    Parser[U] = parser { input => (this ~ right)(input) map { case (r1, r2) => r2 } } def <~[U](right: Parser[U]): Parser[T] = parser { input => (this ~ right)(input) map { case (r1, r2) => r1 } } }
  34. trait Parser[+T] extends (String => ParseResult[T]) { def ~>[U](right: Parser[U]):

    Parser[U] = { for (l <- this; r <- right) yield r } def <~[U](right: Parser[U]): Parser[T] = { for (l <- this; r <- right) yield l } }
  35. for(x <- c1; y <- c2; z <- c3) yield

    {...} c1.flatMap(x => c2.flatMap(y => c3.map(z => {...})))
  36. for(x <- c1; y <- c2; z <- c3) yield

    {...} c1.flatMap(x => c2.flatMap(y => c3.map(z => {...}))) def ~>[U](right: Parser[U]): Parser[U] = { for (l <- this; r <- right) yield r }
  37. for(x <- c1; y <- c2; z <- c3) yield

    {...} c1.flatMap(x => c2.flatMap(y => c3.map(z => {...}))) def ~>[U](right: Parser[U]): Parser[U] = { for (l <- this; r <- right) yield r } this.flatMap(l => right.map(r => r))
  38. for(x <- c1; y <- c2; z <- c3) yield

    {...} c1.flatMap(x => c2.flatMap(y => c3.map(z => {...}))) def ~>[U](right: Parser[U]): Parser[U] = { for (l <- this; r <- right) yield r } this.flatMap(l => right.map(r => r)) this.flatMap(_ => right)
  39. json: object | array; object : '{' pair (',' pair)*

    '}' | '{' '}' // empty object pair: STRING ':' value ; array : '[' value (',' value)* ']' | '[' ']' // empty array value : STRING | NUMBER | object // recursion | array // recursion | 'true' // keywords | 'false' | 'null' STRING : '"' (ESC | ~["\\])* '"' ; fragment ESC : '\\' (["\\/bfnrt] | UNICODE) ; fragment UNICODE : 'u' HEX HEX HEX HEX ; fragment HEX : [0-9a-fA-F] ; NUMBER : '-'? INT '.' [0-9]+ EXP? // 1.35, 1.35E-9, 0.3, -4.5 | '-'? INT EXP // 1e10 -3e4 | '-'? INT // -3, 45 ; fragment INT : '0' | [1-9] [0-9]* ; // no leading zeros fragment EXP : [Ee] [+\-]? INT ; WS : [ \t\n\r]+ -> skip ;
  40. trait Parser[+T] extends (String => ParseResult[T]) { def | [U

    >: T](right: Parser[U]): Parser[U] = parser { input => this(input) match { case Failure(_) => right(input) case succ => succ } } }
  41. trait Parser[+T] extends (String => ParseResult[T]) { def rep1Sep[U](sep: Parser[U]):

    Parser[List[T]] = (this ~ (sep ~> this).*) map { case (x, xs) => x +: xs } def repSep[U](sep: Parser[U]): Parser[List[T]] = this.rep1Sep(sep) | success(List()) }
  42. trait Parser[+T] extends (String => ParseResult[T]) { def rep1Sep[U](sep: Parser[U]):

    Parser[List[T]] = (this ~ (sep ~> this).*) map { case (x, xs) => x +: xs } def repSep[U](sep: Parser[U]): Parser[List[T]] = this.rep1Sep(sep) | success(List()) } object Parser { def success[T](res: T) = parser { in => Success(res, in) } }
  43. model object Json { sealed trait JsonVal case class JsonStringVal(s:String)

    extends JsonVal case class JsonIntVal(i:Int) extends JsonVal case class JsonArray(items: List[JsonVal]) extends JsonVal case class JsonNull() extends JsonVal case class JsonKey(k:String) case class JsonEntry(k:JsonKey, v:JsonVal) case class JsonObject(attrs:List[JsonEntry]) extends JsonVal }
  44. object JsonParser extends CharParser { def obj = '{' ~>

    (entry repSep ',') <~ '}' >> {attrs => JsonObject(attrs)} def entry = entryKey ~ (':' ~> entryVal) >> {case(k,v) => JsonEntry(k, v)} def entryKey = '"' ~> charNot('"').* <~ '"' >> {v => JsonKey(v.mkString)} def entryVal: Parser[JsonVal] = intVal | stringVal | obj | arrayVal | nullVal def intVal = intNumber >> {i => JsonIntVal(i)} def stringVal = '"' ~> charNot('"').* <~ '"' >> {v => JsonStringVal(v.mkString)} def arrayVal = '[' ~> (entryVal repSep ',') <~ ']' >> {items => JsonArray(items)} def nullVal = "null" >> {_ => JsonNull()} }
  45. object JsonParser extends CharParser { def obj = '{' ~>

    (entry repSep ',') <~ '}' >> {attrs => JsonObject(attrs)} def entry = entryKey ~ (':' ~> entryVal) >> {case(k,v) => JsonEntry(k, v)} def entryKey = '"' ~> charNot('"').* <~ '"' >> {v => JsonKey(v.mkString)} def entryVal: Parser[JsonVal] = intVal | stringVal | obj | arrayVal | nullVal def intVal = intNumber >> {i => JsonIntVal(i)} def stringVal = '"' ~> charNot('"').* <~ '"' >> {v => JsonStringVal(v.mkString)} def arrayVal = '[' ~> (entryVal repSep ',') <~ ']' >> {items => JsonArray(items)} def nullVal = "null" >> {_ => JsonNull()} }
  46. object JsonParser extends CharParser { def obj = '{' ~>

    (entry repSep ',') <~ '}' >> {attrs => JsonObject(attrs)} def entry = entryKey ~ (':' ~> entryVal) >> {case(k,v) => JsonEntry(k, v)} def entryKey = '"' ~> charNot('"').* <~ '"' >> {v => JsonKey(v.mkString)} def entryVal: Parser[JsonVal] = intVal | stringVal | obj | arrayVal | nullVal def intVal = intNumber >> {i => JsonIntVal(i)} def stringVal = '"' ~> charNot('"').* <~ '"' >> {v => JsonStringVal(v.mkString)} def arrayVal = '[' ~> (entryVal repSep ',') <~ ']' >> {items => JsonArray(items)} def nullVal = "null" >> {_ => JsonNull()} }
  47. def | [U >: T](right: Parser[U]): Parser[U] = parser {

    input => ... } def | [U >: T](right: => Parser[U]): Parser[U] = parser { input => ... } call-by-name
  48. parseJson( """ {"name":"John","lastname":"Doe","age":55,"hobbies":["tennis","football"],"pet":null} """.trim) JsonObject( List( JsonEntry(JsonKey(name), JsonStringVal(John)), JsonEntry(JsonKey(lastname), JsonStringVal(Doe)),

    JsonEntry(JsonKey(age), JsonIntVal(55)), JsonEntry(JsonKey(hobbies), JsonArray( List( JsonStringVal(tennis), JsonStringVal(football))) ), JsonEntry(JsonKey(pet), JsonNull()) ) )