Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Understanding Parser Combinators in Scala

Understanding Parser Combinators in Scala

We will gradually build an embedded domain-specific language (DSL) for specifying grammars in a EBNF-like notation in Scala.

We will use monadic parser combinators approach. As a result we should be able to parse JSON document using our library.

Oleksii Diagiliev

July 15, 2015
Tweet

More Decks by Oleksii Diagiliev

Other Decks in Programming

Transcript

  1. function: String => ParseResult ParseResult is a Success(result, rest) or

    Failure(message) “abc” => Success(“a”, “bc”) “” => Failure(“string is empty”)
  2. trait Parser[+T] { def parse(input:String): ParseResult[T] } trait Parser[+T] extends

    Function1[String, ParseResult[T]] trait Parser[+T] extends (String => ParseResult[T])
  3. trait Parser[+T] { def parse(input:String): ParseResult[T] } trait Parser[+T] extends

    Function1[String, ParseResult[T]] trait Parser[+T] extends (String => ParseResult[T]) sealed abstract class ParseResult[+T] case class Success[+T](result: T, rest: String) extends ParseResult[T] case class Failure(msg: String) extends ParseResult[Nothing]
  4. val anyChar = new Parser[Char] { def apply(input: String): ParseResult[Char]

    = { if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } }
  5. val anyChar = new Parser[Char] { def apply(input: String): ParseResult[Char]

    = { if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } } def parser[T](f: String => ParseResult[T]) = new Parser[T] { def apply(in: String): ParseResult[T] = f(in) }
  6. val anyChar = new Parser[Char] { def apply(input: String): ParseResult[Char]

    = { if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } } def parser[T](f: String => ParseResult[T]) = new Parser[T] { def apply(in: String): ParseResult[T] = f(in) } val anyChar = parser { input => if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) }
  7. val anyChar = new Parser[Char] { def apply(input: String): ParseResult[Char]

    = { if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } } def parser[T](f: String => ParseResult[T]) = new Parser[T] { def apply(in: String): ParseResult[T] = f(in) } val anyChar = parser { input => if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail) } anyChar(“abc”) == Success(“a”, ”bc”)
  8. is a higher-order function that accepts several parsers as input

    and returns a new parser as its output https://en.wikipedia.org/wiki/Parser_combinator
  9. def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec

    def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } parser combinator
  10. def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec

    def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } parser combinator
  11. def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec

    def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } parser combinator
  12. def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec

    def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } res :+ current.result parser combinator
  13. def many[T](p: Parser[T]): Parser[List[T]] = parser { input => @tailrec

    def parseInternal(current: Success[List[T]]): Success[List[T]] = { p(current.rest) match { case Success(res, rest) => parseInternal(Success(current.result :+ res, rest)) case _ => current } } parseInternal(Success(List(), input)) } res :+ current.result map {_.reverse} parser combinator
  14. … ParseResult.map sealed abstract class ParseResult[+T] { def map[U](f: T

    => U): ParseResult[U] } case class Success[+T](result: T, rest: String) extends ParseResult[T] { override def map[U](f: T => U): ParseResult[U] = Success(f(result), rest) }
  15. … ParseResult.map sealed abstract class ParseResult[+T] { def map[U](f: T

    => U): ParseResult[U] } case class Success[+T](result: T, rest: String) extends ParseResult[T] { override def map[U](f: T => U): ParseResult[U] = Success(f(result), rest) } case class Failure(msg: String) extends ParseResult[Nothing] { override def map[U](f: Nothing => U): ParseResult[U] = this }
  16. { "firstName":"John", "isAlive":true, "age":25, "address":{ "streetAddress":"21 2nd Street", "city":"New York“

    }, "phoneNumbers":[ { "type":"home", "number":"212 555-1234" }, { "type":"office", "number":"646 555-4567" } ] }
  17. { "firstName":"John", "isAlive":true, "age":25, "address":{ "streetAddress":"21 2nd Street", "city":"New York“

    }, "phoneNumbers":[ { "type":"home", "number":"212 555-1234" }, { "type":"office", "number":"646 555-4567" } ] } let’s parse this double-quoted string
  18. anyChar def iff[T](p: Parser[T], f: T => Boolean) = parser

    { input => p(input) match { case succ@Success(res, rest) => if (f(res)) succ else Failure("iff failed") case failure => failure } } parser combinator
  19. anyChar def iff[T](p: Parser[T], f: T => Boolean) = parser

    { input => p(input) match { case succ@Success(res, rest) => if (f(res)) succ else Failure("iff failed") case failure => failure } } def char(c: Char): Parser[Char] = iff(anyChar, _ == c) def charNot(except: Char*): Parser[Char] = iff[Char](anyChar, c => !except.contains(c)) val quote = char('"') val digit = iff[Char](anyChar, _.isDigit) parser combinator
  20. anyChar def iff[T](p: Parser[T], f: T => Boolean) = parser

    { input => p(input) match { case succ@Success(res, rest) => if (f(res)) succ else Failure("iff failed") case failure => failure } } def char(c: Char): Parser[Char] = iff(anyChar, _ == c) def charNot(except: Char*): Parser[Char] = iff[Char](anyChar, c => !except.contains(c)) val quote = char('"') val digit = iff[Char](anyChar, _.isDigit) quote(""" "firstName" """.trim) == Success(", firstName") parser combinator
  21. def and[A, B](parserA: Parser[A], parserB: Parser[B]): Parser[(A,B)] = parser {

    input => parserA(input) match { case Success(res, rest) => parserB(rest) match { case Success(res2, rest2) => Success((res, res2), rest2) case _ => Failure(s"(and) second failed on $rest") } case _ => Failure("(and) first failed") } } parser combinator
  22. def and[A, B](parserA: Parser[A], parserB: Parser[B]): Parser[(A,B)] = parser {

    input => parserA(input) match { case Success(res, rest) => parserB(rest) match { case Success(res2, rest2) => Success((res, res2), rest2) case _ => Failure(s"(and) second failed on $rest") } case _ => Failure("(and) first failed") } } and(digit, char('a'))("3abc") == Success((3,a), bc) and(digit, char('a'))("3333") == Failure((and) second failed on 333) parser combinator
  23. takeFirst takeSecond def takeFirst[A, B](parserA: Parser[A], parserB: Parser[B]) = parser

    { input => and(parserA, parserB)(input) map { case (r1, r2) => r1 } }
  24. takeFirst takeSecond def takeFirst[A, B](parserA: Parser[A], parserB: Parser[B]) = parser

    { input => and(parserA, parserB)(input) map { case (r1, r2) => r1 } } def takeSecond[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input => and(parserA, parserB)(input) map { case (r1, r2) => r2 } }
  25. takeFirst takeSecond def takeFirst[A, B](parserA: Parser[A], parserB: Parser[B]) = parser

    { input => and(parserA, parserB)(input) map { case (r1, r2) => r1 } } def takeSecond[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input => and(parserA, parserB)(input) map { case (r1, r2) => r2 } } takeSecond(quote, stringParser)(""" "firstName" """.trim) == Success(firstName”, )
  26. val stringVal = takeFirst( takeSecond( char('"'), many(charNot('"')) ), char('"') )

    takeSecond(quote, stringParser)(""" "firstName" """.trim) == Success(firstName, )
  27. trait Parser[+T] extends (String => ParseResult[T]) { def ~>[U](right: Parser[U]):

    Parser[U] = takeSecond(this, right) def <~[U](right: Parser[U]): Parser[T] = takeFirst(this, right) def ~[U](right: => Parser[U]): Parser[(T, U)] = and(this, right) def *() = many(this) } shortcuts
  28. trait Parser[+T] extends (String => ParseResult[T]) { def ~>[U](right: Parser[U]):

    Parser[U] = takeSecond(this, right) def <~[U](right: Parser[U]): Parser[T] = takeFirst(this, right) def ~[U](right: => Parser[U]): Parser[(T, U)] = and(this, right) def *() = many(this) } val stringVal = quote ~> charNot('"').* <~ quote implicit def charToParser(c: Char): Parser[Char] = char(c) val stringVal = '"' ~> charNot('"').* <~ '"' shortcuts
  29. trait Parser[+T] extends (String => ParseResult[T]) { def map[U](f: T

    => U): Parser[U] = parser { in => this(in) map f } def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f } }
  30. trait Parser[+T] extends (String => ParseResult[T]) { def map[U](f: T

    => U): Parser[U] = parser { in => this(in) map f } def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f } } sealed abstract class ParseResult[+T] { def withNext[U](f: T => String => ParseResult[U]): ParseResult[U] }
  31. trait Parser[+T] extends (String => ParseResult[T]) { def map[U](f: T

    => U): Parser[U] = parser { in => this(in) map f } def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f } } sealed abstract class ParseResult[+T] { def withNext[U](f: T => String => ParseResult[U]): ParseResult[U] } case class Success[+T](result: T, rest: String) extends ParseResult[T] { override def withNext[U](f: T => String => ParseResult[U]) = f(result)(rest) }
  32. trait Parser[+T] extends (String => ParseResult[T]) { def map[U](f: T

    => U): Parser[U] = parser { in => this(in) map f } def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f } } sealed abstract class ParseResult[+T] { def withNext[U](f: T => String => ParseResult[U]): ParseResult[U] } case class Success[+T](result: T, rest: String) extends ParseResult[T] { override def withNext[U](f: T => String => ParseResult[U]) = f(result)(rest) } case class Failure(msg: String) extends ParseResult[Nothing] { override def withNext[U](f: Nothing => String => ParseResult[U]) = this }
  33. trait Parser[+T] extends (String => ParseResult[T]) { def ~>[U](right: Parser[U]):

    Parser[U] = parser { input => (this ~ right)(input) map { case (r1, r2) => r2 } } def <~[U](right: Parser[U]): Parser[T] = parser { input => (this ~ right)(input) map { case (r1, r2) => r1 } } }
  34. trait Parser[+T] extends (String => ParseResult[T]) { def ~>[U](right: Parser[U]):

    Parser[U] = { for (l <- this; r <- right) yield r } def <~[U](right: Parser[U]): Parser[T] = { for (l <- this; r <- right) yield l } }
  35. for(x <- c1; y <- c2; z <- c3) yield

    {...} c1.flatMap(x => c2.flatMap(y => c3.map(z => {...})))
  36. for(x <- c1; y <- c2; z <- c3) yield

    {...} c1.flatMap(x => c2.flatMap(y => c3.map(z => {...}))) def ~>[U](right: Parser[U]): Parser[U] = { for (l <- this; r <- right) yield r }
  37. for(x <- c1; y <- c2; z <- c3) yield

    {...} c1.flatMap(x => c2.flatMap(y => c3.map(z => {...}))) def ~>[U](right: Parser[U]): Parser[U] = { for (l <- this; r <- right) yield r } this.flatMap(l => right.map(r => r))
  38. for(x <- c1; y <- c2; z <- c3) yield

    {...} c1.flatMap(x => c2.flatMap(y => c3.map(z => {...}))) def ~>[U](right: Parser[U]): Parser[U] = { for (l <- this; r <- right) yield r } this.flatMap(l => right.map(r => r)) this.flatMap(_ => right)
  39. json: object | array; object : '{' pair (',' pair)*

    '}' | '{' '}' // empty object pair: STRING ':' value ; array : '[' value (',' value)* ']' | '[' ']' // empty array value : STRING | NUMBER | object // recursion | array // recursion | 'true' // keywords | 'false' | 'null' STRING : '"' (ESC | ~["\\])* '"' ; fragment ESC : '\\' (["\\/bfnrt] | UNICODE) ; fragment UNICODE : 'u' HEX HEX HEX HEX ; fragment HEX : [0-9a-fA-F] ; NUMBER : '-'? INT '.' [0-9]+ EXP? // 1.35, 1.35E-9, 0.3, -4.5 | '-'? INT EXP // 1e10 -3e4 | '-'? INT // -3, 45 ; fragment INT : '0' | [1-9] [0-9]* ; // no leading zeros fragment EXP : [Ee] [+\-]? INT ; WS : [ \t\n\r]+ -> skip ;
  40. trait Parser[+T] extends (String => ParseResult[T]) { def | [U

    >: T](right: Parser[U]): Parser[U] = parser { input => this(input) match { case Failure(_) => right(input) case succ => succ } } }
  41. trait Parser[+T] extends (String => ParseResult[T]) { def rep1Sep[U](sep: Parser[U]):

    Parser[List[T]] = (this ~ (sep ~> this).*) map { case (x, xs) => x +: xs } def repSep[U](sep: Parser[U]): Parser[List[T]] = this.rep1Sep(sep) | success(List()) }
  42. trait Parser[+T] extends (String => ParseResult[T]) { def rep1Sep[U](sep: Parser[U]):

    Parser[List[T]] = (this ~ (sep ~> this).*) map { case (x, xs) => x +: xs } def repSep[U](sep: Parser[U]): Parser[List[T]] = this.rep1Sep(sep) | success(List()) } object Parser { def success[T](res: T) = parser { in => Success(res, in) } }
  43. model object Json { sealed trait JsonVal case class JsonStringVal(s:String)

    extends JsonVal case class JsonIntVal(i:Int) extends JsonVal case class JsonArray(items: List[JsonVal]) extends JsonVal case class JsonNull() extends JsonVal case class JsonKey(k:String) case class JsonEntry(k:JsonKey, v:JsonVal) case class JsonObject(attrs:List[JsonEntry]) extends JsonVal }
  44. object JsonParser extends CharParser { def obj = '{' ~>

    (entry repSep ',') <~ '}' >> {attrs => JsonObject(attrs)} def entry = entryKey ~ (':' ~> entryVal) >> {case(k,v) => JsonEntry(k, v)} def entryKey = '"' ~> charNot('"').* <~ '"' >> {v => JsonKey(v.mkString)} def entryVal: Parser[JsonVal] = intVal | stringVal | obj | arrayVal | nullVal def intVal = intNumber >> {i => JsonIntVal(i)} def stringVal = '"' ~> charNot('"').* <~ '"' >> {v => JsonStringVal(v.mkString)} def arrayVal = '[' ~> (entryVal repSep ',') <~ ']' >> {items => JsonArray(items)} def nullVal = "null" >> {_ => JsonNull()} }
  45. object JsonParser extends CharParser { def obj = '{' ~>

    (entry repSep ',') <~ '}' >> {attrs => JsonObject(attrs)} def entry = entryKey ~ (':' ~> entryVal) >> {case(k,v) => JsonEntry(k, v)} def entryKey = '"' ~> charNot('"').* <~ '"' >> {v => JsonKey(v.mkString)} def entryVal: Parser[JsonVal] = intVal | stringVal | obj | arrayVal | nullVal def intVal = intNumber >> {i => JsonIntVal(i)} def stringVal = '"' ~> charNot('"').* <~ '"' >> {v => JsonStringVal(v.mkString)} def arrayVal = '[' ~> (entryVal repSep ',') <~ ']' >> {items => JsonArray(items)} def nullVal = "null" >> {_ => JsonNull()} }
  46. object JsonParser extends CharParser { def obj = '{' ~>

    (entry repSep ',') <~ '}' >> {attrs => JsonObject(attrs)} def entry = entryKey ~ (':' ~> entryVal) >> {case(k,v) => JsonEntry(k, v)} def entryKey = '"' ~> charNot('"').* <~ '"' >> {v => JsonKey(v.mkString)} def entryVal: Parser[JsonVal] = intVal | stringVal | obj | arrayVal | nullVal def intVal = intNumber >> {i => JsonIntVal(i)} def stringVal = '"' ~> charNot('"').* <~ '"' >> {v => JsonStringVal(v.mkString)} def arrayVal = '[' ~> (entryVal repSep ',') <~ ']' >> {items => JsonArray(items)} def nullVal = "null" >> {_ => JsonNull()} }
  47. def | [U >: T](right: Parser[U]): Parser[U] = parser {

    input => ... } def | [U >: T](right: => Parser[U]): Parser[U] = parser { input => ... } call-by-name
  48. parseJson( """ {"name":"John","lastname":"Doe","age":55,"hobbies":["tennis","football"],"pet":null} """.trim) JsonObject( List( JsonEntry(JsonKey(name), JsonStringVal(John)), JsonEntry(JsonKey(lastname), JsonStringVal(Doe)),

    JsonEntry(JsonKey(age), JsonIntVal(55)), JsonEntry(JsonKey(hobbies), JsonArray( List( JsonStringVal(tennis), JsonStringVal(football))) ), JsonEntry(JsonKey(pet), JsonNull()) ) )