Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Understanding Parser Combinators in Scala

Understanding Parser Combinators in Scala

We will gradually build an embedded domain-specific language (DSL) for specifying grammars in a EBNF-like notation in Scala.

We will use monadic parser combinators approach. As a result we should be able to parse JSON document using our library.

Oleksii Diagiliev

July 15, 2015
Tweet

More Decks by Oleksii Diagiliev

Other Decks in Programming

Transcript

  1. Understanding
    Parser Combinators
    in Scala
    Oleksiy Dyagilev
    #scala school

    View Slide

  2. function: String => ParseResult

    View Slide

  3. function: String => ParseResult
    ParseResult is a Success(result, rest) or Failure(message)

    View Slide

  4. function: String => ParseResult
    ParseResult is a Success(result, rest) or Failure(message)
    “abc” => Success(“a”, “bc”)
    “” => Failure(“string is empty”)

    View Slide

  5. trait Parser[+T] {
    def parse(input:String): ParseResult[T]
    }

    View Slide

  6. trait Parser[+T] {
    def parse(input:String): ParseResult[T]
    }
    trait Parser[+T] extends Function1[String, ParseResult[T]]

    View Slide

  7. trait Parser[+T] {
    def parse(input:String): ParseResult[T]
    }
    trait Parser[+T] extends Function1[String, ParseResult[T]]
    trait Parser[+T] extends (String => ParseResult[T])

    View Slide

  8. trait Parser[+T] {
    def parse(input:String): ParseResult[T]
    }
    trait Parser[+T] extends Function1[String, ParseResult[T]]
    trait Parser[+T] extends (String => ParseResult[T])
    sealed abstract class ParseResult[+T]
    case class Success[+T](result: T, rest: String) extends ParseResult[T]
    case class Failure(msg: String) extends ParseResult[Nothing]

    View Slide

  9. val anyChar = new Parser[Char] {
    def apply(input: String): ParseResult[Char] = {
    if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail)
    }
    }

    View Slide

  10. val anyChar = new Parser[Char] {
    def apply(input: String): ParseResult[Char] = {
    if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail)
    }
    }
    def parser[T](f: String => ParseResult[T]) = new Parser[T] {
    def apply(in: String): ParseResult[T] = f(in)
    }

    View Slide

  11. val anyChar = new Parser[Char] {
    def apply(input: String): ParseResult[Char] = {
    if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail)
    }
    }
    def parser[T](f: String => ParseResult[T]) = new Parser[T] {
    def apply(in: String): ParseResult[T] = f(in)
    }
    val anyChar = parser { input =>
    if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail)
    }

    View Slide

  12. val anyChar = new Parser[Char] {
    def apply(input: String): ParseResult[Char] = {
    if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail)
    }
    }
    def parser[T](f: String => ParseResult[T]) = new Parser[T] {
    def apply(in: String): ParseResult[T] = f(in)
    }
    val anyChar = parser { input =>
    if (input.isEmpty) Failure("string is empty") else Success(input.head, input.tail)
    }
    anyChar(“abc”) == Success(“a”, ”bc”)

    View Slide

  13. We can parse
    a SINGLE char,
    but how do we parse a
    SEQUENCE ?

    View Slide

  14. We can parse
    a SINGLE char,
    but how do we parse a
    SEQUENCE ?

    View Slide

  15. is a higher-order function that
    accepts several parsers as input and returns a new parser
    as its output
    https://en.wikipedia.org/wiki/Parser_combinator

    View Slide

  16. def many[T](p: Parser[T]): Parser[List[T]] = parser { input =>
    @tailrec
    def parseInternal(current: Success[List[T]]): Success[List[T]] = {
    p(current.rest) match {
    case Success(res, rest) => parseInternal(Success(current.result :+ res, rest))
    case _ => current
    }
    }
    parseInternal(Success(List(), input))
    }
    parser combinator

    View Slide

  17. def many[T](p: Parser[T]): Parser[List[T]] = parser { input =>
    @tailrec
    def parseInternal(current: Success[List[T]]): Success[List[T]] = {
    p(current.rest) match {
    case Success(res, rest) => parseInternal(Success(current.result :+ res, rest))
    case _ => current
    }
    }
    parseInternal(Success(List(), input))
    }
    parser combinator

    View Slide

  18. def many[T](p: Parser[T]): Parser[List[T]] = parser { input =>
    @tailrec
    def parseInternal(current: Success[List[T]]): Success[List[T]] = {
    p(current.rest) match {
    case Success(res, rest) => parseInternal(Success(current.result :+ res, rest))
    case _ => current
    }
    }
    parseInternal(Success(List(), input))
    }
    parser combinator

    View Slide

  19. def many[T](p: Parser[T]): Parser[List[T]] = parser { input =>
    @tailrec
    def parseInternal(current: Success[List[T]]): Success[List[T]] = {
    p(current.rest) match {
    case Success(res, rest) => parseInternal(Success(current.result :+ res, rest))
    case _ => current
    }
    }
    parseInternal(Success(List(), input))
    }
    res :+ current.result
    parser combinator

    View Slide

  20. def many[T](p: Parser[T]): Parser[List[T]] = parser { input =>
    @tailrec
    def parseInternal(current: Success[List[T]]): Success[List[T]] = {
    p(current.rest) match {
    case Success(res, rest) => parseInternal(Success(current.result :+ res, rest))
    case _ => current
    }
    }
    parseInternal(Success(List(), input))
    }
    res :+ current.result
    map {_.reverse}
    parser combinator

    View Slide

  21. … ParseResult.map
    sealed abstract class ParseResult[+T] {
    def map[U](f: T => U): ParseResult[U]
    }

    View Slide

  22. … ParseResult.map
    sealed abstract class ParseResult[+T] {
    def map[U](f: T => U): ParseResult[U]
    }
    case class Success[+T](result: T, rest: String) extends ParseResult[T] {
    override def map[U](f: T => U): ParseResult[U] = Success(f(result), rest)
    }

    View Slide

  23. … ParseResult.map
    sealed abstract class ParseResult[+T] {
    def map[U](f: T => U): ParseResult[U]
    }
    case class Success[+T](result: T, rest: String) extends ParseResult[T] {
    override def map[U](f: T => U): ParseResult[U] = Success(f(result), rest)
    }
    case class Failure(msg: String) extends ParseResult[Nothing] {
    override def map[U](f: Nothing => U): ParseResult[U] = this
    }

    View Slide

  24. val stringParser: Parser[List[Char]] = many(anyChar)

    View Slide

  25. val stringParser: Parser[List[Char]] = many(anyChar)
    stringParser("abc") == Success(List(“a”, “b”, “c”), ””)

    View Slide

  26. {
    "firstName":"John",
    "isAlive":true,
    "age":25,
    "address":{
    "streetAddress":"21 2nd Street",
    "city":"New York“
    },
    "phoneNumbers":[
    { "type":"home", "number":"212 555-1234" },
    { "type":"office", "number":"646 555-4567" }
    ]
    }

    View Slide

  27. {
    "firstName":"John",
    "isAlive":true,
    "age":25,
    "address":{
    "streetAddress":"21 2nd Street",
    "city":"New York“
    },
    "phoneNumbers":[
    { "type":"home", "number":"212 555-1234" },
    { "type":"office", "number":"646 555-4567" }
    ]
    }
    let’s parse this double-quoted string

    View Slide

  28. anyChar
    def iff[T](p: Parser[T], f: T => Boolean) = parser { input =>
    p(input) match {
    case succ@Success(res, rest) => if (f(res)) succ else Failure("iff failed")
    case failure => failure
    }
    }
    parser combinator

    View Slide

  29. anyChar
    def iff[T](p: Parser[T], f: T => Boolean) = parser { input =>
    p(input) match {
    case succ@Success(res, rest) => if (f(res)) succ else Failure("iff failed")
    case failure => failure
    }
    }
    def char(c: Char): Parser[Char] = iff(anyChar, _ == c)
    def charNot(except: Char*): Parser[Char] = iff[Char](anyChar, c => !except.contains(c))
    val quote = char('"')
    val digit = iff[Char](anyChar, _.isDigit)
    parser combinator

    View Slide

  30. anyChar
    def iff[T](p: Parser[T], f: T => Boolean) = parser { input =>
    p(input) match {
    case succ@Success(res, rest) => if (f(res)) succ else Failure("iff failed")
    case failure => failure
    }
    }
    def char(c: Char): Parser[Char] = iff(anyChar, _ == c)
    def charNot(except: Char*): Parser[Char] = iff[Char](anyChar, c => !except.contains(c))
    val quote = char('"')
    val digit = iff[Char](anyChar, _.isDigit)
    quote(""" "firstName" """.trim) == Success(", firstName")
    parser combinator

    View Slide

  31. def and[A, B](parserA: Parser[A], parserB: Parser[B]): Parser[(A,B)] = parser { input =>
    parserA(input) match {
    case Success(res, rest) => parserB(rest) match {
    case Success(res2, rest2) => Success((res, res2), rest2)
    case _ => Failure(s"(and) second failed on $rest")
    }
    case _ => Failure("(and) first failed")
    }
    }
    parser combinator

    View Slide

  32. def and[A, B](parserA: Parser[A], parserB: Parser[B]): Parser[(A,B)] = parser { input =>
    parserA(input) match {
    case Success(res, rest) => parserB(rest) match {
    case Success(res2, rest2) => Success((res, res2), rest2)
    case _ => Failure(s"(and) second failed on $rest")
    }
    case _ => Failure("(and) first failed")
    }
    }
    and(digit, char('a'))("3abc") == Success((3,a), bc)
    and(digit, char('a'))("3333") == Failure((and) second failed on 333)
    parser combinator

    View Slide

  33. takeFirst takeSecond
    def takeFirst[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input =>
    and(parserA, parserB)(input) map { case (r1, r2) => r1 }
    }

    View Slide

  34. takeFirst takeSecond
    def takeFirst[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input =>
    and(parserA, parserB)(input) map { case (r1, r2) => r1 }
    }
    def takeSecond[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input =>
    and(parserA, parserB)(input) map { case (r1, r2) => r2 }
    }

    View Slide

  35. takeFirst takeSecond
    def takeFirst[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input =>
    and(parserA, parserB)(input) map { case (r1, r2) => r1 }
    }
    def takeSecond[A, B](parserA: Parser[A], parserB: Parser[B]) = parser { input =>
    and(parserA, parserB)(input) map { case (r1, r2) => r2 }
    }
    takeSecond(quote, stringParser)(""" "firstName" """.trim) == Success(firstName”, )

    View Slide

  36. val stringVal = takeFirst(
    takeSecond( char('"'), many(charNot('"')) ),
    char('"')
    )

    View Slide

  37. val stringVal = takeFirst(
    takeSecond( char('"'), many(charNot('"')) ),
    char('"')
    )
    takeSecond(quote, stringParser)(""" "firstName" """.trim) == Success(firstName, )

    View Slide

  38. trait Parser[+T] extends (String => ParseResult[T]) {
    def ~>[U](right: Parser[U]): Parser[U] = takeSecond(this, right)
    def <~[U](right: Parser[U]): Parser[T] = takeFirst(this, right)
    def ~[U](right: => Parser[U]): Parser[(T, U)] = and(this, right)
    def *() = many(this)
    }
    shortcuts

    View Slide

  39. trait Parser[+T] extends (String => ParseResult[T]) {
    def ~>[U](right: Parser[U]): Parser[U] = takeSecond(this, right)
    def <~[U](right: Parser[U]): Parser[T] = takeFirst(this, right)
    def ~[U](right: => Parser[U]): Parser[(T, U)] = and(this, right)
    def *() = many(this)
    }
    val stringVal = quote ~> charNot('"').* <~ quote
    implicit def charToParser(c: Char): Parser[Char] = char(c)
    val stringVal = '"' ~> charNot('"').* <~ '"'
    shortcuts

    View Slide

  40. trait Parser[+T] extends (String => ParseResult[T]) {
    def map[U](f: T => U): Parser[U] = parser { in => this(in) map f }
    def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f }
    }

    View Slide

  41. trait Parser[+T] extends (String => ParseResult[T]) {
    def map[U](f: T => U): Parser[U] = parser { in => this(in) map f }
    def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f }
    }
    sealed abstract class ParseResult[+T] {
    def withNext[U](f: T => String => ParseResult[U]): ParseResult[U]
    }

    View Slide

  42. trait Parser[+T] extends (String => ParseResult[T]) {
    def map[U](f: T => U): Parser[U] = parser { in => this(in) map f }
    def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f }
    }
    sealed abstract class ParseResult[+T] {
    def withNext[U](f: T => String => ParseResult[U]): ParseResult[U]
    }
    case class Success[+T](result: T, rest: String) extends ParseResult[T] {
    override def withNext[U](f: T => String => ParseResult[U]) = f(result)(rest)
    }

    View Slide

  43. trait Parser[+T] extends (String => ParseResult[T]) {
    def map[U](f: T => U): Parser[U] = parser { in => this(in) map f }
    def flatMap[U](f: T => Parser[U]): Parser[U] = parser { in => this(in) withNext f }
    }
    sealed abstract class ParseResult[+T] {
    def withNext[U](f: T => String => ParseResult[U]): ParseResult[U]
    }
    case class Success[+T](result: T, rest: String) extends ParseResult[T] {
    override def withNext[U](f: T => String => ParseResult[U]) = f(result)(rest)
    }
    case class Failure(msg: String) extends ParseResult[Nothing] {
    override def withNext[U](f: Nothing => String => ParseResult[U]) = this
    }

    View Slide

  44. trait Parser[+T] extends (String => ParseResult[T]) {
    def ~>[U](right: Parser[U]): Parser[U] = parser { input =>
    (this ~ right)(input) map { case (r1, r2) => r2 }
    }
    def <~[U](right: Parser[U]): Parser[T] = parser { input =>
    (this ~ right)(input) map { case (r1, r2) => r1 }
    }
    }

    View Slide

  45. View Slide

  46. trait Parser[+T] extends (String => ParseResult[T]) {
    def ~>[U](right: Parser[U]): Parser[U] = {
    for (l <- this; r <- right) yield r
    }
    def <~[U](right: Parser[U]): Parser[T] = {
    for (l <- this; r <- right) yield l
    }
    }

    View Slide

  47. for(x <- c1; y <- c2; z <- c3) yield {...}
    c1.flatMap(x => c2.flatMap(y => c3.map(z => {...})))

    View Slide

  48. for(x <- c1; y <- c2; z <- c3) yield {...}
    c1.flatMap(x => c2.flatMap(y => c3.map(z => {...})))
    def ~>[U](right: Parser[U]): Parser[U] = {
    for (l <- this; r <- right) yield r
    }

    View Slide

  49. for(x <- c1; y <- c2; z <- c3) yield {...}
    c1.flatMap(x => c2.flatMap(y => c3.map(z => {...})))
    def ~>[U](right: Parser[U]): Parser[U] = {
    for (l <- this; r <- right) yield r
    }
    this.flatMap(l => right.map(r => r))

    View Slide

  50. for(x <- c1; y <- c2; z <- c3) yield {...}
    c1.flatMap(x => c2.flatMap(y => c3.map(z => {...})))
    def ~>[U](right: Parser[U]): Parser[U] = {
    for (l <- this; r <- right) yield r
    }
    this.flatMap(l => right.map(r => r))
    this.flatMap(_ => right)

    View Slide

  51. json: object | array;
    object
    : '{' pair (',' pair)* '}'
    | '{' '}' // empty object
    pair: STRING ':' value ;
    array
    : '[' value (',' value)* ']'
    | '[' ']' // empty array
    value
    : STRING
    | NUMBER
    | object // recursion
    | array // recursion
    | 'true' // keywords
    | 'false'
    | 'null'
    STRING : '"' (ESC | ~["\\])* '"' ;
    fragment ESC : '\\' (["\\/bfnrt] | UNICODE) ;
    fragment UNICODE : 'u' HEX HEX HEX HEX ;
    fragment HEX : [0-9a-fA-F] ;
    NUMBER
    : '-'? INT '.' [0-9]+ EXP? // 1.35, 1.35E-9, 0.3, -4.5
    | '-'? INT EXP // 1e10 -3e4
    | '-'? INT // -3, 45
    ;
    fragment INT : '0' | [1-9] [0-9]* ; // no leading zeros
    fragment EXP : [Ee] [+\-]? INT ;
    WS : [ \t\n\r]+ -> skip ;

    View Slide

  52. View Slide

  53. trait Parser[+T] extends (String => ParseResult[T]) {
    def | [U >: T](right: Parser[U]): Parser[U] = parser { input =>
    this(input) match {
    case Failure(_) => right(input)
    case succ => succ
    }
    }
    }

    View Slide

  54. trait Parser[+T] extends (String => ParseResult[T]) {
    def rep1Sep[U](sep: Parser[U]): Parser[List[T]] = (this ~ (sep ~> this).*) map { case (x, xs) => x +: xs }
    def repSep[U](sep: Parser[U]): Parser[List[T]] = this.rep1Sep(sep) | success(List())
    }

    View Slide

  55. trait Parser[+T] extends (String => ParseResult[T]) {
    def rep1Sep[U](sep: Parser[U]): Parser[List[T]] = (this ~ (sep ~> this).*) map { case (x, xs) => x +: xs }
    def repSep[U](sep: Parser[U]): Parser[List[T]] = this.rep1Sep(sep) | success(List())
    }
    object Parser {
    def success[T](res: T) = parser { in => Success(res, in) }
    }

    View Slide

  56. model
    object Json {
    sealed trait JsonVal
    case class JsonStringVal(s:String) extends JsonVal
    case class JsonIntVal(i:Int) extends JsonVal
    case class JsonArray(items: List[JsonVal]) extends JsonVal
    case class JsonNull() extends JsonVal
    case class JsonKey(k:String)
    case class JsonEntry(k:JsonKey, v:JsonVal)
    case class JsonObject(attrs:List[JsonEntry]) extends JsonVal
    }

    View Slide

  57. object JsonParser extends CharParser {
    def obj = '{' ~> (entry repSep ',') <~ '}' >> {attrs => JsonObject(attrs)}
    def entry = entryKey ~ (':' ~> entryVal) >> {case(k,v) => JsonEntry(k, v)}
    def entryKey = '"' ~> charNot('"').* <~ '"' >> {v => JsonKey(v.mkString)}
    def entryVal: Parser[JsonVal] = intVal | stringVal | obj | arrayVal | nullVal
    def intVal = intNumber >> {i => JsonIntVal(i)}
    def stringVal = '"' ~> charNot('"').* <~ '"' >> {v => JsonStringVal(v.mkString)}
    def arrayVal = '[' ~> (entryVal repSep ',') <~ ']' >> {items => JsonArray(items)}
    def nullVal = "null" >> {_ => JsonNull()}
    }

    View Slide

  58. object JsonParser extends CharParser {
    def obj = '{' ~> (entry repSep ',') <~ '}' >> {attrs => JsonObject(attrs)}
    def entry = entryKey ~ (':' ~> entryVal) >> {case(k,v) => JsonEntry(k, v)}
    def entryKey = '"' ~> charNot('"').* <~ '"' >> {v => JsonKey(v.mkString)}
    def entryVal: Parser[JsonVal] = intVal | stringVal | obj | arrayVal | nullVal
    def intVal = intNumber >> {i => JsonIntVal(i)}
    def stringVal = '"' ~> charNot('"').* <~ '"' >> {v => JsonStringVal(v.mkString)}
    def arrayVal = '[' ~> (entryVal repSep ',') <~ ']' >> {items => JsonArray(items)}
    def nullVal = "null" >> {_ => JsonNull()}
    }

    View Slide

  59. object JsonParser extends CharParser {
    def obj = '{' ~> (entry repSep ',') <~ '}' >> {attrs => JsonObject(attrs)}
    def entry = entryKey ~ (':' ~> entryVal) >> {case(k,v) => JsonEntry(k, v)}
    def entryKey = '"' ~> charNot('"').* <~ '"' >> {v => JsonKey(v.mkString)}
    def entryVal: Parser[JsonVal] = intVal | stringVal | obj | arrayVal | nullVal
    def intVal = intNumber >> {i => JsonIntVal(i)}
    def stringVal = '"' ~> charNot('"').* <~ '"' >> {v => JsonStringVal(v.mkString)}
    def arrayVal = '[' ~> (entryVal repSep ',') <~ ']' >> {items => JsonArray(items)}
    def nullVal = "null" >> {_ => JsonNull()}
    }

    View Slide

  60. def | [U >: T](right: Parser[U]): Parser[U] = parser { input =>
    ...
    }

    View Slide

  61. def | [U >: T](right: Parser[U]): Parser[U] = parser { input =>
    ...
    }

    View Slide

  62. def | [U >: T](right: Parser[U]): Parser[U] = parser { input =>
    ...
    }
    def | [U >: T](right: => Parser[U]): Parser[U] = parser { input =>
    ...
    }
    call-by-name

    View Slide

  63. parseJson( """ {"name":"John","lastname":"Doe","age":55,"hobbies":["tennis","football"],"pet":null} """.trim)

    View Slide

  64. parseJson( """ {"name":"John","lastname":"Doe","age":55,"hobbies":["tennis","football"],"pet":null} """.trim)
    JsonObject(
    List(
    JsonEntry(JsonKey(name), JsonStringVal(John)),
    JsonEntry(JsonKey(lastname), JsonStringVal(Doe)),
    JsonEntry(JsonKey(age), JsonIntVal(55)),
    JsonEntry(JsonKey(hobbies), JsonArray(
    List(
    JsonStringVal(tennis),
    JsonStringVal(football)))
    ),
    JsonEntry(JsonKey(pet), JsonNull())
    )
    )

    View Slide

  65. • Sources of this talk https://github.com/fe2s/parser-combinators-talk
    • Scala library https://github.com/scala/scala-parser-combinators
    • Haskell Parsec https://wiki.haskell.org/Parsec

    View Slide