Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Parser combinators: a type-driven approach to input processiEOF

Parser combinators: a type-driven approach to input processiEOF

Parsing is a truly fundamental problem. How can we extract and refine intelligible information out of the chaotic and hostile world we operate in? The absurd nature of the task and the breadth of possible solutions can quickly turn into an obsession, watch out.
The composability of first-class functions paired with powerful types can offer an extremely elegant and satifying approach to parsing. In this talk, we will build our own parser-combinators library from scratch using Scala. We will make he most of the language by using all kinds of types, data structures, and other tricks.
May contain traces of Idris, it depends.

Bastien Louërat

February 28, 2018
Tweet

More Decks by Bastien Louërat

Other Decks in Programming

Transcript

  1. Parser Combinators
    A type-driven approach to input processiEOF

    View Slide

  2. Bastien Louërat
    Developer @ Habito
    Scala background
    Rambling @blouerat
    ctrl+u github.com/blouerat

    View Slide

  3. import validation._
    def validateAddress(form: Form) Either[AddressError, Address] =
    for {
    number validateNumber(form)
    street validateStreet(form)
    city validateCity(form)
    postCode validatePostCode(form)
    } yield Address(number, street, city, postCode)
    2 / 39

    View Slide

  4. Roses are Red
    Violets are Blue
    Unexpected '{'
    on line 32.
    3 / 39

    View Slide

  5. View Slide

  6. View Slide

  7. View Slide

  8. EOF

    View Slide

  9. View Slide

  10. View Slide

  11. View Slide

  12. View Slide

  13. sealed trait Parser[A]
    case class Exactly(char: Char) extends Parser[Char]
    val a: Parser[Char] = Exactly('a')
    val b: Parser[Char] = Exactly('b')
    def run[A](parser: Parser[A])(input: String) Option[A] =
    parser match {
    case Exactly(char) input.headOption.filter(_ char)
    }
    scala> run(a)("")
    res1 Option[Char] = None
    scala> run(a)("z")
    res2 Option[Char] = None
    scala> run(a)("a")
    res3 Option[Char] = Some(a)
    12 / 39

    View Slide

  14. sealed trait Error
    case object EOF extends Error
    case class Unexpected(char: Char) extends Error
    def run[A](parser: Parser[A])(input: String) Either[Error, A] =
    parser match {
    case Exactly(char)
    input.headOption.toRight(EOF).flatMap { head
    if (head char)
    Right(char)
    else
    Left(Unexpected(head))
    }
    }
    scala> run(a)("")
    res4 Either[Error,Char] = Left(EOF)
    scala> run(a)("z")
    res5 Either[Error,Char] = Left(Unexpected(z))
    scala> run(a)("a")
    res6 Either[Error,Char] = Right(a)
    13 / 39

    View Slide

  15. case class Or[A](parser1 Parser[A], parser2 Parser[A]) extends Parser[A]
    val aOrB Parser[Char] = Or(a, b)
    def run[A](parser: Parser[A])(input: String) Either[Error, A] =
    parser match {
    case Exactly(char)
    input.headOption.toRight(EOF).flatMap { head
    if (head char)
    Right(char)
    else
    Left(Unexpected(head))
    }
    case Or(parser1, parser2)
    run(parser1)(input) match {
    case Left(error) run(parser2)(input)
    case Right(result) Right(result)
    }
    }
    scala> run(aOrB)("z")
    res7 Either[Error,Char] = Left(Unexpected(z))
    scala> run(aOrB)("a")
    res8 Either[Error,Char] = Right(a)
    scala> run(aOrB)("b")
    res9 Either[Error,Char] = Right(b)
    14 / 39

    View Slide

  16. case class NEL[A](head: A, tail: List[A]) {
    def map[B](f: A B) NEL[B] = NEL(f(head), tail.map(f))
    }
    def oneOf[A](parsers: NEL[Parser[A]]) Parser[A] =
    parsers.tail.foldLeft(parsers.head)(Or(_, _))
    val allDigits: NEL[Char] = NEL('0', ('1' to '9').toList)
    val digit: Parser[Char] = oneOf(allDigits.map(Exactly(_)))
    scala> run(digit)("")
    res11 Either[Error,Char] = Left(EOF)
    scala> run(digit)("z")
    res12 Either[Error,Char] = Left(Unexpected(z))
    scala> run(digit)("0")
    res13 Either[Error,Char] = Right(0)
    scala> run(digit)("1")
    res14 Either[Error,Char] = Right(1)
    scala> run(digit)("2")
    res15 Either[Error,Char] = Right(2)
    scala> run(digit)("9")
    res16 Either[Error,Char] = Right(9)
    15 / 39

    View Slide

  17. scala> run(a)("abcd")
    res17 Either[Error,Char] = Right(a)
    How about bcd?
    def run[A](parser: Parser[A])(input: String) Either[Error, (A, String)] =
    parser match {
    case Exactly(char)
    input.headOption.toRight(EOF).flatMap { head
    if (head char)
    Right((char, input.tail)) is safe, trust me
    else
    Left(Unexpected(head))
    }
    case Or(parser1, parser2)
    run(parser1)(input) match {
    case Left(error) run(parser2)(input)
    case Right(result) Right(result)
    }
    }
    scala> run(a)("")
    res18 Either[Error,(Char, String)] = Left(EOF)
    scala> run(a)("z")
    res19 Either[Error,(Char, String)] = Left(Unexpected(z))
    scala> run(a)("abcd")
    res20 Either[Error,(Char, String)] = Right((a,bcd))
    16 / 39

    View Slide

  18. case class And[A, B](
    parserA Parser[A],
    parserB Parser[B]
    ) extends Parser[(A, B)]
    val aAndB Parser[(Char, Char)] = And(a, b)
    def run[A](parser: Parser[A])(input: String) Either[Error, (A, String)] =
    parser match {
    case Exactly(char)
    input.headOption.toRight(EOF).flatMap { head
    if (head char)
    Right((char, input.tail))
    else
    Left(Unexpected(head))
    }
    case Or(parser1, parser2)
    run(parser1)(input) match {
    case Left(error) run(parser2)(input)
    case Right(result) Right(result)
    }
    case And(parserA, parserB)
    for {
    resultA run(parserA)(input)
    resultB run(parserB)(resultA._2)
    } yield ((resultA._1, resultB._1), resultB._2)
    }
    17 / 39

    View Slide

  19. scala> run(aAndB)("")
    res21 Either[Error,((Char, Char), String)] = Left(EOF)
    scala> run(aAndB)("z")
    res22 Either[Error,((Char, Char), String)] = Left(Unexpected(z))
    scala> run(aAndB)("a")
    res23 Either[Error,((Char, Char), String)] = Left(EOF)
    scala> run(aAndB)("abcd")
    res24 Either[Error,((Char, Char), String)] = Right(((a,b),cd))
    18 / 39

    View Slide

  20. import scalaz._
    import Scalaz._
    def run[F[_], A](
    parser: Parser[A]
    )(
    MS: MonadState[F, String],
    ME: MonadError[F, Error]
    ) F[A] = ???
    19 / 39

    View Slide

  21. trait MonadStateError[F[_], S, E] extends MonadState[F, S] with MonadError[F,
    object MonadStateError {
    def apply[F[_], S, E](
    MS: MonadState[F, S],
    ME: MonadError[F, E]
    ) MonadStateError[F, S, E] =
    new MonadStateError[F, S, E] {
    def point[A](a: A) F[A] = MS.point(a)
    def bind[A, B](fa: F[A])(f: A F[B]) F[B] = MS.bind(fa)(f)
    def handleError[A](fa: F[A])(f: E F[A]) F[A] = ME.handleError(fa)(f
    def raiseError[A](e: E) F[A] = ME.raiseError(e)
    def get: F[S] = MS.get
    def init: F[S] = MS.init
    def put(s: S) F[Unit] = MS.put(s)
    }
    }
    20 / 39

    View Slide

  22. def next[F[_]](
    implicit M: MonadStateError[F, String, Error]
    ) F[Char] =
    M.get.flatMap {
    case "" M.raiseError(EOF)
    case input M.put(input.tail) >| input.head
    }
    def run[F[_], A](
    parser: Parser[A]
    )(
    implicit M: MonadStateError[F, String, Error]
    ) F[A] =
    parser match {
    case Exactly(char)
    for {
    head next
    _ whenM(head char)(M.raiseError(Unexpected(head)))
    } yield head
    case Or(parser1, parser2)
    M.get.flatMap { input
    M.handleError(run(parser1)) { _
    M.put(input) run(parser2)
    }
    }
    case And(parserA, parserB)
    for {
    a run(parserA)
    b run(parserB)
    } yield (a, b)
    }
    21 / 39

    View Slide

  23. type Result[A] = Either[Error, A]
    type ResultST[A] = StateT[Result, String, A]
    val toResultST Parser ResultST = new (Parser ResultST) {
    val MS: MonadState[ResultST, String] =
    StateT.stateTMonadState[String, Result]
    val ME: MonadError[ResultST, Error] =
    StateT.stateTMonadError[String, Result, Error]
    def apply[A](parser: Parser[A]) ResultST[A] =
    run(parser)(MonadStateError(MS, ME))
    }
    def eval[A](parser: Parser[A])(input: String) Result[A] =
    toResultST(parser).eval(input)
    scala> val result = toResultST(aAndB)
    result: ResultST[(Char, Char)] = [email protected]
    scala> result.run("")
    res28 Result[(String, (Char, Char))] = Left(EOF)
    scala> result.run("z")
    res29 Result[(String, (Char, Char))] = Left(Unexpected(z))
    scala> result.run("abcd")
    res30 Result[(String, (Char, Char))] = Right((cd,(a,b)))
    scala> result.eval("abcd")
    res31 Result[(Char, Char)] = Right((a,b))
    22 / 39

    View Slide

  24. sealed trait Parser[A]
    case class Exactly(char: Char) extends Parser[Char]
    case class Pure[A](value: A) extends Parser[A]
    case class Or[A](
    parser1 Parser[A],
    parser2 Parser[A]
    ) extends Parser[A]
    case class Bind[A, B](
    parser: Parser[A],
    f: A Parser[B]
    ) extends Parser[B]
    implicit val parserMonad: Monad[Parser] = new Monad[Parser] {
    def point[A](a: A) Parser[A] = Pure(a)
    def bind[A, B](fa: Parser[A])(f: A Parser[B]) Parser[B] = Bind(fa, f)
    }
    implicit val parserPlus: Plus[Parser] = new Plus[Parser] {
    def plus[A](a: Parser[A], b: Parser[A]) Parser[A] = Or(a, b)
    }
    implicit def parserSemigroup[A] Semigroup[Parser[A]] =
    parserPlus.semigroup[A]
    23 / 39

    View Slide

  25. def and[A, B](parserA Parser[A], parserB Parser[B]) Parser[(A, B)] =
    parserA.tuple(parserB)
    val allDigits: NonEmptyList[Char] = NonEmptyList('0', ('1' to '9') _*)
    def charToInt(c: Char) Int = c.toString.toInt ¯\_(
    ツ)_/¯
    val digit: Parser[Int] = allDigits.foldMap1(d Exactly(d).map(charToInt))
    def maybe[A](parser: Parser[A]) Parser[Option[A]] =
    Or(parser.map(Option(_)), Pure(Option.empty[A]))
    object someMany {
    def some[A](parser: Parser[A]) Parser[List[A]] =
    for {
    head parser
    tail many(parser)
    } yield head tail
    def many[A](parser: Parser[A]) Parser[List[A]] =
    Or(some(parser), Pure(List.empty[A]))
    }
    import someMany._
    24 / 39

    View Slide

  26. def run[F[_], A](
    parser: Parser[A]
    )(
    implicit M: MonadStateError[F, String, Error]
    ) F[A] =
    parser match {
    case Exactly(char)
    for {
    head next
    _ whenM(head char)(M.raiseError(Unexpected(head)))
    } yield head
    case Pure(value) M.point(value)
    case Or(parser1, parser2)
    M.get.flatMap { input
    M.handleError(run(parser1)) { _
    M.put(input) run(parser2)
    }
    }
    case Bind(parser, f) run(parser).flatMap(a run(f(a)))
    }
    type Result[A] = Either[Error, A]
    type ResultST[A] = StateT[Result, String, A]
    val toResultST Parser ResultST = new (Parser ResultST) {
    val MS: MonadState[ResultST, String] = StateT.stateTMonadState[String, Resu
    val ME: MonadError[ResultST, Error] = StateT.stateTMonadError[String, Resul
    def apply[A](parser: Parser[A]) ResultST[A] = run(parser)(MonadStateError(
    }
    def eval[A](parser: Parser[A])(input: String) Result[A] = toResultST(parser)
    25 / 39

    View Slide

  27. val lowercase: Parser[Char] =
    NonEmptyList('a', ('b' to 'z') _*).foldMap1[Parser[Char]](Exactly(_))
    val foo: Parser[(Char, List[Int])] = and(lowercase, some(digit))
    val result = toResultST(foo)
    scala> result.run("")
    res35 Result[(String, (Char, List[Int]))] = Left(EOF)
    scala> result.run("A")
    res36 Result[(String, (Char, List[Int]))] = Left(Unexpected(A))
    scala> result.run("a")
    res37 Result[(String, (Char, List[Int]))] = Left(EOF)
    scala> result.run("b42az")
    res38 Result[(String, (Char, List[Int]))] = Right((az,(b,List(4, 2))))
    26 / 39

    View Slide

  28. val protocol: Parser[String] =
    for {
    n digit
    _ Exactly(':')
    chars lowercase.replicateM(n)
    } yield chars.mkString
    scala> eval(protocol)("")
    res39 Result[String] = Left(EOF)
    scala> eval(protocol)("abcdefg42")
    res40 Result[String] = Left(Unexpected(a))
    scala> eval(protocol)("3abcdefg42")
    res41 Result[String] = Left(Unexpected(a))
    scala> eval(protocol)("3:abcdefg42")
    res42 Result[String] = Right(abc)
    27 / 39

    View Slide

  29. Parser combinators in Idris
    data Grammar : (tok : Type) (consumes : Bool) Type Type where
    Empty : (val : ty) Grammar tok False ty
    Terminal : (tok Maybe a) Grammar tok True a
    NextIs : (tok Bool) Grammar tok False tok
    EOF : Grammar tok False ()
    Fail : String Grammar tok c ty
    Commit : Grammar tok False ()
    SeqEat : Grammar tok True a Inf (a Grammar tok c2 b)
    Grammar tok True b
    SeqEmpty : {c1, c2 : Bool}
    Grammar tok c1 a (a Grammar tok c2 b)
    Grammar tok (c1 || c2) b
    Alt : {c1, c2 : Bool}
    Grammar tok c1 ty Grammar tok c2 ty
    Grammar tok (c1 c2) ty
    ( ) : {c1 : Bool}
    Grammar tok c1 a
    inf c1 (a Grammar tok c2 b)
    Grammar tok (c1 || c2) b
    ( ) {c1 = False} = SeqEmpty
    ( ) {c1 = True} = SeqEat
    28 / 39

    View Slide

  30. sealed trait Bool {
    type If[T Out, F Out, Out] }
    sealed trait True extends Bool {
    type If[T Out, F Out, Out] = T
    }
    sealed trait False extends Bool {
    type If[T Out, F Out, Out] = F
    }
    type [A Bool, B Bool] = A#If[B, False, Bool]
    type [A Bool, B Bool] = A#If[True, B, Bool]
    object Bool {
    sealed trait Refl[A Bool, B Bool]
    object Refl extends Refl0
    trait Refl0 extends Refl1 {
    implicit def reflAndTrue[A Bool] Refl[A True, A] = null
    implicit def reflAndFalse[A Bool] Refl[A False, False] = null
    }
    trait Refl1 {
    implicit def reflOrTrue[A Bool] Refl[A True, True] = null
    implicit def reflOrFalse[A Bool] Refl[A False, A] = null
    }
    }
    import Bool._
    29 / 39

    View Slide

  31. sealed trait Parser[A, X Bool]
    case class Exactly(char: Char) extends Parser[Char, True]
    case class Pure[A](value: A) extends Parser[A, False]
    case class Or[A, X1 Bool, X2 Bool](
    parser1 Parser[A, X1],
    parser2 Parser[A, X2]
    ) extends Parser[A, X1 X2]
    case class Bind[A, X1 Bool, B, X2 Bool](
    parser: Parser[A, X1],
    f: A Parser[B, X2]
    ) extends Parser[B, X1 X2]
    30 / 39

    View Slide

  32. object Parser {
    sealed trait Parser[A, X Bool] { self
    def map[B](f: A B) Parser[B, X] =
    Bind(self, (a: A) Pure(f(a))).refl
    def flatMap[B, X2 Bool](f: A Parser[B, X2]) Parser[B, X X2] =
    Bind(self, f)
    def refl[X2 Bool](implicit refl: Refl[X, X2]) Parser[A, X2] =
    this.asInstanceOf[Parser[A, X2]]
    }
    case class Exactly(char: Char) extends Parser[Char, True]
    case class Pure[A](value: A) extends Parser[A, False]
    case class Or[A, X1 Bool, X2 Bool](
    parser1 Parser[A, X1],
    parser2 Parser[A, X2]
    ) extends Parser[A, X1 X2]
    case class Bind[A, X1 Bool, B, X2 Bool](
    parser: Parser[A, X1],
    f: A Parser[B, X2]
    ) extends Parser[B, X1 X2]
    }
    import Parser._
    31 / 39

    View Slide

  33. val a: Parser[Char, True] = Exactly('a')
    val b: Parser[Char, True] = Exactly('b')
    val aOrB Parser[Char, True] = Or(a, b)
    def wrapped[A, X Bool](parser: Parser[A, X]) Parser[A, True] =
    for {
    _ Exactly('(')
    a parser
    _ Exactly(')')
    } yield a
    def maybe[A, X Bool](parser: Parser[A, X]) Parser[Option[A], False] =
    Or(parser.map(Option(_)), Pure(Option.empty[A])).refl
    object someMany {
    def some[A, X Bool](parser: Parser[A, X]) Parser[List[A], X] =
    parser.flatMap { a
    many(parser).map(a _)
    }.refl
    def many[A, X Bool](parser: Parser[A, X]) Parser[List[A], False] =
    Or(some(parser), Pure(List.empty[A])).refl
    }
    import someMany._
    32 / 39

    View Slide

  34. def runForget[F[_], A](
    parser: Parser[A, _]
    )(
    implicit M: MonadStateError[F, String, Error]
    ) F[A] =
    parser match {
    case Exactly(char)
    for {
    head next
    _ whenM(head char)(M.raiseError(Unexpected(head)))
    } yield head
    case Pure(value) M.point(value)
    case Or(parser1, parser2)
    M.get.flatMap { input
    M.handleError(runForget(parser1)) { _
    M.put(input) runForget(parser2)
    }
    }
    case Bind(parser, f) runForget(parser).flatMap(a runForget(f(a)))
    }
    type Result[A] = Either[Error, A]
    type ResultST[A] = StateT[Result, String, A]
    def toResultST[A](parser: Parser[A, True]) ResultST[A] = {
    val MS: MonadState[ResultST, String] = StateT.stateTMonadState[String, Resu
    val ME: MonadError[ResultST, Error] = StateT.stateTMonadError[String, Resul
    runForget(parser)(MonadStateError(MS, ME))
    }
    def eval[A](parser: Parser[A, True])(input: String) Result[A] = toResultST(p
    33 / 39

    View Slide

  35. scala> eval(aOrBs)("")
    res48 Result[List[Char]] = Left(EOF)
    scala> eval(aOrBs)("abcd")
    res49 Result[List[Char]] = Left(Unexpected(a))
    scala> eval(aOrBs)("(a)(b)(b)(a)cd")
    res50 Result[List[Char]] = Right(List(a, b, b, a))
    scala> eval(maybeAB)("")
    :43: error: type mismatch;
    found : Parser.Parser[Option[Char],False]
    required: Parser.Parser[?,True]
    eval(maybeAB)("")
    ^
    val aOrBs = some(wrapped(aOrB))
    aOrBs: Parser.Parser[List[Char],True] = Bind(Bind(Exactly((),$$Lambda$4605
    val maybeAB = maybe(aOrB)
    maybeAB Parser.Parser[Option[Char],False] = Or(Bind(Or(Exactly(a),Exactly
    34 / 39

    View Slide

  36. def stupid: Parser[Unit, False] = Pure(()).flatMap(_ stupid)
    def woops: Parser[Unit, True] = Exactly('$').flatMap(_ stupid)
    scala> eval(woops)("")
    res52 Result[Unit] = Left(EOF)
    scala> eval(woops)("$")
    java.lang.StackOverflowError
    at scalaz.std.EitherInstances$$anon$1.point(Either.scala:82)
    at scalaz.std.EitherInstances$$anon$1.point(Either.scala:67)
    at scalaz.Monad.$anonfun$map$1(Monad.scala:14)
    at scalaz.std.EitherInstances$$anon$1.bind(Either.scala:70)
    at scalaz.std.EitherInstances$$anon$1.bind(Either.scala:67)
    at scalaz.Monad.map(Monad.scala:14)
    at scalaz.Monad.map$(Monad.scala:14)
    at scalaz.std.EitherInstances$$anon$1.map(Either.scala:67)
    at scalaz.IndexedStateT.apply(StateT.scala:10)
    at scalaz.IndexedStateT.run(StateT.scala:13)
    at scalaz.IndexedStateT.$anonfun$mapsf$2(StateT.scala:98)
    at scalaz.IndexedStateT.$anonfun$flatMap$2(StateT.scala:65)
    at scalaz.IndexedStateT.$anonfun$flatMap$4(StateT.scala:67)
    at scalaz.std.EitherInstances$$anon$1.bind(Either.scala:70)
    at scalaz.std.EitherInstances$$anon$1.bind(Either.scala:67)
    at scalaz.IndexedStateT.$anonfun$flatMap$3(StateT.scala:67)
    at scalaz.std.EitherInstances$$anon$1.bind(Either.scala:70)
    at scalaz.std.EitherInstances$$anon$1.bind(Either.scala:67)
    at scalaz.IndexedStateT.$anonfun$flatMap$2(StateT.scala:65)
    at scalaz.IndexedStateT.$anonfun$flatMap$4(StateT.scala:67)
    at scalaz.std.EitherInstances$$anon$1.bind(Either.scala:70) 35 / 39

    View Slide

  37. Cynicism: The hope that someday you will have known better all along.
    Nein. A Manifesto, Eric Jarosinski
    36 / 39

    View Slide

  38. One must imagine Sisyphus happy
    Existential Comics
    37 / 39

    View Slide

  39. Thank you

    View Slide

  40. Reading list
    Functional Programming in Scala, Paul Chiusano and Runar Bjarnason
    Type-Driven Development with Idris, Edwin Brady
    Monadic parsing in Haskell, Graham Hutton and Erik Meijer
    39 / 39

    View Slide