Bastien Louërat
February 28, 2018
160

# Parser combinators: a type-driven approach to input processiEOF

Parsing is a truly fundamental problem. How can we extract and refine intelligible information out of the chaotic and hostile world we operate in? The absurd nature of the task and the breadth of possible solutions can quickly turn into an obsession, watch out.
The composability of first-class functions paired with powerful types can offer an extremely elegant and satifying approach to parsing. In this talk, we will build our own parser-combinators library from scratch using Scala. We will make he most of the language by using all kinds of types, data structures, and other tricks.
May contain traces of Idris, it depends.

## Bastien Louërat

February 28, 2018

## Transcript

1. Parser Combinators
A type-driven approach to input processiEOF

2. Bastien Louërat
Developer @ Habito
Scala background
Rambling @blouerat
ctrl+u github.com/blouerat

3. import validation._
for {
number validateNumber(form)
street validateStreet(form)
city validateCity(form)
postCode validatePostCode(form)
} yield Address(number, street, city, postCode)
2 / 39

4. Roses are Red
Violets are Blue
Unexpected '{'
on line 32.
3 / 39

5. EOF

6. sealed trait Parser[A]
case class Exactly(char: Char) extends Parser[Char]
val a: Parser[Char] = Exactly('a')
val b: Parser[Char] = Exactly('b')
def run[A](parser: Parser[A])(input: String) Option[A] =
parser match {
}
scala> run(a)("")
res1 Option[Char] = None
scala> run(a)("z")
res2 Option[Char] = None
scala> run(a)("a")
res3 Option[Char] = Some(a)
12 / 39

7. sealed trait Error
case object EOF extends Error
case class Unexpected(char: Char) extends Error
def run[A](parser: Parser[A])(input: String) Either[Error, A] =
parser match {
case Exactly(char)
Right(char)
else
}
}
scala> run(a)("")
res4 Either[Error,Char] = Left(EOF)
scala> run(a)("z")
res5 Either[Error,Char] = Left(Unexpected(z))
scala> run(a)("a")
res6 Either[Error,Char] = Right(a)
13 / 39

8. case class Or[A](parser1 Parser[A], parser2 Parser[A]) extends Parser[A]
val aOrB Parser[Char] = Or(a, b)
def run[A](parser: Parser[A])(input: String) Either[Error, A] =
parser match {
case Exactly(char)
Right(char)
else
}
case Or(parser1, parser2)
run(parser1)(input) match {
case Left(error) run(parser2)(input)
case Right(result) Right(result)
}
}
scala> run(aOrB)("z")
res7 Either[Error,Char] = Left(Unexpected(z))
scala> run(aOrB)("a")
res8 Either[Error,Char] = Right(a)
scala> run(aOrB)("b")
res9 Either[Error,Char] = Right(b)
14 / 39

9. case class NEL[A](head: A, tail: List[A]) {
def map[B](f: A B) NEL[B] = NEL(f(head), tail.map(f))
}
def oneOf[A](parsers: NEL[Parser[A]]) Parser[A] =
val allDigits: NEL[Char] = NEL('0', ('1' to '9').toList)
val digit: Parser[Char] = oneOf(allDigits.map(Exactly(_)))
scala> run(digit)("")
res11 Either[Error,Char] = Left(EOF)
scala> run(digit)("z")
res12 Either[Error,Char] = Left(Unexpected(z))
scala> run(digit)("0")
res13 Either[Error,Char] = Right(0)
scala> run(digit)("1")
res14 Either[Error,Char] = Right(1)
scala> run(digit)("2")
res15 Either[Error,Char] = Right(2)
scala> run(digit)("9")
res16 Either[Error,Char] = Right(9)
15 / 39

10. scala> run(a)("abcd")
res17 Either[Error,Char] = Right(a)
def run[A](parser: Parser[A])(input: String) Either[Error, (A, String)] =
parser match {
case Exactly(char)
Right((char, input.tail)) is safe, trust me
else
}
case Or(parser1, parser2)
run(parser1)(input) match {
case Left(error) run(parser2)(input)
case Right(result) Right(result)
}
}
scala> run(a)("")
res18 Either[Error,(Char, String)] = Left(EOF)
scala> run(a)("z")
res19 Either[Error,(Char, String)] = Left(Unexpected(z))
scala> run(a)("abcd")
res20 Either[Error,(Char, String)] = Right((a,bcd))
16 / 39

11. case class And[A, B](
parserA Parser[A],
parserB Parser[B]
) extends Parser[(A, B)]
val aAndB Parser[(Char, Char)] = And(a, b)
def run[A](parser: Parser[A])(input: String) Either[Error, (A, String)] =
parser match {
case Exactly(char)
Right((char, input.tail))
else
}
case Or(parser1, parser2)
run(parser1)(input) match {
case Left(error) run(parser2)(input)
case Right(result) Right(result)
}
case And(parserA, parserB)
for {
resultA run(parserA)(input)
resultB run(parserB)(resultA._2)
} yield ((resultA._1, resultB._1), resultB._2)
}
17 / 39

12. scala> run(aAndB)("")
res21 Either[Error,((Char, Char), String)] = Left(EOF)
scala> run(aAndB)("z")
res22 Either[Error,((Char, Char), String)] = Left(Unexpected(z))
scala> run(aAndB)("a")
res23 Either[Error,((Char, Char), String)] = Left(EOF)
scala> run(aAndB)("abcd")
res24 Either[Error,((Char, Char), String)] = Right(((a,b),cd))
18 / 39

13. import scalaz._
import Scalaz._
def run[F[_], A](
parser: Parser[A]
)(
) F[A] = ???
19 / 39

def apply[F[_], S, E](
def point[A](a: A) F[A] = MS.point(a)
def bind[A, B](fa: F[A])(f: A F[B]) F[B] = MS.bind(fa)(f)
def handleError[A](fa: F[A])(f: E F[A]) F[A] = ME.handleError(fa)(f
def raiseError[A](e: E) F[A] = ME.raiseError(e)
def get: F[S] = MS.get
def init: F[S] = MS.init
def put(s: S) F[Unit] = MS.put(s)
}
}
20 / 39

15. def next[F[_]](
) F[Char] =
M.get.flatMap {
case "" M.raiseError(EOF)
}
def run[F[_], A](
parser: Parser[A]
)(
) F[A] =
parser match {
case Exactly(char)
for {
case Or(parser1, parser2)
M.get.flatMap { input
M.handleError(run(parser1)) { _
M.put(input) run(parser2)
}
}
case And(parserA, parserB)
for {
a run(parserA)
b run(parserB)
} yield (a, b)
}
21 / 39

16. type Result[A] = Either[Error, A]
type ResultST[A] = StateT[Result, String, A]
val toResultST Parser ResultST = new (Parser ResultST) {
def apply[A](parser: Parser[A]) ResultST[A] =
}
def eval[A](parser: Parser[A])(input: String) Result[A] =
toResultST(parser).eval(input)
scala> val result = toResultST(aAndB)
result: ResultST[(Char, Char)] = [email protected]
scala> result.run("")
res28 Result[(String, (Char, Char))] = Left(EOF)
scala> result.run("z")
res29 Result[(String, (Char, Char))] = Left(Unexpected(z))
scala> result.run("abcd")
res30 Result[(String, (Char, Char))] = Right((cd,(a,b)))
scala> result.eval("abcd")
res31 Result[(Char, Char)] = Right((a,b))
22 / 39

17. sealed trait Parser[A]
case class Exactly(char: Char) extends Parser[Char]
case class Pure[A](value: A) extends Parser[A]
case class Or[A](
parser1 Parser[A],
parser2 Parser[A]
) extends Parser[A]
case class Bind[A, B](
parser: Parser[A],
f: A Parser[B]
) extends Parser[B]
def point[A](a: A) Parser[A] = Pure(a)
def bind[A, B](fa: Parser[A])(f: A Parser[B]) Parser[B] = Bind(fa, f)
}
implicit val parserPlus: Plus[Parser] = new Plus[Parser] {
def plus[A](a: Parser[A], b: Parser[A]) Parser[A] = Or(a, b)
}
implicit def parserSemigroup[A] Semigroup[Parser[A]] =
parserPlus.semigroup[A]
23 / 39

18. def and[A, B](parserA Parser[A], parserB Parser[B]) Parser[(A, B)] =
parserA.tuple(parserB)
val allDigits: NonEmptyList[Char] = NonEmptyList('0', ('1' to '9') _*)
def charToInt(c: Char) Int = c.toString.toInt ¯\_(
ツ)_/¯
val digit: Parser[Int] = allDigits.foldMap1(d Exactly(d).map(charToInt))
def maybe[A](parser: Parser[A]) Parser[Option[A]] =
Or(parser.map(Option(_)), Pure(Option.empty[A]))
object someMany {
def some[A](parser: Parser[A]) Parser[List[A]] =
for {
tail many(parser)
def many[A](parser: Parser[A]) Parser[List[A]] =
Or(some(parser), Pure(List.empty[A]))
}
import someMany._
24 / 39

19. def run[F[_], A](
parser: Parser[A]
)(
) F[A] =
parser match {
case Exactly(char)
for {
case Pure(value) M.point(value)
case Or(parser1, parser2)
M.get.flatMap { input
M.handleError(run(parser1)) { _
M.put(input) run(parser2)
}
}
case Bind(parser, f) run(parser).flatMap(a run(f(a)))
}
type Result[A] = Either[Error, A]
type ResultST[A] = StateT[Result, String, A]
val toResultST Parser ResultST = new (Parser ResultST) {
def apply[A](parser: Parser[A]) ResultST[A] = run(parser)(MonadStateError(
}
def eval[A](parser: Parser[A])(input: String) Result[A] = toResultST(parser)
25 / 39

20. val lowercase: Parser[Char] =
NonEmptyList('a', ('b' to 'z') _*).foldMap1[Parser[Char]](Exactly(_))
val foo: Parser[(Char, List[Int])] = and(lowercase, some(digit))
val result = toResultST(foo)
scala> result.run("")
res35 Result[(String, (Char, List[Int]))] = Left(EOF)
scala> result.run("A")
res36 Result[(String, (Char, List[Int]))] = Left(Unexpected(A))
scala> result.run("a")
res37 Result[(String, (Char, List[Int]))] = Left(EOF)
scala> result.run("b42az")
res38 Result[(String, (Char, List[Int]))] = Right((az,(b,List(4, 2))))
26 / 39

21. val protocol: Parser[String] =
for {
n digit
_ Exactly(':')
chars lowercase.replicateM(n)
} yield chars.mkString
scala> eval(protocol)("")
res39 Result[String] = Left(EOF)
scala> eval(protocol)("abcdefg42")
res40 Result[String] = Left(Unexpected(a))
scala> eval(protocol)("3abcdefg42")
res41 Result[String] = Left(Unexpected(a))
scala> eval(protocol)("3:abcdefg42")
res42 Result[String] = Right(abc)
27 / 39

22. Parser combinators in Idris
data Grammar : (tok : Type) (consumes : Bool) Type Type where
Empty : (val : ty) Grammar tok False ty
Terminal : (tok Maybe a) Grammar tok True a
NextIs : (tok Bool) Grammar tok False tok
EOF : Grammar tok False ()
Fail : String Grammar tok c ty
Commit : Grammar tok False ()
SeqEat : Grammar tok True a Inf (a Grammar tok c2 b)
Grammar tok True b
SeqEmpty : {c1, c2 : Bool}
Grammar tok c1 a (a Grammar tok c2 b)
Grammar tok (c1 || c2) b
Alt : {c1, c2 : Bool}
Grammar tok c1 ty Grammar tok c2 ty
Grammar tok (c1 c2) ty
( ) : {c1 : Bool}
Grammar tok c1 a
inf c1 (a Grammar tok c2 b)
Grammar tok (c1 || c2) b
( ) {c1 = False} = SeqEmpty
( ) {c1 = True} = SeqEat
28 / 39

23. sealed trait Bool {
type If[T Out, F Out, Out] }
sealed trait True extends Bool {
type If[T Out, F Out, Out] = T
}
sealed trait False extends Bool {
type If[T Out, F Out, Out] = F
}
type [A Bool, B Bool] = A#If[B, False, Bool]
type [A Bool, B Bool] = A#If[True, B, Bool]
object Bool {
sealed trait Refl[A Bool, B Bool]
object Refl extends Refl0
trait Refl0 extends Refl1 {
implicit def reflAndTrue[A Bool] Refl[A True, A] = null
implicit def reflAndFalse[A Bool] Refl[A False, False] = null
}
trait Refl1 {
implicit def reflOrTrue[A Bool] Refl[A True, True] = null
implicit def reflOrFalse[A Bool] Refl[A False, A] = null
}
}
import Bool._
29 / 39

24. sealed trait Parser[A, X Bool]
case class Exactly(char: Char) extends Parser[Char, True]
case class Pure[A](value: A) extends Parser[A, False]
case class Or[A, X1 Bool, X2 Bool](
parser1 Parser[A, X1],
parser2 Parser[A, X2]
) extends Parser[A, X1 X2]
case class Bind[A, X1 Bool, B, X2 Bool](
parser: Parser[A, X1],
f: A Parser[B, X2]
) extends Parser[B, X1 X2]
30 / 39

25. object Parser {
sealed trait Parser[A, X Bool] { self
def map[B](f: A B) Parser[B, X] =
Bind(self, (a: A) Pure(f(a))).refl
def flatMap[B, X2 Bool](f: A Parser[B, X2]) Parser[B, X X2] =
Bind(self, f)
def refl[X2 Bool](implicit refl: Refl[X, X2]) Parser[A, X2] =
this.asInstanceOf[Parser[A, X2]]
}
case class Exactly(char: Char) extends Parser[Char, True]
case class Pure[A](value: A) extends Parser[A, False]
case class Or[A, X1 Bool, X2 Bool](
parser1 Parser[A, X1],
parser2 Parser[A, X2]
) extends Parser[A, X1 X2]
case class Bind[A, X1 Bool, B, X2 Bool](
parser: Parser[A, X1],
f: A Parser[B, X2]
) extends Parser[B, X1 X2]
}
import Parser._
31 / 39

26. val a: Parser[Char, True] = Exactly('a')
val b: Parser[Char, True] = Exactly('b')
val aOrB Parser[Char, True] = Or(a, b)
def wrapped[A, X Bool](parser: Parser[A, X]) Parser[A, True] =
for {
_ Exactly('(')
a parser
_ Exactly(')')
} yield a
def maybe[A, X Bool](parser: Parser[A, X]) Parser[Option[A], False] =
Or(parser.map(Option(_)), Pure(Option.empty[A])).refl
object someMany {
def some[A, X Bool](parser: Parser[A, X]) Parser[List[A], X] =
parser.flatMap { a
many(parser).map(a _)
}.refl
def many[A, X Bool](parser: Parser[A, X]) Parser[List[A], False] =
Or(some(parser), Pure(List.empty[A])).refl
}
import someMany._
32 / 39

27. def runForget[F[_], A](
parser: Parser[A, _]
)(
) F[A] =
parser match {
case Exactly(char)
for {
case Pure(value) M.point(value)
case Or(parser1, parser2)
M.get.flatMap { input
M.handleError(runForget(parser1)) { _
M.put(input) runForget(parser2)
}
}
case Bind(parser, f) runForget(parser).flatMap(a runForget(f(a)))
}
type Result[A] = Either[Error, A]
type ResultST[A] = StateT[Result, String, A]
def toResultST[A](parser: Parser[A, True]) ResultST[A] = {
}
def eval[A](parser: Parser[A, True])(input: String) Result[A] = toResultST(p
33 / 39

28. scala> eval(aOrBs)("")
res48 Result[List[Char]] = Left(EOF)
scala> eval(aOrBs)("abcd")
res49 Result[List[Char]] = Left(Unexpected(a))
scala> eval(aOrBs)("(a)(b)(b)(a)cd")
res50 Result[List[Char]] = Right(List(a, b, b, a))
scala> eval(maybeAB)("")
:43: error: type mismatch;
found : Parser.Parser[Option[Char],False]
required: Parser.Parser[?,True]
eval(maybeAB)("")
^
val aOrBs = some(wrapped(aOrB))
aOrBs: Parser.Parser[List[Char],True] = Bind(Bind(Exactly((),\$\$Lambda\$4605
val maybeAB = maybe(aOrB)
maybeAB Parser.Parser[Option[Char],False] = Or(Bind(Or(Exactly(a),Exactly
34 / 39

29. def stupid: Parser[Unit, False] = Pure(()).flatMap(_ stupid)
def woops: Parser[Unit, True] = Exactly('\$').flatMap(_ stupid)
scala> eval(woops)("")
res52 Result[Unit] = Left(EOF)
scala> eval(woops)("\$")
java.lang.StackOverflowError
at scalaz.std.EitherInstances\$\$anon\$1.point(Either.scala:82)
at scalaz.std.EitherInstances\$\$anon\$1.point(Either.scala:67)
at scalaz.std.EitherInstances\$\$anon\$1.bind(Either.scala:70)
at scalaz.std.EitherInstances\$\$anon\$1.bind(Either.scala:67)
at scalaz.std.EitherInstances\$\$anon\$1.map(Either.scala:67)
at scalaz.IndexedStateT.apply(StateT.scala:10)
at scalaz.IndexedStateT.run(StateT.scala:13)
at scalaz.IndexedStateT.\$anonfun\$mapsf\$2(StateT.scala:98)
at scalaz.IndexedStateT.\$anonfun\$flatMap\$2(StateT.scala:65)
at scalaz.IndexedStateT.\$anonfun\$flatMap\$4(StateT.scala:67)
at scalaz.std.EitherInstances\$\$anon\$1.bind(Either.scala:70)
at scalaz.std.EitherInstances\$\$anon\$1.bind(Either.scala:67)
at scalaz.IndexedStateT.\$anonfun\$flatMap\$3(StateT.scala:67)
at scalaz.std.EitherInstances\$\$anon\$1.bind(Either.scala:70)
at scalaz.std.EitherInstances\$\$anon\$1.bind(Either.scala:67)
at scalaz.IndexedStateT.\$anonfun\$flatMap\$2(StateT.scala:65)
at scalaz.IndexedStateT.\$anonfun\$flatMap\$4(StateT.scala:67)
at scalaz.std.EitherInstances\$\$anon\$1.bind(Either.scala:70) 35 / 39

30. Cynicism: The hope that someday you will have known better all along.
Nein. A Manifesto, Eric Jarosinski
36 / 39

31. One must imagine Sisyphus happy
Existential Comics
37 / 39

32. Thank you