Slide 40
Slide 40 text
WORD COUNT
import apache_beam as beam, re
with beam.Pipeline() as p:
(p
| beam.io.textio.ReadFromText("input.txt")
| beam.FlatMap(lamdba s: re.split("\\W+", s))
| beam.combiners.Count.PerElement()
| beam.Map(lambda (w, c): "%s: %d" % (w, c))
| beam.io.textio.WriteToText("output/stringcounts"))