Slide 31
Slide 31 text
Scoobi
!
val lines = fromTextFile("hdfs://in/...")
!
val counts = lines.mapFlatten(_.split(" "))
.map(word => (word, 1))
.groupByKey
.combine(Sum.int)
!
counts.toTextFile(\“hdfs://out/…",
overwrite=true).persist(ScoobiConfiguration())
map, groupByKey, combine => Map/Reduce tasks