require 'wukong' class Mapper < Wukong::Streamer::LineStreamer def process(line) yield ['sum', line.to_f.round] end end class Reducer < Wukong::Streamer::ListReducer def finalize(line) yield [key, values.map(&:to_i).sum] end end Wukong::Script.new(Mapper, Reducer).run
bin/round_and_sum --run=local numbers.txt output Test locally with numbers.txt --run=<local or hadoop> <input> <output><br/>Run on a 100 node cluster with 100 TB of input<br/>bin/round_and_sum --run=hadoop \<br/>hdfs://datanode/numbers-*.txt \<br/>hdfs://datanode/output \<br/>--jobtracker=jobtracker<br/>