private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } }
= new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } }
GENERATE FLATTEN(TOKENIZE(line)) AS word; filtered_words = FILTER words BY word MATCHES '\\w+'; word_groups = GROUP filtered_words BY word; word_count = FOREACH word_groups GENERATE group, COUNT(filtered_words); STORE word_count INTO 'output';
-ne 'puts $_.chomp.split("\t")[0,2].reverse.join("\t")' | \ sort -n -r 49.796916454164744! Igor Shafarevich 27.161954429544405! Pseudomathematics 27.161954429544405! Nelson Goodman 22.634962024620336! Johannes Kepler University of Linz 13.580977214772203! University of Maine at Farmington 13.580977214772203! Polish Academy of Learning 13.580977214772203! Michael Gove 13.580977214772203! Hao Wang (academic) 13.580977214772203! Farkas Bolyai 9.053984809848135 ! Stanisław Zaremba (mathematician) ...