= LOAD 'word_count_text.txt'; words = FOREACH text GENERATE FLATTEN(TOKENIZE((chararray)$0)) AS word; grouped_words = GROUP words BY word; counts = FOREACH grouped_words GENERATE group AS word, COUNT(words) AS count; STORE counts INTO 'wordcount';
= LOAD 'word_count_text.txt'; words = FOREACH text GENERATE FLATTEN(TOKENIZE((chararray)$0)) AS word; grouped_words = GROUP words BY word; counts = FOREACH grouped_words GENERATE group AS word, COUNT(words) AS count; STORE counts INTO 'wordcount';
= FOREACH text GENERATE FLATTEN(TOKENIZE((chararray)$0)) AS word; grouped_words = GROUP words BY word; counts = FOREACH grouped_words GENERATE group AS word, COUNT(words) AS count; STORE counts INTO 'wordcount'; Word Count in Pig
= LOAD 'word_count_text.txt'; words = FOREACH text GENERATE FLATTEN(TOKENIZE((chararray)$0)) AS word; grouped_words = GROUP words BY word; counts = FOREACH grouped_words GENERATE group AS word, COUNT(words) AS count; STORE counts INTO 'wordcount';
= LOAD 'word_count_text.txt'; words = FOREACH text GENERATE FLATTEN(TOKENIZE((chararray)$0)) AS word; grouped_words = GROUP words BY word; counts = FOREACH grouped_words GENERATE group AS word, COUNT(words) AS count; STORE counts INTO 'wordcount';
= LOAD 'word_count_text.txt'; words = FOREACH text GENERATE FLATTEN(TOKENIZE((chararray)$0)) AS word; grouped_words = GROUP words BY word; counts = FOREACH grouped_words GENERATE group AS word, COUNT(words) AS count; STORE counts INTO 'wordcount';
= LOAD 'word_count_text.txt'; words = FOREACH text GENERATE FLATTEN(TOKENIZE((chararray)$0)) AS word; grouped_words = GROUP words BY word; counts = FOREACH grouped_words GENERATE group AS word, COUNT(words) AS count; STORE counts INTO 'wordcount';
(wordcount.pig) text = LOAD 'word_count_text.txt'; words = FOREACH text GENERATE FLATTEN(TOKENIZE((chararray)$0)) AS word; grouped_words = GROUP words BY word; counts = FOREACH grouped_words GENERATE group AS word, COUNT(words) AS count; sorted_counts = ORDER counts BY count DESC, word ASC; STORE counts INTO 'wordcount';
(wordcount.pig) text = LOAD 'word_count_text.txt'; words = FOREACH text GENERATE FLATTEN(TOKENIZE((chararray)$0)) AS word; grouped_words = GROUP words BY word; counts = FOREACH grouped_words GENERATE group AS word, COUNT(words) AS count; sorted_counts = ORDER counts BY count DESC, word ASC; STORE sorted_counts INTO 'wordcount_sorted';
5 lines of code -- Word Count Script (wordcount.pig) text = LOAD 'word_count_text.txt'; words = FOREACH text GENERATE FLATTEN(TOKENIZE((chararray)$0)) AS word; grouped_words = GROUP words BY word; counts = FOREACH grouped_words GENERATE group AS word, COUNT(grouped_words) AS count; STORE counts INTO 'wordcount';
Example Introduction to Apache Tez Pig (Language) Pig for dummies Pig Latin (Language Game) Xplenty Data Jujitsu: The art of turning data into product Pig Cheat Sheet