Slide 20
Slide 20 text
products = LOAD '/example/products/customer_records_map_reduce_input.json’
USING JsonLoader('...');
categories = LOAD '/example/dimension/customer_categories.db'
AS (categoryId:int,age:chararray,gender:chararray);
joinedRecords = JOIN categories BY categoryId, products BY customerCategoryId;
--for each group of users, show top five selling products
flattenedProducts = FOREACH joinedRecords GENERATE
sessionId AS sessionId,
categories::categoryId AS categoryId,
categories::age AS age,
categories::gender AS gender,
FLATTEN(products.(id, name, category, bought, price))
AS (id, name, category, bought, price);
boughtProducts = FILTER flattenedProducts BY bought == true;
groupedProducts = GROUP boughtProducts BY (categoryId, age, gender, id, name);
countedProducts = FOREACH groupedProducts GENERATE
FLATTEN(group),
COUNT(boughtProducts) AS
counter;
groupTopFiveProducts = GROUP countedProducts BY (categoryId, age, gender);