service_id = 1 AND event_id = 2 AND bucket IN (0, 1, ...) AND dt IN ('2020-11-18', '2002-11-19', '2020-11-20') AND occurrence >= 1 ) AS condition_0 LEFT OUTER JOIN ( SELECT user_id FROM cassandra.main.event_counts WHERE service_id = 1 AND event_id = 3 AND bucket IN (0, 1, ...) AND dt IN ('2020-11-18', '2002-11-19', '2020-11-20') AND occurrence >= 1 ) AS condition_1 ON condition_0.user_id = condition_1.user_id WHERE condition_1.user_id IS NULL; -- 08_ΞΠςϜߪೖը໘Λ 3 Ҏʹ 1 ճҎ্࣮ߦͨ͠ -- ߪೖΛ 3 Ҏʹ 1 ճҎ্࣮ߦͨ͠ -- AND NOT
event_id = 2 AND dt IN ('2020-11-18', '2020-11-19', '2020-11-20') AND bucket IN (0, 1, ..., 63) AND occurrence >= 1; SELECT DISTINCT service_id, event_id, dt, bucket FROM main.event_counts WHERE service_id = 1 AND event_id = 2 AND dt = '2020-11-18' AND bucket = 0; SELECT DISTINCT service_id, event_id, dt, bucket FROM main.event_counts WHERE service_id = 1 AND event_id = 2 AND dt = '2020-11-19' AND bucket = 0; ... 1 x 1 x 3 x 64 = 192 queries Presto SQL CQL NativeCassandraSession#getPartitions
bucket FROM main.event_counts WHERE service_id = 1 AND event_id = 2 AND dt IN ('2020-11-18', '2020-11-19', '2020-11-20') AND bucket IN (0, 1, ..., 63); CQL
bucket FROM main.event_counts WHERE service_id = 1 AND event_id = 2 AND dt IN ('2020-11-18', '2020-11-19', '2020-11-20') AND bucket IN (0, 1, ..., 63); CQL 3x faster in our use cases but not enough!!
Bootstrap actions # Execute the script in background and exit immediately # because Presto is not installed until bootstrap actions end # cf. https://forums.aws.amazon.com/thread.jspa?threadID=220183 if [ $(whoami) != "root" ]; then sudo "$0" "[email protected]" & exit 0 fi # Commands to execute after installing Presto is here