.table("bronze_events") \ .withWatermark("event_time", "30 minutes") # ウィンドウ集計 windowed = df.groupBy( window(col("event_time"), "10 minutes"), col("user_id") ).agg(sum("amount").alias("total")) def write_with_merge(batch_df, batch_id): batch_df.createOrReplaceTempView("updates") spark.sql(""" MERGE INTO gold_summary t USING updates s ON t.user_id = s.user_id AND t.window = s.window WHEN MATCHED THEN UPDATE SET total = s.total WHEN NOT MATCHED THEN INSERT * """) windowed.writeStream \ .foreachBatch(write_with_merge) \ .option("checkpointLocation", "/checkpoints/windowed") \ .outputMode("update") \ .start() CREATE OR REFRESH MATERIALIZED VIEW gold_summary AS SELECT window(event_time, '10 minutes') AS window, user_id, SUM(amount) AS total FROM STREAM(bronze_events) WITH WATERMARK ON event_time DELAY OF 30 MINUTES GROUP BY window(event_time, '10 minutes'), user_id; SDPだと?