Slide 28
Slide 28 text
with DAG("commit_dag", default_args=default_args, schedule_interval="@weekly") as dag:
s3_key_sensor = S3KeySensor(
task_id="s3_key_sensor",
bucket_name="dataservices-ingest",
bucket_key="ingest/client_xyz/year={{ execution_date.year }}/" \
"week={{ execution_date.isocalendar()[1] }}/{{ ds_nodash }}.json.gz")
aws_athena_update_raw_partition = AwsAthenaQueryOperator(
query="""
ALTER TABLE raw_client_xyz
ADD IF NOT EXISTS PARTITION (year='{{ execution_date.year }}', week='{{ execution_date.isocalendar()[1] }}');""",
task_id="athena_update_raw_partition")
aws_glue_job_schedule = AwsGlueScheduleJobOperator(
job_name="commit_job",
job_args={
"--source_database": "dataservices_staging",
"--source_table_name": "raw_client_xyz",
"--partition_year": "{{ execution_date.year }}",
"--partition_week": "{{ execution_date.isocalendar()[1] }}"},
task_id="aws_glue_job_schedule")
aws_glue_job_sensor = AWSGlueJobSensor(
job_name="commit_job",
job_run_id="{{ task_instance.xcom_pull(task_ids='aws_glue_job_schedule', key='aws_glue_job_run_id') }}",
task_id="aws_glue_job_sensor")
s3_processed_key_check = S3KeySensor(
bucket_name="dataservices-processed",
bucket_key="processed/client_xyz/year={{ execution_date.year }}/" \
"week={{ execution_date.isocalendar()[1] }}/{{ ds_nodash }}.json.gz",
task_id="s3_processed_key_check")
ms_teams_notify = MSTeamsWebhookOperator(
task_id="ms_teams_notify",
message="Data from client ✨ xyz ✨ has been processed")
s3_key_sensor >> aws_athena_update_raw_partition >> aws_glue_job_schedule >> \
aws_glue_job_sensor >> s3_processed_key_check >> ms_teams_notify