Slide 68
Slide 68 text
jobid
=
conn.run_jobflow(
name='Create
EMR
and
Exec
hiveql
[{}]'.format(target_date),
log_uri='s3://{}/jobflow_logs/'.format(bucket_name),
ec2_keyname='your_key',
master_instance_type='m1.medium',
slave_instance_type='m1.medium',
num_instances=3,
action_on_failure='TERMINATE_JOB_FLOW',
keep_alive=True,
enable_debugging=False,
hadoop_version='2.4.0',
steps=[install_step],
bootstrap_actions=[],
instance_groups=None,
additional_info=None,
ami_version='3.1.1',
api_params=None,
visible_to_all_users=True,
job_flow_role=None)
query_files
=
['sample01.hql',
'sample02.hql']
hql_steps
=
[]
for
query_file
in
query_files:
hql_step
=
HiveStep(
name='Executing
Query
[{}]'.format(query_file),
hive_file='s3n://{0}/hive-‐script/{1}'.format(
bucket_name,
query_file),
hive_versions=hive_version,
hive_args=['-‐dTARGET_DATE={0}'.format(target_date),
'-‐dBUCKET_NAME={0}'.format(bucket_name)])
hql_steps.append(hql_step)
conn.add_jobflow_steps(jobid,
hql_steps)
ꞿֻזג׃תךדꨜ㔲孡ֽ