Upgrade to Pro — share decks privately, control downloads, hide ads and more …

pyconjp 2016 ライトニング用(未確定)

pyconjp 2016 ライトニング用(未確定)

nameのまま

英 谷口

August 16, 2016
Tweet

More Decks by 英 谷口

Other Decks in Programming

Transcript

  1. from boto3.session import Session def start_ec2() session = Session(aws_access_key_id, #認証

    aws_secret_access_key, region_name=region_name) ec2 = session.resource('ec2') #EC2のリソース取得 instance = ec2.Instance(instance_id) #インスタンスオブジェクト取得 if instance.state['Name'] == 'running': #すでに起動していた場合 return True instance.start() #インスタンス起動 instance.wait_until_running() #起動するまで待機 if instance.state['Name'] == 'running': #起動確認 return True return False
  2. from boto3.session import Session session = Session(aws_access_key_id, #認証 aws_secret_access_key, region_name=region_name)

    s3 = session.resource('s3') #S3のリソース取得 bucket = s3.Bucket(bucket_name) #バケットオブジェクト取得 param = { 'Prefix': prefix } for target_item in bucket.objects.filter(**param): #前方一致したアイテム情報 yield target_item.key
  3. from cStringIO import StringIO from boto3.session import Session def get_file_obj():

    session = Session(aws_access_key_id, #認証 aws_secret_access_key, region_name=region_name) s3 = session.resource('s3') #S3のリソース取得 obj = s3.Object(bucket_name, key) #ファイルオブジェクトを取得 return StringIO(obj.get()['Body'].read()) #読み込み *素直にファイルとしてダウンロードしてくる方法もある
  4. import json from urllib import urlencode from urllib2 import Request,

    urlopen def get_access_token() google_account_config = { #接続情報設定 'client_id': client_id, 'client_secret': client_secret, 'refresh_token': refresh_token, 'grant_type': 'refresh_token', } request = Request('https://accounts.google.com/o/oauth2/token', data=urlencode(google_account_config), headers={ 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json' } ) #リクエスト情報設定 response = json.load(urlopen(request)) #リクエスト return response['access_token'] #レスポンスからトークン取得
  5. from oauth2client.client import AccessTokenCredentials from googleapiclient.discovery import build def get_sevice_object()

    token = get_access_token() credentials = AccessTokenCredentials(token, 'MyAgent/1.0', None) service = build('storage', 'v1', credentials=credentials) return service
  6. from googleapiclient.http import MediaFileUpload def upload_file(): service = get_sevice_object() media

    = MediaFileUpload(local_file_path, resumable=True) request = service.objects().insert(bucket=bucket_name, name=storage_file_path, media_body=media) while True: progress, response = request.next_chunk() if progress: print '{0} is uploading {1}/100'.format(base_name, int(100 * progress.progress())) else: break
  7. def get_source_schema(): return [ {'name': 'id', 'type': 'STRING', 'mode': 'REQUIRED'},

    {'name': 'time_stamp', 'type': 'TIMESTAMP', 'mode': 'REQUIRED'}, {'name': 'num', 'type': 'INTEGER'}, {'name': 'value', 'type': 'STRING', 'mode': 'REQUIRED'} ]
  8. import uuid def create_load_data_body(): return { 'jobReference': { 'projectId': project_id

    'job_id': str(uuid.uuid4()) }, 'configuration': { 'load': { 'sourceUris': ['gs:' + storage_path], 'schema': { 'fields': get_source_schema() }, 'destinationTable': { 'projectId': project_id 'datasetId': data_set_id, 'tableId': table_id } } } }
  9. from oauth2client.client import AccessTokenCredentials from googleapiclient.discovery import build def get_sevice_object()

    token = get_access_token() credentials = AccessTokenCredentials(token, 'MyAgent/1.0', None) service = build('bigquery', 'v2', credentials=credentials) return service
  10. import time def execute_job() body = create_load_data_body() service = get_sevice_object()

    job = service.jobs().insert(projectId=project_id, body=body).execute(num_retries=5) job_get = service.jobs().get(projectId=project_id, jobId=job['jobReference']['jobId']) while True: time.sleep(5) job_resource = job_get.execute(num_retries=5) if job_resource['status']['state'] == 'DONE': return job_resource
  11. from oauth2client.client import AccessTokenCredentials from googleapiclient.discovery import build def get_sevice_object()

    token = get_access_token() credentials = AccessTokenCredentials(token, 'MyAgent/1.0', None) service = build('bigquery', 'v2', credentials=credentials) return service
  12. def query() sevice = get_sevice_object() query_data = get_query_data() response =

    sevice.jobs().query(projectId=project_id, body=query_data).execute(num_retries=5) for page in paging(service.jobs().getQueryResults, num_retries=5, **response['jobReference']): if page['jobComplete'] is False: raise Exception('job is not complete') if page['totalRows'] == '0': yield create_record_dict([]) return row_list = [] for row in page['rows']: values = [] for value in row['f']: values.append(value['v']) yield value_list
  13. def paging(request_func, num_retries=5, **kwargs) has_next = True while has_next: response

    = request_func(**kwargs).execute(num_retries=num_retries) if 'pageToken' in response: kwargs['pageToken'] = response['pageToken'] else: has_next = False yield response
  14. def get_query_data() return { 'query': query, #実行したいクエリ 'timeoutMs': 1000, #最大待機時間

    'allowLargeResults': True #結果データサイズが大きい時、どうするか }
  15. from boto3.session import Session def get_queue_object(): session = Session(aws_access_key_id, #認証

    aws_secret_access_key, region_name=region_name) sqs = session.resource('sqs') #SQSのリソース取得 queue = sqs.get_queue_by_name(QueueName=queue_name) return queue
  16. def get_queue_object(): queue = get_queue_object() #queueオブジェクト取得 messages = queue.receive_messages(3) #最大メッセージ数指定可能

    if len(messages) == 0: return [] entries = [] result_list = [] for message in messages: result_list.append(message.body) #メッセージ本体 entries.append({ 'Id': message.message_id, #メッセージメタデータ 'ReceiptHandle': message.receipt_handle }) response = queue.delete_messages( Entries=entries #明示的に削除する必要あり ) if response['ResponseMetadata']['HTTPStatusCode'] != 200: raise Exception('delete messages is error') return result_list