Slide 37
Slide 37 text
Partition Projection
37
メタデータの登録 - AWS
CREATE EXTERNAL TABLE apache_logs (
`client_ip` string,
`client_id` string,
`user_id` string,
`request_received_time` string,
`method` string,
`client_request` string,
`http_version` string,
`server_status` string,
`returned_obj_size` string,
`http_referer` string,
`user_agent` string
)
PARTITIONED BY (
`day` string
)
ROW FORMAT SERDE
'com.amazonaws.glue.serde.GrokSerDe'
WITH SERDEPROPERTIES (
'input.format'='^%{IPORHOST:client_ip} %{USER:client_id} %{USER:user_id}
\\[%{HTTPDATE:request_received_time}\\] \"%{WORD:method} %{NOTSPACE:client_request}
HTTP/%{NUMBER:http_version}\" %{NUMBER:server_status} %{NUMBER:returned_obj_size}
\"%{DATA:http_referer}\" \"%{DATA:user_agent}\"$’
)
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
's3:////’
TBLPROPERTIES (
'projection.day.format'='yyyy/MM/dd’,
'projection.day.interval'='1’,
'projection.day.interval.unit'='DAYS’,
'projection.day.range'='2025/01/20,2025/03/18’,
'projection.day.type'='date’,
'projection.enabled'='true’,
'storage.location.template'='s3:////${day}’
)
パーティションの値と場所をテーブルのプロパ
ティ設定を基に自動で算出
→ ALTER TABLE によるパーティションロード
の必要なし
メモリ内で計算処理を行うため高速
→ GetPartitions を呼び出す必要なし
• TBLPROPERTIES でテーブルプロパティを設定
参考:
https://docs.aws.amazon.com/ja_jp/athena/latest/ug/p
artition-projection-supported-types.html
最終的なAthena クエリで
実行するDDL