Slide 30
Slide 30 text
3-3. 感情分析の結果をクエリしやすいようにフラット化してレイク
データベースに保存
#explode関数はJSON中の配列を行に変換してくれる
sentiment_flat_df=sentiment_df.select("url"
,"datetime"
,"title"
,"body"
,col("sentiment").alias("article_sentiment")
,col("sentiment.document.id").alias("articleId")
,col("sentiment.document.confidenceScores.positive").alias("article_positive")
,col("sentiment.document.confidenceScores.neutral").alias("article_neutral")
,col("sentiment.document.confidenceScores.negative").alias("article_negative")
,explode("sentiment.document.sentences").alias("exploded"),"error")¥
.select("url","datetime","title","body", "article_sentiment"
,"articleId","article_positive","article_neutral","article_negative“
,"exploded.*", "error")¥
.select(
"url"
,"datetime"
,"title"
,"body"
,"article_sentiment"
,"articleId"
,"article_positive"
,"article_neutral"
,"article_negative"
,"text"
,col("confidenceScores.positive").alias("text_positive")
,col("confidenceScores.neutral").alias("text_neutral")
,col("confidenceScores.negative").alias("text_negative")
,"targets"
,"assessments"
,"offset"
,"length"
,"error")
display(sentiment_flat_df.limit(30))
sentiment_flat_df.write.mode('overwrite').saveAsTable("livedoornews03_sentimentflatten")