Upgrade to Pro — share decks privately, control downloads, hide ads and more …

地理空間データの機械学習への適用 / machine_learning_for_spatial_data

地理空間データの機械学習への適用 / machine_learning_for_spatial_data

2024年3月9日に開催された[日本統計学会第18回春季集会](https://jss2024spring.ywstat.jp/)(成城大学)における企画セッション AM-B「Rユーザのための機械学習チュートリアル」で用いたスライド資料です。

Rのソースコードを含んだGitHubリポジトリ: https://github.com/uribo/240309_jss18tutorial

第一部「Rの機械学習フレームワークの紹介〜tidymodelsを中心に〜
第二部「地理空間データの機械学習への適用

Uryu Shinya

March 11, 2024
Tweet

More Decks by Uryu Shinya

Other Decks in Research

Transcript

  1. Ψεڙڅঢ়گͷೋ஋෼ྨϞσϧͷߏங lp_supply_sf |> glimpse() #> Rows: 1,812 #> Columns: 9

    #> $ price <int> 115000, 61400, 57200, 51600, 177000, 208000, 57600, 68600… #> $ water <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRU… #> $ gas <fct> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TR… #> $ sewer <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRU… #> $ above_floor <int> 8, 0, 3, 3, 5, 12, 3, 3, 3, 2, 6, 2, 7, 2, 2, 3, 2, 3, 3,… #> $ under_floor <int> 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, … #> $ dist_from_st <int> 1300, 900, 270, 2100, 150, 0, 150, 850, 1200, 400, 640, 1… #> $ fire <chr> "๷Ր", "४๷", "४๷", "४๷", "๷Ր", "๷Ր", "४๷", "… #> $ geometry <POINT [°]> POINT (140.4655 36.37854), POINT (140.47 36.3659), …
  2. ֶशσʔλͷ஍Ҭ֎ʹϞσϧΛద༻͢Δ ࢛ࠃݝͷ౎ಓ෎ݝ஍ՁௐࠪσʔλʢMQ@TVQQMZ@TG@TIJLPLVʣΛ༻ҙ # rf_final_fit… ୈҰ෦Ͱ࠷ޙʹߏஙͨ͠ϞσϧʢRFɺϋΠύϥ୳ࡧɺಛ௃ྔΤϯδχΞϦϯάΛద༻ʣ extract_workflow(rf_final_fit) |> augment(new_data = lp_test)

    |> brier_class(truth = gas, .pred_FALSE) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> <chr> <chr> <dbl> #> 1 brier_class binary 0.0417 extract_workflow(rf_final_fit) |> # fire͕NAͷ΋ͷΛআ֎ augment(new_data = lp_supply_sf_shikoku |> dplyr::filter(!is.na(fire))) |> brier_class(truth = gas, .pred_FALSE) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> <chr> <chr> <dbl> #> 1 brier_class binary 0.121 Ϟσϧͷ༧ଌਫ਼౓͕௿Լ
  3. ۭؒґଘੑΛߟྀۭͨؒ͠ަࠩݕূ๏ set.seed(123) lpsp_split <- initial_split(lp_supply_sf, prop = 0.8, strata =

    gas) lpsp_train <- training(lpsp_split) lpsp_test <- testing(lpsp_split) library(spatialsample) set.seed(123) lpsp_test <- spatial_clustering_cv(lpsp_train, v = 10) spatial_block_cv(lpsp_train, v = 10)
  4. ۭؒґଘੑΛߟྀۭͨؒ͠ަࠩݕূ๏ fit_resamples(rf_wflow, lpsp_folds_cluster, control = lp_ctrl) |> collect_metrics() #> #

    A tibble: 2 × 6 #> .metric .estimator mean n std_err .config #> <chr> <chr> <dbl> <int> <dbl> <chr> #> 1 accuracy binary 0.869 10 0.0368 Preprocessor1_Model1 #> 2 roc_auc binary 0.798 10 0.0313 Preprocessor1_Model1 finalize_workflow(rf_tune_wflow, lp_tune_best_parameter) |> last_fit(lp_split) |> collect_metrics() #> # A tibble: 2 × 4 #> .metric .estimator .estimate .config #> <chr> <chr> <dbl> <chr> #> 1 accuracy binary 0.950 Preprocessor1_Model1 #> 2 roc_auc binary 0.909 Preprocessor1_Model1 ۭؒґଘੑΛߟྀ͠ͳ͍ަࠩݕূ๏ͰಘͨϦαϯϓϧσʔλ͔ΒಘͨϞσϧͷੑೳ աֶशʁ
  5. \NMSWFSTF^ͷ৔߹ rr_spcv_rf$aggregate(measures = msr("classif.acc")) #> classif.acc #> 0.8751354 rr_spcv_rf$aggregate(measures =

    msr("classif.sensitivity")) #> classif.sensitivity #> 0.993692 rr_spcv_rf$aggregate(measures = msr("classif.specificity")) #> classif.specificity #> 0.1686585 library(mlr3verse) library(mlr3spatiotempcv) lpsp_task <- as_task_classif_st(lpsp_train, target = "gas", positive = "TRUE") lpsp_folds_mlr <- rsmp("repeated_spcv_coords", folds = 5, repeats = 100) rr_spcv_rf <- resample(task = lpsp_task, learner = lrn("classif.ranger"), resampling = lpsp_folds_mlr)
  6. Ϟσϧͷద༻ൣғ "SFBPG"QQMJDBCJMJUZ"0"  importance <- vip::vi_permute( rf_fit, target = "gas",

    metric = "accuracy", pred_wrapper = function(object, newdata) { predict(object, new_data = newdata)$.pred_class }, train = lp_train |> select(gas, sewer, price, above_floor, dist_from_st)) importance #> # A tibble: 4 × 2 #> Variable Importance #> <chr> <dbl> #> 1 sewer 0.00621 #> 2 price 0.0242 #> 3 above_floor 0.00483 #> 4 dist_from_st 0.0152 ᶃม਺ॏཁ౓ΛٻΊΔ
  7. Ϟσϧͷద༻ൣғ "SFBPG"QQMJDBCJMJUZ"0"  ᶄ"0"ͷࢉग़ lp_aoa <- waywiser::ww_area_of_applicability( f, lp_train |>

    mutate(gas = as.numeric(gas) - 1, sewer = as.numeric(sewer), fire = as.numeric(as.factor(fire))), lp_test |> mutate(gas = as.numeric(gas) - 1, sewer = as.numeric(sewer), fire = as.numeric(as.factor(fire))), importance = importance ) lp_aoa #> # Predictors: #> 4 #> Area-of-applicability threshold: #> 0.0865665
  8. Ϟσϧͷద༻ൣғ "SFBPG"QQMJDBCJMJUZ"0"  ᶄ"0"ͷࢉग़ mapview::mapview(lp_supply_aoa |> filter(aoa == TRUE), col.regions

    = "gray") + mapview::mapview(lp_supply_aoa |> filter(aoa == FALSE), col.regions = "#FFA500") lp_supply_aoa <- bind_cols( lp_supply_sf |> mutate(gas = as.numeric(gas)-1, sewer = as.numeric(sewer)), predict(lp_aoa, lp_supply_sf |> mutate(gas = as.numeric(gas)-1, sewer = as.numeric(sewer)))) 用 AOA AOA