Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Hadley Ecosystem 2016

Hadley Ecosystem 2016

Tokyo.R#53 での発表資料

Uryu Shinya

April 30, 2016
Tweet

More Decks by Uryu Shinya

Other Decks in Programming

Transcript

  1. άϥϑͷछྨʹԠͨ͡࡞ਤؔ਺ plot(x, y, ...) barplot(height, ...) boxplot(x, ...) hist(x, ...)

    pie(x, labels = names(x), ...) # ؔ਺ͷҾ਺͕ ଗ͓ͬͯΒͣɺ ֮͑ʹ͍͘ \HHQMPU^ύοέʔδ
  2. άϥϑඳըͷͨΊͷจ๏ʹجͮ͘ϨΠϠʔͷߟ͑Λಋೖ library(ggplot2) ggplot(data, aes(x, y)) + geom_*() + xlab() +

    ylab() άϥϑʹ༻͍ΔཁૉΛࢦఆ BFT ΤεςςΟοΫ৹ඒతཁૉΛఆٛ͢Δؔ਺ Y Z [   ʜ   ʜ   ʜ \HHQMPU^ύοέʔδ
  3. άϥϑඳըͷͨΊͷจ๏ʹجͮ͘ϨΠϠʔͷߟ͑Λಋೖ library(ggplot2) ggplot(data, aes(x, y)) + geom_*() + xlab() +

    ylab() ඳը͢ΔάϥϑͷछྨΛࢦఆ FYHFPN@QPJOU ʜࢄ෍ਤɺ HFPN@CBS ʜ๮άϥϑ \HHQMPU^ύοέʔδ
  4. άϥϑඳըͷͨΊͷจ๏ʹجͮ͘ϨΠϠʔͷߟ͑Λಋೖ library(ggplot2) ggplot(data, aes(x, y)) + geom_*() + xlab() +

    ylab() ඞཁͳཁૉΛϨΠϠʔͱͯ͠௥Ճ \HHQMPU^ύοέʔδ # ౷Ұ͞Εͨه๏ͱؔ਺ʹΑΓɺ άϥϑ࡞੒͕؆ܿʹ
  5. 3ඪ४ͷจࣈྻૢ࡞ؔ਺ͱ\TUSJOHJ^ύοέʔδ paste("ほく", "のうむ") # [1] "ほく のうむ" stringi::stri_c("ほく", "のうむ") #

    [1] "ほくのうむ" \TUSJOHS^ύοέʔδ # \TUSJOHS^͸*$6Λαϙʔτ͢Δ\TUSJOHJ^ͷ ϥούʔύοέʔδ
  6. TUS@ ͱ͍͏ؔ਺໊ library(stringr) ls("package:stringr") %>% grep("^str_", ., value = TRUE)

    stringr::str_c("ほく", "のうむ") # [1] "ほくのうむ" \TUSJOHS^ύοέʔδ
  7. library(stringi) stri_locale_list() %>% length() # [1] 683 stri_datetime_symbols() %>% names()

    # [1] "Month" "Weekday" "Quarter" "AmPm" "Era" stri_datetime_symbols(locale="ja_JP_TRADITIONAL")$Era %>% tail() # [1] "元治" "慶応" "明治" "大正" "昭和" "平成" \TUSJOHS^ύοέʔδ # ϩέʔϧɺϢχίʔυਖ਼نԽͳͲ͸ \TUSJOHJ^ύοέʔδͷؔ਺ͰରԠʂ
  8. stri_trans_general("㈱ホクソエム", id = "nfkd") # [1] "(株)ホクソエム" stri_trans_general("ת京", "Any-ch_FONIPA") #

    [1] "dōng jīng" stri_trans_general("Nippon", "es-ja") # [1] "ニッポン" stri_trans_general("ת京", "Simplified-Traditional") # [1] "東京" \TUSJOHS^ύοέʔδ
  9. 3Ͱ೔෇ɾ࣌ؒΦϒδΣΫτΛѻ͏ؔ਺ c("2016-04-30") %>% as.Date() # [1] "2016-04-30" Sys.Date() # [1]

    "2016-04-30" \MVCSJEBUF^ύοέʔδ # ϢʔβʔϑϨϯυϦʔͰͳ͍ؔ਺໊ͱ ࣌ܥྻσʔλΛѻ͏ύοέʔδͷ܈༤ׂڌʜ
  10. library(lubridate) today() # [1] "2016-04-30" ymd("2016年4月30日", tz = "Asia/Tokyo") %>%

    month() # [1] 4 \MVCSJEBUF^ύοέʔδ λΠϜκʔϯ΍ϩέʔϧͷ มߋ͕ؔ਺಺Ͱ࣮ߦՄೳ
  11. ೔෇ɾ࣌ؒͷՃ޻ͱߏจղੳ today() + weeks(1) # [1] "2016-05-06" x <- dmy("30/04/2016")

    day(x) <- 26 x # [1] "2016-04-26" \MVCSJEBUF^ύοέʔδ # ௚ײతʹཧղ ͠΍͍ؔ͢਺໊ɻ ೔෇ɾ࣌ؒσʔλͷ ѻ͍ʹศརͳ ิॿؔ਺Λఏڙ
  12. ศརͳิॿؔ਺܈ (time1 <- ymd_hms("2016-04-01 09:00:00", tz = "Asia/Tokyo")) # [1]

    "2016-04-01 09:00:00 JST" with_tz(time1, "America/New_York") # [1] "2016-03-31 20:00:00 EDT" \MVCSJEBUF^ύοέʔδ ࢦఆՄೳͳλΠϜκʔϯ͸0MTPO/BNFT Ͱ֬ೝ
  13. (chl <- interval(ymd("1989年11月27日", tz = "Asia/Tokyo"), mdy("April 30, 2016", tz

    = "Asia/Tokyo"))) # [1] 1989-11-27 JST--2016-04-30 JST time1 %within% chl # [1] TRUE \MVCSJEBUF^ύοέʔδ
  14. \SFBES^ύοέʔδ ܕ ؔ਺ লུ࣌ͷදه ࿦ཧܕ col_logical() l (logical) ੔਺ܕ col_integer()

    i (integer) ࣮਺ܕ col_double() d (double) จࣈྻܕ col_character c (character) ೔෇ܕʢ:NEදهʣ col_date(format) D (date) ೔࣌ܕʢ<*40>ʣ col_datetime(format, tz) T (time) ࣌ؒܕ col_time(format) ਺஋ܕ col_number() n ཁҼܕ col_factor(levels, ordered) ಡΈࠐΈର৅ͱ͠ͳ͍ col_skip() _, - collector()
  15. read_csv("x,y,z\n1,2,a\n3,4,b", col_types = cols(x = "i", y = "c", z

    = "_")) \SFBES^ύοέʔδ Source: local data frame [2 x 2] x y <int> <chr> 1 1 2 2 3 4 ม਺ͷܕ͸ೖྗͷઌ಄ߦ͔Β ࣗಈతʹਪଌ͞ΕΔ #
  16. library(httr) GET(…) POST(…) library(rvest) xml2::read_html(x = …) %>% html_nodes(xpath =

    …) ΢Σϒσʔλऔಘ # IUUQϝιουʹରԠͨؔ͠਺܈ɻ ೝূɺIUUQϔομʔ౳ͷαϙʔτ IUNM YNMͲΜͱདྷ͍
  17. 3ύοέʔδ։ൃิॿ library(devtools) install_*(…) use_*(…) check(run_dont_test = FALSE, …) build(…) install(…)

    $3"/ϦϙδτϦ֎͔ΒͷύοέʔδΠϯετʔϧ ܧଓతΠϯςάϨʔγϣϯɺHJUϦϙδτϦʜΠϯϑϥ੔උ # \SPYZHFO^ \UFTUUIBU^ύοέʔδ ʹΑΔࣗಈυΩϡϝϯτੜ੒ɺ ςετ࣮ߦ
  18. library(dplyr) data("Butterflies", package = "gpk") glimpse(Butterflies) \EQMZS^ύοέʔδ Πϯυࠃ಺ͷ͔̑ॴͰه࿥͞Εͨ௏ྨͷ෼෍σʔλ Observations: 44

    Variables: 9 $ Serial_Number (int) 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, ... $ Area (fctr) Indian Subcontinent, Western Himalaya, Western Himalaya, Western Himalaya, Western Himalaya, Western Himalaya, Western Himalaya, Western Himal... $ Locality (fctr) Indian Subcontinent, Western Himalaya, Kangra Hills, Shimla Hills, Dehradun Valley, Mussoorie Hills, Mussoorie Town, Kumaon Hills, Central Him... $ Total_Species_count (int) 1439, 417, 228, 299, 148, 323, 146, 371, 623, 962, 853, 690, 262, 423, 321, 510, 596, 276, 167, 124, 151, 109, 77, 110, 78, 78, 59, 145, 164, 1... $ Skippers (int) 307, 63, 25, 41, 22, 54, 14, 52, 125, 211, 189, 159, 27, 67, 119, 98, 132, 59, 32, 18, 19, 15, 11, 12, 14, 11, 4, 22, 19, 23, 54, 60, 20, 37, 4... $ Swallow_tails (int) 94, 31, 23, 21, 11, 23, 10, 26, 43, 69, 62, 55, 29, 38, 18, 42, 49, 13, 10, 9, 10, 7, 4, 4, 6, 5, 5, 10, 10, 11, 16, 18, 9, 15, 15, 23, 12, 5, ... $ Whites_Yellows (int) 99, 42, 37, 34, 19, 32, 13, 37, 49, 57, 52, 51, 32, 30, 1, 36, 40, 26, 19, 12, 23, 19, 20, 29, 21, 19, 18, 25, 50, 19, 21, 31, 25, 26, 30, 35, ... $ Blues (int) 458, 129, 56, 88, 42, 88, 44, 109, 185, 284, 258, 162, 48, 110, 126, 128, 166, 77, 57, 46, 38, 33, 22, 31, 20, 22, 17, 46, 41, 35, 66, 80, 50, ... $ Brush_Footed (int) 482, 152, 87, 115, 54, 126, 65, 147, 221, 342, 292, 263, 126, 178, 57, 207, 209, 101, 49, 39, 61, 35, 20, 34, 17, 21, 15, 42, 44, 42, 66, 89, 3...
  19. Butterflies %>% filter(Total_Species_count >= 200 | Area %in% c("Indian Subcontinent",

    "North East India+ North Myanmar", "Other parts of India"), grepl("Hills", Locality), between(Serial_Number, 10, 20)) \EQMZS^ύοέʔδ
  20. Butterflies %>% filter(Total_Species_count >= 200 | Area %in% c("Indian Subcontinent",

    "North East India+ North Myanmar", "Other parts of India"), grepl("Hills", Locality), between(Serial_Number, 10, 20)) \EQMZS^ύοέʔδ ࿦ཧ࿨ ਅِ஋Λฦؔ͢਺ ൣғΛநग़ʜ\EQMZS^ͷิॿؔ਺ ཁૉͷू߹
  21. ʮϓϦΩϡΞʯγϦʔζͷ"1*!TVF req.girls <- GET("https://rubicure.herokuapp.com/ girls.json") %>% content() req.girls %>% class()

    # [1] "list" req.girls %>% .[[1]] %>% names() # [1] "girl_name" "human_name" "precure_name" "cast_name" "created_date" "color" "birthday" "transform_message" # [9] "extra_names" "attack_messages" "transform_calls" \EQMZS^ \QVSSS^ \IUUS^
  22. req.girls %>% map_df(., ~ .[c("color")]) %>% group_by(color) %>% tally(sort =

    TRUE) Source: local data frame [8 x 2] color n <chr> <int> 1 pink 10 2 blue 9 … 7 green 2 8 black 1 JF TVNNBSJTF OO  BSSBOHF O \EQMZS^ \QVSSS^ \IUUS^
  23. ೴τϨԋश TFMFDUIFMQFST var_1 var_2 third_variable 4th_variable start_with("var_") end_with("variable") contains("_") matches("[[:punct:]]")

    num_range("var_", 1:2) one_of("third_variable") everything() :PVS5VSO Ͳͷม਺͕બ୒͞ΕΔ͔ʁ
  24. TFMFDUIFMQFST var_1 var_2 third_variable 4th_variable start_with("var_") ◦ ◦ ✖ ✖

    end_with("variable") ✖ ✖ ◦ ◦ contains("_") ◦ ◦ ◦ ◦ matches("[[:punct:]]") ◦ ◦ ◦ ◦ num_range("var_", 1:2) ◦ ◦ ✖ ✖ one_of("third_variable") ✖ ✖ ◦ ✖ everything() ◦ ◦ ◦ ◦ ೴τϨԋश
  25. point <- c(-2:2, NA) if_else(point < 0, "あし", "よし", "なし")

    soum <- c("そうむ", "ほうむ", "ポエム", "のうむ") na_if(soum, "ポエム") nth(soum, 2) ϕΫτϧΛର৅ʹͨ͠ૢ࡞ :PVS5VSO ೴τϨԋश
  26. point <- c(-2:2, NA) if_else(point < 0, "あし", "よし", "なし")

    # [1] "あし" "あし" "よし" "よし" "よし" "なし" soum <- c("そうむ", "ほうむ", "ポエム", "のうむ") na_if(soum, "ポエム") # [1] "そうむ" "ほうむ" NA "のうむ" nth(soum, 2) # [1] "ほうむ" ϕΫτϧΛର৅ʹͨ͠ૢ࡞ ೴τϨԋश
  27. library(magrittr); library(stringr) df.q1 <- data_frame( point = c("10/13/13/16", "9/12/8/15")) %$%

    point %>% str_split_fixed(., "/", 4) %>% as.data.frame() %>% set_colnames(str_c("var", 1:4)) ೴τϨԋश
  28. library(tibble) df.q2 <- frame_data( ~fruit, ~sales, "apple/banana/orange", "1/2/4", "orange/melon/applef/pear", "1/3/5/1")

    %$% data_frame( fruit = fruit %>% str_split("/"), sales = sales %>% str_split("/")) ೴τϨԋश
  29. df.q1 var1 var2 var3 var4 1 10 13 13 16

    2 9 12 8 15 df.q2 Source: local data frame [2 x 2] fruit sales <list> <list> 1 <chr [3]> <chr [3]> 2 <chr [4]> <chr [4]> library(tidyr) df.q3 <- df.q2 %>% unnest() ೴τϨԋश
  30. df.q3 Source: local data frame [7 x 2] fruit sales

    <chr> <chr> 1 apple 1 2 banana 2 3 orange 4 4 orange 1 5 melon 3 6 apple 5 7 pear 1 ೴τϨԋश
  31. σʔλ͕࣋ͭ֊૚ੑ " # $ ʜ ʜ B ʜ ʜ B

    ʜ ʜ C ʜ ʜ D ʜ ʜ D ʜ ʜ D ^ ^ ^ EBUB B EBUB EBUB D EBUB EBUB C EBUB $ SFTVMU SFTVMU B ʜ ʜ C ʜ ʜ D ʜ ʜ
  32. σʔλ͕࣋ͭ֊૚ੑ " # $ ʜ ʜ B ʜ ʜ B

    ʜ ʜ C ʜ ʜ D ʜ ʜ D ʜ ʜ D ^ ^ ^ EBUB B EBUB EBUB D EBUB EBUB C EBUB $ SFTVMU SFTVMU B ʜ ʜ C ʜ ʜ D ʜ ʜ UJEZSOFTU UJEZSVOOFTU QVSSSNBQ EQMZSEP
  33. library(tidyr) by_area <- group_by(Butterflies, Area) %>% nest() \EQMZS^ \UJEZS^ Source:

    local data frame [8 x 2] Area data <fctr> <list> 1 Indian Subcontinent <tbl_df [1,8]> 2 Western Himalaya <tbl_df [7,8]> 3 Central Himalaya <tbl_df [1,8]> 4 North East India+ North Myanmar <tbl_df [9,8]> 5 Other parts of India <tbl_df [21,8]> 6 Sri Lanka <tbl_df [1,8]> 7 North Myanmar <tbl_df [3,8]> 8 South Myanmar <tbl_df [1,8]> # HSPVQ@CZ Ͱࢦఆ ͨ͠ά ϧʔϓ͝ͱʹ UCM@EGΫϥε ΦϒδΣΫτͱͯ͠ ֨ೲ
  34. library(purrr) mod_res <- map(by_area$data, ~ glm(Total_Species_count ~ Skippers + Blues,

    data = ., family = poisson)) \EQMZS^ \QVSSS^ # άϧʔϓ͝ͱʹؔ਺Λద༻ʢNBQQJOHʣ
  35. mod_res[[4]] mod_res %>% class() # [1] "list" \EQMZS^ \QVSSS^ #

    Call: glm(formula = Total_Species_count ~ Skippers + Blues, family = poisson, data = .) Coefficients: (Intercept) Skippers Blues 5.380924 0.003370 0.002889 Degrees of Freedom: 8 Total (i.e. Null); 6 Residual Null Deviance: 932.4 Residual Deviance: 100.3 AIC: 178.6 NBQ ͷฦΓ஋͸ ϦετΦϒδΣΫτ NBQ@EG Ͱ͸ σʔλϑϨʔϜ
  36. library(broom) (do_mod_res <- group_by(Butterflies, Area) %>% do(mod = glm(Total_Species_count ~

    Skippers + Blues, data = ., family = poisson) %>% glance())) do_mod_res %>% class() # [1] "rowwise_df" "tbl_df" "tbl" "data.frame" \EQMZS^ \CSPPN^
  37. \EQMZS^ \CSPPN^ do_mod_res %>% unnest() Source: local data frame [8

    x 8] Area null.deviance df.null logLik AIC BIC deviance df.residual <fctr> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <int> 1 Central Himalaya 0.0000 0 -4.136346 10.272691 8.272691 0.000000e+00 0 2 Indian Subcontinent 0.0000 0 -4.554848 11.109697 9.109697 -1.723066e-13 0 3 North East India+ North Myanmar 932.3645 8 -86.309184 178.618368 179.210041 1.002874e+02 6 4 North Myanmar 55.5590 2 -10.891251 27.782502 25.078339 2.664535e-14 0 5 Other parts of India 630.0408 20 -94.738568 195.477137 198.610704 4.746547e+01 18 6 South Myanmar 0.0000 0 -4.253793 10.507587 8.507587 5.240253e-14 0 7 Sri Lanka 0.0000 0 -3.663752 9.327503 7.327503 -4.884981e-15 0 8 Western Himalaya 255.5851 6 -33.529065 73.058130 72.895860 1.535052e+01 4 #֊૚ߏ଄Λ࣋ͭσʔλϑϨʔϜΛ࠶݁߹