Uryu Shinya
April 30, 2016
17k

Tokyo.R#53 での発表資料

April 30, 2016

## Transcript

\$ !

11. ### άϥϑͷछྨʹԠͨ͡࡞ਤؔ਺ plot(x, y, ...) barplot(height, ...) boxplot(x, ...) hist(x, ...)

pie(x, labels = names(x), ...) # ؔ਺ͷҾ਺͕ ଗ͓ͬͯΒͣɺ ֮͑ʹ͍͘ \HHQMPU^ύοέʔδ
12. ### άϥϑඳըͷͨΊͷจ๏ʹجͮ͘ϨΠϠʔͷߟ͑Λಋೖ library(ggplot2) ggplot(data, aes(x, y)) + geom_*() + xlab() +

ylab() \HHQMPU^ύοέʔδ
13. ### άϥϑඳըͷͨΊͷจ๏ʹجͮ͘ϨΠϠʔͷߟ͑Λಋೖ library(ggplot2) ggplot(data, aes(x, y)) + geom_*() + xlab() +

ylab() άϥϑʹ༻͍ΔཁૉΛࢦఆ BFT ΤεςςΟοΫ৹ඒతཁૉΛఆٛ͢Δؔ਺ Y Z [   ʜ   ʜ   ʜ \HHQMPU^ύοέʔδ
14. ### άϥϑඳըͷͨΊͷจ๏ʹجͮ͘ϨΠϠʔͷߟ͑Λಋೖ library(ggplot2) ggplot(data, aes(x, y)) + geom_*() + xlab() +

ylab() ඳը͢ΔάϥϑͷछྨΛࢦఆ FYHFPN@QPJOU ʜࢄ෍ਤɺ HFPN@CBS ʜ๮άϥϑ \HHQMPU^ύοέʔδ

16. ### άϥϑඳըͷͨΊͷจ๏ʹجͮ͘ϨΠϠʔͷߟ͑Λಋೖ library(ggplot2) ggplot(data, aes(x, y)) + geom_*() + xlab() +

ylab() ඞཁͳཁૉΛϨΠϠʔͱͯ͠௥Ճ \HHQMPU^ύοέʔδ # ౷Ұ͞Εͨه๏ͱؔ਺ʹΑΓɺ άϥϑ࡞੒͕؆ܿʹ
17. ### 3ඪ४ͷจࣈྻૢ࡞ؔ਺ͱ\TUSJOHJ^ύοέʔδ paste("ほく", "のうむ") # [1] "ほく のうむ" stringi::stri_c("ほく", "のうむ") #

[1] "ほくのうむ" \TUSJOHS^ύοέʔδ # \TUSJOHS^͸*\$6Λαϙʔτ͢Δ\TUSJOHJ^ͷ ϥούʔύοέʔδ
18. ### TUS@ ͱ͍͏ؔ਺໊ library(stringr) ls("package:stringr") %>% grep("^str_", ., value = TRUE)

stringr::str_c("ほく", "のうむ") # [1] "ほくのうむ" \TUSJOHS^ύοέʔδ
19. ### library(stringi) stri_locale_list() %>% length() # [1] 683 stri_datetime_symbols() %>% names()

# [1] "Month" "Weekday" "Quarter" "AmPm" "Era" stri_datetime_symbols(locale="ja_JP_TRADITIONAL")\$Era %>% tail() # [1] "元治" "慶応" "明治" "大正" "昭和" "平成" \TUSJOHS^ύοέʔδ # ϩέʔϧɺϢχίʔυਖ਼نԽͳͲ͸ \TUSJOHJ^ύοέʔδͷؔ਺ͰରԠʂ
20. ### stri_trans_general("㈱ﾎｸｿｴﾑ", id = "nfkd") stri_trans_general("ת京", "Any-ch_FONIPA") stri_trans_general("Nippon", "es-ja") stri_trans_general("ת京", "Simplified-Traditional")

\TUSJOHS^ύοέʔδ :PVS5VSO
21. ### stri_trans_general("㈱ﾎｸｿｴﾑ", id = "nfkd") # [1] "(株)ホクソエム" stri_trans_general("ת京", "Any-ch_FONIPA") #

[1] "dōng jīng" stri_trans_general("Nippon", "es-ja") # [1] "ニッポン" stri_trans_general("ת京", "Simplified-Traditional") # [1] "東京" \TUSJOHS^ύοέʔδ
22. ### 3Ͱ೔෇ɾ࣌ؒΦϒδΣΫτΛѻ͏ؔ਺ c("2016-04-30") %>% as.Date() # [1] "2016-04-30" Sys.Date() # [1]

"2016-04-30" \MVCSJEBUF^ύοέʔδ # ϢʔβʔϑϨϯυϦʔͰͳ͍ؔ਺໊ͱ ࣌ܥྻσʔλΛѻ͏ύοέʔδͷ܈༤ׂڌʜ
23. ### library(lubridate) today() # [1] "2016-04-30" ymd("2016年4月30日", tz = "Asia/Tokyo") %>%

month() # [1] 4 \MVCSJEBUF^ύοέʔδ λΠϜκʔϯ΍ϩέʔϧͷ มߋ͕ؔ਺಺Ͱ࣮ߦՄೳ
24. ### ೔෇ɾ࣌ؒͷՃ޻ͱߏจղੳ today() + weeks(1) # [1] "2016-05-06" x <- dmy("30/04/2016")

day(x) <- 26 x # [1] "2016-04-26" \MVCSJEBUF^ύοέʔδ # ௚ײతʹཧղ ͠΍͍ؔ͢਺໊ɻ ೔෇ɾ࣌ؒσʔλͷ ѻ͍ʹศརͳ ิॿؔ਺Λఏڙ
25. ### ศརͳิॿؔ਺܈ (time1 <- ymd_hms("2016-04-01 09:00:00", tz = "Asia/Tokyo")) # [1]

"2016-04-01 09:00:00 JST" with_tz(time1, "America/New_York") # [1] "2016-03-31 20:00:00 EDT" \MVCSJEBUF^ύοέʔδ ࢦఆՄೳͳλΠϜκʔϯ͸0MTPO/BNFT Ͱ֬ೝ
26. ### (chl <- interval(ymd("1989年11月27日", tz = "Asia/Tokyo"), mdy("April 30, 2016", tz

= "Asia/Tokyo"))) # [1] 1989-11-27 JST--2016-04-30 JST time1 %within% chl # [1] TRUE \MVCSJEBUF^ύοέʔδ
27. ### \SFBES^ύοέʔδ طଘͷදܗࣜϑΝΠϧಡΈࠐΈؔ਺ͷվྑ library(readr) read_csv(file, col_types = NULL, locale = default_locale())

# SFBEDTW DPM\$MBTTFT Λجૅͱͨ͠ DPMMFDUPS ؔ਺ͷ੔උͱϩέʔϧରԠ
28. ### \SFBES^ύοέʔδ ܕ ؔ਺ লུ࣌ͷදه ࿦ཧܕ col_logical() l (logical) ੔਺ܕ col_integer()

i (integer) ࣮਺ܕ col_double() d (double) จࣈྻܕ col_character c (character) ೔෇ܕʢ:NEදهʣ col_date(format) D (date) ೔࣌ܕʢ<*40>ʣ col_datetime(format, tz) T (time) ࣌ؒܕ col_time(format) ਺஋ܕ col_number() n ཁҼܕ col_factor(levels, ordered) ಡΈࠐΈର৅ͱ͠ͳ͍ col_skip() _, - collector()
29. ### read_csv("x,y,z\n1,2,a\n3,4,b", col_types = cols(x = "i", y = "c", z

= "_")) \SFBES^ύοέʔδ Source: local data frame [2 x 2] x y <int> <chr> 1 1 2 2 3 4 ม਺ͷܕ͸ೖྗͷઌ಄ߦ͔Β ࣗಈతʹਪଌ͞ΕΔ #
30. ### library(httr) GET(…) POST(…) library(rvest) xml2::read_html(x = …) %>% html_nodes(xpath =

…) ΢Σϒσʔλऔಘ # IUUQϝιουʹରԠͨؔ͠਺܈ɻ ೝূɺIUUQϔομʔ౳ͷαϙʔτ IUNM YNMͲΜͱདྷ͍
31. ### 3ύοέʔδ։ൃิॿ library(devtools) install_*(…) use_*(…) check(run_dont_test = FALSE, …) build(…) install(…)

\$3"/ϦϙδτϦ֎͔ΒͷύοέʔδΠϯετʔϧ ܧଓతΠϯςάϨʔγϣϯɺHJUϦϙδτϦʜΠϯϑϥ੔උ # \SPYZHFO^ \UFTUUIBU^ύοέʔδ ʹΑΔࣗಈυΩϡϝϯτੜ੒ɺ ςετ࣮ߦ

35. ### library(dplyr) data("Butterflies", package = "gpk") glimpse(Butterflies) \EQMZS^ύοέʔδ Πϯυࠃ಺ͷ͔̑ॴͰه࿥͞Εͨ௏ྨͷ෼෍σʔλ Observations: 44

Variables: 9 \$ Serial_Number (int) 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, ... \$ Area (fctr) Indian Subcontinent, Western Himalaya, Western Himalaya, Western Himalaya, Western Himalaya, Western Himalaya, Western Himalaya, Western Himal... \$ Locality (fctr) Indian Subcontinent, Western Himalaya, Kangra Hills, Shimla Hills, Dehradun Valley, Mussoorie Hills, Mussoorie Town, Kumaon Hills, Central Him... \$ Total_Species_count (int) 1439, 417, 228, 299, 148, 323, 146, 371, 623, 962, 853, 690, 262, 423, 321, 510, 596, 276, 167, 124, 151, 109, 77, 110, 78, 78, 59, 145, 164, 1... \$ Skippers (int) 307, 63, 25, 41, 22, 54, 14, 52, 125, 211, 189, 159, 27, 67, 119, 98, 132, 59, 32, 18, 19, 15, 11, 12, 14, 11, 4, 22, 19, 23, 54, 60, 20, 37, 4... \$ Swallow_tails (int) 94, 31, 23, 21, 11, 23, 10, 26, 43, 69, 62, 55, 29, 38, 18, 42, 49, 13, 10, 9, 10, 7, 4, 4, 6, 5, 5, 10, 10, 11, 16, 18, 9, 15, 15, 23, 12, 5, ... \$ Whites_Yellows (int) 99, 42, 37, 34, 19, 32, 13, 37, 49, 57, 52, 51, 32, 30, 1, 36, 40, 26, 19, 12, 23, 19, 20, 29, 21, 19, 18, 25, 50, 19, 21, 31, 25, 26, 30, 35, ... \$ Blues (int) 458, 129, 56, 88, 42, 88, 44, 109, 185, 284, 258, 162, 48, 110, 126, 128, 166, 77, 57, 46, 38, 33, 22, 31, 20, 22, 17, 46, 41, 35, 66, 80, 50, ... \$ Brush_Footed (int) 482, 152, 87, 115, 54, 126, 65, 147, 221, 342, 292, 263, 126, 178, 57, 207, 209, 101, 49, 39, 61, 35, 20, 34, 17, 21, 15, 42, 44, 42, 66, 89, 3...
36. ### Butterflies %>% filter(Total_Species_count >= 200 | Area %in% c("Indian Subcontinent",

"North East India+ North Myanmar", "Other parts of India"), grepl("Hills", Locality), between(Serial_Number, 10, 20)) \EQMZS^ύοέʔδ
37. ### Butterflies %>% filter(Total_Species_count >= 200 | Area %in% c("Indian Subcontinent",

"North East India+ North Myanmar", "Other parts of India"), grepl("Hills", Locality), between(Serial_Number, 10, 20)) \EQMZS^ύοέʔδ ࿦ཧ࿨ ਅِ஋Λฦؔ͢਺ ൣғΛநग़ʜ\EQMZS^ͷิॿؔ਺ ཁૉͷू߹
38. ### ʮϓϦΩϡΞʯγϦʔζͷ"1*!TVF req.girls <- GET("https://rubicure.herokuapp.com/ girls.json") %>% content() req.girls %>% class()

# [1] "list" req.girls %>% .[[1]] %>% names() # [1] "girl_name" "human_name" "precure_name" "cast_name" "created_date" "color" "birthday" "transform_message" # [9] "extra_names" "attack_messages" "transform_calls" \EQMZS^ \QVSSS^ \IUUS^
39. ### req.girls %>% map_df(., ~ .[c("color")]) %>% group_by(color) %>% tally(sort =

TRUE) Source: local data frame [8 x 2] color n <chr> <int> 1 pink 10 2 blue 9 … 7 green 2 8 black 1 JF TVNNBSJTF OO  BSSBOHF O \EQMZS^ \QVSSS^ \IUUS^

41. ### ೴τϨԋश TFMFDUIFMQFST var_1 var_2 third_variable 4th_variable start_with("var_") end_with("variable") contains("_") matches("[[:punct:]]")

num_range("var_", 1:2) one_of("third_variable") everything() :PVS5VSO Ͳͷม਺͕બ୒͞ΕΔ͔ʁ
42. ### TFMFDUIFMQFST var_1 var_2 third_variable 4th_variable start_with("var_") ◦ ◦ ✖ ✖

end_with("variable") ✖ ✖ ◦ ◦ contains("_") ◦ ◦ ◦ ◦ matches("[[:punct:]]") ◦ ◦ ◦ ◦ num_range("var_", 1:2) ◦ ◦ ✖ ✖ one_of("third_variable") ✖ ✖ ◦ ✖ everything() ◦ ◦ ◦ ◦ ೴τϨԋश
43. ### point <- c(-2:2, NA) if_else(point < 0, "あし", "よし", "なし")

soum <- c("そうむ", "ほうむ", "ポエム", "のうむ") na_if(soum, "ポエム") nth(soum, 2) ϕΫτϧΛର৅ʹͨ͠ૢ࡞ :PVS5VSO ೴τϨԋश
44. ### point <- c(-2:2, NA) if_else(point < 0, "あし", "よし", "なし")

# [1] "あし" "あし" "よし" "よし" "よし" "なし" soum <- c("そうむ", "ほうむ", "ポエム", "のうむ") na_if(soum, "ポエム") # [1] "そうむ" "ほうむ" NA "のうむ" nth(soum, 2) # [1] "ほうむ" ϕΫτϧΛର৅ʹͨ͠ૢ࡞ ೴τϨԋश
45. ### library(magrittr); library(stringr) df.q1 <- data_frame( point = c("10/13/13/16", "9/12/8/15")) %\$%

point %>% str_split_fixed(., "/", 4) %>% as.data.frame() %>% set_colnames(str_c("var", 1:4)) ೴τϨԋश
46. ### library(tibble) df.q2 <- frame_data( ~fruit, ~sales, "apple/banana/orange", "1/2/4", "orange/melon/applef/pear", "1/3/5/1")

%\$% data_frame( fruit = fruit %>% str_split("/"), sales = sales %>% str_split("/")) ೴τϨԋश
47. ### df.q1 var1 var2 var3 var4 1 10 13 13 16

2 9 12 8 15 df.q2 Source: local data frame [2 x 2] fruit sales <list> <list> 1 <chr [3]> <chr [3]> 2 <chr [4]> <chr [4]> library(tidyr) df.q3 <- df.q2 %>% unnest() ೴τϨԋश
48. ### df.q3 Source: local data frame [7 x 2] fruit sales

<chr> <chr> 1 apple 1 2 banana 2 3 orange 4 4 orange 1 5 melon 3 6 apple 5 7 pear 1 ೴τϨԋश

50. ### σʔλ͕࣋ͭ֊૚ੑ " # \$ ʜ ʜ B ʜ ʜ B

ʜ ʜ C ʜ ʜ D ʜ ʜ D ʜ ʜ D ^ ^ ^ EBUB B EBUB EBUB D EBUB EBUB C EBUB \$ SFTVMU SFTVMU B ʜ ʜ C ʜ ʜ D ʜ ʜ
51. ### σʔλ͕࣋ͭ֊૚ੑ " # \$ ʜ ʜ B ʜ ʜ B

ʜ ʜ C ʜ ʜ D ʜ ʜ D ʜ ʜ D ^ ^ ^ EBUB B EBUB EBUB D EBUB EBUB C EBUB \$ SFTVMU SFTVMU B ʜ ʜ C ʜ ʜ D ʜ ʜ UJEZSOFTU UJEZSVOOFTU QVSSSNBQ EQMZSEP
52. ### library(tidyr) by_area <- group_by(Butterflies, Area) %>% nest() \EQMZS^ \UJEZS^ Source:

local data frame [8 x 2] Area data <fctr> <list> 1 Indian Subcontinent <tbl_df [1,8]> 2 Western Himalaya <tbl_df [7,8]> 3 Central Himalaya <tbl_df [1,8]> 4 North East India+ North Myanmar <tbl_df [9,8]> 5 Other parts of India <tbl_df [21,8]> 6 Sri Lanka <tbl_df [1,8]> 7 North Myanmar <tbl_df [3,8]> 8 South Myanmar <tbl_df [1,8]> # HSPVQ@CZ Ͱࢦఆ ͨ͠ά ϧʔϓ͝ͱʹ UCM@EGΫϥε ΦϒδΣΫτͱͯ͠ ֨ೲ
53. ### library(purrr) mod_res <- map(by_area\$data, ~ glm(Total_Species_count ~ Skippers + Blues,

data = ., family = poisson)) \EQMZS^ \QVSSS^ # άϧʔϓ͝ͱʹؔ਺Λద༻ʢNBQQJOHʣ
54. ### mod_res[[4]] mod_res %>% class() # [1] "list" \EQMZS^ \QVSSS^ #

Call: glm(formula = Total_Species_count ~ Skippers + Blues, family = poisson, data = .) Coefficients: (Intercept) Skippers Blues 5.380924 0.003370 0.002889 Degrees of Freedom: 8 Total (i.e. Null); 6 Residual Null Deviance: 932.4 Residual Deviance: 100.3 AIC: 178.6 NBQ ͷฦΓ஋͸ ϦετΦϒδΣΫτ NBQ@EG Ͱ͸ σʔλϑϨʔϜ
55. ### library(broom) (do_mod_res <- group_by(Butterflies, Area) %>% do(mod = glm(Total_Species_count ~

Skippers + Blues, data = ., family = poisson) %>% glance())) do_mod_res %>% class() # [1] "rowwise_df" "tbl_df" "tbl" "data.frame" \EQMZS^ \CSPPN^
56. ### \EQMZS^ \CSPPN^ do_mod_res %>% unnest() Source: local data frame [8

x 8] Area null.deviance df.null logLik AIC BIC deviance df.residual <fctr> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <int> 1 Central Himalaya 0.0000 0 -4.136346 10.272691 8.272691 0.000000e+00 0 2 Indian Subcontinent 0.0000 0 -4.554848 11.109697 9.109697 -1.723066e-13 0 3 North East India+ North Myanmar 932.3645 8 -86.309184 178.618368 179.210041 1.002874e+02 6 4 North Myanmar 55.5590 2 -10.891251 27.782502 25.078339 2.664535e-14 0 5 Other parts of India 630.0408 20 -94.738568 195.477137 198.610704 4.746547e+01 18 6 South Myanmar 0.0000 0 -4.253793 10.507587 8.507587 5.240253e-14 0 7 Sri Lanka 0.0000 0 -3.663752 9.327503 7.327503 -4.884981e-15 0 8 Western Himalaya 255.5851 6 -33.529065 73.058130 72.895860 1.535052e+01 4 #֊૚ߏ଄Λ࣋ͭσʔλϑϨʔϜΛ࠶݁߹