kilometer
December 16, 2017
930

# 66th Tokyo.R Beginner session2

## kilometer

December 16, 2017

## Transcript

3. ### Whoʂʁ ໊લɿ @kilometer ৬ۀɿ ϙευΫ(޻ֶത࢜) ઐ໳ɿ ߦಈηϯαϦϯά ɹ ਆܦΠϝʔδϯά ҩ༻γεςϜ޻ֶ

Rྺɿ म࢜ͷࠒ͔Β10೥͙Β͍ɻ ྲྀߦ:ɹ࿨෩ϋϯόʔά

11. ### ԋࢉࢠ− ݞ׳Β͠ − ୅ೖԋࢉࢠ A <- B A <<- B

# ୅ೖԋࢉࢠ # Ӭଓ୅ೖԋࢉࢠ
12. ### ԋࢉࢠ− ݞ׳Β͠ − ୅ೖԋࢉࢠ ex_func <- function(){ x <- 600

x <<- 100 ptint(x) } # άϩʔόϧม਺ # ϩʔΧϧม਺ ʮRͷԋࢉࢠಛूʯy__mattu https://ymattu.github.io/JapanR2017/slide.html#/

14. ### ԋࢉࢠ− ݞ׳Β͠ − ୅ೖԋࢉࢠ ex_func [1] 600 x [1] 100

ex_func <- function(){ x <- 600 x <<- 100 ptint(x) }
15. ### ԋࢉࢠ− ݞ׳Β͠ − ϒʔϧԋࢉࢠ Boolean Algebra A == B A

!= B A | B A & B A %in% B # equal to # not equal to # or # and # is A in B? https://www.amazon.co.jp/dp/0486600289
16. ### ύΠϓԋࢉࢠ X %>% f X %>% f(y) X %>% f

%>% g X %>% f(y, .) f(X) f(X, y) g(f(X)) f(y, X) %>% {magrittr} ʮdplyr࠶ೖ໳ʢجຊฤʣʯyutanihilation https://speakerdeck.com/yutannihilation/dplyrzai-ru-men-ji-ben-bian

18. ### ύΠϓԋࢉࢠ%>% {magrittr} dat1 <- f1(dat0, var1) # ͦΕͱ΋͜͏ॻ͖·͔͢ʁ dat2 <-

f2(dat1, var2) dat3 <- f3(dat2, var3) # ͜͏ॻ͖·͔͢ʁ dat <- f3(f2(f1(dat0, var1), var2), var3)
19. ### ύΠϓԋࢉࢠ%>% {magrittr} # ͑ʁ͜͏ॻ͖·͢ʁ dat <- f3(f2(f1(dat0, var1), var2), var3)

ೖޱ ग़ޱ ᶃ ᶄ ᶅ ࢥߟͷྲྀΕ ߏ଄ͷରԠ
20. ### ύΠϓԋࢉࢠ%>% {magrittr} # ͋ΕΕɺ͜͏ॻ͘ΜͰ͔͢ʁ dat <- f3(f2(f1(dat0, var1), var2), var3)

ೖޱ ग़ޱ ࢥߟͷྲྀΕ ղಡͷྲྀΕ
21. ### ύΠϓԋࢉࢠ%>% {magrittr} # ຊ౰ʹɺ͜͏ॻ͖·͔͢ʁ dat <- f6(f5(f4(f3(f2(f1(dat0, var1-1, var1-2), var2),

var3), var4-1, var4-2, var4-3), var5), var6) ೖޱ ग़ޱ ࢥߟͷྲྀΕ ߏ଄ͷରԠ
22. ### ύΠϓԋࢉࢠ%>% {magrittr} # ϚδͰɺ͜͏ॻ͖·͔͢ʁ dat <- f6(f5(f4(f3(f2(f1(dat0, var1-1, var1-2), var2),

var3), var4-1, var4-2, var4-3), var5), var6) ೖޱ ग़ޱ ࢥߟͷྲྀΕ ղಡͷྲྀΕ
23. ### ύΠϓԋࢉࢠ%>% {magrittr} # ͜ɺ͜͏ॻ͖·͔͢ʁ dat <- f6(f5(f4(var4-1, f3(f2(f1(dat0, var1-1, var1-2),

var2), var3-2), var4-2, var4-3), var5), var6)
24. ### ύΠϓԋࢉࢠ%>% {magrittr} # ͱͳΔͱɺ͜͏ॻ͖·͔͢ʁ ೖޱ ग़ޱ dat1 <- f1(dat0, var1)

dat2 <- f2(dat1, var2) dat3 <- f3(dat2, var3) ᶃ ᶄ ᶅ ೖޱ ग़ޱ ೖޱ ग़ޱ ࢥߟͷྲྀΕ ղಡͷྲྀΕ
25. ### ύΠϓԋࢉࢠ%>% {magrittr} # ͏ʔΜɺ͜͏ॻ͖·͔͢ʁ ਅͷೖޱ Ծͷग़ޱ dat1 <- f1(dat0, var1)

dat2 <- f2(dat1, var2) dat3 <- f3(dat2, var3) ᶃ ᶄ ᶅ Ծͷೖޱ Ծͷग़ޱ Ծͷೖޱ ਅͷग़ޱ ࢥߟͷྲྀΕ ղಡͷྲྀΕ
26. ### ύΠϓԋࢉࢠ%>% {magrittr} # ͛͛͛ɺ͜͏ॻ͖·͔͢ʁ ਅͷೖޱ dat1 <- f1(dat0, var1-1, var1-2)

dat2 <- f2(dat1, var2) dat3 <- f3(dat2, var3) dat4 <- f4(var4-1, dat3, var4-2) dat5 <- f5(dat4, var5) dat6 <- f6(dat5, var6) ਅͷग़ޱ ࢥߟͷྲྀΕ ղಡͷྲྀΕ
27. ### ύΠϓԋࢉࢠ%>% {magrittr} # ύΠϓ೴ͷώτͳΒ͜͏ॻ͖·͢ɻ dat0 %>% f1(var1-1, var1-2) %>% f2(var2)

%>% f3(var3) %>% f4(var4-1, ., var4-2) %>% f5(var5) %>% f6(var6) -> dat ೖޱ ग़ޱ
28. ### ύΠϓԋࢉࢠ%>% {magrittr} # ͜͏΍ͬͯॻ͘ࣄ΋Ͱ͖·͢ɻ dat <- dat0 %>% f1(var1-1, var1-2)

%>% f2(var2) %>% f3(var3) %>% f4(var4-1, ., var4-2) %>% f5(var5) %>% f6(var6) ೖޱ ग़ޱ
29. ### ύΠϓԋࢉࢠ%>% {magrittr} # ͜͏΍ͬͯॻ͘ࣄ΋Ͱ͖·͢ɻ dat <- dat0 %>% f1(var1-1, var1-2)

%>% f2(var2) %>% f3(var3) %>% f4(var4-1, ., var4-2) %>% f5(var5) %>% f6(var6) ೖޱ ग़ޱ υοτ͕͋Δ
30. ### ύΠϓԋࢉࢠ X %>% f X %>% f(y) X %>% f

%>% g X %>% f(y, .) f(X) f(X, y) g(f(X)) f(y, X) %>% {magrittr} ͜Ε
31. ### ύΠϓԋࢉࢠ%>% {magrittr} dat <- iris %>% .[, 1:3] %>% prcomp

iris %>% .[, 1:3] %>% prcomp -> dat “डಈଶ”ͬΆ͍ “ೳಈଶ”ͬΆ͍ B͸A͕F͞Εͨ΋ͷ AΛF͢ΔͱBʹͳΔ
32. ### ύΠϓԋࢉࢠ%>% {magrittr} library(magrittr) iris %>% str 'data.frame': 150 obs. of

5 variables: \$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 ... \$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 ... \$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 ... \$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 ... \$ Species : Factor w/ 3 levels "setosa", ... str(iris)
33. ### ύΠϓԋࢉࢠ%>% {magrittr} library(magrittr) iris %>% cbind(a = 1:150) %>% str

'data.frame': 150 obs. of 6 variables: \$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 ... \$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... \$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7... \$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 ... \$ Species : Factor w/ 3 levels "setosa", ... \$ a : int 1 2 3 4 5 6 7 8 9 10 ...
34. ### ύΠϓԋࢉࢠ%>% {magrittr} library(magrittr) iris %>% .[, 1:3] %>% prcomp %>%

str List of 5 \$ sdev : num [1:3] 1.921 0.491 0.244 \$ rotation: num [1:3, 1:3] 0.39 -0.091 ... ..- attr(*, "dimnames")=List of 2 .. ..\$ : chr [1:3] "Sepal.Length" "Sepal.Width" ... .. ..\$ : chr [1:3] "PC1" "PC2" "PC3" \$ center : Named num [1:3] 5.84 3.06 3.76 ..- attr(*, "names")= chr [1:3] "Sepal.Length" ... \$ scale : logi FALSE \$ x : num [1:150, 1:3] -2.49 -2.52 -2.71 -2.56 ...
35. ### ύΠϓԋࢉࢠ%>% {magrittr} library(magrittr) dat <- iris %>% .[, 1:3] %>%

prcomp %>% .\$x %>% data.frame %T>% plot dat <- iris[, 1:3] dat <- prcomp(dat) dat <- dat\$x dat <- data.frame(dat) plot(dat) teeԋࢉࢠ ʮ෭࡞༻Λڐ͠ͳ͕Β΋chain͍ͯ͘͠ʯdichika http://d.hatena.ne.jp/dichika/20140731/p1

38. ### mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

# ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 
39. ### It (dplyr) provides simple “verbs” to help you translate your

thoughts into code. functions that correspond to the most common data manipulation tasks Introduction to dplyr https://cran.r-project.org/web/packages/dplyr/vignettes/dplyr.html WFSCT {dplyr}
40. ### dplyr͸ɺ͋ͳͨͷߟ͑Λίʔυʹ຋༁ ͢ΔͨΊͷʲಈࢺʳΛఏڙ͢Δɻ σʔλૢ࡞ʹ͓͚ΔجຊͷΩ Λɺɹɹɹγϯϓϧʹ࣮ߦͰ͖Δؔ਺ (܈) Introduction to dplyr https://cran.r-project.org/web/packages/dplyr/vignettes/dplyr.html WFSCT

{dplyr} ※ ͔ͳΓҙ༁
41. ### WFSCT S V O C M ؔ਺ ΦϒδΣΫτ ֤छҾ਺ ͦΕҎ֎ͷએݴ

(෼ذ, ܁ฦ, etc) ※ ΠϝʔδͰ͢

※ ·͞ʹҙ༁
45. ### ΑΓଟ͘ͷ੍໿Λ՝͢ࣄͰɺ ࠢͷ଍ᐫ͔ΒɺΑΓࣗ༝ʹͳΔɻ Igor Stravinsky И́горь Ф Страви́нский The more constraints

one imposes, the more one frees one's self of the chains that shackle the spirit. 1882 - 1971 ※ ׂͱҙ༁

47. ### mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

# ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 

49. ### mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

# ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 

51. ### library(dplyr) iris %>% mutate(a = 1:nrow(.)) %>% str 'data.frame': 150

obs. of 6 variables: \$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 ... \$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... \$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7... \$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 ... \$ Species : Factor w/ 3 levels "setosa", ... \$ a : int 1 2 3 4 5 6 7 8 9 10 ... WFSCT {dplyr}
52. ### library(dplyr) iris %>% mutate(a = 1:nrow(.), a = a *

5/3 %>% round) 'data.frame': 150 obs. of 6 variables: \$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 ... \$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... \$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7... \$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 ... \$ Species : Factor w/ 3 levels “setosa”, ... \$ a : num 1.67 3.33 5 6.67 8.33 ... ... WFSCT {dplyr} ্ॻ͖͞ΕΔ

54. ### library(dplyr) iris %>% select(Sepal.Length, Sepal.Width) 'data.frame': 150 obs. of 6

variables: \$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 ... \$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... WFSCT {dplyr}
55. ### library(dplyr) iris %>% select(contains(“Width”)) 'data.frame': 150 obs. of 6 variables:

\$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... \$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 ... WFSCT {dplyr} Select helpؔ਺
56. ### WFSCT {dplyr} # Select helpؔ਺܈ starts_with("s") ends_with("s") contains("se") matches("^.e") one_of(c("Sepal.Length",

"Species")) everything() https://kazutan.github.io/blog/2017/04/dplyr-select-memo/ ʮdplyr::selectͷ׆༻ྫϝϞʯkazutan
57. ### mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

# ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 

59. ### library(dplyr) iris %>% filter(Species == "versicolor") WFSCT {dplyr} 'data.frame': 50

obs. of 5 variables: \$ Sepal.Length: num 7 6.4 6.9 5.5 6.5 5.7 6.3 ... \$ Sepal.Width : num 3.2 3.2 3.1 2.3 2.8 2.8 ... \$ Petal.Length: num 4.7 4.5 4.9 4 4.6 4.5 4.7 ... \$ Petal.Width : num 1.4 1.5 1.5 1.3 1.5 1.3 ... \$ Species : Factor w/ 3 levels "setosa","versicolor",..: 2 2 2 2 2 2 2 2 2 2 ...
60. ### library(dplyr) iris %>% filter(Species == "versicolor") WFSCT {dplyr} NSE (Non-Standard

Evaluation) 'data.frame': 50 obs. of 5 variables: \$ Sepal.Length: num 7 6.4 6.9 5.5 6.5 5.7 6.3 ... \$ Sepal.Width : num 3.2 3.2 3.1 2.3 2.8 2.8 ... \$ Petal.Length: num 4.7 4.5 4.9 4 4.6 4.5 4.7 ... \$ Petal.Width : num 1.4 1.5 1.5 1.3 1.5 1.3 ... \$ Species : Factor w/ 3 levels "setosa","versicolor",..: 2 2 2 2 2 2 2 2 2 2 ...
61. ### filter(df, x == "a", y == 1) /4&ͷ࿩ NSE (Non-Standard

Evaluation) df[df\$x == "a" & df\$y == 1, ] SE (Standard Evaluation) http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr
62. ### filter(df, x == "a", y == 1) /4&ͷ࿩ NSEΛ࢖͏ͱɺ ɾdfͷ໊લΛԿճ΋ॻ͔ͳ͍͍ͯ͘Αɻ

ɾSQLʹ຋༁͢Δ࣌ʹָͩΑɻ http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr df[df\$x == "a" & df\$y == 1, ]
63. ### filter(df, x == "a", y == 1) /4&ͷ࿩ NSEΛ࢖͏ͱɺ ɾdfͷ໊લΛԿճ΋ॻ͔ͳ͍͍ͯ͘Αɻ

ɾSQLʹ຋༁͢Δ࣌ʹָͩΑɻ ɹɹ http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr ৭ʑ͋Δ͚ͲεοΩϦ͍ͯ͠Δͷ͸ਖ਼ٛ (ࢲݟ) df[df\$x == "a" & df\$y == 1, ]
64. ### filter(df, x == "a", y == 1) /4&ͷ࿩ NSEΛ࢖͏ͱɺ df[df\$x

== "a" & df\$y == 1, ] http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr ৭ʑ͋Δ͚ͲεοΩϦ͍ͯ͠Δͷ͸ਖ਼ٛ (ࢲݟ) ॻ͖΍͘͢ɺಡΈ΍͘͢ɻ ࢥߟͱ࣮૷ͷڑ཭Λۙ͘ɻ # ಈࢺత # ໊ࢺత
65. ### df <- data.frame(x = 1:3, y = 1:3) filter(df, x

== 1) /4&ͷ࿩ NSEΛ࠾༻͍ͯ͠ΔͷͰɺ http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr my_var <- "x" filter(df, my_var == 1) ͜Εɹ͕ಈ͔ͳ͍ɻ dfͷmy_varΧϥϜΛ୳͠ʹߦ͘
66. ### /4&ͷ࿩ my_var <- quo(x) filter(df, (!! my_var) == 1) Ͳʙʙʙͯ͠΋΍Γ͚ͨΕ͹ɺ

Կނ͜͏ͳΔ͔͸ɺ ɹʮdplyr࠶ೖ໳ʢTidyvalฤʣʯΛࢀরɻ https://speakerdeck.com/yutannihilation/dplyrzai-ru-men-tidyevalbian ʮdplyr࠶ೖ໳ʢTidyvalฤʣʯyutanihilation
67. ### /4&ͷ࿩ my_var <- quo(x) filter(df, (!! my_var) == 1) Ͳʙʙʙͯ͠΋΍Γ͚ͨΕ͹ɺ

Կނ͜͏ͳΔ͔͸ɺ ɹʮdplyr࠶ೖ໳ʢTidyvalฤʣʯΛࢀরɻ https://speakerdeck.com/yutannihilation/dplyrzai-ru-men-tidyevalbian Մಡੑ্͕͕ΔʁԼ͕Δʁ ͦΕ͸ɺ͋ͳͨͱಡΈख࣍ୈɻ ʮdplyr࠶ೖ໳ʢTidyvalฤʣʯyutanihilation
68. ### mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

# ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 
69. ### WFSCT {dplyr} join # ߦྻͷ݁߹ xxx_join関数群 left_join, right_join inner_join, semi_join

full_join anti_join
70. ### a <- data.frame(x1 = c(1,2,3), x2 = 10:12) b <-

data.frame(x1 = c(1,3,5), x3 = 100:102) WFSCT {dplyr} > left_join(a, b) > right_join(a, b) x1 x2 x3 1 10 100 2 11 NA 3 12 101 x1 x2 x3 1 10 100 3 12 101 5 NA 102 join # ߦྻͷ݁߹
71. ### WFSCT {dplyr} > inner_join(a, b) > semi_join(a, b) x1 x2

x3 1 10 100 3 12 101 x1 x2 1 10 3 12 join # ߦྻͷ݁߹ a <- data.frame(x1 = c(1,2,3), x2 = 10:12) b <- data.frame(x1 = c(1,3,5), x3 = 100:102)
72. ### WFSCT {dplyr} > anti_join(a, b) x1 x2 2 11 join

# ߦྻͷ݁߹ a <- data.frame(x1 = c(1,2,3), x2 = 10:12) b <- data.frame(x1 = c(1,3,5), x3 = 100:102) > full_join(a, b) x1 x2 x3 1 10 100 2 11 NA 3 12 101 5 NA 102

in R

77. None