Upgrade to Pro — share decks privately, control downloads, hide ads and more …

66th Tokyo.R Beginner session2

kilometer
December 16, 2017

66th Tokyo.R Beginner session2

発表資料です。

kilometer

December 16, 2017
Tweet

More Decks by kilometer

Other Decks in Technology

Transcript

  1. ԋࢉࢠ− ݞ׳Β͠ − ୅ೖԋࢉࢠ A <- B A <<- B

    # ୅ೖԋࢉࢠ # Ӭଓ୅ೖԋࢉࢠ
  2. ԋࢉࢠ− ݞ׳Β͠ − ୅ೖԋࢉࢠ ex_func <- function(){ x <- 600

    x <<- 100 ptint(x) } # άϩʔόϧม਺ # ϩʔΧϧม਺ ʮRͷԋࢉࢠಛूʯy__mattu https://ymattu.github.io/JapanR2017/slide.html#/
  3. ԋࢉࢠ− ݞ׳Β͠ − ୅ೖԋࢉࢠ ex_func [1] 600 x [1] 100

    ex_func <- function(){ x <- 600 x <<- 100 ptint(x) }
  4. ԋࢉࢠ− ݞ׳Β͠ − ϒʔϧԋࢉࢠ Boolean Algebra A == B A

    != B A | B A & B A %in% B # equal to # not equal to # or # and # is A in B? https://www.amazon.co.jp/dp/0486600289
  5. ύΠϓԋࢉࢠ X %>% f X %>% f(y) X %>% f

    %>% g X %>% f(y, .) f(X) f(X, y) g(f(X)) f(y, X) %>% {magrittr} ʮdplyr࠶ೖ໳ʢجຊฤʣʯyutanihilation https://speakerdeck.com/yutannihilation/dplyrzai-ru-men-ji-ben-bian
  6. ύΠϓԋࢉࢠ%>% {magrittr} dat1 <- f1(dat0, var1) # ͦΕͱ΋͜͏ॻ͖·͔͢ʁ dat2 <-

    f2(dat1, var2) dat3 <- f3(dat2, var3) # ͜͏ॻ͖·͔͢ʁ dat <- f3(f2(f1(dat0, var1), var2), var3)
  7. ύΠϓԋࢉࢠ%>% {magrittr} # ຊ౰ʹɺ͜͏ॻ͖·͔͢ʁ dat <- f6(f5(f4(f3(f2(f1(dat0, var1-1, var1-2), var2),

    var3), var4-1, var4-2, var4-3), var5), var6) ೖޱ ग़ޱ ࢥߟͷྲྀΕ ߏ଄ͷରԠ
  8. ύΠϓԋࢉࢠ%>% {magrittr} # ϚδͰɺ͜͏ॻ͖·͔͢ʁ dat <- f6(f5(f4(f3(f2(f1(dat0, var1-1, var1-2), var2),

    var3), var4-1, var4-2, var4-3), var5), var6) ೖޱ ग़ޱ ࢥߟͷྲྀΕ ղಡͷྲྀΕ
  9. ύΠϓԋࢉࢠ%>% {magrittr} # ͱͳΔͱɺ͜͏ॻ͖·͔͢ʁ ೖޱ ग़ޱ dat1 <- f1(dat0, var1)

    dat2 <- f2(dat1, var2) dat3 <- f3(dat2, var3) ᶃ ᶄ ᶅ ೖޱ ग़ޱ ೖޱ ग़ޱ ࢥߟͷྲྀΕ ղಡͷྲྀΕ
  10. ύΠϓԋࢉࢠ%>% {magrittr} # ͏ʔΜɺ͜͏ॻ͖·͔͢ʁ ਅͷೖޱ Ծͷग़ޱ dat1 <- f1(dat0, var1)

    dat2 <- f2(dat1, var2) dat3 <- f3(dat2, var3) ᶃ ᶄ ᶅ Ծͷೖޱ Ծͷग़ޱ Ծͷೖޱ ਅͷग़ޱ ࢥߟͷྲྀΕ ղಡͷྲྀΕ
  11. ύΠϓԋࢉࢠ%>% {magrittr} # ͛͛͛ɺ͜͏ॻ͖·͔͢ʁ ਅͷೖޱ dat1 <- f1(dat0, var1-1, var1-2)

    dat2 <- f2(dat1, var2) dat3 <- f3(dat2, var3) dat4 <- f4(var4-1, dat3, var4-2) dat5 <- f5(dat4, var5) dat6 <- f6(dat5, var6) ਅͷग़ޱ ࢥߟͷྲྀΕ ղಡͷྲྀΕ
  12. ύΠϓԋࢉࢠ%>% {magrittr} # ύΠϓ೴ͷώτͳΒ͜͏ॻ͖·͢ɻ dat0 %>% f1(var1-1, var1-2) %>% f2(var2)

    %>% f3(var3) %>% f4(var4-1, ., var4-2) %>% f5(var5) %>% f6(var6) -> dat ೖޱ ग़ޱ
  13. ύΠϓԋࢉࢠ%>% {magrittr} # ͜͏΍ͬͯॻ͘ࣄ΋Ͱ͖·͢ɻ dat <- dat0 %>% f1(var1-1, var1-2)

    %>% f2(var2) %>% f3(var3) %>% f4(var4-1, ., var4-2) %>% f5(var5) %>% f6(var6) ೖޱ ग़ޱ
  14. ύΠϓԋࢉࢠ%>% {magrittr} # ͜͏΍ͬͯॻ͘ࣄ΋Ͱ͖·͢ɻ dat <- dat0 %>% f1(var1-1, var1-2)

    %>% f2(var2) %>% f3(var3) %>% f4(var4-1, ., var4-2) %>% f5(var5) %>% f6(var6) ೖޱ ग़ޱ υοτ͕͋Δ
  15. ύΠϓԋࢉࢠ X %>% f X %>% f(y) X %>% f

    %>% g X %>% f(y, .) f(X) f(X, y) g(f(X)) f(y, X) %>% {magrittr} ͜Ε
  16. ύΠϓԋࢉࢠ%>% {magrittr} dat <- iris %>% .[, 1:3] %>% prcomp

    iris %>% .[, 1:3] %>% prcomp -> dat “डಈଶ”ͬΆ͍ “ೳಈଶ”ͬΆ͍ B͸A͕F͞Εͨ΋ͷ AΛF͢ΔͱBʹͳΔ
  17. ύΠϓԋࢉࢠ%>% {magrittr} library(magrittr) iris %>% str 'data.frame': 150 obs. of

    5 variables: $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 ... $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 ... $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 ... $ Species : Factor w/ 3 levels "setosa", ... str(iris)
  18. ύΠϓԋࢉࢠ%>% {magrittr} library(magrittr) iris %>% cbind(a = 1:150) %>% str

    'data.frame': 150 obs. of 6 variables: $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7... $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 ... $ Species : Factor w/ 3 levels "setosa", ... $ a : int 1 2 3 4 5 6 7 8 9 10 ...
  19. ύΠϓԋࢉࢠ%>% {magrittr} library(magrittr) iris %>% .[, 1:3] %>% prcomp %>%

    str List of 5 $ sdev : num [1:3] 1.921 0.491 0.244 $ rotation: num [1:3, 1:3] 0.39 -0.091 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:3] "Sepal.Length" "Sepal.Width" ... .. ..$ : chr [1:3] "PC1" "PC2" "PC3" $ center : Named num [1:3] 5.84 3.06 3.76 ..- attr(*, "names")= chr [1:3] "Sepal.Length" ... $ scale : logi FALSE $ x : num [1:150, 1:3] -2.49 -2.52 -2.71 -2.56 ...
  20. ύΠϓԋࢉࢠ%>% {magrittr} library(magrittr) dat <- iris %>% .[, 1:3] %>%

    prcomp %>% .$x %>% data.frame %T>% plot dat <- iris[, 1:3] dat <- prcomp(dat) dat <- dat$x dat <- data.frame(dat) plot(dat) teeԋࢉࢠ ʮ෭࡞༻Λڐ͠ͳ͕Β΋chain͍ͯ͘͠ʯdichika http://d.hatena.ne.jp/dichika/20140731/p1
  21. mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

    # ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 
  22. It (dplyr) provides simple “verbs” to help you translate your

    thoughts into code. functions that correspond to the most common data manipulation tasks Introduction to dplyr https://cran.r-project.org/web/packages/dplyr/vignettes/dplyr.html WFSCT {dplyr}
  23. WFSCT {dplyr} By constraining your options, it helps you think

    about your data manipulation challenges. Introduction to dplyr https://cran.r-project.org/web/packages/dplyr/vignettes/dplyr.html
  24. ΑΓଟ͘ͷ੍໿Λ՝͢ࣄͰɺ ࠢͷ଍ᐫ͔ΒɺΑΓࣗ༝ʹͳΔɻ Igor Stravinsky И́горь Ф Страви́нский The more constraints

    one imposes, the more one frees one's self of the chains that shackle the spirit. 1882 - 1971 ※ ׂͱҙ༁
  25. mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

    # ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 
  26. mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

    # ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 
  27. library(dplyr) iris %>% mutate(a = 1:nrow(.)) %>% str 'data.frame': 150

    obs. of 6 variables: $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7... $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 ... $ Species : Factor w/ 3 levels "setosa", ... $ a : int 1 2 3 4 5 6 7 8 9 10 ... WFSCT {dplyr}
  28. library(dplyr) iris %>% mutate(a = 1:nrow(.), a = a *

    5/3 %>% round) 'data.frame': 150 obs. of 6 variables: $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7... $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 ... $ Species : Factor w/ 3 levels “setosa”, ... $ a : num 1.67 3.33 5 6.67 8.33 ... ... WFSCT {dplyr} ্ॻ͖͞ΕΔ
  29. library(dplyr) iris %>% select(Sepal.Length, Sepal.Width) 'data.frame': 150 obs. of 6

    variables: $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... WFSCT {dplyr}
  30. library(dplyr) iris %>% select(contains(“Width”)) 'data.frame': 150 obs. of 6 variables:

    $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 ... WFSCT {dplyr} Select helpؔ਺
  31. WFSCT {dplyr} # Select helpؔ਺܈ starts_with("s") ends_with("s") contains("se") matches("^.e") one_of(c("Sepal.Length",

    "Species")) everything() https://kazutan.github.io/blog/2017/04/dplyr-select-memo/ ʮdplyr::selectͷ׆༻ྫϝϞʯkazutan
  32. mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

    # ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 
  33. library(dplyr) iris %>% filter(Species == "versicolor") WFSCT {dplyr} 'data.frame': 50

    obs. of 5 variables: $ Sepal.Length: num 7 6.4 6.9 5.5 6.5 5.7 6.3 ... $ Sepal.Width : num 3.2 3.2 3.1 2.3 2.8 2.8 ... $ Petal.Length: num 4.7 4.5 4.9 4 4.6 4.5 4.7 ... $ Petal.Width : num 1.4 1.5 1.5 1.3 1.5 1.3 ... $ Species : Factor w/ 3 levels "setosa","versicolor",..: 2 2 2 2 2 2 2 2 2 2 ...
  34. library(dplyr) iris %>% filter(Species == "versicolor") WFSCT {dplyr} NSE (Non-Standard

    Evaluation) 'data.frame': 50 obs. of 5 variables: $ Sepal.Length: num 7 6.4 6.9 5.5 6.5 5.7 6.3 ... $ Sepal.Width : num 3.2 3.2 3.1 2.3 2.8 2.8 ... $ Petal.Length: num 4.7 4.5 4.9 4 4.6 4.5 4.7 ... $ Petal.Width : num 1.4 1.5 1.5 1.3 1.5 1.3 ... $ Species : Factor w/ 3 levels "setosa","versicolor",..: 2 2 2 2 2 2 2 2 2 2 ...
  35. filter(df, x == "a", y == 1) /4&ͷ࿩ NSE (Non-Standard

    Evaluation) df[df$x == "a" & df$y == 1, ] SE (Standard Evaluation) http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr
  36. filter(df, x == "a", y == 1) /4&ͷ࿩ NSEΛ࢖͏ͱɺ ɾdfͷ໊લΛԿճ΋ॻ͔ͳ͍͍ͯ͘Αɻ

    ɾSQLʹ຋༁͢Δ࣌ʹָͩΑɻ http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr df[df$x == "a" & df$y == 1, ]
  37. filter(df, x == "a", y == 1) /4&ͷ࿩ NSEΛ࢖͏ͱɺ ɾdfͷ໊લΛԿճ΋ॻ͔ͳ͍͍ͯ͘Αɻ

    ɾSQLʹ຋༁͢Δ࣌ʹָͩΑɻ ɹɹ http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr ৭ʑ͋Δ͚ͲεοΩϦ͍ͯ͠Δͷ͸ਖ਼ٛ (ࢲݟ) df[df$x == "a" & df$y == 1, ]
  38. filter(df, x == "a", y == 1) /4&ͷ࿩ NSEΛ࢖͏ͱɺ df[df$x

    == "a" & df$y == 1, ] http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr ৭ʑ͋Δ͚ͲεοΩϦ͍ͯ͠Δͷ͸ਖ਼ٛ (ࢲݟ) ॻ͖΍͘͢ɺಡΈ΍͘͢ɻ ࢥߟͱ࣮૷ͷڑ཭Λۙ͘ɻ # ಈࢺత # ໊ࢺత
  39. df <- data.frame(x = 1:3, y = 1:3) filter(df, x

    == 1) /4&ͷ࿩ NSEΛ࠾༻͍ͯ͠ΔͷͰɺ http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr my_var <- "x" filter(df, my_var == 1) ͜Εɹ͕ಈ͔ͳ͍ɻ dfͷmy_varΧϥϜΛ୳͠ʹߦ͘
  40. /4&ͷ࿩ my_var <- quo(x) filter(df, (!! my_var) == 1) Ͳʙʙʙͯ͠΋΍Γ͚ͨΕ͹ɺ

    Կނ͜͏ͳΔ͔͸ɺ ɹʮdplyr࠶ೖ໳ʢTidyvalฤʣʯΛࢀরɻ https://speakerdeck.com/yutannihilation/dplyrzai-ru-men-tidyevalbian ʮdplyr࠶ೖ໳ʢTidyvalฤʣʯyutanihilation
  41. /4&ͷ࿩ my_var <- quo(x) filter(df, (!! my_var) == 1) Ͳʙʙʙͯ͠΋΍Γ͚ͨΕ͹ɺ

    Կނ͜͏ͳΔ͔͸ɺ ɹʮdplyr࠶ೖ໳ʢTidyvalฤʣʯΛࢀরɻ https://speakerdeck.com/yutannihilation/dplyrzai-ru-men-tidyevalbian Մಡੑ্͕͕ΔʁԼ͕Δʁ ͦΕ͸ɺ͋ͳͨͱಡΈख࣍ୈɻ ʮdplyr࠶ೖ໳ʢTidyvalฤʣʯyutanihilation
  42. mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

    # ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 
  43. a <- data.frame(x1 = c(1,2,3), x2 = 10:12) b <-

    data.frame(x1 = c(1,3,5), x3 = 100:102) WFSCT {dplyr} > left_join(a, b) > right_join(a, b) x1 x2 x3 1 10 100 2 11 NA 3 12 101 x1 x2 x3 1 10 100 3 12 101 5 NA 102 join # ߦྻͷ݁߹
  44. WFSCT {dplyr} > inner_join(a, b) > semi_join(a, b) x1 x2

    x3 1 10 100 3 12 101 x1 x2 1 10 3 12 join # ߦྻͷ݁߹ a <- data.frame(x1 = c(1,2,3), x2 = 10:12) b <- data.frame(x1 = c(1,3,5), x3 = 100:102)
  45. WFSCT {dplyr} > anti_join(a, b) x1 x2 2 11 join

    # ߦྻͷ݁߹ a <- data.frame(x1 = c(1,2,3), x2 = 10:12) b <- data.frame(x1 = c(1,3,5), x3 = 100:102) > full_join(a, b) x1 x2 x3 1 10 100 2 11 NA 3 12 101 5 NA 102