Upgrade to Pro — share decks privately, control downloads, hide ads and more …

66th Tokyo.R Beginner session2

Avatar for kilometer kilometer
December 16, 2017

66th Tokyo.R Beginner session2

発表資料です。

Avatar for kilometer

kilometer

December 16, 2017
Tweet

More Decks by kilometer

Other Decks in Technology

Transcript

  1. ԋࢉࢠ− ݞ׳Β͠ − ୅ೖԋࢉࢠ A <- B A <<- B

    # ୅ೖԋࢉࢠ # Ӭଓ୅ೖԋࢉࢠ
  2. ԋࢉࢠ− ݞ׳Β͠ − ୅ೖԋࢉࢠ ex_func <- function(){ x <- 600

    x <<- 100 ptint(x) } # άϩʔόϧม਺ # ϩʔΧϧม਺ ʮRͷԋࢉࢠಛूʯy__mattu https://ymattu.github.io/JapanR2017/slide.html#/
  3. ԋࢉࢠ− ݞ׳Β͠ − ୅ೖԋࢉࢠ ex_func [1] 600 x [1] 100

    ex_func <- function(){ x <- 600 x <<- 100 ptint(x) }
  4. ԋࢉࢠ− ݞ׳Β͠ − ϒʔϧԋࢉࢠ Boolean Algebra A == B A

    != B A | B A & B A %in% B # equal to # not equal to # or # and # is A in B? https://www.amazon.co.jp/dp/0486600289
  5. ύΠϓԋࢉࢠ X %>% f X %>% f(y) X %>% f

    %>% g X %>% f(y, .) f(X) f(X, y) g(f(X)) f(y, X) %>% {magrittr} ʮdplyr࠶ೖ໳ʢجຊฤʣʯyutanihilation https://speakerdeck.com/yutannihilation/dplyrzai-ru-men-ji-ben-bian
  6. ύΠϓԋࢉࢠ%>% {magrittr} dat1 <- f1(dat0, var1) # ͦΕͱ΋͜͏ॻ͖·͔͢ʁ dat2 <-

    f2(dat1, var2) dat3 <- f3(dat2, var3) # ͜͏ॻ͖·͔͢ʁ dat <- f3(f2(f1(dat0, var1), var2), var3)
  7. ύΠϓԋࢉࢠ%>% {magrittr} # ຊ౰ʹɺ͜͏ॻ͖·͔͢ʁ dat <- f6(f5(f4(f3(f2(f1(dat0, var1-1, var1-2), var2),

    var3), var4-1, var4-2, var4-3), var5), var6) ೖޱ ग़ޱ ࢥߟͷྲྀΕ ߏ଄ͷରԠ
  8. ύΠϓԋࢉࢠ%>% {magrittr} # ϚδͰɺ͜͏ॻ͖·͔͢ʁ dat <- f6(f5(f4(f3(f2(f1(dat0, var1-1, var1-2), var2),

    var3), var4-1, var4-2, var4-3), var5), var6) ೖޱ ग़ޱ ࢥߟͷྲྀΕ ղಡͷྲྀΕ
  9. ύΠϓԋࢉࢠ%>% {magrittr} # ͱͳΔͱɺ͜͏ॻ͖·͔͢ʁ ೖޱ ग़ޱ dat1 <- f1(dat0, var1)

    dat2 <- f2(dat1, var2) dat3 <- f3(dat2, var3) ᶃ ᶄ ᶅ ೖޱ ग़ޱ ೖޱ ग़ޱ ࢥߟͷྲྀΕ ղಡͷྲྀΕ
  10. ύΠϓԋࢉࢠ%>% {magrittr} # ͏ʔΜɺ͜͏ॻ͖·͔͢ʁ ਅͷೖޱ Ծͷग़ޱ dat1 <- f1(dat0, var1)

    dat2 <- f2(dat1, var2) dat3 <- f3(dat2, var3) ᶃ ᶄ ᶅ Ծͷೖޱ Ծͷग़ޱ Ծͷೖޱ ਅͷग़ޱ ࢥߟͷྲྀΕ ղಡͷྲྀΕ
  11. ύΠϓԋࢉࢠ%>% {magrittr} # ͛͛͛ɺ͜͏ॻ͖·͔͢ʁ ਅͷೖޱ dat1 <- f1(dat0, var1-1, var1-2)

    dat2 <- f2(dat1, var2) dat3 <- f3(dat2, var3) dat4 <- f4(var4-1, dat3, var4-2) dat5 <- f5(dat4, var5) dat6 <- f6(dat5, var6) ਅͷग़ޱ ࢥߟͷྲྀΕ ղಡͷྲྀΕ
  12. ύΠϓԋࢉࢠ%>% {magrittr} # ύΠϓ೴ͷώτͳΒ͜͏ॻ͖·͢ɻ dat0 %>% f1(var1-1, var1-2) %>% f2(var2)

    %>% f3(var3) %>% f4(var4-1, ., var4-2) %>% f5(var5) %>% f6(var6) -> dat ೖޱ ग़ޱ
  13. ύΠϓԋࢉࢠ%>% {magrittr} # ͜͏΍ͬͯॻ͘ࣄ΋Ͱ͖·͢ɻ dat <- dat0 %>% f1(var1-1, var1-2)

    %>% f2(var2) %>% f3(var3) %>% f4(var4-1, ., var4-2) %>% f5(var5) %>% f6(var6) ೖޱ ग़ޱ
  14. ύΠϓԋࢉࢠ%>% {magrittr} # ͜͏΍ͬͯॻ͘ࣄ΋Ͱ͖·͢ɻ dat <- dat0 %>% f1(var1-1, var1-2)

    %>% f2(var2) %>% f3(var3) %>% f4(var4-1, ., var4-2) %>% f5(var5) %>% f6(var6) ೖޱ ग़ޱ υοτ͕͋Δ
  15. ύΠϓԋࢉࢠ X %>% f X %>% f(y) X %>% f

    %>% g X %>% f(y, .) f(X) f(X, y) g(f(X)) f(y, X) %>% {magrittr} ͜Ε
  16. ύΠϓԋࢉࢠ%>% {magrittr} dat <- iris %>% .[, 1:3] %>% prcomp

    iris %>% .[, 1:3] %>% prcomp -> dat “डಈଶ”ͬΆ͍ “ೳಈଶ”ͬΆ͍ B͸A͕F͞Εͨ΋ͷ AΛF͢ΔͱBʹͳΔ
  17. ύΠϓԋࢉࢠ%>% {magrittr} library(magrittr) iris %>% str 'data.frame': 150 obs. of

    5 variables: $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 ... $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 ... $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 ... $ Species : Factor w/ 3 levels "setosa", ... str(iris)
  18. ύΠϓԋࢉࢠ%>% {magrittr} library(magrittr) iris %>% cbind(a = 1:150) %>% str

    'data.frame': 150 obs. of 6 variables: $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7... $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 ... $ Species : Factor w/ 3 levels "setosa", ... $ a : int 1 2 3 4 5 6 7 8 9 10 ...
  19. ύΠϓԋࢉࢠ%>% {magrittr} library(magrittr) iris %>% .[, 1:3] %>% prcomp %>%

    str List of 5 $ sdev : num [1:3] 1.921 0.491 0.244 $ rotation: num [1:3, 1:3] 0.39 -0.091 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:3] "Sepal.Length" "Sepal.Width" ... .. ..$ : chr [1:3] "PC1" "PC2" "PC3" $ center : Named num [1:3] 5.84 3.06 3.76 ..- attr(*, "names")= chr [1:3] "Sepal.Length" ... $ scale : logi FALSE $ x : num [1:150, 1:3] -2.49 -2.52 -2.71 -2.56 ...
  20. ύΠϓԋࢉࢠ%>% {magrittr} library(magrittr) dat <- iris %>% .[, 1:3] %>%

    prcomp %>% .$x %>% data.frame %T>% plot dat <- iris[, 1:3] dat <- prcomp(dat) dat <- dat$x dat <- data.frame(dat) plot(dat) teeԋࢉࢠ ʮ෭࡞༻Λڐ͠ͳ͕Β΋chain͍ͯ͘͠ʯdichika http://d.hatena.ne.jp/dichika/20140731/p1
  21. mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

    # ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 
  22. It (dplyr) provides simple “verbs” to help you translate your

    thoughts into code. functions that correspond to the most common data manipulation tasks Introduction to dplyr https://cran.r-project.org/web/packages/dplyr/vignettes/dplyr.html WFSCT {dplyr}
  23. WFSCT {dplyr} By constraining your options, it helps you think

    about your data manipulation challenges. Introduction to dplyr https://cran.r-project.org/web/packages/dplyr/vignettes/dplyr.html
  24. ΑΓଟ͘ͷ੍໿Λ՝͢ࣄͰɺ ࠢͷ଍ᐫ͔ΒɺΑΓࣗ༝ʹͳΔɻ Igor Stravinsky И́горь Ф Страви́нский The more constraints

    one imposes, the more one frees one's self of the chains that shackle the spirit. 1882 - 1971 ※ ׂͱҙ༁
  25. mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

    # ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 
  26. mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

    # ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 
  27. library(dplyr) iris %>% mutate(a = 1:nrow(.)) %>% str 'data.frame': 150

    obs. of 6 variables: $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7... $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 ... $ Species : Factor w/ 3 levels "setosa", ... $ a : int 1 2 3 4 5 6 7 8 9 10 ... WFSCT {dplyr}
  28. library(dplyr) iris %>% mutate(a = 1:nrow(.), a = a *

    5/3 %>% round) 'data.frame': 150 obs. of 6 variables: $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7... $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 ... $ Species : Factor w/ 3 levels “setosa”, ... $ a : num 1.67 3.33 5 6.67 8.33 ... ... WFSCT {dplyr} ্ॻ͖͞ΕΔ
  29. library(dplyr) iris %>% select(Sepal.Length, Sepal.Width) 'data.frame': 150 obs. of 6

    variables: $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 ... $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... WFSCT {dplyr}
  30. library(dplyr) iris %>% select(contains(“Width”)) 'data.frame': 150 obs. of 6 variables:

    $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 ... $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 ... WFSCT {dplyr} Select helpؔ਺
  31. WFSCT {dplyr} # Select helpؔ਺܈ starts_with("s") ends_with("s") contains("se") matches("^.e") one_of(c("Sepal.Length",

    "Species")) everything() https://kazutan.github.io/blog/2017/04/dplyr-select-memo/ ʮdplyr::selectͷ׆༻ྫϝϞʯkazutan
  32. mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

    # ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 
  33. library(dplyr) iris %>% filter(Species == "versicolor") WFSCT {dplyr} 'data.frame': 50

    obs. of 5 variables: $ Sepal.Length: num 7 6.4 6.9 5.5 6.5 5.7 6.3 ... $ Sepal.Width : num 3.2 3.2 3.1 2.3 2.8 2.8 ... $ Petal.Length: num 4.7 4.5 4.9 4 4.6 4.5 4.7 ... $ Petal.Width : num 1.4 1.5 1.5 1.3 1.5 1.3 ... $ Species : Factor w/ 3 levels "setosa","versicolor",..: 2 2 2 2 2 2 2 2 2 2 ...
  34. library(dplyr) iris %>% filter(Species == "versicolor") WFSCT {dplyr} NSE (Non-Standard

    Evaluation) 'data.frame': 50 obs. of 5 variables: $ Sepal.Length: num 7 6.4 6.9 5.5 6.5 5.7 6.3 ... $ Sepal.Width : num 3.2 3.2 3.1 2.3 2.8 2.8 ... $ Petal.Length: num 4.7 4.5 4.9 4 4.6 4.5 4.7 ... $ Petal.Width : num 1.4 1.5 1.5 1.3 1.5 1.3 ... $ Species : Factor w/ 3 levels "setosa","versicolor",..: 2 2 2 2 2 2 2 2 2 2 ...
  35. filter(df, x == "a", y == 1) /4&ͷ࿩ NSE (Non-Standard

    Evaluation) df[df$x == "a" & df$y == 1, ] SE (Standard Evaluation) http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr
  36. filter(df, x == "a", y == 1) /4&ͷ࿩ NSEΛ࢖͏ͱɺ ɾdfͷ໊લΛԿճ΋ॻ͔ͳ͍͍ͯ͘Αɻ

    ɾSQLʹ຋༁͢Δ࣌ʹָͩΑɻ http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr df[df$x == "a" & df$y == 1, ]
  37. filter(df, x == "a", y == 1) /4&ͷ࿩ NSEΛ࢖͏ͱɺ ɾdfͷ໊લΛԿճ΋ॻ͔ͳ͍͍ͯ͘Αɻ

    ɾSQLʹ຋༁͢Δ࣌ʹָͩΑɻ ɹɹ http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr ৭ʑ͋Δ͚ͲεοΩϦ͍ͯ͠Δͷ͸ਖ਼ٛ (ࢲݟ) df[df$x == "a" & df$y == 1, ]
  38. filter(df, x == "a", y == 1) /4&ͷ࿩ NSEΛ࢖͏ͱɺ df[df$x

    == "a" & df$y == 1, ] http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr ৭ʑ͋Δ͚ͲεοΩϦ͍ͯ͠Δͷ͸ਖ਼ٛ (ࢲݟ) ॻ͖΍͘͢ɺಡΈ΍͘͢ɻ ࢥߟͱ࣮૷ͷڑ཭Λۙ͘ɻ # ಈࢺత # ໊ࢺత
  39. df <- data.frame(x = 1:3, y = 1:3) filter(df, x

    == 1) /4&ͷ࿩ NSEΛ࠾༻͍ͯ͠ΔͷͰɺ http://dplyr.tidyverse.org/articles/programming.html Programming with dplyr my_var <- "x" filter(df, my_var == 1) ͜Εɹ͕ಈ͔ͳ͍ɻ dfͷmy_varΧϥϜΛ୳͠ʹߦ͘
  40. /4&ͷ࿩ my_var <- quo(x) filter(df, (!! my_var) == 1) Ͳʙʙʙͯ͠΋΍Γ͚ͨΕ͹ɺ

    Կނ͜͏ͳΔ͔͸ɺ ɹʮdplyr࠶ೖ໳ʢTidyvalฤʣʯΛࢀরɻ https://speakerdeck.com/yutannihilation/dplyrzai-ru-men-tidyevalbian ʮdplyr࠶ೖ໳ʢTidyvalฤʣʯyutanihilation
  41. /4&ͷ࿩ my_var <- quo(x) filter(df, (!! my_var) == 1) Ͳʙʙʙͯ͠΋΍Γ͚ͨΕ͹ɺ

    Կނ͜͏ͳΔ͔͸ɺ ɹʮdplyr࠶ೖ໳ʢTidyvalฤʣʯΛࢀরɻ https://speakerdeck.com/yutannihilation/dplyrzai-ru-men-tidyevalbian Մಡੑ্͕͕ΔʁԼ͕Δʁ ͦΕ͸ɺ͋ͳͨͱಡΈख࣍ୈɻ ʮdplyr࠶ೖ໳ʢTidyvalฤʣʯyutanihilation
  42. mutate select filter arrange summaries join # ΧϥϜͷ௥Ճ # ΧϥϜͷબ୒

    # ߦͷߜΓࠐΈ # ߦͷฒͼସ͑ # ஋ͷू໿ # ߦྻͷ݁߹ {dplyr} WFSCT WFSCؔ਺܈ 
  43. a <- data.frame(x1 = c(1,2,3), x2 = 10:12) b <-

    data.frame(x1 = c(1,3,5), x3 = 100:102) WFSCT {dplyr} > left_join(a, b) > right_join(a, b) x1 x2 x3 1 10 100 2 11 NA 3 12 101 x1 x2 x3 1 10 100 3 12 101 5 NA 102 join # ߦྻͷ݁߹
  44. WFSCT {dplyr} > inner_join(a, b) > semi_join(a, b) x1 x2

    x3 1 10 100 3 12 101 x1 x2 1 10 3 12 join # ߦྻͷ݁߹ a <- data.frame(x1 = c(1,2,3), x2 = 10:12) b <- data.frame(x1 = c(1,3,5), x3 = 100:102)
  45. WFSCT {dplyr} > anti_join(a, b) x1 x2 2 11 join

    # ߦྻͷ݁߹ a <- data.frame(x1 = c(1,2,3), x2 = 10:12) b <- data.frame(x1 = c(1,3,5), x3 = 100:102) > full_join(a, b) x1 x2 x3 1 10 100 2 11 NA 3 12 101 5 NA 102