A workshop for beginners on the #tidyverse, focusing on data manipulation using #dplyr along with hands-on exercises.
Delivered at DataFest Tbilisi 2018.
INTRO TO THE TIDYVERSEDATA MANIPULATION USINGOMAYMA SAIDOmaymaS
View Slide
The TidyverseSource: https://imgur.com/a/l7fNwP1
id minion leader type age missions_internalmissions_external101 yellow 5 60 2102 yellow 6 55 10108 purple 10 48 3120 purple 16 49 1100 yellow 3 54 4> minionsdataframe/tbl
id minion leader type age missions_internalmissions_external101 yellow 5 60 2102 yellow 6 55 10108 purple 10 48 3120 purple 16 49 1100 yellow 3 54 4VARIABLESOBSERVATIONS
kevin <-
kevin <-kevin_new <- rotate(kevin,direction = “clockwise”,angle = 90)objectfunction arguments
Kevin_new <- rotate(kevin,direction = “clockwise”,angle = 90)objectfunction argumentsWhat is the value of Kevin_new?kevin <-
Kevin_newkevin <-Kevin_new <- rotate(kevin,direction = “clockwise”,angle = 90)objectfunction arguments
A grammar of data manipulation
id minion leader type age missions_internalmissions_external101 yellow 5 60 2102 yellow 6 55 10108 purple 10 48 3120 purple 16 49 1100 yellow 3 54 4> minions
select()Return a subset of columns
select(minions, id, age)dataframeColumnsto select
id minion leader type age missions_internalmissions_external101 yellow 5 60 2102 yellow 6 55 10108 purple 10 48 3120 purple 16 49 1100 yellow 3 54 4id age101 5102 6108 10120 16100 3select(minions, id, age)New dataframe/tbl
select(minions, -missions_external)dataframe Column to exclude
id minion leader type age missions_internal101 yellow 5 60102 yellow 6 55108 purple 10 48120 purple 16 49100 yellow 3 54select(minions, -missions_external)
select(minions, id:leader)dataframeRange ofcolumns toselect
id minion leader101102108120100select(minions, id:leader)
filter()Return a subset of rows
filter(minions, type == “yellow”)dataframe Condition
id minion leader type age missions_internalmissions_external101 yellow 5 60 2102 yellow 6 55 10100 yellow 3 54 4filter(minions, type == “yellow”)
><>=<=!=== equalgreater thanless thangreater than or equalless than or equalnot equalMORE CONDITIONS&|ANDORCOMBINE WITH,
filter(minions, type == “yellow”, age > 3)dataframe Multiple Condition
id minion leader type age missions_internalmissions_external101 yellow 5 60 2102 yellow 6 55 10filter(minions, type == “yellow”, age > 3)
mutate()add/modify columns
mutate(minions, missions = missions_internal+misssions_external)dataframe expressionNewcolumnname
id minion leader type age missions_internalmissions_externalmissions101 yellow 5 60 2 62102 yellow 6 55 10 65108 purple 10 48 3 51120 purple 16 49 1 50100 yellow 3 54 4 58mutate(minions, missions = missions_internal+misssions_external)
summarize()Calculate aggregate measures for groups
summarize(minions, age_median = median(age))expressionNew columnnamedataframe
summarize(minions, age_median = median(age))age_median6id minion leader type age missions_internalmissions_external101 yellow 5 60 2102 yellow 6 55 10108 purple 10 48 3120 purple 16 49 1100 yellow 3 54 4
summarize(minions,age_median = median(age),missions_internal_all = sum(missions_internal),missions_external_all = sum(missions_external))Multiple expressions
group_by()Group by one or more variables
minions %>%group_by(leader) %>%summarize(missions_internal_all = sum(missions_internal),missions_external_all = sum(missions_external))New column name Expressiondataframe group
minions %>%group_by(leader) %>%summarize(missions_internal_all = sum(missions_internal),missions_external_all = sum(missions_external))leader missions_internal_all missions_external_all169 1697 4
arrange()Reorder rows based on variables
arrange(minions, missions_internal)dataframe Column name
id minion leader type age missions_internalmissions_external108 purple 10 48 3120 purple 16 49 1100 yellow 3 54 4102 yellow 6 55 10101 yellow 5 60 2arrange(minions, missions_internal)DEFAULTAscending
id minion leader type age missions_internalmissions_external101 yellow 5 60 2102 yellow 6 55 10100 yellow 3 54 4120 purple 16 49 1108 purple 10 48 3arrange(minions, desc(missions_internal))
%>%The Pipe
<- %>% rotate(“clockwise”, 90)object function<- rotate( , “clockwise”, 90)argumentsobjectfunction argumentspipe=
<- scale( , 0.25)1Successive commands
<- scale( , 0.25)12 <- rotate( , “clockwise”, 90)Successive commands
<- scale( , 0.25)<- rotate( , “clockwise”, 90)<- clone( , 1)123Successive commands
<- scale( , 0.25)12 <- rotate( , “clockwise”, 90)<- clone( , 1)3Successive commands
<- clone(rotate(scale( , 0.25), “clockwise”, 90),1)One-line commands
k %>%scale(0.25) %>%rotate("clockwise", 90) %>%clone(1)<-Piped commands
MISSION ACCOMPLISHED