Slide 1

Slide 1 text

INTRO TO THE TIDYVERSE DATA MANIPULATION USING OMAYMA SAID OmaymaS

Slide 2

Slide 2 text

The Tidyverse Source: https://imgur.com/a/l7fNwP1

Slide 3

Slide 3 text

The Tidyverse Source: https://imgur.com/a/l7fNwP1

Slide 4

Slide 4 text

id minion leader type age missions_ internal missions_ external 101 yellow 5 60 2 102 yellow 6 55 10 108 purple 10 48 3 120 purple 16 49 1 100 yellow 3 54 4 > minions dataframe/tbl

Slide 5

Slide 5 text

id minion leader type age missions_ internal missions_ external 101 yellow 5 60 2 102 yellow 6 55 10 108 purple 10 48 3 120 purple 16 49 1 100 yellow 3 54 4 VARIABLES OBSERVATIONS

Slide 6

Slide 6 text

kevin <-

Slide 7

Slide 7 text

kevin <- kevin_new <- rotate(kevin, direction = “clockwise”, angle = 90) object function arguments

Slide 8

Slide 8 text

Kevin_new <- rotate(kevin, direction = “clockwise”, angle = 90) object function arguments What is the value of Kevin_new ? kevin <-

Slide 9

Slide 9 text

Kevin_new kevin <- Kevin_new <- rotate(kevin, direction = “clockwise”, angle = 90) object function arguments

Slide 10

Slide 10 text

A grammar of data manipulation

Slide 11

Slide 11 text

id minion leader type age missions_ internal missions_ external 101 yellow 5 60 2 102 yellow 6 55 10 108 purple 10 48 3 120 purple 16 49 1 100 yellow 3 54 4 > minions

Slide 12

Slide 12 text

select() Return a subset of columns

Slide 13

Slide 13 text

select(minions, id, age) dataframe Columns to select

Slide 14

Slide 14 text

id minion leader type age missions_ internal missions_ external 101 yellow 5 60 2 102 yellow 6 55 10 108 purple 10 48 3 120 purple 16 49 1 100 yellow 3 54 4 id age 101 5 102 6 108 10 120 16 100 3 select(minions, id, age) New dataframe/tbl

Slide 15

Slide 15 text

select(minions, -missions_external) dataframe Column to exclude

Slide 16

Slide 16 text

id minion leader type age missions_ internal 101 yellow 5 60 102 yellow 6 55 108 purple 10 48 120 purple 16 49 100 yellow 3 54 select(minions, -missions_external)

Slide 17

Slide 17 text

select(minions, id:leader) dataframe Range of columns to select

Slide 18

Slide 18 text

id minion leader 101 102 108 120 100 select(minions, id:leader)

Slide 19

Slide 19 text

filter() Return a subset of rows

Slide 20

Slide 20 text

filter(minions, type == “yellow”) dataframe Condition

Slide 21

Slide 21 text

id minion leader type age missions_ internal missions_e xternal 101 yellow 5 60 2 102 yellow 6 55 10 100 yellow 3 54 4 filter(minions, type == “yellow”)

Slide 22

Slide 22 text

> < >= <= != == equal greater than less than greater than or equal less than or equal not equal MORE CONDITIONS & | AND OR COMBINE WITH ,

Slide 23

Slide 23 text

filter(minions, type == “yellow” , age > 3) dataframe Multiple Condition

Slide 24

Slide 24 text

id minion leader type age missions_ internal missions_e xternal 101 yellow 5 60 2 102 yellow 6 55 10 filter(minions, type == “yellow” , age > 3)

Slide 25

Slide 25 text

mutate() add/modify columns

Slide 26

Slide 26 text

mutate(minions, missions = missions_internal+misssions_external) dataframe expression New column name

Slide 27

Slide 27 text

id minion leader type age missions_ internal missions_ external missions 101 yellow 5 60 2 62 102 yellow 6 55 10 65 108 purple 10 48 3 51 120 purple 16 49 1 50 100 yellow 3 54 4 58 mutate(minions, missions = missions_internal+misssions_external)

Slide 28

Slide 28 text

summarize() Calculate aggregate measures for groups

Slide 29

Slide 29 text

summarize(minions, age_median = median(age)) expression New column name dataframe

Slide 30

Slide 30 text

summarize(minions, age_median = median(age)) age_median 6 id minion leader type age missions_ internal missions_ external 101 yellow 5 60 2 102 yellow 6 55 10 108 purple 10 48 3 120 purple 16 49 1 100 yellow 3 54 4

Slide 31

Slide 31 text

summarize(minions, age_median = median(age), missions_internal_all = sum(missions_internal), missions_external_all = sum(missions_external)) Multiple expressions

Slide 32

Slide 32 text

group_by() Group by one or more variables

Slide 33

Slide 33 text

minions %>% group_by(leader) %>% summarize(missions_internal_all = sum(missions_internal), missions_external_all = sum(missions_external)) New column name Expression dataframe group

Slide 34

Slide 34 text

minions %>% group_by(leader) %>% summarize(missions_internal_all = sum(missions_internal), missions_external_all = sum(missions_external)) leader missions_internal_all missions_external_all 169 16 97 4

Slide 35

Slide 35 text

arrange() Reorder rows based on variables

Slide 36

Slide 36 text

arrange(minions, missions_internal) dataframe Column name

Slide 37

Slide 37 text

id minion leader type age missions_ internal missions_ external 108 purple 10 48 3 120 purple 16 49 1 100 yellow 3 54 4 102 yellow 6 55 10 101 yellow 5 60 2 arrange(minions, missions_internal) DEFAULT Ascending

Slide 38

Slide 38 text

id minion leader type age missions_ internal missions_ external 101 yellow 5 60 2 102 yellow 6 55 10 100 yellow 3 54 4 120 purple 16 49 1 108 purple 10 48 3 arrange(minions, desc(missions_internal))

Slide 39

Slide 39 text

%>% The Pipe

Slide 40

Slide 40 text

<- %>% rotate(“clockwise”, 90) object function <- rotate( , “clockwise”, 90) arguments object function arguments pipe =

Slide 41

Slide 41 text

<- scale( , 0.25) 1 Successive commands

Slide 42

Slide 42 text

<- scale( , 0.25) 1 2 <- rotate( , “clockwise”, 90) Successive commands

Slide 43

Slide 43 text

<- scale( , 0.25) <- rotate( , “clockwise”, 90) <- clone( , 1) 1 2 3 Successive commands

Slide 44

Slide 44 text

<- scale( , 0.25) 1 2 <- rotate( , “clockwise”, 90) <- clone( , 1) 3 Successive commands

Slide 45

Slide 45 text

<- clone(rotate(scale( , 0.25), “clockwise”, 90),1) One-line commands

Slide 46

Slide 46 text

k %>% scale(0.25) %>% rotate("clockwise", 90) %>% clone(1) <- Piped commands

Slide 47

Slide 47 text

MISSION ACCOMPLISHED