Slide 1

Slide 1 text

data_wrangling() && ("manipulation" %in% R) %>% %>% %>% > day[3] Ruan van Mazijk

Slide 2

Slide 2 text

tinyurl.com/r-with-ruan Notes & slides will go up here: (But I encourage you to make your own notes!)

Slide 3

Slide 3 text

> workshop$outline[1:3] DAY 1 Tidy data principles & tidyr DAY 2 Manipulating data & an intro to dplyr DAY 3 Extending your data with mutate(), summarise() & friends

Slide 4

Slide 4 text

> workshop$outline[2:3] DAY 2 Manipulating data & an intro to dplyr DAY 3 Extending your data with mutate(), summarise() & friends

Slide 5

Slide 5 text

dplyr:: # Verbs to manipulate your data select() # operates on columns filter() # operates on rows

Slide 6

Slide 6 text

data %>%

Slide 7

Slide 7 text

data %>% gather(key = veg_type, value = fix) %>%

Slide 8

Slide 8 text

data %>% gather(key = veg_type, value = fix) %>% separate(fix, into = c("lon", "lat")) %>%

Slide 9

Slide 9 text

data %>% gather(key = veg_type, value = fix) %>% separate(fix, into = c("lon", "lat")) %>% select(veg_type, lon, lat, soil, plant_height) %>%

Slide 10

Slide 10 text

data %>% gather(key = veg_type, value = fix) %>% separate(fix, into = c("lon", "lat")) %>% select(veg_type, lon, lat, soil, plant_height) %>% filter(plant_height %>% between(0.5, 10),

Slide 11

Slide 11 text

data %>% gather(key = veg_type, value = fix) %>% separate(fix, into = c("lon", "lat")) %>% select(veg_type, lon, lat, soil, plant_height) %>% filter(plant_height %>% between(0.5, 10), veg_type %in% c("fynbos", "strandveld", "renosterveld"))

Slide 12

Slide 12 text

data %>% gather(key = veg_type, value = fix) %>% separate(fix, into = c("lon", "lat")) %>% select(veg_type, lon, lat, soil, plant_height) %>% filter(plant_height %>% between(0.5, 10), veg_type %in% c("fynbos", "strandveld", "renosterveld")) Summary statistics for each vegetation type?

Slide 13

Slide 13 text

data %>% gather(key = veg_type, value = fix) %>% separate(fix, into = c("lon", "lat")) %>% select(veg_type, lon, lat, soil, plant_height) %>% filter(plant_height %>% between(0.5, 10), veg_type %in% c("fynbos", "strandveld", "renosterveld")) %>% ???() Summary statistics for each vegetation type?

Slide 14

Slide 14 text

dplyr:: # Verbs to manipulate your data select() # operates on columns filter() # operates on rows

Slide 15

Slide 15 text

dplyr:: # Verbs to extend your data mutate() # operates on columns group_by() # operates on rows summarise() # rows & columns

Slide 16

Slide 16 text

data %>% mutate(...) CC BY SA RStudio https://www.rstudio.com/resources/cheatsheets/

Slide 17

Slide 17 text

data %>% mutate(...)

Slide 18

Slide 18 text

data %>% mutate(...) data %>% mutate(BMI = height / weight)

Slide 19

Slide 19 text

data %>% mutate(...) data %>% mutate(BMI = height / weight) data %>% mutate(BMI = height / weight, BMI_std = scale(BMI))

Slide 20

Slide 20 text

data %>% mutate_all(...) CC BY SA RStudio https://www.rstudio.com/resources/cheatsheets/

Slide 21

Slide 21 text

data %>% mutate_all(.funs, ...) data %>% mutate_all(scale) data %>% mutate_all(list(log, log1p))

Slide 22

Slide 22 text

data %>% mutate_if(.predicate, .funs) CC BY SA RStudio https://www.rstudio.com/resources/cheatsheets/

Slide 23

Slide 23 text

data %>% mutate_if(.predicate, .funs, ...) data %>% mutate_if(is.numeric, scale) data %>% mutate_if(is.numeric, list(log, log1p))

Slide 24

Slide 24 text

dplyr:: # Verbs to extent your data mutate() # operates on columns group_by() # operates on rows summarise() # rows & columns

Slide 25

Slide 25 text

dplyr:: # Verbs to extent your data mutate() # operates on columns group_by() # operates on rows summarise() # rows & columns

Slide 26

Slide 26 text

CC BY SA RStudio https://www.rstudio.com/resources/cheatsheets/ data

Slide 27

Slide 27 text

CC BY SA RStudio https://www.rstudio.com/resources/cheatsheets/ data %>% group_by(veg_type)

Slide 28

Slide 28 text

CC BY SA RStudio https://www.rstudio.com/resources/cheatsheets/ data %>% group_by(veg_type) %>% summarise(mean_plant_height = mean(plant_height))

Slide 29

Slide 29 text

data %>% group_by(veg_type) %>% summarise(mean_plant_height = mean(plant_height),

Slide 30

Slide 30 text

data %>% group_by(veg_type) %>% summarise(mean_plant_height = mean(plant_height), st_plant_height = sd(plant_height))

Slide 31

Slide 31 text

data %>% group_by(veg_type) %>% summarise(mean_plant_height = mean(plant_height), st_plant_height = sd(plant_height)) data %>% group_by(veg_type) %>% summarise_if(is.numeric, mean)

Slide 32

Slide 32 text

data %>% group_by(veg_type) %>% summarise(mean_plant_height = mean(plant_height), st_plant_height = sd(plant_height)) data %>% group_by(veg_type) %>% summarise_if(is.numeric, mean) data %>% group_by(veg_type) %>% summarise_if(is.numeric, mean, na.rm = TRUE)

Slide 33

Slide 33 text

data %>% group_by(veg_type) %>% summarise(mean_plant_height = mean(plant_height), st_plant_height = sd(plant_height)) data %>% group_by(veg_type) %>% summarise_if(is.numeric, mean) data %>% group_by(veg_type) %>% summarise_if(is.numeric, mean, na.rm = TRUE) data %>% group_by(veg_type) %>% summarise_if(is.numeric, list(mean, sd))

Slide 34

Slide 34 text

> demo()