Davis Vaughan
October 08, 2019
# slide-almanac.pdf

A tour of two new R packages, {slide} and {almanac}.

{slide} empowers you to perform arbitrary rolling computations, like cumulative functions, rolling averages, and rolling regressions.

{almanac} provides the tools to construct recurrence rules and schedules, and then adjust dates relative to them. This allows you to shift dates by "3 business days".

Together, you can perform computations that are highly relevant in a business setting, such as "a rolling average over the past 20 business days".

## Transcript

1. Moving Averages and Calendars
Davis Vaughan
Software Engineer, RStudio
October 2019

2. Window Functions
Schedules

3. Window What?

function applied
=
mean()

function applied
=
mean()
function applied
=
sd()

7. Function applied
is arbitrary

8. Function applied
is arbitrary
rolling regression = lm()

9. Types of windows:
1) Sliding
2) Expanding

Moving averages, rolling regressions…
Cumulative sums, expanding window regression…
14. In R?

So many attempts:
- zoo::rollapply()
- tibbletime::rollify()
- tsibble::slide() / stretch()
- data.table::frollapply() (2019-10-03)

16. {slide}

slide(1:4, ~.x, .before = 2)
3
2
1
[[3]]
2
1
[[2]]
4
3
2
[[4]]
1
[[1]]

3
2
1
[[3]]
NULL
[[2]]
4
3
2
[[4]]
NULL
[[1]]
slide(1:4, ~.x, .before = 2, .complete = TRUE)
Ignore partial results

slide(1:4, ~.x, .before = 1, .after = 1)
4
3
2
[[3]]
4
3
[[4]]
3
2
1
[[2]]
2
1
[[1]]
Center alignment

slide(1:4, ~.x, .before = Inf)
3
2
1
[[3]]
2
1
[[2]]
4
3
2
1
[[4]]
1
[[1]]
Cumulative sliding

slide()
slide_dbl()
slide_int()
...
Type Stability

sales_vec
[1] 2 4 6 2
slide_dbl(sales_vec, mean, .before = 2)
[1] 2 3 4 4

sales_vec
[1] 2 4 6 2
slide_dbl(sales_vec, mean, .before = 2)
[1] 2 3 4 4

sales_vec
[1] 2 4 6 2
slide_dbl(sales_vec, mean, .before = 2)
[1] 2 3 4 4

sales_vec
[1] 2 4 6 2
slide_dbl(sales_vec, mean, .before = 2)
[1] 2 3 4 4

sales_vec
[1] 2 4 6 2
slide_dbl(sales_vec, mean, .before = 2)
[1] 2 3 4 4

index_vec <- as.Date("2019-08-29") + c(0, 1, 5, 6)
wday_vec <- wday(index_vec, label = TRUE)
company <- tibble(
sales = sales_vec,
index = index_vec,
wday = wday_vec
)
4 2 Wed
2019-09-04
1
3
2
2019-09-03 Tue
6
2019-08-30 Fri
4
2 Thu
2019-08-29
sales wday
index

“3 day rolling average?”
4 2 Wed
2019-09-04
1
3
2
2019-09-03 Tue
6
2019-08-30 Fri
4
2 Thu
2019-08-29
sales wday
index

“3 day rolling average?”

3
6
4
roll_day
2
4 2 Wed
2019-09-04
1
3
2
2019-09-03 Tue
6
2019-08-30 Fri
4
2 Thu
2019-08-29
sales wday
index

company <- company %>%
mutate(
roll_row = slide_dbl(sales, mean, .before = 2)
)
roll_row

3
4
4
2

3
6
4
roll_day
2
4 2 Wed
2019-09-04
1
3
2
2019-09-03 Tue
6
2019-08-30 Fri
4
2 Thu
2019-08-29
sales wday
index

# Construct a regular index
full_index <- expand(
company,
index = full_seq(index, 1)
)
# Join with original data
company_full_raw <- left_join(
full_index,
company
)
# Slide over this, then filter back down
company_three_day <- company_full_raw %>%
mutate(
roll_day = slide_dbl(
sales,
mean,
na.rm = TRUE,
.before = 2
)
) %>%
filter(
index %in% company\$index
)
Solution?
7 2019-09-04
2
6 6 2019-09-03
2019-09-02
5 NA
4 NA 2019-09-01
1
3
2
2019-08-31
NA
2019-08-30
4
2 2019-08-29
sales index

3
6
4
roll_day
2
4 2 2019-09-04
1
3
2
2019-09-03
6
2019-08-30
4
2 2019-08-29
sales index

company_full_raw
company_three_day

# Construct a regular index
full_index <- expand(
company,
index = full_seq(index, 1)
)
# Join with original data
company_full_raw <- left_join(
full_index,
company
)
# Slide over this, then filter back down
company_three_day <- company_full_raw %>%
mutate(
roll_day = slide_dbl(
sales,
mean,
na.rm = TRUE,
.before = 2
)
) %>%
filter(
index %in% company\$index
)
Solution?
7 2019-09-04
2
6 6 2019-09-03
2019-09-02
5 NA
4 NA 2019-09-01
1
3
2
2019-08-31
NA
2019-08-30
4
2 2019-08-29
sales index

3
6
4
roll_day
2
4 2 2019-09-04
1
3
2
2019-09-03
6
2019-08-30
4
2 2019-08-29
sales index

company_full_raw
company_three_day
I
JUST
WANT
A
3
DAY
AVERAGE

37. slide(.x, .f, …)
slide_index(.x, .i, .f, …)

slide_index(
.x = wday_vec,
.i = index_vec,
.f = ~.x,
.before = days(2)
)
Tue
[[3]]
Fri
Thu
[[2]]
Wed
Tue
[[4]]
Thu
[[1]]
slide(
.x = wday_vec,
.f = ~.x,
.before = 2
)
Tue
Fri
Thu
[[3]]
Fri
Thu
[[2]]
Wed
Tue
Fri
[[4]]
Thu
[[1]]

company <- company %>%
mutate(
roll_day = slide_index_dbl(sales, index, mean, .before = days(2))
)

3
6
4
roll_day
2
4 2 Wed
2019-09-04
1
3
2
2019-09-03 Tue
6
2019-08-30 Fri
4
2 Thu
2019-08-29
sales wday
index

“3 day rolling average?”

“3 day rolling average?”

roll_day

3
4
6
2

3
5
4
roll_bday
2
4 2 Wed
2019-09-04
1
3
2
2019-09-03 Tue
6
2019-08-30 Fri
4
2 Thu
2019-08-29
sales wday
index

3 bday = [Fri, Mon, Tue]
3 day = [Sun, Mon, Tue]

calendar <- (weekends + holidays)
company <- company %>%
mutate(
roll_day = slide_index_dbl(sales, index, mean, .before = days(2))
)
company <- company %>%
mutate(
roll_bday = slide_index_dbl(sales, index, mean, .before = bdays(2, calendar))
)
Ideally

calendar <- (weekends + holidays)
company <- company %>%
mutate(
roll_day = slide_index_dbl(sales, index, mean, .before = days(2))
)
company <- company %>%
mutate(
roll_bday = slide_index_dbl(sales, index, mean, .before = bdays(2, calendar))
)
Ideally
“knows” about custom holidays and weekends

calendar <- (weekends + holidays)
company <- company %>%
mutate(
roll_day = slide_index_dbl(sales, index, mean, .before = days(2))
)
company <- company %>%
mutate(
roll_bday = slide_index_dbl(sales, index, mean, .before = bdays(2, calendar))
)
Ideally
“knows” about custom holidays and weekends
“adjusts” dates relative to the calendar

47. {almanac}

48. Recurrence rule:
A set of conditions that define a
recurring event, such as a weekend
or holiday.

on_labor_day <- yearly() %>%
recur_on_ymonth(“September”) %>%
recur_on_wday(“Monday”, nth = 1)

50. speakerdeck.com/davisvaughan/slide-almanac
on_labor_day <- yearly() %>%
recur_on_ymonth(“September”) %>%
recur_on_wday(“Monday”, nth = 1)
Base frequency of the event

on_labor_day <- yearly() %>%
recur_on_ymonth(“September”) %>%
recur_on_wday(“Monday”, nth = 1)
Base frequency of the event
Recurrence conditions

on_labor_day <- yearly() %>%
recur_on_ymonth(“September”) %>%
recur_on_wday(“Monday”, nth = 1)
Base frequency of the event
Recurrence conditions
sch_in(c("2019-09-02", "2019-09-03"), on_labor_day)
#> [1] TRUE FALSE

on_labor_day <- yearly() %>%
recur_on_ymonth(“September”) %>%
recur_on_wday(“Monday”, nth = 1)
Base frequency of the event
Recurrence conditions
sch_in(c("2019-09-02", "2019-09-03"), on_labor_day)
#> [1] TRUE FALSE
sch_seq("2017-01-01", "2019-12-31", on_labor_day)
#> [1] "2017-09-04" "2018-09-03" "2019-09-02"

54. Schedule:
A collection of recurrence rules,
required dates, and exclusion
dates.

on_labor_day <- yearly() %>%
recur_on_ymonth(“September”) %>%
recur_on_wday(“Monday”, nth = 1)
on_christmas <- yearly() %>%
recur_on_ymonth(“December”) %>%
recur_on_mday(25)
on_weekends <- weekly() %>%
recur_on_weekends()
on_weekends_or_holidays <- schedule() %>%

sch_seq("2019-09-01", "2019-12-31", on_weekends_or_holidays)
#> [1] "2019-09-01" "2019-09-02" "2019-09-07" "2019-09-08" "2019-09-14"
#> [6] "2019-09-15" "2019-09-21" "2019-09-22" "2019-09-28" "2019-09-29"
#> ...
#> [31] "2019-12-14" "2019-12-15" "2019-12-21" "2019-12-22" "2019-12-25"
#> [36] "2019-12-28" "2019-12-29"

* These will probably move to their own
hldy_christmas()
hldy_easter()
hldy_thanksgiving()
...
calendar_us_federal()
calendar_us_nyse()
Prebuilt holidays and calendars

* These will probably move to their own
hldy_christmas()
hldy_easter()
hldy_thanksgiving()
...
calendar_us_federal()
calendar_us_nyse()
Prebuilt holidays and calendars
Particularly
challenging!

# A Monday
labor_day <- “2019-09-02"
# Find the next business day?
# - Sees labor day, adjust by 1 day
# - Lands on 2019-09-03, done!
#> [1] "2019-09-03"
# - Sees labor day, adjust by -1 day
# - Lands on 2019-09-01, a Sunday, adjust by -1 day
# - Lands on 2019-08-31, a Saturday, adjust by -1 day
# - Lands on 2019-08-30, done!
#> [1] “2019-08-30"

# A Monday
labor_day <- “2019-09-02"
# Find the next business day?
# - Sees labor day, adjust by 1 day
# - Lands on 2019-09-03, done!
#> [1] "2019-09-03"
# - Sees labor day, adjust by -1 day
# - Lands on 2019-09-01, a Sunday, adjust by -1 day
# - Lands on 2019-08-31, a Saturday, adjust by -1 day
# - Lands on 2019-08-30, done!
#> [1] “2019-08-30"

# A Monday
labor_day <- “2019-09-02"
# Find the next business day?
# - Sees labor day, adjust by 1 day
# - Lands on 2019-09-03, done!
#> [1] "2019-09-03"
# - Sees labor day, adjust by -1 day
# - Lands on 2019-09-01, a Sunday, adjust by -1 day
# - Lands on 2019-08-31, a Saturday, adjust by -1 day
# - Lands on 2019-08-30, done!
#> [1] “2019-08-30"

# A Monday
labor_day <- “2019-09-02"
# Find the next business day?
# - Sees labor day, adjust by 1 day
# - Lands on 2019-09-03, done!
#> [1] "2019-09-03"
# - Sees labor day, adjust by -1 day
# - Lands on 2019-09-01, a Sunday, adjust by -1 day
# - Lands on 2019-08-31, a Saturday, adjust by -1 day
# - Lands on 2019-08-30, done!
#> [1] “2019-08-30"
This can also be a function

63. Modified following:
after x, unless it falls in a
different month, in which case the
first business day before x is

on_15th_and_last <- monthly() %>%
recur_on_mday(c(15, -1))
payments <- tibble(
dates = sch_seq("2019-09-01", "2019-12-31", on_15th_and_last),
wday = wday(dates, label = TRUE)
)
2019-12-31 2019-12-31 Tue
8 Tue
2019-12-15
7 Sun Mon
2019-12-16
6 2019-11-30 Sat Fri
2019-11-29
2019-11-15 Fri 2019-11-15 Fri
5

Mon
Thu
Tue
Mon

2019-09-30
2019-10-15
2019-10-31
2019-09-16
4 Thu
2019-10-31
1
3
2
2019-10-15 Tue
2019-09-30 Mon
Sun
2019-09-15
wday
dates

2019-12-31 2019-12-31 Tue
8 Tue
2019-12-15
7 Sun Mon
2019-12-16
6 2019-11-30 Sat Fri
2019-11-29
2019-11-15 Fri 2019-11-15 Fri
5

Mon
Thu
Tue
Mon

2019-09-30
2019-10-15
2019-10-31
2019-09-16
4 Thu
2019-10-31
1
3
2
2019-10-15 Tue
2019-09-30 Mon
Sun
2019-09-15
wday
dates

on_weekends <- weekly() %>% recur_on_weekends()
payments %>%
mutate(
)

friday_before_labor_day <- “2019-08-30"
# Move forward two business days?
# - Steps forward 1 day to Saturday 2019-08-31
# - Steps forward 1 day to Wednesday 2019-09-04
sch_step(
friday_before_labor_day,
n = 2,
schedule = on_weekends_or_holidays
)
#> [1] “2019-09-04"

friday_before_labor_day <- “2019-08-30"
# Move forward two business days?
# - Steps forward 1 day to Saturday 2019-08-31
# - Steps forward 1 day to Wednesday 2019-09-04
sch_step(
friday_before_labor_day,
n = 2,
schedule = on_weekends_or_holidays
)
#> [1] “2019-09-04"

68. {slide} + {almanac}

calendar <- (weekends + holidays)
company <- company %>%
mutate(
roll_day = slide_index_dbl(sales, index, mean, .before = days(2))
)
company <- company %>%
mutate(
roll_bday = slide_index_dbl(sales, index, mean, .before = bdays(2, calendar))
)
Ideally

calendar <- (weekends + holidays)
company <- company %>%
mutate(
roll_day = slide_index_dbl(sales, index, mean, .before = days(2))
)
company <- company %>%
mutate(
roll_bday = slide_index_dbl(sales, index, mean, .before = bdays(2, calendar))
)
Ideally
We can make this with {almanac}

calendar <- (weekends + holidays)
company <- company %>%
mutate(
roll_day = slide_index_dbl(sales, index, mean, .before = days(2))
)
company <- company %>%
mutate(
roll_bday = slide_index_dbl(sales, index, mean, .before = bdays(2, calendar))
)
Ideally
We can make this with {almanac}
This doesn’t exist yet, but would use sch_step()

73. slide_index(.x, .i, .f, …)
slide_between(.x, .i, .starts, .stops, .f, …)

company <- company %>%
mutate(
roll_day = slide_index_dbl(sales, index, mean, .before = days(2))
)
company <- company %>%
mutate(
starts = index - days(2),
stops = index,
roll_day = slide_between_dbl(sales, index, mean, .starts = starts, .stops = stops)
)

3
6
4
roll_day
2
4 2 Wed
2019-09-04
1
3
2
2019-09-03 Tue
6
2019-08-30 Fri
4
2 Thu
2019-08-29
sales wday
index

company <- company %>%
mutate(
roll_day = slide_index_dbl(sales, index, mean, .before = days(2))
)
company <- company %>%
mutate(
starts = index - days(2),
stops = index,
roll_day = slide_between_dbl(sales, index, mean, .starts = starts, .stops = stops)
)

3
6
4
roll_day
2
4 2 Wed
2019-09-04
1
3
2
2019-09-03 Tue
6
2019-08-30 Fri
4
2 Thu
2019-08-29
sales wday
index

This is where we solve our problem

company <- company %>%
mutate(
starts = sch_step(index, n = -2, schedule = on_weekends),
stops = index,
roll_bday = slide_between_dbl(sales, index, mean, .starts = starts, .stops = stops)
)
roll_day

3
4
6
2

3
5
4
roll_bday
2
4 2 Wed
2019-09-04
1
3
2
2019-09-03 Tue
6
2019-08-30 Fri
4
2 Thu
2019-08-29
sales wday
index

79. In conclusion…

80. {slide} for window functions
slide_index() to roll relative to an index

81. {slide} for window functions
slide_index() to roll relative to an index
{almanac} to build schedules and adjust dates

82. {slide} for window functions
slide_index() to roll relative to an index
{slide} + {almanac}
=
Flexible rolling computations!
{almanac} to build schedules and adjust dates

83. Special Thanks
JavaScript: rrule
https://github.com/jakubroztocil/rrule
James Laird-Smith: gs
https://github.com/jameslairdsmith/gs
Jeroen Ooms: V8
https://github.com/jeroen/V8

84. Questions?
{almanac}
GitHub
https://github.com/DavisVaughan/almanac
Website
https://davisvaughan.github.io/almanac
{slide}
GitHub
https://github.com/DavisVaughan/slide
Website
https://davisvaughan.github.io/slide