July 23, 2022
7.7k

# 回帰分析ではlm()ではなくestimatr::lm_robust()を使おう / TokyoR100

2022年7月23日に行われた、第100回R勉強会@東京（#TokyoR）での発表資料です。
https://tokyor.connpass.com/event/253867/

https://github.com/dropout009/TokyoR100

July 23, 2022

## Transcript

1. lm()
estimatr::lm_robust()
2022/07/23
100 R @ #TokyoR
@dropout009

2. TVISION INSIGHTS
Speaker Deck: dropout009
Blog: https://dropout009.hatenablog.com/

3. R
lm()
estimatr::lm_robust()

4. lm()

5. lm()
𝑌 = 𝑋 + 𝑈
𝑋 ∼ Uniform −2, 2
𝑈 ∼ 𝒩(0, 1)
N <- 50
df <- tibble(
x = runif(N, -2, 2),
u = rnorm(N, 0, 1),
y = x + e
)
𝑈 𝑋

6. 𝑌!
= 𝛼 + 𝛽𝑋!
+ 𝜖!
lm()
p
lm()
> df %>%
+ lm(y ~ x, data = .) %>%
+ tidy()
# A tibble: 2 × 5
term estimate std.error statistic p.value

1 (Intercept) -0.0542 0.146 -0.371 7.12e- 1
2 x 0.990 0.115 8.58 2.93e-11
"
𝛽

7. N <- 50
M <- 5000
df <- tibble(
m = rep(seq_len(M), each = N),
x = runif(N * M, -2, 2),
u = rnorm(N * M, 0, 1),
y = x + u
)
df_result <- df %>%
nest(data = c(x, u, y)) %>%
mutate(result = map(data, ~ tidy(lm(y ~ x, data = .)))) %>%
select(m, result) %>%
unnest(c(m, result)) %>%
filter(term == "x")
𝑌!
" , 𝑋!
"
!#\$
%
\$
𝛼("), "
𝛽(")
𝑌 = 𝑋 + 𝑈
𝑋 ∼ Uniform −2, 2
𝑈 ∼ 𝒩(0, 1)
"
𝛽(")

8. 6
𝛽(")

• SD &
𝛽 ! , … , &
𝛽(#)
• *
SE &
𝛽 %
&
𝛽(%)

9. lm()

*
SE &
𝛽 %

10. lm()

11. 𝑈 𝑋
𝑌 = 𝑋 + 𝑈
𝑋 ∼ Uniform −2, 2
𝑈 ∼ 𝒩(0, 𝑋,)
N <- 50
df_heterogeneous <- tibble(
x = runif(N, -2, 2),
u = rnorm(N, 0, abs(x)),
y = x + u
)
𝑈 𝑋
𝑋

12. N <- 50
M <- 5000
df_simulation_heterogeneous <- tibble(
m = rep(seq_len(M), each = N),
x = runif(N * M, -2, 2),
u = rnorm(N * M, 0, abs(x)),
y = x + u
)
df_result_heterogeneous <- df_simulation_heterogeneous %>%
nest(data = c(x, u, y)) %>%
mutate(result = map(data, ~ tidy(lm(y ~ x, data = .)))) %>%
select(m, result) %>%
unnest(c(m, result)) %>%
filter(term == "x")

13. lm()

*
SE &
𝛽 %

14. lm()
lm()

15. lm()

16. lm()
𝑌 = 𝑿;𝜷 + 𝑈
𝑿 𝜷 𝐾×1
𝑿 = 𝑋<, … , 𝑋=, … , 𝑋>
;
𝜷 = (𝛽<, … , 𝛽=, … , 𝛽>)′
𝔼 𝑈 ∣ 𝑿 = 0 𝜎?(𝑿)
𝜎? 𝑿 = Var 𝑈 ∣ 𝑿 = 𝔼 𝑈? ∣ 𝑿
𝔼 𝑌? < ∞, 𝔼 𝑿 < ∞, 𝔼 𝑿𝑿′ > 0

17. 𝑌!, 𝑿! , … , 𝑌&, 𝑿& , … , 𝑌', 𝑿'
/
𝜷 = argmin
𝒃
8
&)!
'
𝑌& − 𝑿&
*𝒃 + = 8
&)!
'
𝑿&𝑿&
*
,!
8
&)!
'
𝑿&𝑌&
/
𝜷
/
𝜷
@
𝜷 = B
!"#
\$
𝑿!
𝑿!
%

B
!"#
\$
𝑿!
𝑌!
= B
!"#
\$
𝑿!
𝑿!
%

B
!"#
\$
𝑋!
𝑿!
%𝜷 + 𝑈!
= 𝜷 + B
!"#
\$
𝑿!
𝑿!
%

B
!"#
\$
𝑿!
𝑈!

18. 𝔼 /
𝜷 ∣ 𝑿𝟏, … , 𝑿𝑵 = 𝔼 𝛽 + 8
&)!
'
𝑿&𝑿&
*
,!
8
&)!
'
𝑿&𝑈& ∣ 𝑿𝟏, … , 𝑿𝑵
= 𝜷 + 8
&)!
'
𝑿&𝑿&
*
,!
8
&)!
'
𝑿&𝔼 𝑈& ∣ 𝑿&
= 𝜷

19. Var @
𝜷 ∣ 𝑿#
, … , 𝑿\$
= Var 𝜷 + B
!"#
\$
𝑿!
𝑿!
%

B
!"#
\$
𝑿!
𝑈!
∣ 𝑿#
, … , 𝑿\$
= B
!"#
\$
𝑿!
𝑿!
%

B
!"#
\$
𝑿!
𝑿!
%Var 𝑈!
∣ 𝑿!
B
!"#
\$
𝑿!
𝑿!
%

= B
!"#
\$
𝑿!
𝑿!
%

B
!"#
\$
𝑿!
𝑿!
%𝔼 𝑈!
' ∣ 𝑿!
B
!"#
\$
𝑿!
𝑿!
%

= B
!"#
\$
𝑿!
𝑿!
%

B
!"#
\$
𝑿!
𝑿!
%𝜎!
' B
!"#
\$
𝑿!
𝑿!
%

𝜎&
+ = 𝜎+ 𝑿& = 𝔼 𝑈&
+ ∣ 𝑿&
𝑖 ≠ 𝑗
Cov 𝑈!
, 𝑈"
∣ 𝑿#
, … , 𝑿\$
= 𝔼 𝑈!
𝑈"
∣ 𝑿#
, … , 𝑿\$
= 𝔼 𝑈!
∣ 𝑿!
𝔼 𝑈"
∣ 𝑿"
= 0

20. 𝑁 𝜎&
+ 𝜎&
+ = 𝜎+
Var @
𝜷 ∣ 𝑿#
, … , 𝑿\$
= B
!"#
\$
𝑿!
𝑿!
%

B
!"#
\$
𝑿!
𝑿!
%𝜎!
' B
!"#
\$
𝑿!
𝑿!
%

≈ B
!"#
\$
𝑿!
𝑿!
%

B
!"#
\$
𝑿!
𝑿!
%𝜎' B
!"#
\$
𝑿!
𝑿!
%

= 𝜎' B
!"#
\$
𝑿!
𝑿!
%

𝜎+
𝑠' =
1
𝑁 − 𝐾
B
!"#
\$
@
𝑈!
' =
1
𝑁 − 𝐾
B
!"#
\$
𝑌!
− 𝑿!
% @
𝜷 '
N
Var @
𝜷 ∣ 𝑿#
, … , 𝑿\$
= 𝑠' B
!"#
\$
𝑿!
𝑿!
%

lm()

21. 𝑌 = 𝛼 + 𝛽𝑋 + 𝑈
C
𝛼 = D
𝑌 − &
𝛽𝑋
&
𝛽 =
∑&)!
' 𝑋& − D
𝑋 𝑌& − D
𝑌
∑&)!
' 𝑋& − D
𝑋 +
&
𝛽
Var &
𝛽 ∣ 𝑋!, … , 𝑋' =
∑&)!
' 𝑋& − D
𝑋 +𝜎&
+
∑&)!
' 𝑋& − D
𝑋 + +
=
1
∑&)!
' 𝑋& − D
𝑋 +
∑&)!
' 𝑋& − D
𝑋 +𝜎&
+
∑&)!
' 𝑋& − D
𝑋 +
Var &
𝛽 ∣ 𝑋!, … , 𝑋' ≈
1
∑&)!
' 𝑋& − D
𝑋 +
𝜎+
𝜎&
+ 𝜎+

𝑋!
− 1
𝑋 %

22. 3
𝑌 = 𝑋 + 𝑈
𝑋 ∼ Uniform {1, 2, 3}
𝑈 ∼ 𝒩 0, 𝜎? 𝑋
𝜎& 𝑋 = &
16& if 𝑋 = 1
16& if 𝑋 = 2
16& if 𝑋 = 3
𝜎& 𝑋 = &
1& if 𝑋 = 1
31& if 𝑋 = 2
16& if 𝑋 = 3
𝜎& 𝑋 = &
31& if 𝑋 = 1
1& if 𝑋 = 2
16& if 𝑋 = 3
𝑈 𝑋

23. 話 ⾒
話 ⾒

24. lm()
lm()
0.70 0.70
0.52 0.70
0.84 0.69
lm()

25. Robust Standard Errors

26. Var @
𝜷 ∣ 𝑿#
, … , 𝑿\$
= B
!"#
\$
𝑿!
𝑿!
%

B
!"#
\$
𝑿!
𝑿!
%𝔼 𝑈!
' ∣ 𝑿!
B
!"#
\$
𝑿!
𝑿!
%

⾒ 𝑠+
𝑠' =
1
𝑁 − 𝐾
B
!"#
\$
@
𝑈!
'
𝔼 𝑈&
+ ∣ 𝑿& 𝑠+ /
𝑈&
+
N
Var @
𝜷 ∣ 𝑿#
, … , 𝑿\$
= B
!"#
\$
𝑿!
𝑿!
%

B
!"#
\$
𝑿!
𝑿!
% @
𝑈!
' B
!"#
\$
𝑿!
𝑿!
%

Heteroskedasticity-Consistent Standard Errors Robust Standard Errors
Robust SE HC0
'
𝑈!
" 1
𝑁
#
\$
∑!%#
\$ 𝑿!𝑿!
& '
𝑈!
"

27. R Robust SE
estimatr::lm_robust()

28. 𝑌 = 𝑋 + 𝑈
𝑋 ∼ Uniform −2, 2
𝑈 ∼ 𝒩(0, 𝑋,)
N <- 50
df_heterogeneous <- tibble(
x = runif(N, -2, 2),
u = rnorm(N, 0, abs(x)),
y = x + u
)
𝑈 𝑋
𝑋

29. estimatr::lm_robust() se_type Robust SE
Robust SE
estimatr::lm_robust()
> df_heterogeneous %>%
+ estimatr::lm_robust(y ~ x, data = ., se_type = "classical") %>%
+ tidy()
term estimate std.error statistic p.value conf.low conf.high df outcome
1 (Intercept) 0.3456885 0.1594783 2.167621 3.517795e-02 0.02503593 0.666341 48 y
2 x 0.8999665 0.1531487 5.876423 3.869962e-07 0.59204038 1.207893 48 y
> df_heterogeneous %>%
+ estimatr::lm_robust(y ~ x, data = ., se_type = "HC0") %>%
+ tidy()
term estimate std.error statistic p.value conf.low conf.high df outcome
1 (Intercept) 0.3456885 0.1563591 2.210862 0.031843538 0.03130734 0.6600696 48 y
2 x 0.8999665 0.2389852 3.765784 0.000453233 0.41945455 1.3804784 48 y
lm()
Robust

30. Robust SE
• classical Robust SE HC0
• Robust SE 50
*
SE &
𝛽 %

31. • HC0
• HC0 - HC3 ⾒
HC2 HC3 HC0 < HC2 < HC3 HC3
• estimatr::lm_robust() HC2 Stata HC1
ℎ!!
= 𝑿!
& 3
!'#
\$
𝑿!
𝑿!
&
(#
𝑿!
leverage 𝑋!
0 ≤ ℎ!!
≤ 1 HC0Hansen(2022)
HC0 3
!'#
\$
𝑿!
𝑿!
&
(#
3
!'#
\$
𝑿!
𝑿!
& 6
𝑈!
% 3
!'#
\$
𝑿!
𝑿!
&
(#
HC1 𝑁
𝑁 − 𝐾
3
!'#
\$
𝑿!
𝑿!
&
(#
3
!'#
\$
𝑿!
𝑿!
& 6
𝑈!
% 3
!'#
\$
𝑿!
𝑿!
&
(#
HC2 3
!'#
\$
𝑿!
𝑿!
&
(#
3
!'#
\$
𝑿!
𝑿!
&
6
𝑈!
%
1 − ℎ!!
3
!'#
\$
𝑿!
𝑿!
&
(#
HC3 3
!'#
\$
𝑿!
𝑿!
&
(#
3
!'#
\$
𝑿!
𝑿!
&
6
𝑈!
%
1 − ℎ!!
%
3
!'#
\$
𝑿!
𝑿!
&
(#

32. Robust SE
• HC0-HC3
• HC1-HC3 HC0
HC3
*
SE &
𝛽 %

33. • R lm() lm()
• Robust SE
Robust
SE
• R estimatr lm_robust() Robust SE
sandwich vcovHC() lm_robust() lm()
• Robust SE HC0, HC1, HC2, HC3
⾒ HC2 HC3

34. • Hansen, Bruce E. "Econometrics." (2022). https://www.ssc.wisc.edu/~bhansen/econometrics/.
• Blair G, Cooper J, Coppock A, Humphreys M, Sonnet L. (2022). estimatr: Fast Estimators for Design-
Based Inference. https://declaredesign.org/r/estimatr/. https://github.com/DeclareDesign/estimatr.

35. R