Illustration showing different flavors of robust standard errors. Load in library, dataset, and recode. Do not really need to dummy code but may make making the X matrix easier. Using the High School & Beyond (hsb) dataset.
library(mlmRev) #has the hsb dataset
## Loading required package: lme4
## Loading required package: Matrix
library(summarytools) #for descriptives
library(jtools) #for output
library(dplyr) #for pipes and selecting
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(sandwich) #robust SEs
library(lmtest) #for coeftest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
hsb <- Hsb82
names(hsb) <- tolower(names(hsb))
dim(hsb)
## [1] 7185 8
head(hsb)
## school minrty sx ses mach meanses sector cses
## 1 1224 No Female -1.528 5.876 -0.434383 Public -1.09361702
## 2 1224 No Female -0.588 19.708 -0.434383 Public -0.15361702
## 3 1224 No Male -0.528 20.349 -0.434383 Public -0.09361702
## 4 1224 No Male -0.668 8.781 -0.434383 Public -0.23361702
## 5 1224 No Male -0.158 17.898 -0.434383 Public 0.27638298
## 6 1224 No Male 0.022 4.583 -0.434383 Public 0.45638298
hsb$private <- ifelse(hsb$sector == "Public", 0, 1)
hsb$female <- ifelse(hsb$sx == "Male", 0, 1)
freq(hsb$private)
## Frequencies
## hsb$private
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 0 3642 50.69 50.69 50.69 50.69
## 1 3543 49.31 100.00 49.31 100.00
## <NA> 0 0.00 100.00
## Total 7185 100.00 100.00 100.00 100.00
Run an OLS regression predicting math achievement. NOTE: private is a level 2, school-level variable.
m1 <- lm(mach ~ ses + female + private, data = hsb)
summ(m1, digits = 3)
## MODEL INFO:
## Observations: 7185
## Dependent Variable: mach
## Type: OLS linear regression
##
## MODEL FIT:
## F(3,7181) = 454.392, p = 0.000
## R² = 0.160
## Adj. R² = 0.159
##
## Standard errors: OLS
## Est. S.E. t val. p
## (Intercept) 12.521 0.131 95.691 0.000 ***
## ses 2.884 0.097 29.586 0.000 ***
## female -1.404 0.149 -9.393 0.000 ***
## private 1.963 0.152 12.949 0.000 ***
###
Reproduce results using matrix algebra:
\(B = (X^TX)^{-1}X^TY\)
X <- model.matrix(m1)
head(X)
## (Intercept) ses female private
## 1 1 -1.528 1 0
## 2 1 -0.588 1 0
## 3 1 -0.528 0 0
## 4 1 -0.668 0 0
## 5 1 -0.158 0 0
## 6 1 0.022 0 0
#regression coefficients
###
Bs <- solve(t(X) %*% X) %*% t(X) %*% hsb$mach
Bs
## [,1]
## (Intercept) 12.520715
## ses 2.884130
## female -1.403538
## private 1.963150
Now for the standard errors: method 1
\(\sigma^2 = \frac{u^Tu}{n - k - 1}\)
where \(u\) is the residual (or \(y - XB\)).
u <- resid(m1)
num <- t(u) %*% u #numerator
den <- nobs(m1) - m1$rank #denominator
(num/den) %>% sqrt #residual standard error (manually)
## [,1]
## [1,] 6.307042
summary(m1)$sigma #residual standard error (computed using lm)
## [1] 6.307042
var2 <- as.numeric(num/den) #from matrix, convert to numeric
To generate the variance covariance matrix: \(Var(B) = \sigma^2(X^TX)^{-1}\). The square root of the diagonal are the standard errors.
vce <- var2 * solve(t(X) %*% X)
ses <- sqrt(diag(vce)) #standard errors
cbind(B = Bs, ses, t = round(Bs/ses, 3))
## ses
## (Intercept) 12.520715 0.13084584 95.691
## ses 2.884130 0.09748345 29.586
## female -1.403538 0.14942396 -9.393
## private 1.963150 0.15160531 12.949
summary(m1)
##
## Call:
## lm(formula = mach ~ ses + female + private, data = hsb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.9037 -4.6864 0.2104 4.8645 17.1868
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.52071 0.13085 95.691 <2e-16 ***
## ses 2.88413 0.09748 29.586 <2e-16 ***
## female -1.40354 0.14942 -9.393 <2e-16 ***
## private 1.96315 0.15161 12.949 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.307 on 7181 degrees of freedom
## Multiple R-squared: 0.1595, Adjusted R-squared: 0.1592
## F-statistic: 454.4 on 3 and 7181 DF, p-value: < 2.2e-16
Doing it using a general form using Method 2:
\((X^TX)^{-1} X^Tu^TuX (X^TX)^{-1}\). In the general case, \(u^Tu\) is a \(n \times n\) diagonal with a constant \(\sigma^2\) on the diagonal.
######### correct::: doing this using a general form::: METHOD 2
### bread * meat * bread
#constant variance on the diagonal
n <- nobs(m1)
mm <- diag(var2, nrow = n) #creating a matrix with the constant variance on the diagonal
mm[1:5, 1:5] #just taking a peek at the first five rows/columns
## [,1] [,2] [,3] [,4] [,5]
## [1,] 39.77878 0.00000 0.00000 0.00000 0.00000
## [2,] 0.00000 39.77878 0.00000 0.00000 0.00000
## [3,] 0.00000 0.00000 39.77878 0.00000 0.00000
## [4,] 0.00000 0.00000 0.00000 39.77878 0.00000
## [5,] 0.00000 0.00000 0.00000 0.00000 39.77878
mt <- t(X) %*% mm %*% X #the meat
br <- solve(crossprod(X)) #this is the same as solve(t(X) %*% X)
(vce3 <- br %*% mt %*% br)
## (Intercept) ses female private
## (Intercept) 0.0171206345 0.0008451577 -0.0115724586 -0.0110969152
## ses 0.0008451577 0.0095030234 0.0010249165 -0.0028145087
## female -0.0115724586 0.0010249165 0.0223275203 -0.0004476095
## private -0.0110969152 -0.0028145087 -0.0004476095 0.0229841695
vce
## (Intercept) ses female private
## (Intercept) 0.0171206345 0.0008451577 -0.0115724586 -0.0110969152
## ses 0.0008451577 0.0095030234 0.0010249165 -0.0028145087
## female -0.0115724586 0.0010249165 0.0223275203 -0.0004476095
## private -0.0110969152 -0.0028145087 -0.0004476095 0.0229841695
vcov(m1) #all the same :: this is automatically computed using the vcov function
## (Intercept) ses female private
## (Intercept) 0.0171206345 0.0008451577 -0.0115724586 -0.0110969152
## ses 0.0008451577 0.0095030234 0.0010249165 -0.0028145087
## female -0.0115724586 0.0010249165 0.0223275203 -0.0004476095
## private -0.0110969152 -0.0028145087 -0.0004476095 0.0229841695
In the case where homoskedasticity is not a reasonable assumption, we can create a heteroskedasticity-consistent covariance matrix (HCCM).
########### Heteroscedasticity Consistent SEs
u <- resid(m1) #just recreating
uu <- diag(u %*% t(u)) %>% diag #get the diagonal, then make a diagonal matrix
uu[1:5, 1:5] #taking a peek, differing variances on the diagonal
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.6959333 0.000 0.00000 0.000000 0.00000
## [2,] 0.0000000 105.816 0.00000 0.000000 0.00000
## [3,] 0.0000000 0.000 87.44319 0.000000 0.00000
## [4,] 0.0000000 0.000 0.00000 3.287388 0.00000
## [5,] 0.0000000 0.000 0.00000 0.000000 34.02363
Unlike the first matrix with constant variance, the variance is allowed to vary per observation. That new matrix is used in the meat matrix.
mt <- t(X) %*% uu %*% X
br <- solve(crossprod(X))
(vce4 <- br %*% mt %*% br) * (7185 / 7181) # n / n- k--> uses an adjustment too
## (Intercept) ses female private
## (Intercept) 0.01941891 0.001236800 -0.012945247 -0.012716403
## ses 0.00123680 0.008964747 0.001322174 -0.003971380
## female -0.01294525 0.001322174 0.022554135 0.000554395
## private -0.01271640 -0.003971380 0.000554395 0.023658248
vcovHC(m1, type = 'HC1') #same, automated using the sandwich package
## (Intercept) ses female private
## (Intercept) 0.01941891 0.001236800 -0.012945247 -0.012716403
## ses 0.00123680 0.008964747 0.001322174 -0.003971380
## female -0.01294525 0.001322174 0.022554135 0.000554395
## private -0.01271640 -0.003971380 0.000554395 0.023658248
There are different types of robust standard errors. What was shown was using what was called ‘HC1’. The simplest version is HC0 with no degrees of freedom adjustment.
(vc.hc0 <- br %*% mt %*% br) #simplest version, no df adjustment
## (Intercept) ses female private
## (Intercept) 0.019408094 0.001236111 -0.0129380402 -0.0127093240
## ses 0.001236111 0.008959756 0.0013214379 -0.0039691692
## female -0.012938040 0.001321438 0.0225415788 0.0005540864
## private -0.012709324 -0.003969169 0.0005540864 0.0236450776
vcovHC(m1, type = 'HC0') #identical
## (Intercept) ses female private
## (Intercept) 0.019408094 0.001236111 -0.0129380402 -0.0127093240
## ses 0.001236111 0.008959756 0.0013214379 -0.0039691692
## female -0.012938040 0.001321438 0.0225415788 0.0005540864
## private -0.012709324 -0.003969169 0.0005540864 0.0236450776
Other types, HC2 uses the hat values \(X(X'X)^{-1}X'\).
ht <- diag(X %*% solve((t(X) %*% X)) %*% t(X))
tmp <- hatvalues(m1) #same, just comparing
#compare
ht[1:10]
## 1 2 3 4 5
## 0.0008239518 0.0004371588 0.0004745605 0.0005086124 0.0004296461
## 6 7 8 9 10
## 0.0004314467 0.0004429814 0.0006259305 0.0005147352 0.0004610464
tmp[1:10]
## 1 2 3 4 5
## 0.0008239518 0.0004371588 0.0004745605 0.0005086124 0.0004296461
## 6 7 8 9 10
## 0.0004314467 0.0004429814 0.0006259305 0.0005147352 0.0004610464
### for HC2, divided by 1-ht
newu <- u^2/(1-ht)
newu[1:6]
## 1 2 3 4 5 6
## 0.6965072 105.8623008 87.4847037 3.2890610 34.0382573 64.0462798
dd <- diag(newu, nrow = n)
(vcov.HC2 <- br %*% (t(X) %*% dd %*% X) %*% br)
## (Intercept) ses female private
## (Intercept) 0.019419000 0.001236993 -0.0129453531 -0.0127163888
## ses 0.001236993 0.008966698 0.0013225142 -0.0039723682
## female -0.012945353 0.001322514 0.0225540732 0.0005542377
## private -0.012716389 -0.003972368 0.0005542377 0.0236585058
vcovHC(m1, type = 'HC2') #same
## (Intercept) ses female private
## (Intercept) 0.019419000 0.001236993 -0.0129453531 -0.0127163888
## ses 0.001236993 0.008966698 0.0013225142 -0.0039723682
## female -0.012945353 0.001322514 0.0225540732 0.0005542377
## private -0.012716389 -0.003972368 0.0005542377 0.0236585058
The preferred version, which is also the default of the sandwich function, is HC3 where diagonal is: \(\frac{u^{2}}{(1 - h)^2}\).
### for HC3, divided by (1-ht)^2
newu <- u^2/(1-ht)^2
dd <- diag(newu, nrow = n)
(vcov.HC3 <- br %*% (t(X) %*% dd %*% X) %*% br)
## (Intercept) ses female private
## (Intercept) 0.019429912 0.001237875 -0.012952671 -0.012723458
## ses 0.001237875 0.008973645 0.001323592 -0.003975570
## female -0.012952671 0.001323592 0.022566575 0.000554389
## private -0.012723458 -0.003975570 0.000554389 0.023671943
vcovHC(m1, type = 'HC3') #same
## (Intercept) ses female private
## (Intercept) 0.019429912 0.001237875 -0.012952671 -0.012723458
## ses 0.001237875 0.008973645 0.001323592 -0.003975570
## female -0.012952671 0.001323592 0.022566575 0.000554389
## private -0.012723458 -0.003975570 0.000554389 0.023671943
These are shown on p. 4 of this sandwich package manual https://cran.r-project.org/web/packages/sandwich/vignettes/sandwich.pdf.
Cluster Robust
Now, another flavor is to have a cluster robust variance covariance matrix.
Cluster VCV(\(\hat{B} = (X^TX)^{-1} \hat{\Omega} (X^TX)^{-1}\) where
\(\hat{\Omega} = \sum_{g=1}^G X^T_g \hat{u}_g \hat{u}_g^T X_g\)
head(hsb)
## school minrty sx ses mach meanses sector cses private
## 1 1224 No Female -1.528 5.876 -0.434383 Public -1.09361702 0
## 2 1224 No Female -0.588 19.708 -0.434383 Public -0.15361702 0
## 3 1224 No Male -0.528 20.349 -0.434383 Public -0.09361702 0
## 4 1224 No Male -0.668 8.781 -0.434383 Public -0.23361702 0
## 5 1224 No Male -0.158 17.898 -0.434383 Public 0.27638298 0
## 6 1224 No Male 0.022 4.583 -0.434383 Public 0.45638298 0
## female
## 1 1
## 2 1
## 3 0
## 4 0
## 5 0
## 6 0
cdata <- data.frame(cluster = hsb$school, r = resid(m1))
(m <- length(table(cdata$cluster))) #number of clusters
## [1] 160
n <- nobs(m1)
k <- m1$rank
dfa <- (m/(m-1)) * ((n-1)/(n-k))
gs <- names(table(cdata$cluster))
u <- matrix(NA, nrow = m, ncol = k) #clusters x rank:: creating a new u matrix
The u matrix is now a \(m \times k\) matrix. It is the transpose of the residuals multiplied by the design matrix PER cluster.
### do this per cluster
for(i in 1:m){
u[i,] <- t(cdata$r[cdata$cluster == gs[i]]) %*% X[hsb$school == gs[i], 1:k]
} #this is now a 160 x 4 matrix
#4 x 160 multipled by 160 x 4 = 4 by 4 matrix
(mt <- crossprod(u)) #which is the same as t(u) %*% u
## [,1] [,2] [,3] [,4]
## [1,] 1127414.04 -32276.29 524452.13 599712.7
## [2,] -32276.29 261299.92 -42889.74 10432.7
## [3,] 524452.13 -42889.74 404206.13 249132.6
## [4,] 599712.74 10432.70 249132.57 599712.7
(clvc <- br %*% mt %*% br * dfa) #same:: manually computed
## (Intercept) ses female private
## (Intercept) 0.055118621 0.003473873 -0.027093581 -0.03596669
## ses 0.003473873 0.015449763 0.001932605 -0.01272128
## female -0.027093581 0.001932605 0.052099373 -0.01256699
## private -0.035966689 -0.012721285 -0.012566991 0.09446861
vcovCL(m1, cluster = hsb$school) #same:: using the sandwich package
## (Intercept) ses female private
## (Intercept) 0.055118621 0.003473873 -0.027093581 -0.03596669
## ses 0.003473873 0.015449763 0.001932605 -0.01272128
## female -0.027093581 0.001932605 0.052099373 -0.01256699
## private -0.035966689 -0.012721285 -0.012566991 0.09446861
coeftest(m1, vcovCL(m1, cluster = hsb$school))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.52071 0.23477 53.3310 < 2.2e-16 ***
## ses 2.88413 0.12430 23.2035 < 2.2e-16 ***
## female -1.40354 0.22825 -6.1490 8.213e-10 ***
## private 1.96315 0.30736 6.3872 1.795e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m1, clvc)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.52071 0.23477 53.3310 < 2.2e-16 ***
## ses 2.88413 0.12430 23.2035 < 2.2e-16 ***
## female -1.40354 0.22825 -6.1490 8.213e-10 ***
## private 1.96315 0.30736 6.3872 1.795e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summ(m1, cluster = 'school', robust= "HC1", digits = 5) #using jtools
## MODEL INFO:
## Observations: 7185
## Dependent Variable: mach
## Type: OLS linear regression
##
## MODEL FIT:
## F(3,7181) = 454.39243, p = 0.00000
## R² = 0.15954
## Adj. R² = 0.15919
##
## Standard errors: Cluster-robust, type = HC1
## Est. S.E. t val. p
## (Intercept) 12.52071 0.23477 53.33103 0.00000 ***
## ses 2.88413 0.12430 23.20352 0.00000 ***
## female -1.40354 0.22825 -6.14905 0.00000 ***
## private 1.96315 0.30736 6.38719 0.00000 ***
Compare the standard errors of the cluster robust version with the standard version below for the private coefficient (school level). This is .15 vs .30.
summ(m1)
## MODEL INFO:
## Observations: 7185
## Dependent Variable: mach
## Type: OLS linear regression
##
## MODEL FIT:
## F(3,7181) = 454.39, p = 0.00
## R² = 0.16
## Adj. R² = 0.16
##
## Standard errors: OLS
## Est. S.E. t val. p
## (Intercept) 12.52 0.13 95.69 0.00 ***
## ses 2.88 0.10 29.59 0.00 ***
## female -1.40 0.15 -9.39 0.00 ***
## private 1.96 0.15 12.95 0.00 ***
What about multiway clustering? Cameron, Gelbach, and Miller (2011) provide a simple way of computing the multiway vcv matrix. Given two sets of clusters (e.g., firm by year clusters), compute the vcv using firm as the cluster (A). Then compute the vcv with year as the cluster (B) and the vcv with an interaction of firm x year as the cluster (C). Finally, the multiway vcv is A + B - C.
library(multiwayvcov)
#data(package = 'multiwayvcov')
data("petersen") #this is in the multiwayvcov package-- this has been deprecated already in favor of the sandwich package
dim(petersen)
## [1] 5000 4
summary(petersen)
## firmid year x y
## Min. : 1.0 Min. : 1.0 Min. :-3.253332 Min. :-7.84325
## 1st Qu.:125.8 1st Qu.: 3.0 1st Qu.:-0.671632 1st Qu.:-1.48863
## Median :250.5 Median : 5.5 Median : 0.026331 Median : 0.05298
## Mean :250.5 Mean : 5.5 Mean : 0.005371 Mean : 0.03524
## 3rd Qu.:375.2 3rd Qu.: 8.0 3rd Qu.: 0.671361 3rd Qu.: 1.52572
## Max. :500.0 Max. :10.0 Max. : 3.552810 Max. : 8.63729
length(table(petersen$firmid))
## [1] 500
length(table(petersen$year))
## [1] 10
head(petersen)
## firmid year x y
## 1 1 1 -1.1139730 2.2515350
## 2 1 2 -0.0808538 1.2423460
## 3 1 3 -0.2376072 -1.4263760
## 4 1 4 -0.1524857 -1.1093940
## 5 1 5 -0.0014262 0.9146864
## 6 1 6 -1.2127370 -1.4246860
test <- petersen
library(ggplot2)
ggplot(petersen, aes(x = year, y = y, group = firmid)) + geom_point(alpha = .05) + geom_line(alpha = .05)
fm <- lm(y ~ x, data=test)
summary(fm)
##
## Call:
## lm(formula = y ~ x, data = test)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.7611 -1.3680 -0.0166 1.3387 8.6779
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02968 0.02836 1.047 0.295
## x 1.03483 0.02858 36.204 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.005 on 4998 degrees of freedom
## Multiple R-squared: 0.2078, Adjusted R-squared: 0.2076
## F-statistic: 1311 on 1 and 4998 DF, p-value: < 2.2e-16
coeftest(fm, vcovCL(fm, cluster = test$firmid)) #A
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.029680 0.067013 0.4429 0.6579
## x 1.034833 0.050596 20.4530 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(fm, vcovCL(fm, cluster = test$year)) #B
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.029680 0.023387 1.2691 0.2045
## x 1.034833 0.033389 30.9933 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#doing this using multiway clustering
coeftest(fm, vcovCL(fm, cluster = test[,c('firmid','year')]))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.029680 0.065064 0.4562 0.6483
## x 1.034833 0.053558 19.3217 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#automatic
lfe::felm(y ~ x | 0 | 0 | year + firmid, data = test) %>% summary #same result
##
## Call:
## lfe::felm(formula = y ~ x | 0 | 0 | year + firmid, data = test)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.7611 -1.3680 -0.0166 1.3387 8.6779
##
## Coefficients:
## Estimate Cluster s.e. t value Pr(>|t|)
## (Intercept) 0.02968 0.06506 0.456 0.648
## x 1.03483 0.05356 19.322 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.005 on 4998 degrees of freedom
## Multiple R-squared(full model): 0.2078 Adjusted R-squared: 0.2076
## Multiple R-squared(proj model): 0.2078 Adjusted R-squared: 0.2076
## F-statistic(full model, *iid*): 1311 on 1 and 4998 DF, p-value: < 2.2e-16
## F-statistic(proj model): 373.3 on 1 and 9 DF, p-value: 1.231e-08
#### doing this multiway manually
#test$int <- paste0(test$firmid, test$year) #cross of firm and year
test$int <- interaction(test$firmid, test$year)
length(table(test$int))
## [1] 5000
M1 <- vcovCL(fm, cluster = test$firmid)
M2 <- vcovCL(fm, cluster = test$year)
M3 <- vcovCL(fm, cluster = test$int)
M1 + M2 - M3 #add first 2, subtract the third: p 336, Cameron
## (Intercept) x
## (Intercept) 4.233313e-03 -2.845339e-05
## x -2.845339e-05 2.868462e-03
vcovCL(fm, cluster = test[,c('year','firmid')]) #same results!
## (Intercept) x
## (Intercept) 4.233313e-03 -2.845339e-05
## x -2.845339e-05 2.868462e-03