## RDD Example
## Francis Huang
## 2018.09.19
library(rdrobust) #for rdplot
library(jtools) #for nicer reg output
library(ggplot2) #for basic graphs
library(dplyr) #for data munging
#1 Cleanup
rm(list=ls())
dat <- read.csv("http://faculty.missouri.edu/huangf/data/eval/rddex.csv")
head(dat)
#2 Are birthdays really random?
# Can discuss
ggplot(dat, aes(x = avar)) + geom_histogram(bins = 30) +
geom_vline(xintercept = 0)
#McRary Sorting Test::: in the rdd package
#want to see a nonsignificant p value
rdd::DCdensity(dat$avar, 0)
#seems fine
#3 Do we think there is a discontinuity?
#What about the covariates? are the groups really equal?
t.test(female ~ treat, data = dat)
t.test(white ~ treat, data = dat)
t.test(black ~ treat, data = dat) #more Black in the control group?
#A quicker and nicer way to do this...
library(tableone)
t1 <- CreateTableOne(data = dat,
vars = c('black', 'white', 'female'),
strata = 'treat')
print(t1, smd = T)
# should we be worried about differences? possibly but can include as
# covariates and SMD is not huge (i.e., d < 0.20)
#another way to show
aggregate(cbind(female, white, black) ~ treat, data = dat, mean)
#just to visualize
rdplot(dat$female, dat$avar, p = 1, nbins = 60, y.lab = '% female')
rdplot(dat$white, dat$avar, p = 1, nbins = 60, y.lab = '% White')
#4 What about assignment to treament?
library(descr)
crosstab(dat$treat, dat$takeup, prop.t = T, plot = F)
#treat is their actual assignment based on their birthday
#takeup is if they actually followed the assignment or not (redshirted
#or greenshirted).
#2 were supposed to be in control but they took the treatment
#5 were supposed to be in the treatment but went to the control
#imperfect compliance; small number though < 1%
#if < 5%, just ignore, use treament and treat as a 'sharp' RDD
#This winds up being an ITT
#If you want to recover the full treatment effect, can use an
#instrumental variable and then results in a 'fuzzy' RDD
#5 model: simple, just doing without covariates
l.mod <- lm(abc ~ avar + treat + avar:treat, data = dat)
summary(l.mod)
summ(l.mod, robust = 'HC2', digits = 2, confint = T) #more info
# the higher order polynomial involves ^2 or ^3 the avar
q.mod <- lm(abc ~ avar * treat + I(avar^2) * treat , data = dat)
summary(q.mod)
summ(q.mod, robust = 'HC2', digits = 2, confint = T) #more info
# cubic
c.mod <- update(q.mod, .~. + I(avar^3) * treat, data = dat)
summary(c.mod)
summ(c.mod, robust = 'HC2', digits = 2, confint = T) #more info
export_summs(l.mod, q.mod, c.mod, robust = T, digits = 2)
### ANOTHER WAY
library(sandwich)
library(lmtest)
coeftest(l.mod, vcov = vcovHC(l.mod))
coeftest(q.mod, vcov = vcovHC(q.mod))
coeftest(c.mod, vcov = vcovHC(c.mod))
## if doing this as a fuzzy RDD
library(ivpack)
iv.mod <- ivreg(abc ~ avar*takeup, ~avar*treat, data = dat)
summary(iv.mod) #naive SE
robust.se(iv.mod) #robust SE
## Lastly,
res1 <- rdrobust(dat$abc, x = dat$avar)
summary(res1)
## Hard to tell what is going on. a lot is going on
## with bandwidth selection and kernel type w/c maybe for now
## is more complicated than necessary. THIS is using
## a local polynomial regression. It is NOT using the whole
## n = 1000 which is fine.
### using different bandwidths
test2 <- dplyr::filter(dat, between(avar, -106, 106))
freq(test2$treat)
ll.mod <- lm(abc ~ avar + treat, data = test2)
summ(ll.mod, robust = 'HC2', digits = 2, confint = T)
test2 <- dplyr::filter(dat, between(avar, -250, 250))
#optimal bandwidth: Imbens & Kalyanaraman, 2012
freq(test2$treat)
ll.mod <- lm(abc ~ avar + treat + avar*treat, data = test2)
summ(ll.mod, robust = 'HC2', digits = 2, confint = T)
### regardless of method used, shows a positive effect
### for the treatment
### can turn to the shiny rddapp and try this too.
### compare results
### https://rddapp.shinyapps.io/shinyrdd/