1 National Health Interview Survey
This reproduces the analyses in Table 1.1 of Angrist and Pischke (2014). which compares people with and without health insurance in the 2009 National Health Interview Survey (NHIS).
The code is derived from NHIS2009_hicompare.do.
Load the prerequisite packages.
library("tidyverse")
library("magrittr")
library("haven")
Load the data (originally from http://masteringmetrics.com/wp-content/uploads/2015/01/Data.zip), and adjust a few of the columns to account for differences in how Stata and R store data.
data("NHIS2009", package = "masteringmetrics")
Remove missing values.
NHIS2009 <- NHIS2009 %>%
filter(marradult, perweight != 0) %>%
group_by(serial) %>%
mutate(hi_hsb = mean(hi_hsb1, na.rm = TRUE)) %>%
filter(!is.na(hi_hsb), !is.na(hi)) %>%
mutate(female = sum(fml)) %>%
filter(female == 1) %>%
select(-female)
For the sample only include married adults between 26 and 59 in age, and remove single person households.
NHIS2009 <- NHIS2009 %>%
filter(between(age, 26, 59),
marradult, adltempl >= 1)
Keep only single family households.
NHIS2009 <- NHIS2009 %>%
group_by(serial) %>%
filter(length(serial) > 1L) %>%
ungroup()
Tables of wives and husbands by health insurance. status.
The weighting following the “analytic” weights in the original .do
file which weights observations by perweight
and normalizes the weights so that the sub-samples of males and females have the same number as the original sample.
NHIS2009 %>%
group_by(fml) %>%
# normalize person weights to match number of observations in each
# group
mutate(perweight = perweight / sum(perweight) * n()) %>%
group_by(fml, hi) %>%
summarise(n_wt = sum(perweight)) %>%
group_by(fml) %>%
mutate(prop = n_wt / sum(n_wt))
#> # A tibble: 4 x 4
#> # Groups: fml [2]
#> fml hi n_wt prop
#> <lgl> <dbl> <dbl> <dbl>
#> 1 FALSE 0. 1281. 0.136
#> 2 FALSE 1. 8114. 0.864
#> 3 TRUE 0. 1131. 0.120
#> 4 TRUE 1. 8264. 0.880
Compare sample statistics of mean and women, with and without health insurance.
varlist <- c("hlth", "nwhite", "age", "yedu", "famsize", "empl", "inc")
NHIS2009_diff <- NHIS2009 %>%
# rlang::set_attrs with NULL removes attributes from columns.
# this avoids a warning from gather about differing attributes
map_dfc(~ rlang::set_attrs(.x, NULL)) %>%
select(fml, hi, one_of(varlist)) %>%
gather(variable, value, -fml, -hi) %>%
group_by(fml, hi, variable) %>%
summarise(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE)) %>%
gather(stat, value, -fml, -hi, -variable) %>%
unite(stat_hi, stat, hi) %>%
spread(stat_hi, value) %>%
mutate(diff = mean_1 - mean_0)
knitr::kable(NHIS2009_diff, digits = 3)
fml | variable | mean_0 | mean_1 | sd_0 | sd_1 | diff |
---|---|---|---|---|---|---|
FALSE | age | 4.13e+01 | 4.42e+01 | 8.40e+00 | 8.61e+00 | 2.893 |
FALSE | empl | 8.52e-01 | 9.22e-01 | 3.55e-01 | 2.68e-01 | 0.070 |
FALSE | famsize | 4.06e+00 | 3.55e+00 | 1.54e+00 | 1.32e+00 | -0.506 |
FALSE | hlth | 3.70e+00 | 3.98e+00 | 1.01e+00 | 9.34e-01 | 0.278 |
FALSE | inc | 4.36e+04 | 1.04e+05 | 3.57e+04 | 5.48e+04 | 60366.415 |
FALSE | nwhite | 1.88e-01 | 2.00e-01 | 3.91e-01 | 4.00e-01 | 0.011 |
FALSE | yedu | 1.12e+01 | 1.41e+01 | 3.47e+00 | 2.68e+00 | 2.919 |
TRUE | age | 3.95e+01 | 4.22e+01 | 8.26e+00 | 8.65e+00 | 2.631 |
TRUE | empl | 5.41e-01 | 7.58e-01 | 4.98e-01 | 4.29e-01 | 0.216 |
TRUE | famsize | 4.07e+00 | 3.55e+00 | 1.54e+00 | 1.32e+00 | -0.520 |
TRUE | hlth | 3.61e+00 | 3.99e+00 | 1.02e+00 | 9.28e-01 | 0.382 |
TRUE | inc | 4.36e+04 | 1.03e+05 | 3.52e+04 | 5.51e+04 | 59722.242 |
TRUE | nwhite | 1.83e-01 | 2.02e-01 | 3.87e-01 | 4.01e-01 | 0.018 |
TRUE | yedu | 1.14e+01 | 1.43e+01 | 3.50e+00 | 2.60e+00 | 2.913 |