1 National Health Interview Survey

This reproduces the analyses in Table 1.1 of Angrist and Pischke (2014). which compares people with and without health insurance in the 2009 National Health Interview Survey (NHIS).

The code is derived from NHIS2009_hicompare.do.

Load the prerequisite packages.

library("tidyverse")
library("magrittr")
library("haven")

Load the data (originally from http://masteringmetrics.com/wp-content/uploads/2015/01/Data.zip), and adjust a few of the columns to account for differences in how Stata and R store data.

data("NHIS2009", package = "masteringmetrics")

Remove missing values.

NHIS2009 <- NHIS2009 %>%
  filter(marradult, perweight != 0) %>%
  group_by(serial) %>%
  mutate(hi_hsb = mean(hi_hsb1, na.rm = TRUE)) %>%
  filter(!is.na(hi_hsb), !is.na(hi)) %>%
  mutate(female = sum(fml)) %>%
  filter(female == 1) %>%
  select(-female)

For the sample only include married adults between 26 and 59 in age, and remove single person households.

NHIS2009 <- NHIS2009 %>%
  filter(between(age, 26, 59),
         marradult, adltempl >= 1)

Keep only single family households.

NHIS2009 <- NHIS2009 %>%
  group_by(serial) %>%
  filter(length(serial) > 1L) %>%
  ungroup()

Tables of wives and husbands by health insurance. status. The weighting following the “analytic” weights in the original .do file which weights observations by perweight and normalizes the weights so that the sub-samples of males and females have the same number as the original sample.

NHIS2009 %>%
  group_by(fml) %>%
  # normalize person weights to match number of observations in each
  # group
  mutate(perweight = perweight / sum(perweight) * n()) %>%
  group_by(fml, hi) %>%
  summarise(n_wt = sum(perweight)) %>%
  group_by(fml) %>%
  mutate(prop = n_wt / sum(n_wt))
#> # A tibble: 4 x 4
#> # Groups:   fml [2]
#>   fml      hi  n_wt  prop
#>   <lgl> <dbl> <dbl> <dbl>
#> 1 FALSE    0. 1281. 0.136
#> 2 FALSE    1. 8114. 0.864
#> 3 TRUE     0. 1131. 0.120
#> 4 TRUE     1. 8264. 0.880

Compare sample statistics of mean and women, with and without health insurance.

varlist <- c("hlth", "nwhite", "age", "yedu", "famsize", "empl", "inc")
NHIS2009_diff <- NHIS2009 %>%
  # rlang::set_attrs with NULL removes attributes from columns.
  # this avoids a warning from gather about differing attributes
  map_dfc(~ rlang::set_attrs(.x, NULL)) %>%
  select(fml, hi, one_of(varlist)) %>%
  gather(variable, value, -fml, -hi) %>%
  group_by(fml, hi, variable) %>%
  summarise(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE)) %>%
  gather(stat, value, -fml, -hi, -variable) %>%
  unite(stat_hi, stat, hi) %>%
  spread(stat_hi, value) %>%
  mutate(diff = mean_1 - mean_0)
knitr::kable(NHIS2009_diff, digits = 3)
fml variable mean_0 mean_1 sd_0 sd_1 diff
FALSE age 4.13e+01 4.42e+01 8.40e+00 8.61e+00 2.893
FALSE empl 8.52e-01 9.22e-01 3.55e-01 2.68e-01 0.070
FALSE famsize 4.06e+00 3.55e+00 1.54e+00 1.32e+00 -0.506
FALSE hlth 3.70e+00 3.98e+00 1.01e+00 9.34e-01 0.278
FALSE inc 4.36e+04 1.04e+05 3.57e+04 5.48e+04 60366.415
FALSE nwhite 1.88e-01 2.00e-01 3.91e-01 4.00e-01 0.011
FALSE yedu 1.12e+01 1.41e+01 3.47e+00 2.68e+00 2.919
TRUE age 3.95e+01 4.22e+01 8.26e+00 8.65e+00 2.631
TRUE empl 5.41e-01 7.58e-01 4.98e-01 4.29e-01 0.216
TRUE famsize 4.07e+00 3.55e+00 1.54e+00 1.32e+00 -0.520
TRUE hlth 3.61e+00 3.99e+00 1.02e+00 9.28e-01 0.382
TRUE inc 4.36e+04 1.03e+05 3.52e+04 5.51e+04 59722.242
TRUE nwhite 1.83e-01 2.02e-01 3.87e-01 4.01e-01 0.018
TRUE yedu 1.14e+01 1.43e+01 3.50e+00 2.60e+00 2.913