1 National Health Interview Survey

This reproduces the analyses in Table 1.1 of Angrist and Pischke (2014). which compares people with and without health insurance in the 2009 National Health Interview Survey (NHIS).

The code is derived from NHIS2009_hicompare.do.

Load the prerequisite packages.

library("tidyverse")
library("magrittr")
library("haven")

Load the data (originally from http://masteringmetrics.com/wp-content/uploads/2015/01/Data.zip), and adjust a few of the columns to account for differences in how Stata and R store data.

data("NHIS2009", package = "masteringmetrics")

Remove missing values.

NHIS2009 <- NHIS2009 %>%
  filter(marradult, perweight != 0) %>%
  group_by(serial) %>%
  mutate(hi_hsb = mean(hi_hsb1, na.rm = TRUE)) %>%
  filter(!is.na(hi_hsb), !is.na(hi)) %>%
  mutate(female = sum(fml)) %>%
  filter(female == 1) %>%
  select(-female)

For the sample only include married adults between 26 and 59 in age, and remove single person households.

NHIS2009 <- NHIS2009 %>%
  filter(between(age, 26, 59),
         marradult, adltempl >= 1)

Keep only single family households.

NHIS2009 <- NHIS2009 %>%
  group_by(serial) %>%
  filter(length(serial) > 1L) %>%
  ungroup()

Tables of wives and husbands by health insurance. status. The weighting following the “analytic” weights in the original .do file which weights observations by perweight and normalizes the weights so that the sub-samples of males and females have the same number as the original sample.

NHIS2009 %>%
  group_by(fml) %>%
  # normalize person weights to match number of observations in each
  # group
  mutate(perweight = perweight / sum(perweight) * n()) %>%
  group_by(fml, hi) %>%
  summarise(n_wt = sum(perweight)) %>%
  group_by(fml) %>%
  mutate(prop = n_wt / sum(n_wt))
#> # A tibble: 4 x 4
#> # Groups:   fml [2]
#>   fml      hi  n_wt  prop
#>   <lgl> <dbl> <dbl> <dbl>
#> 1 FALSE    0. 1281. 0.136
#> 2 FALSE    1. 8114. 0.864
#> 3 TRUE     0. 1131. 0.120
#> 4 TRUE     1. 8264. 0.880

Compare sample statistics of mean and women, with and without health insurance.

varlist <- c("hlth", "nwhite", "age", "yedu", "famsize", "empl", "inc")
NHIS2009_diff <- NHIS2009 %>%
  # rlang::set_attrs with NULL removes attributes from columns.
  # this avoids a warning from gather about differing attributes
  map_dfc(~ rlang::set_attrs(.x, NULL)) %>%
  select(fml, hi, one_of(varlist)) %>%
  gather(variable, value, -fml, -hi) %>%
  group_by(fml, hi, variable) %>%
  summarise(mean = mean(value, na.rm = TRUE), sd = sd(value, na.rm = TRUE)) %>%
  gather(stat, value, -fml, -hi, -variable) %>%
  unite(stat_hi, stat, hi) %>%
  spread(stat_hi, value) %>%
  mutate(diff = mean_1 - mean_0)

knitr::kable(NHIS2009_diff, digits = 3)

fml	variable	mean_0	mean_1	sd_0	sd_1	diff
FALSE	age	4.13e+01	4.42e+01	8.40e+00	8.61e+00	2.893
FALSE	empl	8.52e-01	9.22e-01	3.55e-01	2.68e-01	0.070
FALSE	famsize	4.06e+00	3.55e+00	1.54e+00	1.32e+00	-0.506
FALSE	hlth	3.70e+00	3.98e+00	1.01e+00	9.34e-01	0.278
FALSE	inc	4.36e+04	1.04e+05	3.57e+04	5.48e+04	60366.415
FALSE	nwhite	1.88e-01	2.00e-01	3.91e-01	4.00e-01	0.011
FALSE	yedu	1.12e+01	1.41e+01	3.47e+00	2.68e+00	2.919
TRUE	age	3.95e+01	4.22e+01	8.26e+00	8.65e+00	2.631
TRUE	empl	5.41e-01	7.58e-01	4.98e-01	4.29e-01	0.216
TRUE	famsize	4.07e+00	3.55e+00	1.54e+00	1.32e+00	-0.520
TRUE	hlth	3.61e+00	3.99e+00	1.02e+00	9.28e-01	0.382
TRUE	inc	4.36e+04	1.03e+05	3.52e+04	5.51e+04	59722.242
TRUE	nwhite	1.83e-01	2.02e-01	3.87e-01	4.01e-01	0.018
TRUE	yedu	1.14e+01	1.43e+01	3.50e+00	2.60e+00	2.913

R Code for Mastering ’Metrics

1 National Health Interview Survey

1.1 References