5  Models for Paper

Work From Home: Who Can and Who Does?

Code
library(scales)
library(reldist)
library(pollster)
library(labelled)
library(weights)
library(tigris)
library(ipumsr)
library(srvyr)
library(survey)
library(tidyverse)
library(naniar)
library(gmodels)
library(gtsummary)
library(quarto)
library(huxtable) # for summ() and regression output formatting
library(jtools)
library(modelsummary)
library(car)

knitr::opts_chunk$set(warning=FALSE, message=FALSE)

load("./data/WFH.RData")
# hist(joined$INCEARN)



joined$race_cat <- factor(joined$race_cat, levels = c("White", "Asian", "Black", "Other") )
joined$CanWorkFromHome <- factor(joined$CanWorkFromHome, levels = c("No WFH", "Some WFH", "Can WFH") )
table(joined$CanWorkFromHome)

  No WFH Some WFH  Can WFH 
   65082    15901    41080 
Code
joined <- joined %>% mutate(tenKdollars = INCEARN / 10000)
joined$CIHISPEED <- factor(joined$CIHISPEED, labels = c("10" = "Has Access", "20" = "Lacks Access") )

# hist(joined$INCEARN)

6 Regression Models

Regression for 2019 using survey object dstrata2019 & regression for 2021 using survey object dstrata2021.

6.1 Subset: Gender & Household Dynamics

Model supporting statements made in “Gender and Household Dynamics” section of Working From Home in Illinois paper.

6.1.1 Logit Model

Model created with a subset of survey data. Only uses workers who had Management type occupations, Could feasibly WFH, and were under the ages of 45.

Code
joined$occ_2dig_labels <- as.factor(joined$occ_2dig_labels)
joined$CanWorkFromHome <- as.factor(joined$CanWorkFromHome)

#gmodels::CrossTable(joined$CanWorkFromHome, joined$occ_2dig_labels, chisq = TRUE, prop.r = FALSE, prop.c = FALSE, prop.t = FALSE, prop.chisq = FALSE )

dstrata <- survey::svydesign(id = ~CLUSTER, strata = ~STRATA, weights = ~PERWT, data = joined) %>% 
  as_survey() %>%
  mutate(decile = ntile(INCEARN, 10))



# 2019 data turned into survey item
dstrata2019 <- joined %>% filter(YEAR==2019) 
dstrata2019 <- survey::svydesign(id = ~CLUSTER, strata = ~STRATA, 
                                 weights = ~PERWT, data = dstrata2019) %>% 
  as_survey() %>% 
  mutate(decile = ntile(INCEARN, 10))


dstrata2021 <- joined %>% filter(YEAR==2021) 

dstrata2021 <- survey::svydesign(id = ~CLUSTER, strata = ~STRATA, weights = ~PERWT, data = dstrata2021) %>% as_survey() %>%
  mutate(decile = ntile(INCEARN, 10))

A note on the p-value: the p-value is a test of significance for the null hypothesis H0 that

  • there is no difference in the log-odds of the outcome between the reference group (captured by the intercept) and the explanatory variable (or one of its categories), or that the difference between the two groups equals zero: H0:b1=0 and Ha:b1≠0

If p<0.5, we reject H0 as we have evidence to suggest that the difference between the two groups does not equal zero.

Log-odds are not the most intuitive to interpret. Instead of discussing the change in the log-odds, we can calculate the odds ratio for a given variable by exponentiating the coefficient.

Odds ratio is read “have x times the odds of the outcome of interest compared to those in the reference group”.

Reference group of the outcome variable: by default, R creates uses the lowest coded group as the reference. The reference category can be changed by using the ‘relevel()’.

Relationship between Odds and Probabilities:

Odds=P/(1-P)

P=odds/(1+odds)

Odds=exp(log-odds)

P=exp(log-odds)/(1+exp(log-odds))

6.1.1.1 Separate models for each year

Be careful when subsetting survey data. Messes up standard errors if filter() is used to select the subset instead using the subset = “” option for survey objects.

Code
m <- list(
  "2019 Logged Odds" = svyglm(did_wfh~ sex_cat + age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type + race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2019),
  
   "2021 Logged Odds" =  svyglm(did_wfh~ sex_cat+ age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2021),
  
   "2019 Odds Ratio" = svyglm(did_wfh~ sex_cat + age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type + race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2019),
  
   "2021 Odds Ratio" =   svyglm(did_wfh~ sex_cat+ age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2021)
) 

# 
# modelsummary(m, exponentiate = TRUE, # does standard and exponentiated models together
#              statistic = c("conf.int",
#                            "s.e. = {std.error}",
#                            "p = {p.value}"),
#              stars = TRUE, shape = term ~ model + statistic,
#              notes = list('Subset of ACS Survey Data for 2019 and 2021',
#                           'Odds Ratios Shown in Table'),
#              title = 'Predictions for WFH in 2019 vs 2021',
#              output = "table.docx")

modelsummary(m, 
             exponentiate = c(FALSE,FALSE,TRUE,TRUE), # does standard and exponentiated models together 
             statistic = c("s.e. = {std.error}", 
                           "p = {p.value}"),
             stars = TRUE, shape = term ~ model + statistic,
             notes = list('Subset of ACS Survey Data for 2019 and 2021',
                          'Odds Ratios Shown in Table'),
             title = 'Predictions for WFH in 2019 vs 2021')
Predictions for WFH in 2019 vs 2021
2019 Logged Odds
2021 Logged Odds
2019 Odds Ratio
2021 Odds Ratio
Est. s.e. = S.E. p = p Est. s.e. = S.E. p = p Est. s.e. = S.E. p = p Est. s.e. = S.E. p = p
(Intercept) −4.083*** s.e. = 0.375 p = <0.001 −2.201*** s.e. = 0.174 p = <0.001 0.017*** s.e. = 0.006 p = <0.001 0.111*** s.e. = 0.019 p = <0.001
sex_catMale 0.316** s.e. = 0.113 p = 0.005 0.044 s.e. = 0.062 p = 0.476 1.372** s.e. = 0.155 p = 0.005 1.045 s.e. = 0.065 p = 0.476
age_cat25to34 1.389*** s.e. = 0.338 p = <0.001 0.542*** s.e. = 0.140 p = <0.001 4.011*** s.e. = 1.355 p = <0.001 1.719*** s.e. = 0.240 p = <0.001
age_cat35to44 1.846*** s.e. = 0.340 p = <0.001 0.749*** s.e. = 0.147 p = <0.001 6.336*** s.e. = 2.151 p = <0.001 2.115*** s.e. = 0.310 p = <0.001
NCHILD −0.166* s.e. = 0.074 p = 0.024 −0.226*** s.e. = 0.040 p = <0.001 0.847* s.e. = 0.062 p = 0.024 0.798*** s.e. = 0.032 p = <0.001
NCHLT5 0.167 s.e. = 0.107 p = 0.121 0.251*** s.e. = 0.066 p = <0.001 1.181 s.e. = 0.127 p = 0.121 1.285*** s.e. = 0.085 p = <0.001
CIHISPEEDLacks Access −0.335 s.e. = 0.329 p = 0.308 −0.609*** s.e. = 0.153 p = <0.001 0.715 s.e. = 0.235 p = 0.308 0.544*** s.e. = 0.083 p = <0.001
county_pop_typeUrban Counties 0.018 s.e. = 0.245 p = 0.940 1.384*** s.e. = 0.127 p = <0.001 1.019 s.e. = 0.249 p = 0.940 3.989*** s.e. = 0.507 p = <0.001
race_catAsian −0.220 s.e. = 0.199 p = 0.270 0.362*** s.e. = 0.102 p = <0.001 0.803 s.e. = 0.160 p = 0.270 1.436*** s.e. = 0.147 p = <0.001
race_catBlack −0.238 s.e. = 0.291 p = 0.414 −0.330* s.e. = 0.139 p = 0.018 0.788 s.e. = 0.230 p = 0.414 0.719* s.e. = 0.100 p = 0.018
race_catOther −0.365 s.e. = 0.300 p = 0.224 −0.478*** s.e. = 0.103 p = <0.001 0.694 s.e. = 0.208 p = 0.224 0.620*** s.e. = 0.064 p = <0.001
Num.Obs. 7344 7389 7344 7389
R2 0.024 0.049 0.024 0.049
R2 Adj. −0.155 −0.114 −0.155 −0.114
AIC 3783.8 9246.9 3783.8 9246.9
BIC 3851.7 9319.8 3851.7 9319.8
Log.Lik. −1872.464 −4606.447 −1872.464 −4606.447
RMSE 0.25 0.47 0.25 0.47
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
Subset of ACS Survey Data for 2019 and 2021
Odds Ratios Shown in Table

6.1.2 OLS Model

Code
m <- list(
  "2019 OLS" = svyglm(did_wfh~ sex_cat + age_cat + NCHLT5 + CIHISPEED + county_pop_type +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    design = dstrata2019),
  
  "2021 OLS" =  svyglm(did_wfh ~ sex_cat+ age_cat + NCHLT5 + CIHISPEED + county_pop_type +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    design = dstrata2021)
) 

# 
# modelsummary(m, exponentiate = TRUE, # does standard and exponentiated models together
#              statistic = c("conf.int",
#                            "s.e. = {std.error}",
#                            "p = {p.value}"),
#              stars = TRUE, shape = term ~ model + statistic,
#              notes = list('Subset of ACS Survey Data for 2019 and 2021',
#                           'Odds Ratios Shown in Table'),
#              title = 'Predictions for WFH in 2019 vs 2021',
#              output = "table.docx")

modelsummary(m,# exponentiate = TRUE, # does standard and exponentiated models together 
             statistic = c(
                           "s.e. = {std.error}", 
                           "p = {p.value}"),
             stars = TRUE, shape = term ~ model + statistic,
             notes = list('Subset of ACS Survey Data for 2019 and 2021'),
             title = 'OLS - Separate Models for WFH in 2019 vs 2021')
OLS - Separate Models for WFH in 2019 vs 2021
2019 OLS
2021 OLS
Est. s.e. = S.E. p = p Est. s.e. = S.E. p = p
(Intercept) 0.010 s.e. = 0.014 p = 0.465 0.052+ s.e. = 0.028 p = 0.060
sex_catMale 0.023** s.e. = 0.008 p = 0.003 0.014 s.e. = 0.014 p = 0.323
age_cat25to34 0.048*** s.e. = 0.009 p = <0.001 0.106*** s.e. = 0.027 p = <0.001
age_cat35to44 0.071*** s.e. = 0.009 p = <0.001 0.109*** s.e. = 0.027 p = <0.001
NCHLT5 0.001 s.e. = 0.007 p = 0.845 0.009 s.e. = 0.012 p = 0.462
CIHISPEEDLacks Access −0.019 s.e. = 0.016 p = 0.227 −0.115*** s.e. = 0.026 p = <0.001
county_pop_typeUrban Counties 0.004 s.e. = 0.015 p = 0.778 0.263*** s.e. = 0.017 p = <0.001
race_catAsian −0.014 s.e. = 0.013 p = 0.283 0.092*** s.e. = 0.025 p = <0.001
race_catBlack −0.015 s.e. = 0.016 p = 0.358 −0.078** s.e. = 0.030 p = 0.010
race_catOther −0.021 s.e. = 0.015 p = 0.172 −0.106*** s.e. = 0.021 p = <0.001
Num.Obs. 7344 7389
R2 0.010 0.052
R2 Adj. −0.172 −0.111
AIC 15087.7 43997.0
BIC 197538.9 55704.4
Log.Lik. −98720.466 −27803.205
RMSE 0.25 0.47
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
Subset of ACS Survey Data for 2019 and 2021
Code
#|column: page
m4_2019 <- svyglm(did_wfh ~ sex_cat+ age_cat + NCHLT5 + CIHISPEED + county_pop_type+ tenKdollars +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    design = dstrata2019)
  
m4_2021 <- svyglm(did_wfh ~ sex_cat+ age_cat + NCHLT5 + CIHISPEED + county_pop_type+tenKdollars +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    design = dstrata2021)


# export_summs(m4_2019, m4_2021, model.names = c("Subset 2019 OLS", " Subset 2021 OLS"))


t1 <- m4_2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)

                 ) %>%
    add_significance_stars(hide_ci = TRUE)%>%

  add_global_p(keep = T, test.statistic = "F")

t2 <- m4_2021 %>%
  tbl_regression(intercept = T,     # No intercept in OR table
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)
                 ) %>%
    add_significance_stars(hide_ci = TRUE)%>%

  add_global_p(keep = T, test.statistic = "F")


tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**OLS 2019 Subset**", "**OLS 2021 Subset**")
)
Characteristic OLS 2019 Subset OLS 2021 Subset
Beta1 SE2 Beta1 SE2
(Intercept) 0.01 0.014 0.06* 0.028
sex_cat NA NA
    Female
    Male 0.02** 0.008 -0.01 0.014
age_cat NA NA
    16to24
    25to34 0.05*** 0.009 0.07** 0.027
    35to44 0.07*** 0.010 0.06* 0.028
Number of own children under age 5 in household 0.00 0.007 0.00 0.012
CIHISPEED NA NA
    Has Access
    Lacks Access -0.02 0.016 -0.10*** 0.025
county_pop_type NA NA
    Rural Counties
    Urban Counties 0.00 0.016 0.24*** 0.017
tenKdollars 0.00 0.001 0.01*** 0.001
race_cat NA NA
    White
    Asian -0.01 0.013 0.09*** 0.025
    Black -0.02 0.016 -0.06* 0.030
    Other -0.02 0.015 -0.09*** 0.021
1 *p<0.05; **p<0.01; ***p<0.001
2 SE = Standard Error

6.2 Full Sample - Models with all Workers

6.2.1 OLS

Code
both <- svyglm(did_wfh~ CanWorkFromHome+county_pop_type+ NCHILD + NCHLT5 +
                 tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED + 
                 occ_2dig_labels + factor(YEAR), design = dstrata)



ols2019 <- svyglm(did_wfh~ CanWorkFromHome+county_pop_type+ NCHILD + NCHLT5 +
                    tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED + 
                    occ_2dig_labels, design = dstrata2019)


ols2021 <- svyglm(did_wfh~ CanWorkFromHome+county_pop_type+ NCHILD + NCHLT5 +
                    tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED + 
                    occ_2dig_labels, design = dstrata2021)

#summary(ols2021)

# export_summs(both, ols2021, model.names = c("Both Years OLS", " Only 2021 OLS"))

t1 <- ols2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)

                 ) %>%
  add_significance_stars(hide_ci = TRUE) %>%
  add_global_p(keep = T, test.statistic = "F")

t2 <- ols2021 %>%
  tbl_regression(intercept = T,     # No intercept in OR table
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)
                 ) %>%
  add_significance_stars(hide_ci = TRUE)%>%
  add_global_p(keep = T, test.statistic = "F")


tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**OLS 2019 Full Model**", "**OLS 2021 Full Model**")
)
Characteristic OLS 2019 Full Model OLS 2021 Full Model
Beta1 SE2 Beta1 SE2
(Intercept) 0.01 0.005 0.03*** 0.009
CanWorkFromHome NA NA
    No WFH
    Some WFH 0.04*** 0.005 0.17*** 0.008
    Can WFH 0.03*** 0.004 0.17*** 0.006
county_pop_type NA NA
    Rural Counties
    Urban Counties 0.01* 0.003 0.08*** 0.005
Number of own children in the household 0.00 0.001 -0.01*** 0.002
Number of own children under age 5 in household 0.01* 0.004 0.02** 0.007
tenKdollars 0.00 0.000 0.00*** 0.000
race_cat NA NA
    White
    Asian -0.01* 0.005 0.03** 0.010
    Black -0.02*** 0.004 -0.02 0.008
    Other -0.02*** 0.004 -0.03*** 0.006
sex_cat NA NA
    Female
    Male 0.00 0.003 -0.01*** 0.004
age_cat NA NA
    16to24
    25to34 0.02*** 0.004 0.04*** 0.008
    35to44 0.03*** 0.004 0.05*** 0.008
    45to54 0.04*** 0.004 0.03*** 0.008
    55to64 0.04*** 0.004 0.01 0.008
    65+ 0.06*** 0.006 0.03** 0.010
CIHISPEED NA NA
    Has Access
    Lacks Access -0.01*** 0.003 -0.04*** 0.006
occ_2dig_labels NA NA
    Management, Business, Science, Arts
    Military -0.04*** 0.005 -0.05 0.037
    Natural Resources, Construction -0.01* 0.006 -0.06*** 0.008
    Production, Transportation -0.02*** 0.004 -0.06*** 0.007
    Sales & Office Jobs 0.00 0.005 -0.02*** 0.007
    Service Occupations 0.00 0.005 -0.06*** 0.007
1 *p<0.05; **p<0.01; ***p<0.001
2 SE = Standard Error

6.2.2 Logit

6.2.3 OLS - Drop WFH Feasibility

Code
ols2019 <- svyglm(did_wfh~ occ_2dig_labels+county_pop_type + NCHLT5 + spouse +
                    tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED, design = dstrata2019)


ols2021 <- svyglm(did_wfh~ occ_2dig_labels+county_pop_type + NCHLT5 + spouse +
                    tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED, design = dstrata2021)

#summary(ols2021)

# export_summs(both, ols2021, model.names = c("Both Years OLS", " Only 2021 OLS"))

t1 <- ols2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)

                 ) %>%
  add_significance_stars(hide_ci = TRUE) %>%
  add_global_p(keep = T, test.statistic = "F")

t2 <- ols2021 %>%
  tbl_regression(intercept = T,     # No intercept in OR table
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)
                 ) %>%
  add_significance_stars(hide_ci = TRUE)%>%
  add_global_p(keep = T, test.statistic = "F")


tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**OLS 2019 Full Model**", "**OLS 2021 Full Model**")
)
Characteristic OLS 2019 Full Model OLS 2021 Full Model
Beta1 SE2 Beta1 SE2
(Intercept) 0.03*** 0.005 0.14*** 0.008
occ_2dig_labels NA NA
    Management, Business, Science, Arts
    Military -0.06*** 0.004 -0.19*** 0.037
    Natural Resources, Construction -0.04*** 0.005 -0.19*** 0.007
    Production, Transportation -0.05*** 0.004 -0.19*** 0.006
    Sales & Office Jobs 0.00 0.005 -0.06*** 0.007
    Service Occupations -0.02*** 0.004 -0.17*** 0.006
county_pop_type NA NA
    Rural Counties
    Urban Counties 0.01** 0.003 0.09*** 0.005
Number of own children under age 5 in household 0.01* 0.003 0.01 0.006
tenKdollars 0.00 0.000 0.01*** 0.000
race_cat NA NA
    White
    Asian -0.01* 0.005 0.02 0.011
    Black -0.02*** 0.004 -0.02* 0.008
    Other -0.02*** 0.004 -0.04*** 0.006
sex_cat NA NA
    Female
    Male 0.00 0.003 -0.01** 0.004
age_cat NA NA
    16to24
    25to34 0.02*** 0.004 0.06*** 0.008
    35to44 0.03*** 0.004 0.06*** 0.008
    45to54 0.04*** 0.004 0.03*** 0.007
    55to64 0.04*** 0.004 0.02** 0.007
    65+ 0.06*** 0.006 0.04*** 0.010
CIHISPEED NA NA
    Has Access
    Lacks Access -0.02*** 0.003 -0.04*** 0.006
1 *p<0.05; **p<0.01; ***p<0.001
2 SE = Standard Error

6.2.4 OLS - Drop Occupations

Code
ols2019 <- svyglm(did_wfh ~ CanWorkFromHome + county_pop_type+ NCHILD + NCHLT5 +
                    tenKdollars + race_cat + sex_cat + age_cat + CIHISPEED, design = dstrata2019)


ols2021 <- svyglm(did_wfh ~ CanWorkFromHome + county_pop_type + NCHILD + NCHLT5 +
                    tenKdollars + race_cat + sex_cat + age_cat + CIHISPEED, design = dstrata2021)

#summary(ols2021)

# export_summs(both, ols2021, model.names = c("Both Years OLS", " Only 2021 OLS"))

t1 <- ols2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)

                 ) %>%
  add_significance_stars(hide_ci = TRUE) %>%
  add_global_p(keep = T, test.statistic = "F")

t2 <- ols2021 %>%
  tbl_regression(intercept = T,     # No intercept in OR table
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)
                 ) %>%
  add_significance_stars(hide_ci = TRUE)%>%
  add_global_p(keep = T, test.statistic = "F")


tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**OLS 2019 Full Model**", "**OLS 2021 Full Model**")
)
Characteristic OLS 2019 Full Model OLS 2021 Full Model
Beta1 SE2 Beta1 SE2
(Intercept) 0.01 0.004 -0.01 0.007
CanWorkFromHome NA NA
    No WFH
    Some WFH 0.05*** 0.005 0.20*** 0.008
    Can WFH 0.04*** 0.003 0.20*** 0.005
county_pop_type NA NA
    Rural Counties
    Urban Counties 0.01* 0.003 0.08*** 0.005
Number of own children in the household 0.00 0.001 -0.01*** 0.002
Number of own children under age 5 in household 0.01* 0.004 0.02*** 0.007
tenKdollars 0.00 0.000 0.01*** 0.000
race_cat NA NA
    White
    Asian -0.01 0.005 0.03** 0.010
    Black -0.02*** 0.004 -0.02* 0.008
    Other -0.02*** 0.004 -0.03*** 0.006
sex_cat NA NA
    Female
    Male 0.00 0.003 -0.02*** 0.004
age_cat NA NA
    16to24
    25to34 0.02*** 0.004 0.05*** 0.008
    35to44 0.03*** 0.004 0.06*** 0.008
    45to54 0.03*** 0.004 0.03*** 0.008
    55to64 0.04*** 0.004 0.01 0.007
    65+ 0.06*** 0.006 0.03** 0.010
CIHISPEED NA NA
    Has Access
    Lacks Access -0.02*** 0.003 -0.04*** 0.006
1 *p<0.05; **p<0.01; ***p<0.001
2 SE = Standard Error

6.3 Additional Models

Code
#svyglm(did_wfh~ INCEARN + race_cat+ SEX+ CIHISPEED+CINETHH + occ_2dig_labels+CanWorkFromHome+county_pop_type, design = dstrata2021) %>% summary()



### All variations of family = "" had same results 
# m1_2019 <- svyglm(did_wfh~ INCEARN +race_cat+ SEX+ AGE + CIHISPEED*CINETHH + CanWorkFromHome+county_pop_type+NCHILD+MARST + NCHLT5, 
#       family = "binomial", design = dstrata2019)
# 
# m1_2021<- svyglm(did_wfh~ INCEARN +race_cat+ SEX+ AGE + CIHISPEED*CINETHH + CanWorkFromHome+county_pop_type+NCHILD+MARST + NCHLT5, 
#       family = "binomial", design = dstrata2021)
#   
# export_summs(m1_2019, m1_2021, 
#              model.names = c("2019 Logit", "2021 Logit"), 
#             robust = "HC3", statistics = c(N = "nobs", R2 = "r.squared", adjR2 = "adj.r.squared"))
# 
# 
# m1_2019 <- svyglm(did_wfh~ INCEARN +race_cat+ SEX+ AGE + CIHISPEED*CINETHH + CanWorkFromHome+county_pop_type+NCHILD+MARST + NCHLT5, 
#        family = quasibinomial(), design = dstrata2019)
# 
# m1_2021<- svyglm(did_wfh~ INCEARN +race_cat+ SEX+ AGE + CIHISPEED*CINETHH + CanWorkFromHome+county_pop_type+NCHILD+MARST + NCHLT5, 
#     family=quasibinomial(),   design = dstrata2021)
#   
# 
# export_summs(m1_2019, m1_2021, 
#              model.names = c("2019 QuasiBi", "2021 QuasiBi"), 
#             robust = "HC3", statistics = c(N = "nobs", R2 = "r.squared", adjR2 = "adj.r.squared"))


m1_2019 <- svyglm(did_wfh ~ CanWorkFromHome + county_pop_type + NCHILD + NCHLT5  + CIHISPEED + INCEARN + race_cat + sex_cat + age_cat, 
        family = quasibinomial(),design = dstrata2019)

m1_2021<- svyglm(did_wfh ~ CanWorkFromHome + county_pop_type + NCHILD + NCHLT5  + CIHISPEED + tenKdollars + race_cat + sex_cat+ age_cat, 
      family = quasibinomial(), design = dstrata2021)
  
# export_summs(m1_2019, m1_2021, 
#              model.names = c("2019 Logit", "2021 Logit"), 
#             robust = "HC3", statistics = c(N = "nobs", R2 = "r.squared", adjR2 = "adj.r.squared"))
Code
OR.CI_2019 <- cbind("AOR" = exp(   coef(m1_2019)),
                       exp(confint(m1_2019,
                           df.resid=degf(m1_2019$survey.design))))[-1,]

OR.CI_2021 <- cbind("AOR" = exp(   coef(m1_2021)),
                       exp(confint(m1_2021,
                           df.resid=degf(m1_2021$survey.design))))[-1,]

library(gtsummary)
library(car)

t1 <- m1_2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
) %>%
  modify_column_hide(p.value) %>%
      add_significance_stars(hide_ci = TRUE)%>%

  modify_caption("2019  vs 2021 ACS Data - Logit Models")

t2 <- m1_2019 %>%
  tbl_regression(intercept = F,     # No intercept in OR table
                 exponentiate = T,  # OR = exp(B)
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3),
) %>%
      add_significance_stars(hide_ci = TRUE)%>%

  add_global_p(keep = T, test.statistic = "F")

# tbl_merge(
#   tbls = list(t1, t2),
#   tab_spanner = c("**2019 Regression Coefficients**", "** 2019 Adjusted Odds Ratio**")
# )


t3 <- m1_2021 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
) %>%
      add_significance_stars(hide_ci = TRUE)%>%

  modify_column_hide(p.value) %>%
  modify_caption("2021 ACS")

t4 <- m1_2021 %>%
  tbl_regression(intercept = F,     # No intercept in OR table
                 exponentiate = T,  # OR = exp(B)
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3) ) %>%
      add_significance_stars(hide_ci = TRUE)%>%

  add_global_p(keep = T, test.statistic = "F")

# tbl_merge(
#   tbls = list(t3, t4),
#   tab_spanner = c("**Regression Coefficients**", "**Adjusted Odds Ratio**")
# )


tbl_merge(
  tbls = list(t1, t2, t3, t4),
  tab_spanner = c("**2019 Logged Odds**", "**2019 Odds Ratio**", "**2021 Logged Odds**", "**2021 Odds Ratio**")
)
2019 vs 2021 ACS Data - Logit Models
Characteristic 2019 Logged Odds 2019 Odds Ratio 2021 Logged Odds 2021 Odds Ratio
log(OR)1,2 SE2 OR1,2 SE2 log(OR)1,2 SE2 OR1,2 SE2
(Intercept) -4.43*** 0.165 -3.41*** 0.091
CanWorkFromHome NA NA
    No WFH
    Some WFH 0.91*** 0.079 2.47*** 0.079 1.47*** 0.046 4.33*** 0.046
    Can WFH 0.81*** 0.063 2.25*** 0.063 1.48*** 0.038 4.40*** 0.038
county_pop_type NA NA
    Rural Counties
    Urban Counties 0.17* 0.077 1.19* 0.077 0.78*** 0.055 2.19*** 0.055
Number of own children in the household 0.01 0.029 1.01 0.029 -0.10*** 0.018 0.91*** 0.018
Number of own children under age 5 in household 0.13* 0.064 1.14* 0.064 0.15*** 0.043 1.16*** 0.043
CIHISPEED NA NA
    Has Access
    Lacks Access -0.39*** 0.093 0.68*** 0.093 -0.40*** 0.058 0.67*** 0.058
Total personal earned income 0.00 0.000 1.00 0.000
race_cat NA NA
    White
    Asian -0.17 0.105 0.84 0.105 0.17** 0.057 1.19** 0.057
    Black -0.53*** 0.118 0.59*** 0.118 -0.14* 0.059 0.87* 0.059
    Other -0.61*** 0.143 0.54*** 0.143 -0.29*** 0.052 0.75*** 0.052
sex_cat NA NA
    Female
    Male 0.00 0.052 1.00 0.052 -0.16*** 0.030 0.85*** 0.030
age_cat NA NA
    16to24
    25to34 0.82*** 0.169 2.27*** 0.169 0.55*** 0.080 1.73*** 0.080
    35to44 1.07*** 0.166 2.91*** 0.166 0.60*** 0.082 1.82*** 0.082
    45to54 1.19*** 0.164 3.28*** 0.164 0.43*** 0.081 1.53*** 0.081
    55to64 1.26*** 0.162 3.53*** 0.162 0.31*** 0.081 1.37*** 0.081
    65+ 1.56*** 0.168 4.78*** 0.168 0.44*** 0.092 1.55*** 0.092
tenKdollars 0.03*** 0.002 1.03*** 0.002
1 *p<0.05; **p<0.01; ***p<0.001
2 OR = Odds Ratio, SE = Standard Error

6.3.1 Prediction

Interpreation examples

What is the predicted probability (and 95% CI) that someone worked from home in 2019 and male, and between 25 and 34 years old?

Code
# Always include the intercept for prediction.
# Specify a 1 for the intercept, a # for each continuous predictor
# and a 1 for each non-reference level of a categorical variable.
# If a predictor is at its reference level, specify a 0 or exclude it.
#install.packages("faraway")
library(faraway)

ilogit(svycontrast(m1_2019, c("(Intercept)" = 1,
                              "age_cat25to34" =1                             )))
         contrast     SE
contrast 0.026317 0.1151

Recode marital status and add back into regression