5 Models for Paper

Work From Home: Who Can and Who Does?

Code

library(scales)
library(reldist)
library(pollster)
library(labelled)
library(weights)
library(tigris)
library(ipumsr)
library(srvyr)
library(survey)
library(tidyverse)
library(naniar)
library(gmodels)
library(gtsummary)
library(quarto)
library(huxtable) # for summ() and regression output formatting
library(jtools)
library(modelsummary)
library(car)

knitr::opts_chunk$set(warning=FALSE, message=FALSE)

load("./data/WFH.RData")
# hist(joined$INCEARN)



joined$race_cat <- factor(joined$race_cat, levels = c("White", "Asian", "Black", "Other") )
joined$CanWorkFromHome <- factor(joined$CanWorkFromHome, levels = c("No WFH", "Some WFH", "Can WFH") )
table(joined$CanWorkFromHome)


  No WFH Some WFH  Can WFH 
   65082    15901    41080

Code

joined <- joined %>% mutate(tenKdollars = INCEARN / 10000)
joined$CIHISPEED <- factor(joined$CIHISPEED, labels = c("10" = "Has Access", "20" = "Lacks Access") )

# hist(joined$INCEARN)

6 Regression Models

Regression for 2019 using survey object dstrata2019 & regression for 2021 using survey object dstrata2021.

6.1 Subset: Gender & Household Dynamics

Model supporting statements made in “Gender and Household Dynamics” section of Working From Home in Illinois paper.

6.1.1 Logit Model

Model created with a subset of survey data. Only uses workers who had Management type occupations, Could feasibly WFH, and were under the ages of 45.

Code

joined$occ_2dig_labels <- as.factor(joined$occ_2dig_labels)
joined$CanWorkFromHome <- as.factor(joined$CanWorkFromHome)

#gmodels::CrossTable(joined$CanWorkFromHome, joined$occ_2dig_labels, chisq = TRUE, prop.r = FALSE, prop.c = FALSE, prop.t = FALSE, prop.chisq = FALSE )

dstrata <- survey::svydesign(id = ~CLUSTER, strata = ~STRATA, weights = ~PERWT, data = joined) %>% 
  as_survey() %>%
  mutate(decile = ntile(INCEARN, 10))



# 2019 data turned into survey item
dstrata2019 <- joined %>% filter(YEAR==2019) 
dstrata2019 <- survey::svydesign(id = ~CLUSTER, strata = ~STRATA, 
                                 weights = ~PERWT, data = dstrata2019) %>% 
  as_survey() %>% 
  mutate(decile = ntile(INCEARN, 10))


dstrata2021 <- joined %>% filter(YEAR==2021) 

dstrata2021 <- survey::svydesign(id = ~CLUSTER, strata = ~STRATA, weights = ~PERWT, data = dstrata2021) %>% as_survey() %>%
  mutate(decile = ntile(INCEARN, 10))

A note on the p-value: the p-value is a test of significance for the null hypothesis H0 that

there is no difference in the log-odds of the outcome between the reference group (captured by the intercept) and the explanatory variable (or one of its categories), or that the difference between the two groups equals zero: H0:b1=0 and Ha:b1≠0

If p<0.5, we reject H0 as we have evidence to suggest that the difference between the two groups does not equal zero.

Log-odds are not the most intuitive to interpret. Instead of discussing the change in the log-odds, we can calculate the odds ratio for a given variable by exponentiating the coefficient.

Odds ratio is read “have x times the odds of the outcome of interest compared to those in the reference group”.

Reference group of the outcome variable: by default, R creates uses the lowest coded group as the reference. The reference category can be changed by using the ‘relevel()’.

Relationship between Odds and Probabilities:

Odds=P/(1-P)

P=odds/(1+odds)

Odds=exp(log-odds)

P=exp(log-odds)/(1+exp(log-odds))

6.1.1.1 Separate models for each year

Be careful when subsetting survey data. Messes up standard errors if filter() is used to select the subset instead using the subset = “” option for survey objects.

Code

m <- list(
  "2019 Logged Odds" = svyglm(did_wfh~ sex_cat + age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type + race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2019),
  
   "2021 Logged Odds" =  svyglm(did_wfh~ sex_cat+ age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2021),
  
   "2019 Odds Ratio" = svyglm(did_wfh~ sex_cat + age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type + race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2019),
  
   "2021 Odds Ratio" =   svyglm(did_wfh~ sex_cat+ age_cat  + NCHILD + NCHLT5 + CIHISPEED + county_pop_type +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    family = quasibinomial(),
                    design = dstrata2021)
) 

# 
# modelsummary(m, exponentiate = TRUE, # does standard and exponentiated models together
#              statistic = c("conf.int",
#                            "s.e. = {std.error}",
#                            "p = {p.value}"),
#              stars = TRUE, shape = term ~ model + statistic,
#              notes = list('Subset of ACS Survey Data for 2019 and 2021',
#                           'Odds Ratios Shown in Table'),
#              title = 'Predictions for WFH in 2019 vs 2021',
#              output = "table.docx")

modelsummary(m, 
             exponentiate = c(FALSE,FALSE,TRUE,TRUE), # does standard and exponentiated models together 
             statistic = c("s.e. = {std.error}", 
                           "p = {p.value}"),
             stars = TRUE, shape = term ~ model + statistic,
             notes = list('Subset of ACS Survey Data for 2019 and 2021',
                          'Odds Ratios Shown in Table'),
             title = 'Predictions for WFH in 2019 vs 2021')

Predictions for WFH in 2019 vs 2021
	2019 Logged Odds			2021 Logged Odds			2019 Odds Ratio			2021 Odds Ratio
	Est.	s.e. = S.E.	p = p	Est.	s.e. = S.E.	p = p	Est.	s.e. = S.E.	p = p	Est.	s.e. = S.E.	p = p
(Intercept)	−4.083***	s.e. = 0.375	p = <0.001	−2.201***	s.e. = 0.174	p = <0.001	0.017***	s.e. = 0.006	p = <0.001	0.111***	s.e. = 0.019	p = <0.001
sex_catMale	0.316**	s.e. = 0.113	p = 0.005	0.044	s.e. = 0.062	p = 0.476	1.372**	s.e. = 0.155	p = 0.005	1.045	s.e. = 0.065	p = 0.476
age_cat25to34	1.389***	s.e. = 0.338	p = <0.001	0.542***	s.e. = 0.140	p = <0.001	4.011***	s.e. = 1.355	p = <0.001	1.719***	s.e. = 0.240	p = <0.001
age_cat35to44	1.846***	s.e. = 0.340	p = <0.001	0.749***	s.e. = 0.147	p = <0.001	6.336***	s.e. = 2.151	p = <0.001	2.115***	s.e. = 0.310	p = <0.001
NCHILD	−0.166*	s.e. = 0.074	p = 0.024	−0.226***	s.e. = 0.040	p = <0.001	0.847*	s.e. = 0.062	p = 0.024	0.798***	s.e. = 0.032	p = <0.001
NCHLT5	0.167	s.e. = 0.107	p = 0.121	0.251***	s.e. = 0.066	p = <0.001	1.181	s.e. = 0.127	p = 0.121	1.285***	s.e. = 0.085	p = <0.001
CIHISPEEDLacks Access	−0.335	s.e. = 0.329	p = 0.308	−0.609***	s.e. = 0.153	p = <0.001	0.715	s.e. = 0.235	p = 0.308	0.544***	s.e. = 0.083	p = <0.001
county_pop_typeUrban Counties	0.018	s.e. = 0.245	p = 0.940	1.384***	s.e. = 0.127	p = <0.001	1.019	s.e. = 0.249	p = 0.940	3.989***	s.e. = 0.507	p = <0.001
race_catAsian	−0.220	s.e. = 0.199	p = 0.270	0.362***	s.e. = 0.102	p = <0.001	0.803	s.e. = 0.160	p = 0.270	1.436***	s.e. = 0.147	p = <0.001
race_catBlack	−0.238	s.e. = 0.291	p = 0.414	−0.330*	s.e. = 0.139	p = 0.018	0.788	s.e. = 0.230	p = 0.414	0.719*	s.e. = 0.100	p = 0.018
race_catOther	−0.365	s.e. = 0.300	p = 0.224	−0.478***	s.e. = 0.103	p = <0.001	0.694	s.e. = 0.208	p = 0.224	0.620***	s.e. = 0.064	p = <0.001
Num.Obs.	7344			7389			7344			7389
R2	0.024			0.049			0.024			0.049
R2 Adj.	−0.155			−0.114			−0.155			−0.114
AIC	3783.8			9246.9			3783.8			9246.9
BIC	3851.7			9319.8			3851.7			9319.8
Log.Lik.	−1872.464			−4606.447			−1872.464			−4606.447
RMSE	0.25			0.47			0.25			0.47
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001
Subset of ACS Survey Data for 2019 and 2021
Odds Ratios Shown in Table

6.1.2 OLS Model

Code

m <- list(
  "2019 OLS" = svyglm(did_wfh~ sex_cat + age_cat + NCHLT5 + CIHISPEED + county_pop_type +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    design = dstrata2019),
  
  "2021 OLS" =  svyglm(did_wfh ~ sex_cat+ age_cat + NCHLT5 + CIHISPEED + county_pop_type +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    design = dstrata2021)
) 

# 
# modelsummary(m, exponentiate = TRUE, # does standard and exponentiated models together
#              statistic = c("conf.int",
#                            "s.e. = {std.error}",
#                            "p = {p.value}"),
#              stars = TRUE, shape = term ~ model + statistic,
#              notes = list('Subset of ACS Survey Data for 2019 and 2021',
#                           'Odds Ratios Shown in Table'),
#              title = 'Predictions for WFH in 2019 vs 2021',
#              output = "table.docx")

modelsummary(m,# exponentiate = TRUE, # does standard and exponentiated models together 
             statistic = c(
                           "s.e. = {std.error}", 
                           "p = {p.value}"),
             stars = TRUE, shape = term ~ model + statistic,
             notes = list('Subset of ACS Survey Data for 2019 and 2021'),
             title = 'OLS - Separate Models for WFH in 2019 vs 2021')

OLS - Separate Models for WFH in 2019 vs 2021
	2019 OLS			2021 OLS
	Est.	s.e. = S.E.	p = p	Est.	s.e. = S.E.	p = p
(Intercept)	0.010	s.e. = 0.014	p = 0.465	0.052+	s.e. = 0.028	p = 0.060
sex_catMale	0.023**	s.e. = 0.008	p = 0.003	0.014	s.e. = 0.014	p = 0.323
age_cat25to34	0.048***	s.e. = 0.009	p = <0.001	0.106***	s.e. = 0.027	p = <0.001
age_cat35to44	0.071***	s.e. = 0.009	p = <0.001	0.109***	s.e. = 0.027	p = <0.001
NCHLT5	0.001	s.e. = 0.007	p = 0.845	0.009	s.e. = 0.012	p = 0.462
CIHISPEEDLacks Access	−0.019	s.e. = 0.016	p = 0.227	−0.115***	s.e. = 0.026	p = <0.001
county_pop_typeUrban Counties	0.004	s.e. = 0.015	p = 0.778	0.263***	s.e. = 0.017	p = <0.001
race_catAsian	−0.014	s.e. = 0.013	p = 0.283	0.092***	s.e. = 0.025	p = <0.001
race_catBlack	−0.015	s.e. = 0.016	p = 0.358	−0.078**	s.e. = 0.030	p = 0.010
race_catOther	−0.021	s.e. = 0.015	p = 0.172	−0.106***	s.e. = 0.021	p = <0.001
Num.Obs.	7344			7389
R2	0.010			0.052
R2 Adj.	−0.172			−0.111
AIC	15087.7			43997.0
BIC	197538.9			55704.4
Log.Lik.	−98720.466			−27803.205
RMSE	0.25			0.47
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001
Subset of ACS Survey Data for 2019 and 2021

Code

#|column: page
m4_2019 <- svyglm(did_wfh ~ sex_cat+ age_cat + NCHLT5 + CIHISPEED + county_pop_type+ tenKdollars +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    design = dstrata2019)
  
m4_2021 <- svyglm(did_wfh ~ sex_cat+ age_cat + NCHLT5 + CIHISPEED + county_pop_type+tenKdollars +race_cat,
                    subset = AGE < 45 & CanWorkFromHome == "Can WFH" & occ_2dig_labels == "Management, Business, Science, Arts",
                    design = dstrata2021)


# export_summs(m4_2019, m4_2021, model.names = c("Subset 2019 OLS", " Subset 2021 OLS"))


t1 <- m4_2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)

                 ) %>%
    add_significance_stars(hide_ci = TRUE)%>%

  add_global_p(keep = T, test.statistic = "F")

t2 <- m4_2021 %>%
  tbl_regression(intercept = T,     # No intercept in OR table
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)
                 ) %>%
    add_significance_stars(hide_ci = TRUE)%>%

  add_global_p(keep = T, test.statistic = "F")


tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**OLS 2019 Subset**", "**OLS 2021 Subset**")
)

Characteristic	OLS 2019 Subset		OLS 2021 Subset
Characteristic	Beta¹	SE²	Beta¹	SE²
(Intercept)	0.01	0.014	0.06*	0.028
sex_cat	NA		NA
Female	—	—	—	—
Male	0.02**	0.008	-0.01	0.014
age_cat	NA		NA
16to24	—	—	—	—
25to34	0.05***	0.009	0.07**	0.027
35to44	0.07***	0.010	0.06*	0.028
Number of own children under age 5 in household	0.00	0.007	0.00	0.012
CIHISPEED	NA		NA
Has Access	—	—	—	—
Lacks Access	-0.02	0.016	-0.10***	0.025
county_pop_type	NA		NA
Rural Counties	—	—	—	—
Urban Counties	0.00	0.016	0.24***	0.017
tenKdollars	0.00	0.001	0.01***	0.001
race_cat	NA		NA
White	—	—	—	—
Asian	-0.01	0.013	0.09***	0.025
Black	-0.02	0.016	-0.06*	0.030
Other	-0.02	0.015	-0.09***	0.021
¹ p<0.05; p<0.01; **p<0.001
² SE = Standard Error

6.2 Full Sample - Models with all Workers

6.2.1 OLS

Code

both <- svyglm(did_wfh~ CanWorkFromHome+county_pop_type+ NCHILD + NCHLT5 +
                 tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED + 
                 occ_2dig_labels + factor(YEAR), design = dstrata)



ols2019 <- svyglm(did_wfh~ CanWorkFromHome+county_pop_type+ NCHILD + NCHLT5 +
                    tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED + 
                    occ_2dig_labels, design = dstrata2019)


ols2021 <- svyglm(did_wfh~ CanWorkFromHome+county_pop_type+ NCHILD + NCHLT5 +
                    tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED + 
                    occ_2dig_labels, design = dstrata2021)

#summary(ols2021)

# export_summs(both, ols2021, model.names = c("Both Years OLS", " Only 2021 OLS"))

t1 <- ols2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)

                 ) %>%
  add_significance_stars(hide_ci = TRUE) %>%
  add_global_p(keep = T, test.statistic = "F")

t2 <- ols2021 %>%
  tbl_regression(intercept = T,     # No intercept in OR table
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)
                 ) %>%
  add_significance_stars(hide_ci = TRUE)%>%
  add_global_p(keep = T, test.statistic = "F")


tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**OLS 2019 Full Model**", "**OLS 2021 Full Model**")
)

Characteristic	OLS 2019 Full Model		OLS 2021 Full Model
Characteristic	Beta¹	SE²	Beta¹	SE²
(Intercept)	0.01	0.005	0.03***	0.009
CanWorkFromHome	NA		NA
No WFH	—	—	—	—
Some WFH	0.04***	0.005	0.17***	0.008
Can WFH	0.03***	0.004	0.17***	0.006
county_pop_type	NA		NA
Rural Counties	—	—	—	—
Urban Counties	0.01*	0.003	0.08***	0.005
Number of own children in the household	0.00	0.001	-0.01***	0.002
Number of own children under age 5 in household	0.01*	0.004	0.02**	0.007
tenKdollars	0.00	0.000	0.00***	0.000
race_cat	NA		NA
White	—	—	—	—
Asian	-0.01*	0.005	0.03**	0.010
Black	-0.02***	0.004	-0.02	0.008
Other	-0.02***	0.004	-0.03***	0.006
sex_cat	NA		NA
Female	—	—	—	—
Male	0.00	0.003	-0.01***	0.004
age_cat	NA		NA
16to24	—	—	—	—
25to34	0.02***	0.004	0.04***	0.008
35to44	0.03***	0.004	0.05***	0.008
45to54	0.04***	0.004	0.03***	0.008
55to64	0.04***	0.004	0.01	0.008
65+	0.06***	0.006	0.03**	0.010
CIHISPEED	NA		NA
Has Access	—	—	—	—
Lacks Access	-0.01***	0.003	-0.04***	0.006
occ_2dig_labels	NA		NA
Management, Business, Science, Arts	—	—	—	—
Military	-0.04***	0.005	-0.05	0.037
Natural Resources, Construction	-0.01*	0.006	-0.06***	0.008
Production, Transportation	-0.02***	0.004	-0.06***	0.007
Sales & Office Jobs	0.00	0.005	-0.02***	0.007
Service Occupations	0.00	0.005	-0.06***	0.007
¹ p<0.05; p<0.01; **p<0.001
² SE = Standard Error

6.2.2 Logit

6.2.3 OLS - Drop WFH Feasibility

Code

ols2019 <- svyglm(did_wfh~ occ_2dig_labels+county_pop_type + NCHLT5 + spouse +
                    tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED, design = dstrata2019)


ols2021 <- svyglm(did_wfh~ occ_2dig_labels+county_pop_type + NCHLT5 + spouse +
                    tenKdollars +race_cat+ sex_cat + age_cat + CIHISPEED, design = dstrata2021)

#summary(ols2021)

# export_summs(both, ols2021, model.names = c("Both Years OLS", " Only 2021 OLS"))

t1 <- ols2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)

                 ) %>%
  add_significance_stars(hide_ci = TRUE) %>%
  add_global_p(keep = T, test.statistic = "F")

t2 <- ols2021 %>%
  tbl_regression(intercept = T,     # No intercept in OR table
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)
                 ) %>%
  add_significance_stars(hide_ci = TRUE)%>%
  add_global_p(keep = T, test.statistic = "F")


tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**OLS 2019 Full Model**", "**OLS 2021 Full Model**")
)

Characteristic	OLS 2019 Full Model		OLS 2021 Full Model
Characteristic	Beta¹	SE²	Beta¹	SE²
(Intercept)	0.03***	0.005	0.14***	0.008
occ_2dig_labels	NA		NA
Management, Business, Science, Arts	—	—	—	—
Military	-0.06***	0.004	-0.19***	0.037
Natural Resources, Construction	-0.04***	0.005	-0.19***	0.007
Production, Transportation	-0.05***	0.004	-0.19***	0.006
Sales & Office Jobs	0.00	0.005	-0.06***	0.007
Service Occupations	-0.02***	0.004	-0.17***	0.006
county_pop_type	NA		NA
Rural Counties	—	—	—	—
Urban Counties	0.01**	0.003	0.09***	0.005
Number of own children under age 5 in household	0.01*	0.003	0.01	0.006
tenKdollars	0.00	0.000	0.01***	0.000
race_cat	NA		NA
White	—	—	—	—
Asian	-0.01*	0.005	0.02	0.011
Black	-0.02***	0.004	-0.02*	0.008
Other	-0.02***	0.004	-0.04***	0.006
sex_cat	NA		NA
Female	—	—	—	—
Male	0.00	0.003	-0.01**	0.004
age_cat	NA		NA
16to24	—	—	—	—
25to34	0.02***	0.004	0.06***	0.008
35to44	0.03***	0.004	0.06***	0.008
45to54	0.04***	0.004	0.03***	0.007
55to64	0.04***	0.004	0.02**	0.007
65+	0.06***	0.006	0.04***	0.010
CIHISPEED	NA		NA
Has Access	—	—	—	—
Lacks Access	-0.02***	0.003	-0.04***	0.006
¹ p<0.05; p<0.01; **p<0.001
² SE = Standard Error

6.2.4 OLS - Drop Occupations

Code

ols2019 <- svyglm(did_wfh ~ CanWorkFromHome + county_pop_type+ NCHILD + NCHLT5 +
                    tenKdollars + race_cat + sex_cat + age_cat + CIHISPEED, design = dstrata2019)


ols2021 <- svyglm(did_wfh ~ CanWorkFromHome + county_pop_type + NCHILD + NCHLT5 +
                    tenKdollars + race_cat + sex_cat + age_cat + CIHISPEED, design = dstrata2021)

#summary(ols2021)

# export_summs(both, ols2021, model.names = c("Both Years OLS", " Only 2021 OLS"))

t1 <- ols2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)

                 ) %>%
  add_significance_stars(hide_ci = TRUE) %>%
  add_global_p(keep = T, test.statistic = "F")

t2 <- ols2021 %>%
  tbl_regression(intercept = T,     # No intercept in OR table
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3)
                 ) %>%
  add_significance_stars(hide_ci = TRUE)%>%
  add_global_p(keep = T, test.statistic = "F")


tbl_merge(
  tbls = list(t1, t2),
  tab_spanner = c("**OLS 2019 Full Model**", "**OLS 2021 Full Model**")
)

Characteristic	OLS 2019 Full Model		OLS 2021 Full Model
Characteristic	Beta¹	SE²	Beta¹	SE²
(Intercept)	0.01	0.004	-0.01	0.007
CanWorkFromHome	NA		NA
No WFH	—	—	—	—
Some WFH	0.05***	0.005	0.20***	0.008
Can WFH	0.04***	0.003	0.20***	0.005
county_pop_type	NA		NA
Rural Counties	—	—	—	—
Urban Counties	0.01*	0.003	0.08***	0.005
Number of own children in the household	0.00	0.001	-0.01***	0.002
Number of own children under age 5 in household	0.01*	0.004	0.02***	0.007
tenKdollars	0.00	0.000	0.01***	0.000
race_cat	NA		NA
White	—	—	—	—
Asian	-0.01	0.005	0.03**	0.010
Black	-0.02***	0.004	-0.02*	0.008
Other	-0.02***	0.004	-0.03***	0.006
sex_cat	NA		NA
Female	—	—	—	—
Male	0.00	0.003	-0.02***	0.004
age_cat	NA		NA
16to24	—	—	—	—
25to34	0.02***	0.004	0.05***	0.008
35to44	0.03***	0.004	0.06***	0.008
45to54	0.03***	0.004	0.03***	0.008
55to64	0.04***	0.004	0.01	0.007
65+	0.06***	0.006	0.03**	0.010
CIHISPEED	NA		NA
Has Access	—	—	—	—
Lacks Access	-0.02***	0.003	-0.04***	0.006
¹ p<0.05; p<0.01; **p<0.001
² SE = Standard Error

6.3 Additional Models

Code

#svyglm(did_wfh~ INCEARN + race_cat+ SEX+ CIHISPEED+CINETHH + occ_2dig_labels+CanWorkFromHome+county_pop_type, design = dstrata2021) %>% summary()



### All variations of family = "" had same results 
# m1_2019 <- svyglm(did_wfh~ INCEARN +race_cat+ SEX+ AGE + CIHISPEED*CINETHH + CanWorkFromHome+county_pop_type+NCHILD+MARST + NCHLT5, 
#       family = "binomial", design = dstrata2019)
# 
# m1_2021<- svyglm(did_wfh~ INCEARN +race_cat+ SEX+ AGE + CIHISPEED*CINETHH + CanWorkFromHome+county_pop_type+NCHILD+MARST + NCHLT5, 
#       family = "binomial", design = dstrata2021)
#   
# export_summs(m1_2019, m1_2021, 
#              model.names = c("2019 Logit", "2021 Logit"), 
#             robust = "HC3", statistics = c(N = "nobs", R2 = "r.squared", adjR2 = "adj.r.squared"))
# 
# 
# m1_2019 <- svyglm(did_wfh~ INCEARN +race_cat+ SEX+ AGE + CIHISPEED*CINETHH + CanWorkFromHome+county_pop_type+NCHILD+MARST + NCHLT5, 
#        family = quasibinomial(), design = dstrata2019)
# 
# m1_2021<- svyglm(did_wfh~ INCEARN +race_cat+ SEX+ AGE + CIHISPEED*CINETHH + CanWorkFromHome+county_pop_type+NCHILD+MARST + NCHLT5, 
#     family=quasibinomial(),   design = dstrata2021)
#   
# 
# export_summs(m1_2019, m1_2021, 
#              model.names = c("2019 QuasiBi", "2021 QuasiBi"), 
#             robust = "HC3", statistics = c(N = "nobs", R2 = "r.squared", adjR2 = "adj.r.squared"))


m1_2019 <- svyglm(did_wfh ~ CanWorkFromHome + county_pop_type + NCHILD + NCHLT5  + CIHISPEED + INCEARN + race_cat + sex_cat + age_cat, 
        family = quasibinomial(),design = dstrata2019)

m1_2021<- svyglm(did_wfh ~ CanWorkFromHome + county_pop_type + NCHILD + NCHLT5  + CIHISPEED + tenKdollars + race_cat + sex_cat+ age_cat, 
      family = quasibinomial(), design = dstrata2021)
  
# export_summs(m1_2019, m1_2021, 
#              model.names = c("2019 Logit", "2021 Logit"), 
#             robust = "HC3", statistics = c(N = "nobs", R2 = "r.squared", adjR2 = "adj.r.squared"))

Code

OR.CI_2019 <- cbind("AOR" = exp(   coef(m1_2019)),
                       exp(confint(m1_2019,
                           df.resid=degf(m1_2019$survey.design))))[-1,]

OR.CI_2021 <- cbind("AOR" = exp(   coef(m1_2021)),
                       exp(confint(m1_2021,
                           df.resid=degf(m1_2021$survey.design))))[-1,]

library(gtsummary)
library(car)

t1 <- m1_2019 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
) %>%
  modify_column_hide(p.value) %>%
      add_significance_stars(hide_ci = TRUE)%>%

  modify_caption("2019  vs 2021 ACS Data - Logit Models")

t2 <- m1_2019 %>%
  tbl_regression(intercept = F,     # No intercept in OR table
                 exponentiate = T,  # OR = exp(B)
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3),
) %>%
      add_significance_stars(hide_ci = TRUE)%>%

  add_global_p(keep = T, test.statistic = "F")

# tbl_merge(
#   tbls = list(t1, t2),
#   tab_spanner = c("**2019 Regression Coefficients**", "** 2019 Adjusted Odds Ratio**")
# )


t3 <- m1_2021 %>%
  tbl_regression(intercept = T,
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
) %>%
      add_significance_stars(hide_ci = TRUE)%>%

  modify_column_hide(p.value) %>%
  modify_caption("2021 ACS")

t4 <- m1_2021 %>%
  tbl_regression(intercept = F,     # No intercept in OR table
                 exponentiate = T,  # OR = exp(B)
                 # Use style_number to round to 2 digits (e.g., 1.27)
                 estimate_fun = function(x) style_number(x, digits = 2),
                 # Use style_sigfig to keep 2 significant digits (e.g., 1.3)
                 pvalue_fun   = function(x) style_pvalue(x, digits = 3) ) %>%
      add_significance_stars(hide_ci = TRUE)%>%

  add_global_p(keep = T, test.statistic = "F")

# tbl_merge(
#   tbls = list(t3, t4),
#   tab_spanner = c("**Regression Coefficients**", "**Adjusted Odds Ratio**")
# )


tbl_merge(
  tbls = list(t1, t2, t3, t4),
  tab_spanner = c("**2019 Logged Odds**", "**2019 Odds Ratio**", "**2021 Logged Odds**", "**2021 Odds Ratio**")
)

2019 vs 2021 ACS Data - Logit Models
Characteristic	2019 Logged Odds		2019 Odds Ratio		2021 Logged Odds		2021 Odds Ratio
Characteristic	log(OR)^1,2	SE²	OR^1,2	SE²	log(OR)^1,2	SE²	OR^1,2	SE²
(Intercept)	-4.43***	0.165			-3.41***	0.091
CanWorkFromHome			NA				NA
No WFH	—	—	—	—	—	—	—	—
Some WFH	0.91***	0.079	2.47***	0.079	1.47***	0.046	4.33***	0.046
Can WFH	0.81***	0.063	2.25***	0.063	1.48***	0.038	4.40***	0.038
county_pop_type			NA				NA
Rural Counties	—	—	—	—	—	—	—	—
Urban Counties	0.17*	0.077	1.19*	0.077	0.78***	0.055	2.19***	0.055
Number of own children in the household	0.01	0.029	1.01	0.029	-0.10***	0.018	0.91***	0.018
Number of own children under age 5 in household	0.13*	0.064	1.14*	0.064	0.15***	0.043	1.16***	0.043
CIHISPEED			NA				NA
Has Access	—	—	—	—	—	—	—	—
Lacks Access	-0.39***	0.093	0.68***	0.093	-0.40***	0.058	0.67***	0.058
Total personal earned income	0.00	0.000	1.00	0.000
race_cat			NA				NA
White	—	—	—	—	—	—	—	—
Asian	-0.17	0.105	0.84	0.105	0.17**	0.057	1.19**	0.057
Black	-0.53***	0.118	0.59***	0.118	-0.14*	0.059	0.87*	0.059
Other	-0.61***	0.143	0.54***	0.143	-0.29***	0.052	0.75***	0.052
sex_cat			NA				NA
Female	—	—	—	—	—	—	—	—
Male	0.00	0.052	1.00	0.052	-0.16***	0.030	0.85***	0.030
age_cat			NA				NA
16to24	—	—	—	—	—	—	—	—
25to34	0.82***	0.169	2.27***	0.169	0.55***	0.080	1.73***	0.080
35to44	1.07***	0.166	2.91***	0.166	0.60***	0.082	1.82***	0.082
45to54	1.19***	0.164	3.28***	0.164	0.43***	0.081	1.53***	0.081
55to64	1.26***	0.162	3.53***	0.162	0.31***	0.081	1.37***	0.081
65+	1.56***	0.168	4.78***	0.168	0.44***	0.092	1.55***	0.092
tenKdollars					0.03***	0.002	1.03***	0.002
¹ p<0.05; p<0.01; **p<0.001
² OR = Odds Ratio, SE = Standard Error

6.3.1 Prediction

Interpreation examples

What is the predicted probability (and 95% CI) that someone worked from home in 2019 and male, and between 25 and 34 years old?

Code

# Always include the intercept for prediction.
# Specify a 1 for the intercept, a # for each continuous predictor
# and a 1 for each non-reference level of a categorical variable.
# If a predictor is at its reference level, specify a 0 or exclude it.
#install.packages("faraway")
library(faraway)

ilogit(svycontrast(m1_2019, c("(Intercept)" = 1,
                              "age_cat25to34" =1                             )))

         contrast     SE
contrast 0.026317 0.1151

Recode marital status and add back into regression