mosaic_2020/logistic_regression.R

97 lines
2.8 KiB
R
Executable file

#!/usr/bin/Rscript
getwd()
setwd('~/git/mosaic_2020/')
getwd()
########################################################################
# TASK: Run regression analysis
# npa
########################################################################
#=================================================================================
# TO DO:
# Simple stats b/w obesity and non-obesity to consider including in reg analysis
# Include NPA statistically sign params
# Rerun graphs and plots without asthma?
#=================================================================================
#====================
# Input: source data
#====================
source("data_extraction_formatting_clinical.R")
rm(fp_adults, metadata_all)
########################################################################
my_data = reg_data
#########################################################################
# check factor of each column
lapply(my_data, class)
character_vars <- lapply(my_data, class) == "character"
character_vars
table(character_vars)
factor_vars <- lapply(my_data, class) == "factor"
table(factor_vars)
my_data[, character_vars] <- lapply(my_data[, character_vars], as.factor)
factor_vars <- lapply(my_data, class) == "factor"
factor_vars
table(factor_vars)
# check again
lapply(my_data, class)
table(my_data$ethnicity)
my_data$ethnicity = as.factor(my_data$ethnicity)
class(my_data$ethnicity)
colnames(my_data)
reg_param = c("age"
, "age_bins"
#, "death" # outcome
, "asthma"
, "obesity"
, "gender"
, "los"
, "o2_sat_admis"
#, "logistic_outcome"
#, "steroid_ics"
, "ethnicity"
, "smoking"
, "sfluv"
, "h1n1v"
, "ia_cxr"
, "max_resp_score"
, "T1_resp_score"
, "com_noasthma"
, "onset_initial_bin")
for(i in reg_param) {
# print (i)
p_form = as.formula(paste("death ~ ", i ,sep = ""))
model_reg = glm(p_form , family = binomial, data = my_data)
print(summary(model_reg))
print(exp(cbind(OR = coef(model_reg), confint(model_reg))))
#print (PseudoR2(model_reg))
cat("=================================================================================\n")
}
full_mod = glm(death ~ asthma +
gender +
age_bins +
los +
#ethnicity +
onset_initial_bin +
o2_sat_bin +
com_noasthma +
obesity +
#ia_cxr +
smoking +
#sfluv +
#h1n1v
max_resp_score +
T1_resp_score +
, family = "binomial", data = my_data)
summary(full_mod)