added clinical data extraction and logistic regression script
This commit is contained in:
parent
05f1f716f1
commit
4766d6d450
2 changed files with 420 additions and 0 deletions
97
logistic_regression.R
Executable file
97
logistic_regression.R
Executable file
|
@ -0,0 +1,97 @@
|
|||
#!/usr/bin/Rscript
|
||||
getwd()
|
||||
setwd('~/git/mosaic_2020/')
|
||||
getwd()
|
||||
########################################################################
|
||||
# TASK: Run regression analysis
|
||||
# npa
|
||||
########################################################################
|
||||
#=================================================================================
|
||||
# TO DO:
|
||||
# Simple stats b/w obesity and non-obesity to consider including in reg analysis
|
||||
# Include NPA statistically sign params
|
||||
# Rerun graphs and plots without asthma?
|
||||
#=================================================================================
|
||||
|
||||
#====================
|
||||
# Input: source data
|
||||
#====================
|
||||
source("data_extraction_formatting_clinical.R")
|
||||
|
||||
rm(fp_adults, metadata_all)
|
||||
|
||||
########################################################################
|
||||
my_data = reg_data
|
||||
#########################################################################
|
||||
# check factor of each column
|
||||
lapply(my_data, class)
|
||||
|
||||
character_vars <- lapply(my_data, class) == "character"
|
||||
character_vars
|
||||
table(character_vars)
|
||||
|
||||
factor_vars <- lapply(my_data, class) == "factor"
|
||||
table(factor_vars)
|
||||
|
||||
my_data[, character_vars] <- lapply(my_data[, character_vars], as.factor)
|
||||
factor_vars <- lapply(my_data, class) == "factor"
|
||||
factor_vars
|
||||
table(factor_vars)
|
||||
|
||||
# check again
|
||||
lapply(my_data, class)
|
||||
|
||||
table(my_data$ethnicity)
|
||||
my_data$ethnicity = as.factor(my_data$ethnicity)
|
||||
class(my_data$ethnicity)
|
||||
|
||||
colnames(my_data)
|
||||
reg_param = c("age"
|
||||
, "age_bins"
|
||||
#, "death" # outcome
|
||||
, "asthma"
|
||||
, "obesity"
|
||||
, "gender"
|
||||
, "los"
|
||||
, "o2_sat_admis"
|
||||
#, "logistic_outcome"
|
||||
#, "steroid_ics"
|
||||
, "ethnicity"
|
||||
, "smoking"
|
||||
, "sfluv"
|
||||
, "h1n1v"
|
||||
, "ia_cxr"
|
||||
, "max_resp_score"
|
||||
, "T1_resp_score"
|
||||
, "com_noasthma"
|
||||
, "onset_initial_bin")
|
||||
|
||||
for(i in reg_param) {
|
||||
# print (i)
|
||||
p_form = as.formula(paste("death ~ ", i ,sep = ""))
|
||||
model_reg = glm(p_form , family = binomial, data = my_data)
|
||||
print(summary(model_reg))
|
||||
print(exp(cbind(OR = coef(model_reg), confint(model_reg))))
|
||||
#print (PseudoR2(model_reg))
|
||||
cat("=================================================================================\n")
|
||||
}
|
||||
|
||||
|
||||
full_mod = glm(death ~ asthma +
|
||||
gender +
|
||||
age_bins +
|
||||
los +
|
||||
#ethnicity +
|
||||
onset_initial_bin +
|
||||
o2_sat_bin +
|
||||
com_noasthma +
|
||||
obesity +
|
||||
#ia_cxr +
|
||||
smoking +
|
||||
#sfluv +
|
||||
#h1n1v
|
||||
max_resp_score +
|
||||
T1_resp_score +
|
||||
, family = "binomial", data = my_data)
|
||||
|
||||
summary(full_mod)
|
Loading…
Add table
Add a link
Reference in a new issue