added clinical data extraction and logistic regression script

2020-11-17 17:30:05 +00:00 · 2020-11-17 17:30:05 +00:00 · 4766d6d450
commit 4766d6d450
parent 05f1f716f1
2 changed files with 420 additions and 0 deletions
--- a/logistic_regression.R
+++ b/logistic_regression.R
@ -0,0 +1,97 @@
+#!/usr/bin/Rscript   
+getwd()
+setwd('~/git/mosaic_2020/')
+getwd()
+########################################################################
+# TASK: Run regression analysis
+# npa
+########################################################################
+#=================================================================================
+# TO DO:
+# Simple stats b/w obesity and non-obesity to consider including in reg analysis
+# Include NPA statistically sign params
+# Rerun graphs and plots without asthma?
+#=================================================================================
+
+#====================
+# Input: source data
+#====================
+source("data_extraction_formatting_clinical.R")
+
+rm(fp_adults, metadata_all)
+
+########################################################################
+my_data = reg_data
+#########################################################################
+# check factor of each column
+lapply(my_data, class)
+
+character_vars <- lapply(my_data, class) == "character"
+character_vars
+table(character_vars)
+
+factor_vars <- lapply(my_data, class) == "factor"
+table(factor_vars)
+
+my_data[, character_vars] <- lapply(my_data[, character_vars], as.factor)
+factor_vars <- lapply(my_data, class) == "factor"
+factor_vars
+table(factor_vars)
+
+# check again
+lapply(my_data, class)
+
+table(my_data$ethnicity)
+my_data$ethnicity = as.factor(my_data$ethnicity)
+class(my_data$ethnicity)
+
+colnames(my_data)
+reg_param = c("age"
+              , "age_bins"
+              #, "death" # outcome
+              , "asthma"
+              , "obesity"
+              , "gender"
+              , "los"
+              , "o2_sat_admis"
+              #, "logistic_outcome"
+              #, "steroid_ics"
+              , "ethnicity"
+              , "smoking"
+              , "sfluv"
+              , "h1n1v"
+              , "ia_cxr"
+              , "max_resp_score"
+              , "T1_resp_score"
+              , "com_noasthma"
+              , "onset_initial_bin")
+
+for(i in reg_param) { 
+  # print (i)
+  p_form = as.formula(paste("death ~ ", i ,sep = ""))
+  model_reg = glm(p_form , family = binomial, data = my_data)
+  print(summary(model_reg))
+  print(exp(cbind(OR = coef(model_reg), confint(model_reg))))
+  #print (PseudoR2(model_reg))
+  cat("=================================================================================\n")
+}
+
+
+full_mod = glm(death ~ asthma +
+                 gender + 
+                 age_bins + 
+                 los + 
+                 #ethnicity +
+                 onset_initial_bin +
+                 o2_sat_bin +
+                 com_noasthma +
+                 obesity +
+                 #ia_cxr +
+                 smoking +
+                 #sfluv +
+                 #h1n1v
+                 max_resp_score +
+                 T1_resp_score +
+               , family = "binomial", data = my_data)
+
+summary(full_mod)