mosaic_2020/extra/logistic_regression_v2.R

182 lines
5.2 KiB
R

#!/usr/bin/Rscript
getwd()
setwd('~/git/mosaic_2020/')
getwd()
########################################################################
# TASK: Run regression analysis
# npa
########################################################################
#=================================================================================
# TO DO:
# Simple stats b/w obesity and non-obesity to consider including in reg analysis
# Include NPA statistically sign params
# Rerun graphs and plots without asthma?
#=================================================================================
#====================
# Input: source data
#====================
source("data_extraction_formatting_clinical.R")
# quick sanity checks
table(clinical_df_ics$ia_exac_copd==1 & clinical_df_ics$asthma == 1)
table(fp_adults$ia_exac_copd==1 & fp_adults$asthma == 1)
table(fp_adults_na$ia_exac_copd==1 & fp_adults_na$asthma == 1)
table(clinical_df_ics$asthma)
#--------------------
# Data reassignment
#--------------------
my_data = clinical_df_ics
my_data_na = clinical_df_ics_na
table(my_data$ia_exac_copd==1 & my_data$asthma == 1)
table(my_data_na$ia_exac_copd==1 & my_data_na$asthma == 1)
# clear variables
rm(fp_adults, fp_adults_na)
#########################################################################
if ( names(which(lapply(my_data, class) == "character")) == "mosaic" ){
cat("Character class for 1 column only:", "mosaic")
}else{
cat("More than one character class detected: Resolve!")
quit()
}
#============================
# Identifying column types: Reg data
#===========================
cols_to_omit = c("mosaic", "flustat", "onset_2_initial", "ia_exac_copd")
my_reg_data = my_data[!colnames(my_data)%in%cols_to_omit]
my_vars = colnames(my_reg_data)
my_vars
lapply(my_reg_data, class)
numerical_vars = c("age"
, "vl_pfu_ul_npa1"
, "los"
, "onset2final"
, "onsfindeath"
, "o2_sat_admis")
my_reg_data[numerical_vars] <- lapply(my_reg_data[numerical_vars], as.numeric)
my_reg_params = my_vars
na_count = sapply(my_reg_data, function(x) sum(is.na(x)));na_count
names(na_count)[na_count>0]
vars_to_factor = my_vars[!my_vars%in%numerical_vars]
# convert to factor
lapply(my_reg_data, class)
my_reg_data[vars_to_factor] <- lapply(my_reg_data[vars_to_factor], as.factor)
factor_vars <- colnames(my_reg_data)[lapply(my_reg_data, class) == "factor"]
table(factor_vars)
# check again
lapply(my_reg_data, class)
# all parasm for reg
my_reg_params = c("age"
, "vl_pfu_ul_npa1"
, "los"
, "onset2final"
#, "onsfindeath"
#, "o2_sat_admis"
#, "death"
#, "obesity"
, "sfluv"
, "h1n1v"
, "gender"
, "asthma"
, "ethnicity"
, "smoking"
, "ia_cxr"
, "max_resp_score"
, "T1_resp_score"
, "com_noasthma"
, "T2_resp_score"
, "inresp_sev"
, "steroid"
, "age_bins"
, "o2_sat_bin"
, "onset_initial_bin"
, "steroid_ics"
, "t1_resp_recoded")
################################################################
#=================
# reg data prepare
#=================
pv1 = "death"
pv2 = "t1_resp_recoded"
#reg_params1 = factor_vars[!factor_vars%in%pv1]
#reg_params_mixed = my_vars[!my_vars%in%pv1]
#-----------------------------
# outcome: death
# data: fp adults
#-----------------------------
for(i in my_reg_params) {
#print (i)
p_form = as.formula(paste("death ~ obesity + ", i ,sep = ""))
print(p_form)
model_reg = glm(p_form , family = binomial, data = my_reg_data)
print(summary(model_reg))
print(exp(cbind(OR = coef(model_reg), confint(model_reg))))
#print (PseudoR2(model_reg))
print(nobs(model_reg))
cat("=================================================================================\n")
}
#-----------------------------
# outcome: t1_resp_recoded
# data: fp adults
#-----------------------------
for(i in my_reg_params) {
#print (i)
p_form = as.formula(paste("t1_resp_recoded ~ obesity + ", i ,sep = ""))
print(p_form)
model_reg = glm(p_form , family = binomial, data = my_reg_data)
print(summary(model_reg))
print(exp(cbind(OR = coef(model_reg), confint(model_reg))))
#print (PseudoR2(model_reg))
print(nobs(model_reg))
cat("=================================================================================\n")
}
##################################################
full_mod = glm(death ~ obesity +
gender +
age_bins +
los +
#ethnicity +
onset_initial_bin +
o2_sat_bin +
com_noasthma +
obesity +
#ia_cxr +
smoking +
#sfluv +
#h1n1v
max_resp_score +
T1_resp_score +
, family = "binomial", data = my_reg_data)
summary(full_mod)