#!/usr/bin/Rscript getwd() setwd('~/git/mosaic_2020/') getwd() ######################################################################## # TASK: Run regression analysis # npa ######################################################################## #================================================================================= # TO DO: # Simple stats b/w obesity and non-obesity to consider including in reg analysis # Include NPA statistically sign params # Rerun graphs and plots without asthma? #================================================================================= #==================== # Input: source data #==================== source("data_extraction_formatting_clinical.R") # quick sanity checks table(clinical_df_ics$ia_exac_copd==1 & clinical_df_ics$asthma == 1) table(fp_adults$ia_exac_copd==1 & fp_adults$asthma == 1) table(fp_adults_na$ia_exac_copd==1 & fp_adults_na$asthma == 1) table(clinical_df_ics$asthma) #-------------------- # Data reassignment #-------------------- my_data = clinical_df_ics my_data_na = clinical_df_ics_na table(my_data$ia_exac_copd==1 & my_data$asthma == 1) table(my_data_na$ia_exac_copd==1 & my_data_na$asthma == 1) # clear variables #rm(fp_adults, fp_adults_na) ######################################################################### if ( names(which(lapply(my_data, class) == "character")) == "mosaic" ){ cat("Character class for 1 column only:", "mosaic") }else{ cat("More than one character class detected: Resolve!") quit() } #============================ # Identifying column types: Reg data #=========================== cols_to_omit = c("mosaic", "flustat", "onset_2_initial", "ia_exac_copd") my_reg_data = my_data[!colnames(my_data)%in%cols_to_omit] my_vars = colnames(my_reg_data) my_vars lapply(my_reg_data, class) numerical_vars = c("age" , "vl_pfu_ul_npa1" , "los" , "onset2final" , "onsfindeath" , "o2_sat_admis") my_reg_data[numerical_vars] <- lapply(my_reg_data[numerical_vars], as.numeric) my_reg_params = my_vars na_count = sapply(my_reg_data, function(x) sum(is.na(x)));na_count names(na_count)[na_count>0] vars_to_factor = my_vars[!my_vars%in%numerical_vars] # convert to factor lapply(my_reg_data, class) my_reg_data[vars_to_factor] <- lapply(my_reg_data[vars_to_factor], as.factor) factor_vars <- colnames(my_reg_data)[lapply(my_reg_data, class) == "factor"] table(factor_vars) # check again lapply(my_reg_data, class) # all parasm for reg my_reg_params = c("age" , "vl_pfu_ul_npa1" , "los" , "onset2final" #, "onsfindeath" #, "o2_sat_admis" , "death" , "obesity" , "sfluv" , "h1n1v" , "gender" , "asthma" , "ethnicity" , "smoking" , "ia_cxr" , "max_resp_score" , "T1_resp_score" , "com_noasthma" , "T2_resp_score" , "inresp_sev" , "steroid" , "age_bins" , "o2_sat_bin" , "onset_initial_bin" , "steroid_ics" , "t1_resp_recoded") #================= # reg data prepare #================= pv1 = "death" pv2 = "t1_resp_recoded" #reg_params1 = factor_vars[!factor_vars%in%pv1] #reg_params_mixed = my_vars[!my_vars%in%pv1] ######################################################################## #================= # outcome2 #================= #----------------------------- # outcome: death + obesity # data: fp adults #----------------------------- my_reg_params1 = my_reg_params[!my_reg_params%in%c("death", "obesity")] for(i in my_reg_params1) { #print (i) p_form = as.formula(paste("death ~ obesity + ", i ,sep = "")) print(p_form) model_reg = glm(p_form , family = binomial, data = my_reg_data) print(summary(model_reg)) print(exp(cbind(OR = coef(model_reg), confint(model_reg)))) #print (PseudoR2(model_reg)) print(nobs(model_reg)) cat("=================================================================================\n") } #----------------------------- # outcome: death # data: fp adults #----------------------------- my_reg_params1v2 = my_reg_params[!my_reg_params%in%c("death")] for(i in my_reg_params1v2) { #print (i) p_form = as.formula(paste("death ~ ", i ,sep = "")) print(p_form) model_reg = glm(p_form , family = binomial, data = my_reg_data) print(summary(model_reg)) print(exp(cbind(OR = coef(model_reg), confint(model_reg)))) #print (PseudoR2(model_reg)) print(nobs(model_reg)) cat("=================================================================================\n") } ######################################################################## #================= # outcome2 #================= #----------------------------- # outcome: t1_resp_recoded + obesity # data: fp adults #----------------------------- my_reg_params2 = my_reg_params[!my_reg_params%in%c("death" , "obesity" , "t1_resp_recoded" , "T1_resp_score")] for(i in my_reg_params2) { #print (i) p_form = as.formula(paste("t1_resp_recoded ~ obesity + ", i ,sep = "")) print(p_form) model_reg = glm(p_form , family = binomial, data = my_reg_data) print(summary(model_reg)) print(exp(cbind(OR = coef(model_reg), confint(model_reg)))) #print (PseudoR2(model_reg)) print(nobs(model_reg)) cat("=================================================================================\n") } #----------------------------- # outcome: t1_resp_recoded # data: fp adults #----------------------------- my_reg_params2v2 = my_reg_params[!my_reg_params%in%c("death" #, "obesity" , "t1_resp_recoded" , "T1_resp_score")] for(i in my_reg_params2v2) { #print (i) p_form = as.formula(paste("t1_resp_recoded ~ ", i ,sep = "")) print(p_form) model_reg = glm(p_form , family = binomial, data = my_reg_data) print(summary(model_reg)) print(exp(cbind(OR = coef(model_reg), confint(model_reg)))) #print (PseudoR2(model_reg)) print(nobs(model_reg)) cat("=================================================================================\n") } ######################################################################## # Full model ######################################################################## full_mod = glm(death ~ obesity + age + #age_bins + obesity + asthma + t1_resp_recoded + #ia_cxr , family = "binomial", data = my_reg_data) summary(full_mod) ######################################################################## # mediators ######################################################################## sig_npa_cols = c("mosaic", sig_npa_cols) my_med_sig = fp_adults[, sig_npa_cols] my_reg_data_med = merge(clinical_df_ics, my_med_sig , by = intersect(names(clinical_df_ics), names(my_med_sig)) ) #----------------------------- # outcome: death + obesity # data: fp adults #----------------------------- #my_reg_params_meds = c(my_reg_params, sig_npa_cols) my_reg_params_meds = colnames(my_reg_data_med) my_reg_params_meds1 = my_reg_params_meds[!my_reg_params_meds%in%c("mosaic", "flustat" , "onset_2_initial" , "onsfindeath" , "ia_exac_copd" , "death" , "obesity")] for(i in my_reg_params_meds1) { #print (i) p_form = as.formula(paste("death ~ obesity + ", i ,sep = "")) print(p_form) model_reg = glm(p_form , family = binomial, data = my_reg_data_med) print(summary(model_reg)) print(exp(cbind(OR = coef(model_reg), confint(model_reg)))) #print (PseudoR2(model_reg)) print(nobs(model_reg)) cat("=================================================================================\n") } #----------------------------- # outcome: t1_resp_recoded ~ obesity + asthma # data: fp adults #----------------------------- my_reg_params_meds2v2 = my_reg_params_meds[!my_reg_params_meds%in%c("mosaic" , "flustat" , "onset_2_initial" , "onsfindeath" , "ia_exac_copd" , "death" , "t1_resp_recoded" , "T1_resp_score" , "asthma")] for(i in my_reg_params_meds2v2) { #print (i) p_form = as.formula(paste("t1_resp_recoded ~ obesity + asthma + ", i ,sep = "")) print(p_form) model_reg = glm(p_form , family = binomial, data = my_reg_data_med) print(summary(model_reg)) print(exp(cbind(OR = coef(model_reg), confint(model_reg)))) #print (PseudoR2(model_reg)) print(nobs(model_reg)) cat("=================================================================================\n") }