From a53243a19c6300195e16c5ed94fb508fe971d6ae Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 23 Nov 2020 18:17:08 +0000 Subject: [PATCH] moved older regression files to extra --- extra/logistic_regression_v1.R | 159 ++++++++++++++++++++++++++++ extra/logistic_regression_v2.R | 182 +++++++++++++++++++++++++++++++++ 2 files changed, 341 insertions(+) create mode 100755 extra/logistic_regression_v1.R create mode 100644 extra/logistic_regression_v2.R diff --git a/extra/logistic_regression_v1.R b/extra/logistic_regression_v1.R new file mode 100755 index 0000000..cd67250 --- /dev/null +++ b/extra/logistic_regression_v1.R @@ -0,0 +1,159 @@ +#!/usr/bin/Rscript +getwd() +setwd('~/git/mosaic_2020/') +getwd() +######################################################################## +# TASK: Run regression analysis +# npa +######################################################################## +#================================================================================= +# TO DO: +# Simple stats b/w obesity and non-obesity to consider including in reg analysis +# Include NPA statistically sign params +# Rerun graphs and plots without asthma? +#================================================================================= + +#==================== +# Input: source data +#==================== +source("data_extraction_formatting_clinical.R") + +# quick sanity checks +table(clinical_df_ics$ia_exac_copd==1 & clinical_df_ics$asthma == 1) +table(fp_adults$ia_exac_copd==1 & fp_adults$asthma == 1) +table(fp_adults_na$ia_exac_copd==1 & fp_adults_na$asthma == 1) + +table(clinical_df_ics$asthma) + +#-------------------- +# Data reassignment +#-------------------- +my_data = clinical_df_ics +my_data_na = clinical_df_ics_na + +table(my_data$ia_exac_copd==1 & my_data$asthma == 1) +table(my_data_na$ia_exac_copd==1 & my_data_na$asthma == 1) + +# clear variables +rm(fp_adults, fp_adults_na) + +######################################################################### + +if ( names(which(lapply(my_data, class) == "character")) == "mosaic" ){ + cat("Character class for 1 column only:", "mosaic") +}else{ + cat("More than one character class detected: Resolve!") + quit() +} + +#============================ +# Identifying column types: Reg data +#=========================== +cols_to_omit = c("mosaic", "flustat", "onset_2_initial", "ia_exac_copd") + +my_reg_data = my_data[!colnames(my_data)%in%cols_to_omit] + +my_vars = colnames(my_reg_data) + +lapply(my_reg_data, class) + +numerical_vars = c("age" + , "vl_pfu_ul_npa1" + , "los" + #, "onset2final" + #, "onsfindeath" + #, "o2_sat_admis") +) + +na_count = sapply(my_reg_data, function(x) sum(is.na(x)));na_count + +vars_to_factor = my_vars[!my_vars%in%numerical_vars] + +#int_vars <- lapply(my_reg_data, class) == "integer" +#int_num_vars <- lapply(my_reg_data, class)%in%c("integer", "numeric") +#int_num_vars +#table(int_num_vars) + +# convert to factor +lapply(my_reg_data, class) +#my_reg_data[, vars_to_factor] <- lapply(my_reg_data[, vars_to_factor], as.factor) +my_reg_data[vars_to_factor] <- lapply(my_reg_data[vars_to_factor], as.factor) +factor_vars <- colnames(my_reg_data)[lapply(my_reg_data, class) == "factor"] +table(factor_vars) + +# check again +lapply(my_reg_data, class) + +#factor_vars_keep = names(which(lapply(my_data, nlevels) >2)) +#factor_vars_keep + +################################################################ +#================= +# reg data prepare +#================= + +pv1 = "death" +pv2 = "t1_resp_recoded" + +reg_params1 = factor_vars[!factor_vars%in%pv1] +reg_params1 = my_vars[!my_vars%in%pv1] + +#reg_params2 = my_vars[!my_vars%in%c(pv1, pv2)] + +for(i in reg_params1) { + #print (i) + #p_form = as.formula(paste("death ~ ", i ,sep = "")) + p_form = as.formula(paste("death ~ obesity + ", i ,sep = "")) + #p_form = as.formula(paste0( pv1, "~ ", i)) + print(p_form) + model_reg = glm(p_form , family = binomial, data = my_reg_data) + print(summary(model_reg)) + print(exp(cbind(OR = coef(model_reg), confint(model_reg)))) + #print (PseudoR2(model_reg)) + print(nobs(model_reg)) + cat("=================================================================================\n") +} + + + + + + + +for(i in numerical_vars) { + #print (i) + p_form = as.formula(paste("death ~ ", i ,sep = "")) + #p_form = as.formula(paste0( pv1, "~ ", i)) + print(p_form) + model_reg = glm(p_form , family = binomial, data = my_reg_data) + print(summary(model_reg)) + print(exp(cbind(OR = coef(model_reg), confint(model_reg)))) + #print (PseudoR2(model_reg)) + print(nobs(model_reg)) + cat("=================================================================================\n") +} + + + + + +################################################## + +full_mod = glm(death ~ obesity + + gender + + age_bins + + los + + #ethnicity + + onset_initial_bin + + o2_sat_bin + + com_noasthma + + obesity + + #ia_cxr + + smoking + + #sfluv + + #h1n1v + max_resp_score + + T1_resp_score + + , family = "binomial", data = my_reg_data) + +summary(full_mod) diff --git a/extra/logistic_regression_v2.R b/extra/logistic_regression_v2.R new file mode 100644 index 0000000..b444cb5 --- /dev/null +++ b/extra/logistic_regression_v2.R @@ -0,0 +1,182 @@ +#!/usr/bin/Rscript +getwd() +setwd('~/git/mosaic_2020/') +getwd() +######################################################################## +# TASK: Run regression analysis +# npa +######################################################################## +#================================================================================= +# TO DO: +# Simple stats b/w obesity and non-obesity to consider including in reg analysis +# Include NPA statistically sign params +# Rerun graphs and plots without asthma? +#================================================================================= + +#==================== +# Input: source data +#==================== +source("data_extraction_formatting_clinical.R") + +# quick sanity checks +table(clinical_df_ics$ia_exac_copd==1 & clinical_df_ics$asthma == 1) +table(fp_adults$ia_exac_copd==1 & fp_adults$asthma == 1) +table(fp_adults_na$ia_exac_copd==1 & fp_adults_na$asthma == 1) + +table(clinical_df_ics$asthma) + +#-------------------- +# Data reassignment +#-------------------- +my_data = clinical_df_ics +my_data_na = clinical_df_ics_na + +table(my_data$ia_exac_copd==1 & my_data$asthma == 1) +table(my_data_na$ia_exac_copd==1 & my_data_na$asthma == 1) + +# clear variables +rm(fp_adults, fp_adults_na) + +######################################################################### + +if ( names(which(lapply(my_data, class) == "character")) == "mosaic" ){ + cat("Character class for 1 column only:", "mosaic") +}else{ + cat("More than one character class detected: Resolve!") + quit() +} + +#============================ +# Identifying column types: Reg data +#=========================== +cols_to_omit = c("mosaic", "flustat", "onset_2_initial", "ia_exac_copd") + +my_reg_data = my_data[!colnames(my_data)%in%cols_to_omit] + +my_vars = colnames(my_reg_data) +my_vars + +lapply(my_reg_data, class) +numerical_vars = c("age" + , "vl_pfu_ul_npa1" + , "los" + , "onset2final" + , "onsfindeath" + , "o2_sat_admis") + +my_reg_data[numerical_vars] <- lapply(my_reg_data[numerical_vars], as.numeric) + +my_reg_params = my_vars + +na_count = sapply(my_reg_data, function(x) sum(is.na(x)));na_count +names(na_count)[na_count>0] + +vars_to_factor = my_vars[!my_vars%in%numerical_vars] + +# convert to factor +lapply(my_reg_data, class) +my_reg_data[vars_to_factor] <- lapply(my_reg_data[vars_to_factor], as.factor) +factor_vars <- colnames(my_reg_data)[lapply(my_reg_data, class) == "factor"] +table(factor_vars) + +# check again +lapply(my_reg_data, class) + + +# all parasm for reg + my_reg_params = c("age" + , "vl_pfu_ul_npa1" + , "los" + , "onset2final" + #, "onsfindeath" + #, "o2_sat_admis" + #, "death" + #, "obesity" + , "sfluv" + , "h1n1v" + , "gender" + , "asthma" + , "ethnicity" + , "smoking" + , "ia_cxr" + , "max_resp_score" + , "T1_resp_score" + , "com_noasthma" + , "T2_resp_score" + , "inresp_sev" + , "steroid" + , "age_bins" + , "o2_sat_bin" + , "onset_initial_bin" + , "steroid_ics" + , "t1_resp_recoded") + +################################################################ +#================= +# reg data prepare +#================= + +pv1 = "death" +pv2 = "t1_resp_recoded" + +#reg_params1 = factor_vars[!factor_vars%in%pv1] +#reg_params_mixed = my_vars[!my_vars%in%pv1] + + +#----------------------------- +# outcome: death +# data: fp adults +#----------------------------- + +for(i in my_reg_params) { + #print (i) + p_form = as.formula(paste("death ~ obesity + ", i ,sep = "")) + print(p_form) + model_reg = glm(p_form , family = binomial, data = my_reg_data) + print(summary(model_reg)) + print(exp(cbind(OR = coef(model_reg), confint(model_reg)))) + #print (PseudoR2(model_reg)) + print(nobs(model_reg)) + cat("=================================================================================\n") +} + + +#----------------------------- +# outcome: t1_resp_recoded +# data: fp adults +#----------------------------- +for(i in my_reg_params) { + #print (i) + p_form = as.formula(paste("t1_resp_recoded ~ obesity + ", i ,sep = "")) + print(p_form) + model_reg = glm(p_form , family = binomial, data = my_reg_data) + print(summary(model_reg)) + print(exp(cbind(OR = coef(model_reg), confint(model_reg)))) + #print (PseudoR2(model_reg)) + print(nobs(model_reg)) + cat("=================================================================================\n") +} + + + + +################################################## + +full_mod = glm(death ~ obesity + + gender + + age_bins + + los + + #ethnicity + + onset_initial_bin + + o2_sat_bin + + com_noasthma + + obesity + + #ia_cxr + + smoking + + #sfluv + + #h1n1v + max_resp_score + + T1_resp_score + + , family = "binomial", data = my_reg_data) + +summary(full_mod)