moved older regression files to extra
This commit is contained in:
parent
e3259f2f17
commit
a53243a19c
2 changed files with 341 additions and 0 deletions
159
extra/logistic_regression_v1.R
Executable file
159
extra/logistic_regression_v1.R
Executable file
|
@ -0,0 +1,159 @@
|
|||
#!/usr/bin/Rscript
|
||||
getwd()
|
||||
setwd('~/git/mosaic_2020/')
|
||||
getwd()
|
||||
########################################################################
|
||||
# TASK: Run regression analysis
|
||||
# npa
|
||||
########################################################################
|
||||
#=================================================================================
|
||||
# TO DO:
|
||||
# Simple stats b/w obesity and non-obesity to consider including in reg analysis
|
||||
# Include NPA statistically sign params
|
||||
# Rerun graphs and plots without asthma?
|
||||
#=================================================================================
|
||||
|
||||
#====================
|
||||
# Input: source data
|
||||
#====================
|
||||
source("data_extraction_formatting_clinical.R")
|
||||
|
||||
# quick sanity checks
|
||||
table(clinical_df_ics$ia_exac_copd==1 & clinical_df_ics$asthma == 1)
|
||||
table(fp_adults$ia_exac_copd==1 & fp_adults$asthma == 1)
|
||||
table(fp_adults_na$ia_exac_copd==1 & fp_adults_na$asthma == 1)
|
||||
|
||||
table(clinical_df_ics$asthma)
|
||||
|
||||
#--------------------
|
||||
# Data reassignment
|
||||
#--------------------
|
||||
my_data = clinical_df_ics
|
||||
my_data_na = clinical_df_ics_na
|
||||
|
||||
table(my_data$ia_exac_copd==1 & my_data$asthma == 1)
|
||||
table(my_data_na$ia_exac_copd==1 & my_data_na$asthma == 1)
|
||||
|
||||
# clear variables
|
||||
rm(fp_adults, fp_adults_na)
|
||||
|
||||
#########################################################################
|
||||
|
||||
if ( names(which(lapply(my_data, class) == "character")) == "mosaic" ){
|
||||
cat("Character class for 1 column only:", "mosaic")
|
||||
}else{
|
||||
cat("More than one character class detected: Resolve!")
|
||||
quit()
|
||||
}
|
||||
|
||||
#============================
|
||||
# Identifying column types: Reg data
|
||||
#===========================
|
||||
cols_to_omit = c("mosaic", "flustat", "onset_2_initial", "ia_exac_copd")
|
||||
|
||||
my_reg_data = my_data[!colnames(my_data)%in%cols_to_omit]
|
||||
|
||||
my_vars = colnames(my_reg_data)
|
||||
|
||||
lapply(my_reg_data, class)
|
||||
|
||||
numerical_vars = c("age"
|
||||
, "vl_pfu_ul_npa1"
|
||||
, "los"
|
||||
#, "onset2final"
|
||||
#, "onsfindeath"
|
||||
#, "o2_sat_admis")
|
||||
)
|
||||
|
||||
na_count = sapply(my_reg_data, function(x) sum(is.na(x)));na_count
|
||||
|
||||
vars_to_factor = my_vars[!my_vars%in%numerical_vars]
|
||||
|
||||
#int_vars <- lapply(my_reg_data, class) == "integer"
|
||||
#int_num_vars <- lapply(my_reg_data, class)%in%c("integer", "numeric")
|
||||
#int_num_vars
|
||||
#table(int_num_vars)
|
||||
|
||||
# convert to factor
|
||||
lapply(my_reg_data, class)
|
||||
#my_reg_data[, vars_to_factor] <- lapply(my_reg_data[, vars_to_factor], as.factor)
|
||||
my_reg_data[vars_to_factor] <- lapply(my_reg_data[vars_to_factor], as.factor)
|
||||
factor_vars <- colnames(my_reg_data)[lapply(my_reg_data, class) == "factor"]
|
||||
table(factor_vars)
|
||||
|
||||
# check again
|
||||
lapply(my_reg_data, class)
|
||||
|
||||
#factor_vars_keep = names(which(lapply(my_data, nlevels) >2))
|
||||
#factor_vars_keep
|
||||
|
||||
################################################################
|
||||
#=================
|
||||
# reg data prepare
|
||||
#=================
|
||||
|
||||
pv1 = "death"
|
||||
pv2 = "t1_resp_recoded"
|
||||
|
||||
reg_params1 = factor_vars[!factor_vars%in%pv1]
|
||||
reg_params1 = my_vars[!my_vars%in%pv1]
|
||||
|
||||
#reg_params2 = my_vars[!my_vars%in%c(pv1, pv2)]
|
||||
|
||||
for(i in reg_params1) {
|
||||
#print (i)
|
||||
#p_form = as.formula(paste("death ~ ", i ,sep = ""))
|
||||
p_form = as.formula(paste("death ~ obesity + ", i ,sep = ""))
|
||||
#p_form = as.formula(paste0( pv1, "~ ", i))
|
||||
print(p_form)
|
||||
model_reg = glm(p_form , family = binomial, data = my_reg_data)
|
||||
print(summary(model_reg))
|
||||
print(exp(cbind(OR = coef(model_reg), confint(model_reg))))
|
||||
#print (PseudoR2(model_reg))
|
||||
print(nobs(model_reg))
|
||||
cat("=================================================================================\n")
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
for(i in numerical_vars) {
|
||||
#print (i)
|
||||
p_form = as.formula(paste("death ~ ", i ,sep = ""))
|
||||
#p_form = as.formula(paste0( pv1, "~ ", i))
|
||||
print(p_form)
|
||||
model_reg = glm(p_form , family = binomial, data = my_reg_data)
|
||||
print(summary(model_reg))
|
||||
print(exp(cbind(OR = coef(model_reg), confint(model_reg))))
|
||||
#print (PseudoR2(model_reg))
|
||||
print(nobs(model_reg))
|
||||
cat("=================================================================================\n")
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
##################################################
|
||||
|
||||
full_mod = glm(death ~ obesity +
|
||||
gender +
|
||||
age_bins +
|
||||
los +
|
||||
#ethnicity +
|
||||
onset_initial_bin +
|
||||
o2_sat_bin +
|
||||
com_noasthma +
|
||||
obesity +
|
||||
#ia_cxr +
|
||||
smoking +
|
||||
#sfluv +
|
||||
#h1n1v
|
||||
max_resp_score +
|
||||
T1_resp_score +
|
||||
, family = "binomial", data = my_reg_data)
|
||||
|
||||
summary(full_mod)
|
182
extra/logistic_regression_v2.R
Normal file
182
extra/logistic_regression_v2.R
Normal file
|
@ -0,0 +1,182 @@
|
|||
#!/usr/bin/Rscript
|
||||
getwd()
|
||||
setwd('~/git/mosaic_2020/')
|
||||
getwd()
|
||||
########################################################################
|
||||
# TASK: Run regression analysis
|
||||
# npa
|
||||
########################################################################
|
||||
#=================================================================================
|
||||
# TO DO:
|
||||
# Simple stats b/w obesity and non-obesity to consider including in reg analysis
|
||||
# Include NPA statistically sign params
|
||||
# Rerun graphs and plots without asthma?
|
||||
#=================================================================================
|
||||
|
||||
#====================
|
||||
# Input: source data
|
||||
#====================
|
||||
source("data_extraction_formatting_clinical.R")
|
||||
|
||||
# quick sanity checks
|
||||
table(clinical_df_ics$ia_exac_copd==1 & clinical_df_ics$asthma == 1)
|
||||
table(fp_adults$ia_exac_copd==1 & fp_adults$asthma == 1)
|
||||
table(fp_adults_na$ia_exac_copd==1 & fp_adults_na$asthma == 1)
|
||||
|
||||
table(clinical_df_ics$asthma)
|
||||
|
||||
#--------------------
|
||||
# Data reassignment
|
||||
#--------------------
|
||||
my_data = clinical_df_ics
|
||||
my_data_na = clinical_df_ics_na
|
||||
|
||||
table(my_data$ia_exac_copd==1 & my_data$asthma == 1)
|
||||
table(my_data_na$ia_exac_copd==1 & my_data_na$asthma == 1)
|
||||
|
||||
# clear variables
|
||||
rm(fp_adults, fp_adults_na)
|
||||
|
||||
#########################################################################
|
||||
|
||||
if ( names(which(lapply(my_data, class) == "character")) == "mosaic" ){
|
||||
cat("Character class for 1 column only:", "mosaic")
|
||||
}else{
|
||||
cat("More than one character class detected: Resolve!")
|
||||
quit()
|
||||
}
|
||||
|
||||
#============================
|
||||
# Identifying column types: Reg data
|
||||
#===========================
|
||||
cols_to_omit = c("mosaic", "flustat", "onset_2_initial", "ia_exac_copd")
|
||||
|
||||
my_reg_data = my_data[!colnames(my_data)%in%cols_to_omit]
|
||||
|
||||
my_vars = colnames(my_reg_data)
|
||||
my_vars
|
||||
|
||||
lapply(my_reg_data, class)
|
||||
numerical_vars = c("age"
|
||||
, "vl_pfu_ul_npa1"
|
||||
, "los"
|
||||
, "onset2final"
|
||||
, "onsfindeath"
|
||||
, "o2_sat_admis")
|
||||
|
||||
my_reg_data[numerical_vars] <- lapply(my_reg_data[numerical_vars], as.numeric)
|
||||
|
||||
my_reg_params = my_vars
|
||||
|
||||
na_count = sapply(my_reg_data, function(x) sum(is.na(x)));na_count
|
||||
names(na_count)[na_count>0]
|
||||
|
||||
vars_to_factor = my_vars[!my_vars%in%numerical_vars]
|
||||
|
||||
# convert to factor
|
||||
lapply(my_reg_data, class)
|
||||
my_reg_data[vars_to_factor] <- lapply(my_reg_data[vars_to_factor], as.factor)
|
||||
factor_vars <- colnames(my_reg_data)[lapply(my_reg_data, class) == "factor"]
|
||||
table(factor_vars)
|
||||
|
||||
# check again
|
||||
lapply(my_reg_data, class)
|
||||
|
||||
|
||||
# all parasm for reg
|
||||
my_reg_params = c("age"
|
||||
, "vl_pfu_ul_npa1"
|
||||
, "los"
|
||||
, "onset2final"
|
||||
#, "onsfindeath"
|
||||
#, "o2_sat_admis"
|
||||
#, "death"
|
||||
#, "obesity"
|
||||
, "sfluv"
|
||||
, "h1n1v"
|
||||
, "gender"
|
||||
, "asthma"
|
||||
, "ethnicity"
|
||||
, "smoking"
|
||||
, "ia_cxr"
|
||||
, "max_resp_score"
|
||||
, "T1_resp_score"
|
||||
, "com_noasthma"
|
||||
, "T2_resp_score"
|
||||
, "inresp_sev"
|
||||
, "steroid"
|
||||
, "age_bins"
|
||||
, "o2_sat_bin"
|
||||
, "onset_initial_bin"
|
||||
, "steroid_ics"
|
||||
, "t1_resp_recoded")
|
||||
|
||||
################################################################
|
||||
#=================
|
||||
# reg data prepare
|
||||
#=================
|
||||
|
||||
pv1 = "death"
|
||||
pv2 = "t1_resp_recoded"
|
||||
|
||||
#reg_params1 = factor_vars[!factor_vars%in%pv1]
|
||||
#reg_params_mixed = my_vars[!my_vars%in%pv1]
|
||||
|
||||
|
||||
#-----------------------------
|
||||
# outcome: death
|
||||
# data: fp adults
|
||||
#-----------------------------
|
||||
|
||||
for(i in my_reg_params) {
|
||||
#print (i)
|
||||
p_form = as.formula(paste("death ~ obesity + ", i ,sep = ""))
|
||||
print(p_form)
|
||||
model_reg = glm(p_form , family = binomial, data = my_reg_data)
|
||||
print(summary(model_reg))
|
||||
print(exp(cbind(OR = coef(model_reg), confint(model_reg))))
|
||||
#print (PseudoR2(model_reg))
|
||||
print(nobs(model_reg))
|
||||
cat("=================================================================================\n")
|
||||
}
|
||||
|
||||
|
||||
#-----------------------------
|
||||
# outcome: t1_resp_recoded
|
||||
# data: fp adults
|
||||
#-----------------------------
|
||||
for(i in my_reg_params) {
|
||||
#print (i)
|
||||
p_form = as.formula(paste("t1_resp_recoded ~ obesity + ", i ,sep = ""))
|
||||
print(p_form)
|
||||
model_reg = glm(p_form , family = binomial, data = my_reg_data)
|
||||
print(summary(model_reg))
|
||||
print(exp(cbind(OR = coef(model_reg), confint(model_reg))))
|
||||
#print (PseudoR2(model_reg))
|
||||
print(nobs(model_reg))
|
||||
cat("=================================================================================\n")
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
##################################################
|
||||
|
||||
full_mod = glm(death ~ obesity +
|
||||
gender +
|
||||
age_bins +
|
||||
los +
|
||||
#ethnicity +
|
||||
onset_initial_bin +
|
||||
o2_sat_bin +
|
||||
com_noasthma +
|
||||
obesity +
|
||||
#ia_cxr +
|
||||
smoking +
|
||||
#sfluv +
|
||||
#h1n1v
|
||||
max_resp_score +
|
||||
T1_resp_score +
|
||||
, family = "binomial", data = my_reg_data)
|
||||
|
||||
summary(full_mod)
|
Loading…
Add table
Add a link
Reference in a new issue