perfomed LR analysis and tidyed up clinical formatting code
This commit is contained in:
parent
08e01abfb5
commit
f0c0fd72d1
5 changed files with 296 additions and 301 deletions
|
@ -13,21 +13,25 @@ getwd()
|
|||
source("data_extraction_formatting_clinical.R")
|
||||
|
||||
# quick sanity checks
|
||||
table(clinical_df_ics$ia_exac_copd==1 & clinical_df_ics$asthma == 1)
|
||||
table(fp_adults$ia_exac_copd==1 & fp_adults$asthma == 1)
|
||||
table(fp_adults_na$ia_exac_copd==1 & fp_adults_na$asthma == 1)
|
||||
table(fp_adults_ics$ia_exac_copd==1 & fp_adults_ics$asthma == 1)
|
||||
table(fp_adults_ics$ia_exac_copd==1 & fp_adults_ics$asthma == 1)
|
||||
table(fp_adults_ics_na$ia_exac_copd==1 & fp_adults_ics_na$asthma == 1)
|
||||
|
||||
table(clinical_df_ics$asthma)
|
||||
table(fp_adults_ics$asthma)
|
||||
|
||||
if ( length(cols_to_extract) == length(clinical_cols) + length(sig_npa_cols) ){
|
||||
cat("PASS: extracting clinical and sign npa cols")
|
||||
cat("PASS: extracting clinical and sign npa cols for regression")
|
||||
} else{
|
||||
cat("FAIL: could not find cols to extract")
|
||||
cat("FAIL: could not find cols to extract for regression")
|
||||
quit()
|
||||
}
|
||||
|
||||
fp_adults_reg = fp_adults[, cols_to_extract]
|
||||
fp_adults_reg_na = fp_adults_na[, cols_to_extract]
|
||||
|
||||
#fp_adults_reg = fp_adults_ics[, cols_to_extract]
|
||||
fp_adults_reg = fp_adults_ics[, colnames(fp_adults_ics)%in%cols_to_extract]
|
||||
cols_to_extract[!cols_to_extract%in%colnames(fp_adults_reg)]
|
||||
|
||||
fp_adults_reg_na = fp_adults_ics_na[, colnames(fp_adults_ics_na)%in%cols_to_extract]
|
||||
|
||||
#--------------------
|
||||
# Data reassignment
|
||||
|
@ -38,26 +42,23 @@ my_data_na = fp_adults_reg_na
|
|||
table(my_data$ia_exac_copd==1 & my_data$asthma == 1)
|
||||
table(my_data_na$ia_exac_copd==1 & my_data_na$asthma == 1)
|
||||
|
||||
# clear variables
|
||||
rm(fp_adults, fp_adults_na, clinical_df_ics, clinical_df_ics_na)
|
||||
|
||||
cols_to_omit = c("mosaic", "flustat", "onset_2_initial", "ia_exac_copd", "onsfindeath", "o2_sat_admis")
|
||||
my_clinical_cols = clinical_cols[!clinical_cols%in%cols_to_omit]
|
||||
my_clinical_cols
|
||||
|
||||
#########################################################################
|
||||
#============================
|
||||
# Identifying column types: Reg data
|
||||
#===========================
|
||||
cols_to_omit = c("mosaic", "flustat", "onset_2_initial", "ia_exac_copd")
|
||||
my_vars = colnames(my_data)
|
||||
|
||||
my_reg_data = my_data[!colnames(my_data)%in%cols_to_omit]
|
||||
lapply(my_data, class)
|
||||
|
||||
my_vars = colnames(my_reg_data)
|
||||
my_vars
|
||||
|
||||
lapply(my_reg_data, class)
|
||||
|
||||
check_int_vars = my_vars[lapply(my_reg_data, class)%in%c("integer")]
|
||||
check_num_vars = my_vars[lapply(my_reg_data, class)%in%c("numeric")]
|
||||
check_charac_vars = my_vars[lapply(my_reg_data, class)%in%c("character")]
|
||||
check_factor_vars = my_vars[lapply(my_reg_data, class)%in%c("factor")]
|
||||
check_int_vars = my_vars[lapply(my_data, class)%in%c("integer")]
|
||||
check_num_vars = my_vars[lapply(my_data, class)%in%c("numeric")]
|
||||
check_charac_vars = my_vars[lapply(my_data, class)%in%c("character")]
|
||||
check_factor_vars = my_vars[lapply(my_data, class)%in%c("factor")]
|
||||
|
||||
cat("\nNo. of int cols:", length(check_int_vars)
|
||||
, "\nNo. of num cols:", length(check_num_vars)
|
||||
|
@ -65,44 +66,56 @@ cat("\nNo. of int cols:", length(check_int_vars)
|
|||
, "\nNo. of factor cols:", length(check_factor_vars)
|
||||
)
|
||||
|
||||
# convert char vals to int as these should be int
|
||||
my_reg_data[,check_charac_vars] = lapply(my_reg_data[,check_charac_vars], as.integer)
|
||||
str(my_reg_data$sfluv)
|
||||
|
||||
|
||||
numerical_vars = c("age"
|
||||
#=======================================
|
||||
# changing dtypes in cols
|
||||
#=======================================
|
||||
# what I need to be numerical explicitly
|
||||
my_numerical_vars = c("age"
|
||||
, "vl_pfu_ul_npa1"
|
||||
, "los"
|
||||
, "onset2final"
|
||||
, "onsfindeath"
|
||||
, "o2_sat_admis")
|
||||
|
||||
my_data[my_numerical_vars] <- lapply(my_data[my_numerical_vars], as.numeric)
|
||||
|
||||
num_vars = my_vars[lapply(my_data, class)%in%c("numeric")]
|
||||
num_vars
|
||||
|
||||
# what I need to be integer explicitly
|
||||
not_int_vars = c(my_numerical_vars, sig_npa_cols)
|
||||
|
||||
my_int_vars = my_vars[!my_vars%in%not_int_vars]
|
||||
my_int_vars
|
||||
|
||||
#my_int_vars = my_clinical_cols[int_vars_extract%in%check_int_vars]
|
||||
#my_int_vars
|
||||
|
||||
my_reg_data[numerical_vars] <- lapply(my_reg_data[numerical_vars], as.numeric)
|
||||
# convert int cols to factor
|
||||
my_data[my_int_vars] <- lapply(my_data[my_int_vars], as.factor)
|
||||
|
||||
my_reg_params = my_vars
|
||||
factor_vars = my_vars[lapply(my_data, class)%in%c("factor")]
|
||||
factor_vars
|
||||
check_factor_vars
|
||||
|
||||
# check again
|
||||
lapply(my_data, class)
|
||||
#======================================================================
|
||||
my_reg_data = my_data[!colnames(my_data)%in%cols_to_omit]
|
||||
|
||||
my_reg_params = colnames(my_reg_data)
|
||||
#length(factor_vars) + length(num_vars); ncol(my_reg_data)
|
||||
#my_reg_params = c(factor_vars, num_vars)
|
||||
|
||||
na_count = sapply(my_reg_data, function(x) sum(is.na(x)));na_count
|
||||
names(na_count)[na_count>0]
|
||||
|
||||
vars_to_factor = my_vars[!my_vars%in%numerical_vars]
|
||||
|
||||
# convert to factor
|
||||
lapply(my_reg_data, class)
|
||||
my_reg_data[vars_to_factor] <- lapply(my_reg_data[vars_to_factor], as.factor)
|
||||
factor_vars <- colnames(my_reg_data)[lapply(my_reg_data, class) == "factor"]
|
||||
table(factor_vars)
|
||||
|
||||
# check again
|
||||
lapply(my_reg_data, class)
|
||||
|
||||
|
||||
# all parasm for reg
|
||||
my_reg_params = c("age"
|
||||
my_reg_params_DEL = c("age"
|
||||
, "age_bins"
|
||||
, "vl_pfu_ul_npa1"
|
||||
, "los"
|
||||
, "onset2final"
|
||||
|
@ -123,11 +136,12 @@ my_reg_params = c("age"
|
|||
, "T2_resp_score"
|
||||
, "inresp_sev"
|
||||
, "steroid"
|
||||
, "age_bins"
|
||||
|
||||
, "o2_sat_bin"
|
||||
, "onset_initial_bin"
|
||||
, "steroid_ics"
|
||||
, "t1_resp_recoded")
|
||||
, "t1_resp_recoded"
|
||||
, sig_npa_cols)
|
||||
|
||||
#=================
|
||||
# reg data prepare
|
||||
|
@ -139,16 +153,16 @@ pv2 = "t1_resp_recoded"
|
|||
#reg_params_mixed = my_vars[!my_vars%in%pv1]
|
||||
|
||||
########################################################################
|
||||
#=================
|
||||
# outcome2
|
||||
#=================
|
||||
# outcome: death
|
||||
########################################################################
|
||||
#-----------------------------
|
||||
# outcome: death + obesity
|
||||
# data: fp adults
|
||||
#-----------------------------
|
||||
my_reg_params1 = my_reg_params[!my_reg_params%in%c("death", "obesity")]
|
||||
|
||||
for(i in my_reg_params1) {
|
||||
sink(file = "reg_output_out1.txt", append = T)
|
||||
for(i in my_reg_params1) {
|
||||
#print (i)
|
||||
p_form = as.formula(paste("death ~ obesity + ", i ,sep = ""))
|
||||
print(p_form)
|
||||
|
@ -159,16 +173,20 @@ for(i in my_reg_params1) {
|
|||
print(nobs(model_reg))
|
||||
cat("=================================================================================\n")
|
||||
}
|
||||
sink()
|
||||
|
||||
#-----------------------------
|
||||
# outcome: death
|
||||
# outcome: death + obesity
|
||||
# data: fp adults
|
||||
#-----------------------------
|
||||
my_reg_params1v2 = my_reg_params[!my_reg_params%in%c("death")]
|
||||
my_reg_params2 = my_reg_params[!my_reg_params%in%c( "death"
|
||||
, "obesity"
|
||||
, "asthma")]
|
||||
|
||||
for(i in my_reg_params1v2) {
|
||||
sink(file = "reg_output_out1_ob_as.txt", append = T)
|
||||
for(i in my_reg_params2) {
|
||||
#print (i)
|
||||
p_form = as.formula(paste("death ~ ", i ,sep = ""))
|
||||
p_form = as.formula(paste("death ~ obesity + asthma +", i ,sep = ""))
|
||||
print(p_form)
|
||||
model_reg = glm(p_form , family = binomial, data = my_reg_data)
|
||||
print(summary(model_reg))
|
||||
|
@ -177,20 +195,45 @@ for(i in my_reg_params1v2) {
|
|||
print(nobs(model_reg))
|
||||
cat("=================================================================================\n")
|
||||
}
|
||||
sink()
|
||||
|
||||
|
||||
#-------------------
|
||||
# Full model
|
||||
#-------------------
|
||||
full_mod = glm(death ~ obesity +
|
||||
asthma +
|
||||
#age +
|
||||
age_bins +
|
||||
#t1_resp_recoded +
|
||||
com_noasthma +
|
||||
eotaxin3_npa1 +
|
||||
il1_npa1+
|
||||
il8_2_npa1+
|
||||
il12p70_npa1
|
||||
, family = "binomial"
|
||||
, data = my_reg_data)
|
||||
|
||||
print(summary(full_mod))
|
||||
print(exp(cbind(OR = coef(full_mod), confint(full_mod))))
|
||||
print(nobs(full_mod))
|
||||
|
||||
cat("=================================================================================\n")
|
||||
|
||||
########################################################################
|
||||
# outcome: t1_resp_recoded
|
||||
########################################################################
|
||||
#=================
|
||||
# outcome2
|
||||
#=================
|
||||
#-----------------------------
|
||||
# outcome: t1_resp_recoded + obesity
|
||||
# data: fp adults
|
||||
#-----------------------------
|
||||
my_reg_params2 = my_reg_params[!my_reg_params%in%c("death"
|
||||
, "obesity"
|
||||
, "t1_resp_recoded"
|
||||
, "T1_resp_score")]
|
||||
my_reg_params3 = my_reg_params[!my_reg_params%in%c("t1_resp_recoded"
|
||||
, "T1_resp_score"
|
||||
, "death"
|
||||
, "obesity")]
|
||||
|
||||
for(i in my_reg_params2) {
|
||||
sink(file = "reg_output_out2.txt", append = T)
|
||||
for(i in my_reg_params3) {
|
||||
#print (i)
|
||||
p_form = as.formula(paste("t1_resp_recoded ~ obesity + ", i ,sep = ""))
|
||||
print(p_form)
|
||||
|
@ -201,20 +244,22 @@ for(i in my_reg_params2) {
|
|||
print(nobs(model_reg))
|
||||
cat("=================================================================================\n")
|
||||
}
|
||||
|
||||
sink()
|
||||
|
||||
#-----------------------------
|
||||
# outcome: t1_resp_recoded
|
||||
# outcome: t1_resp_recoded + obesity
|
||||
# data: fp adults
|
||||
#-----------------------------
|
||||
my_reg_params2v2 = my_reg_params[!my_reg_params%in%c("death"
|
||||
#, "obesity"
|
||||
, "t1_resp_recoded"
|
||||
, "T1_resp_score")]
|
||||
my_reg_params4 = my_reg_params[!my_reg_params%in%c( "t1_resp_recoded"
|
||||
, "T1_resp_score"
|
||||
, "obesity"
|
||||
, "death"
|
||||
, "asthma")]
|
||||
|
||||
for(i in my_reg_params2v2) {
|
||||
sink(file = "reg_output_out2_ob_as.txt", append = T)
|
||||
for(i in my_reg_params4) {
|
||||
#print (i)
|
||||
p_form = as.formula(paste("t1_resp_recoded ~ ", i ,sep = ""))
|
||||
p_form = as.formula(paste("t1_resp_recoded ~ obesity + asthma +", i ,sep = ""))
|
||||
print(p_form)
|
||||
model_reg = glm(p_form , family = binomial, data = my_reg_data)
|
||||
print(summary(model_reg))
|
||||
|
@ -223,86 +268,23 @@ for(i in my_reg_params2v2) {
|
|||
print(nobs(model_reg))
|
||||
cat("=================================================================================\n")
|
||||
}
|
||||
sink()
|
||||
|
||||
########################################################################
|
||||
#-------------------
|
||||
# Full model
|
||||
########################################################################
|
||||
#-------------------
|
||||
#full_mod2 = glm(death ~ obesity +
|
||||
# asthma +
|
||||
# #age +
|
||||
# #age_bins +
|
||||
#
|
||||
#
|
||||
# , family = "binomial"
|
||||
# , data = my_reg_data)
|
||||
#
|
||||
#print(summary(full_mod2))
|
||||
#print(exp(cbind(OR = coef(full_mod2), confint(full_mod2))))
|
||||
#print(nobs(full_mod2))
|
||||
|
||||
full_mod = glm(death ~ obesity +
|
||||
age +
|
||||
#age_bins +
|
||||
obesity +
|
||||
asthma +
|
||||
t1_resp_recoded +
|
||||
#ia_cxr
|
||||
, family = "binomial", data = my_reg_data)
|
||||
cat("=================================================================================\n")
|
||||
|
||||
summary(full_mod)
|
||||
|
||||
|
||||
|
||||
########################################################################
|
||||
# mediators
|
||||
########################################################################
|
||||
sig_npa_cols = c("mosaic", sig_npa_cols)
|
||||
|
||||
my_med_sig = fp_adults[, sig_npa_cols]
|
||||
|
||||
my_reg_data_med = merge(clinical_df_ics, my_med_sig
|
||||
, by = intersect(names(clinical_df_ics), names(my_med_sig))
|
||||
)
|
||||
|
||||
#-----------------------------
|
||||
# outcome: death + obesity
|
||||
# data: fp adults
|
||||
#-----------------------------
|
||||
#my_reg_params_meds = c(my_reg_params, sig_npa_cols)
|
||||
my_reg_params_meds = colnames(my_reg_data_med)
|
||||
my_reg_params_meds1 = my_reg_params_meds[!my_reg_params_meds%in%c("mosaic", "flustat"
|
||||
, "onset_2_initial"
|
||||
, "onsfindeath"
|
||||
, "ia_exac_copd"
|
||||
, "death"
|
||||
, "obesity")]
|
||||
|
||||
|
||||
|
||||
for(i in my_reg_params_meds1) {
|
||||
#print (i)
|
||||
p_form = as.formula(paste("death ~ obesity + ", i ,sep = ""))
|
||||
print(p_form)
|
||||
model_reg = glm(p_form , family = binomial, data = my_reg_data_med)
|
||||
print(summary(model_reg))
|
||||
print(exp(cbind(OR = coef(model_reg), confint(model_reg))))
|
||||
#print (PseudoR2(model_reg))
|
||||
print(nobs(model_reg))
|
||||
cat("=================================================================================\n")
|
||||
}
|
||||
|
||||
#-----------------------------
|
||||
# outcome: t1_resp_recoded ~ obesity + asthma
|
||||
# data: fp adults
|
||||
#-----------------------------
|
||||
my_reg_params_meds2v2 = my_reg_params_meds[!my_reg_params_meds%in%c("mosaic"
|
||||
, "flustat"
|
||||
, "onset_2_initial"
|
||||
, "onsfindeath"
|
||||
, "ia_exac_copd"
|
||||
, "death"
|
||||
, "t1_resp_recoded"
|
||||
, "T1_resp_score"
|
||||
, "asthma")]
|
||||
|
||||
|
||||
|
||||
for(i in my_reg_params_meds2v2) {
|
||||
#print (i)
|
||||
p_form = as.formula(paste("t1_resp_recoded ~ obesity + asthma + ", i ,sep = ""))
|
||||
print(p_form)
|
||||
model_reg = glm(p_form , family = binomial, data = my_reg_data_med)
|
||||
print(summary(model_reg))
|
||||
print(exp(cbind(OR = coef(model_reg), confint(model_reg))))
|
||||
#print (PseudoR2(model_reg))
|
||||
print(nobs(model_reg))
|
||||
cat("=================================================================================\n")
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue