logistic regression for outcome and meds

This commit is contained in:
Tanushree Tunstall 2020-11-23 18:16:19 +00:00
parent 25fb702e2e
commit e3259f2f17
10 changed files with 265 additions and 1904 deletions

View file

@ -172,6 +172,12 @@ table(clinical_df$asthma, clinical_df$age_bins)
#1 11 8 12 9
sum(table(clinical_df$asthma, clinical_df$age_bins)) == nrow(clinical_df)
table(clinical_df$age_int)
clinical_df = subset(clinical_df, select = -c(age_int))
table(clinical_df$age_int)
class(clinical_df$age_bins)
clinical_df$age_bins
#===========================
# O2 saturation binning
@ -214,7 +220,6 @@ tot_onset2ini
onset_initial_bin = cut(clinical_df$onset_2_initial, c(min_in, 4, max_in))
clinical_df$onset_initial_bin = onset_initial_bin
sum(table(clinical_df$onset_initial_bin)) == tot_onset2ini
#=======================
@ -357,10 +362,6 @@ clinical_df_ics = merge(clinical_df, clinical_ics, by = merging_cols, all = T);
colnames(clinical_df_ics)
# change colname of logistic_outcome
c1 = which(colnames(clinical_df_ics) == "logistic_outcome")
colnames(clinical_df_ics)[c1] <- "t1_resp_recoded"
if (nrow(clinical_df_ics) == nrow(clinical_df) & nrow(clinical_ics)){
cat("\nPASS: No. of rows match, nrow =", nrow(clinical_df_ics)
, "\nChecking ncols...")
@ -376,14 +377,33 @@ if (nrow(clinical_df_ics) == nrow(clinical_df) & nrow(clinical_ics)){
, "\nExpected nrows:", nrow(fp_adults))
}
# change the factor vars to integers
str(clinical_df_ics)
factor_vars = lapply(clinical_df_ics, class) == "factor"
table(factor_vars)
clinical_df_ics[, factor_vars] <- lapply(clinical_df_ics[, factor_vars], as.integer)
table(factor_vars)
#=========================
# add binary outcome for T1 resp score
#=========================
table(clinical_df_ics$T1_resp_score)
str(clinical_df_ics)
clinical_df_ics$t1_resp_recoded = ifelse(clinical_df_ics$T1_resp_score <3, 0, 1)
table(clinical_df_ics$t1_resp_recoded)
#table(clinical_df_ics$steroid)
table(clinical_df_ics$steroid_ics)
#=========================
# change the factor vars to integers
#=========================
#str(clinical_df_ics)
#factor_vars = lapply(clinical_df_ics, class) == "factor"
#table(factor_vars)
#clinical_df_ics[, factor_vars] <- lapply(clinical_df_ics[, factor_vars], as.integer)
#table(factor_vars)
#str(clinical_df_ics)
#=========================
# remove cols
#=========================
clinical_df_ics = subset(clinical_df_ics, select = -c(onset_2_initial))
#======================
# writing output file
@ -392,10 +412,17 @@ outfile_name_reg = "clinical_df_recoded.csv"
outfile_reg = paste0(outdir, outfile_name_reg)
cat("\nWriting clinical file for regression:", outfile_reg)
#write.csv(clinical_df_ics, file = outfile_reg)
#=========================
# clinical_df_ics: without asthma
#=========================
clinical_df_ics_na = clinical_df_ics[clinical_df_ics$asthma == 0,]
################################################################
rm(age_bins, max_age_interval, max_in, min_in
, o2_sat_bin, onset_initial_bin, tot_o2
, n_text_code, n1, n2, tot_onset2ini, infile_ics
, tot_onset2ini, meta_data_cols
, clinical_df)
, clinical_df, clinical_ics)
################################################################