correcting dtype for sfluv and h1n1v for data formatting clinical
This commit is contained in:
parent
7529549bfc
commit
08e01abfb5
4 changed files with 52 additions and 461 deletions
|
@ -225,36 +225,35 @@ sum(table(clinical_df$onset_initial_bin)) == tot_onset2ini
|
||||||
#=======================
|
#=======================
|
||||||
# seasonal flu: sfluv
|
# seasonal flu: sfluv
|
||||||
#=======================
|
#=======================
|
||||||
if (! is.factor(clinical_df$sfluv)){
|
# reassign as 0 and 1
|
||||||
clinical_df$sfluv = as.factor(clinical_df$sfluv)
|
|
||||||
}
|
|
||||||
class(clinical_df$sfluv)
|
|
||||||
levels(clinical_df$sfluv)
|
|
||||||
|
|
||||||
table(clinical_df$sfluv)
|
table(clinical_df$sfluv)
|
||||||
table(clinical_df$asthma, clinical_df$sfluv)
|
table(clinical_df$asthma, clinical_df$sfluv)
|
||||||
|
clinical_df$sfluv = ifelse(clinical_df$sfluv == "yes", 1, 0)
|
||||||
# reassign
|
table(clinical_df$sfluv)
|
||||||
levels(clinical_df$sfluv) <- c("0", "0", "1")
|
|
||||||
table(clinical_df$asthma, clinical_df$sfluv)
|
table(clinical_df$asthma, clinical_df$sfluv)
|
||||||
|
|
||||||
|
# convert to integer
|
||||||
|
str(clinical_df$sfluv)
|
||||||
|
clinical_df$sfluv = as.integer(clinical_df$sfluv)
|
||||||
|
str(clinical_df$sfluv)
|
||||||
|
|
||||||
#=======================
|
#=======================
|
||||||
# h1n1v
|
# h1n1v
|
||||||
#=======================
|
#=======================
|
||||||
if (! is.factor(clinical_df$h1n1v)){
|
# reassign as 0 and 1
|
||||||
clinical_df$h1n1v = as.factor(clinical_df$h1n1v)
|
|
||||||
}
|
|
||||||
|
|
||||||
class(clinical_df$h1n1v)
|
|
||||||
levels(clinical_df$h1n1v)
|
|
||||||
|
|
||||||
table(clinical_df$h1n1v)
|
table(clinical_df$h1n1v)
|
||||||
table(clinical_df$asthma, clinical_df$h1n1v)
|
table(clinical_df$asthma, clinical_df$h1n1v)
|
||||||
|
clinical_df$h1n1v = ifelse(clinical_df$h1n1v == "yes", 1, 0)
|
||||||
# reassign
|
table(clinical_df$h1n1v)
|
||||||
levels(clinical_df$h1n1v) <- c("0", "0", "1")
|
|
||||||
table(clinical_df$asthma, clinical_df$h1n1v)
|
table(clinical_df$asthma, clinical_df$h1n1v)
|
||||||
|
|
||||||
|
# convert to integer
|
||||||
|
str(clinical_df$h1n1v)
|
||||||
|
clinical_df$h1n1v = as.integer(clinical_df$h1n1v)
|
||||||
|
str(clinical_df$h1n1v)
|
||||||
|
|
||||||
#=======================
|
#=======================
|
||||||
# ethnicity
|
# ethnicity
|
||||||
#=======================
|
#=======================
|
||||||
|
|
|
@ -1,329 +0,0 @@
|
||||||
#!/usr/bin/Rscript
|
|
||||||
getwd()
|
|
||||||
setwd('~/git/mosaic_2020/')
|
|
||||||
getwd()
|
|
||||||
########################################################################
|
|
||||||
# TASK: Extract relevant columns from mosaic adults data
|
|
||||||
# npa
|
|
||||||
########################################################################
|
|
||||||
#====================
|
|
||||||
# Input: source data for clinical
|
|
||||||
#====================
|
|
||||||
source("data_extraction_formatting_clinical.R")
|
|
||||||
#source("colnames_clinical_meds.R")
|
|
||||||
#=======================================
|
|
||||||
# Data for mediator to include in regression
|
|
||||||
#=======================================
|
|
||||||
cat("Extracting", length(sig_npa_cols), "mediator cols from fp_adults")
|
|
||||||
|
|
||||||
med_df = fp_adults[, c("mosaic", sig_npa_cols)]
|
|
||||||
|
|
||||||
# sanity checks
|
|
||||||
if ( sum(table(clinical_df$obesity)) & sum(table(clinical_df$age>=18)) & sum(table(clinical_df$death)) & sum(table(clinical_df$asthma)) == nrow(clinical_df) ){
|
|
||||||
cat("PASS: binary data obs are complete, n =", nrow(clinical_df))
|
|
||||||
}else{
|
|
||||||
cat("FAIL: Incomplete data for binary outcomes. Please check and decide!")
|
|
||||||
quit()
|
|
||||||
}
|
|
||||||
|
|
||||||
table(clinical_df$ia_exac_copd)
|
|
||||||
|
|
||||||
########################################################################
|
|
||||||
# Data extraction for regression
|
|
||||||
########################################################################
|
|
||||||
|
|
||||||
common_cols = names(clinical_df)[names(clinical_df)%in%names(med_df)]
|
|
||||||
|
|
||||||
cat("\nMerging clinical and mediator data for regression"
|
|
||||||
,"\nMerging on column:", common_cols)
|
|
||||||
|
|
||||||
reg_data = merge(clinical_df, med_df
|
|
||||||
, by = common_cols)
|
|
||||||
|
|
||||||
if (nrow(reg_data) == nrow(clinical_df) & nrow(med_df)){
|
|
||||||
cat("\nNo. of rows match, nrow =", nrow(clinical_df)
|
|
||||||
, "\nChecking ncols...")
|
|
||||||
if ( ncol(reg_data) == ncol(clinical_df) + ncol(med_df) - length(common_cols) ){
|
|
||||||
cat("\nNo. of cols match, ncol =", ncol(reg_data))
|
|
||||||
} else {
|
|
||||||
cat("FAIL: ncols mismatch"
|
|
||||||
, "Expected ncols:", ncol(clinical_df) + ncol(med_df) - length(common_cols)
|
|
||||||
, "\nGot:", ncol(reg_data))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
cat("FAIL: nrows mismatch"
|
|
||||||
, "\nExpected nrows:", nrow(fp_adults))
|
|
||||||
}
|
|
||||||
|
|
||||||
########################################################################
|
|
||||||
# Reassign the copd and asthma status and do some checks
|
|
||||||
table(reg_data$ia_exac_copd); sum(is.na(reg_data$ia_exac_copd))
|
|
||||||
|
|
||||||
reg_data$ia_exac_copd[reg_data$ia_exac_copd< 1]<- 0
|
|
||||||
reg_data$ia_exac_copd[is.na(reg_data$ia_exac_copd)] <- 0
|
|
||||||
|
|
||||||
table(reg_data$ia_exac_copd); sum(is.na(reg_data$ia_exac_copd))
|
|
||||||
|
|
||||||
# check copd and asthma status
|
|
||||||
table(reg_data$ia_exac_copd, reg_data$asthma)
|
|
||||||
check_copd_and_asthma_1<- subset(reg_data, ia_exac_copd ==1 & asthma == 1) # check this is 3
|
|
||||||
|
|
||||||
# reassign these 3 so these are treated as non-asthmatics as copd with asthma is NOT TRUE asthma
|
|
||||||
reg_data$asthma[reg_data$ia_exac_copd == 1 & reg_data$asthma == 1]= 0
|
|
||||||
table(reg_data$ia_exac_copd, reg_data$asthma)
|
|
||||||
|
|
||||||
foo<- subset(reg_data, asthma==1 & ia_exac_copd ==1) # check that its 0
|
|
||||||
|
|
||||||
rm(check_copd_and_asthma_1, foo)
|
|
||||||
#=====================================================================
|
|
||||||
# count the resp scores
|
|
||||||
max_resp_score_table<- table(reg_data$max_resp_score)
|
|
||||||
max_resp_score_table
|
|
||||||
|
|
||||||
T1_resp_score_table<- table(reg_data$T1_resp_score)
|
|
||||||
T1_resp_score_table
|
|
||||||
|
|
||||||
T2_resp_score_table<- table(reg_data$T2_resp_score)
|
|
||||||
T2_resp_score_table
|
|
||||||
|
|
||||||
Inresp_sev<- table(reg_data$inresp_sev)
|
|
||||||
Inresp_sev
|
|
||||||
|
|
||||||
# Reassign the resp score so all 4 are replace by 3
|
|
||||||
reg_data$max_resp_score[reg_data$max_resp_score ==4 ] <- 3
|
|
||||||
revised_resp_score_table<- table(reg_data$max_resp_score)
|
|
||||||
revised_resp_score_table
|
|
||||||
|
|
||||||
reg_data$T1_resp_score[reg_data$T1_resp_score ==4 ] <- 3
|
|
||||||
revised_T1_resp_score_table<- table(reg_data$T1_resp_score)
|
|
||||||
revised_T1_resp_score_table
|
|
||||||
|
|
||||||
reg_data$T2_resp_score[reg_data$T2_resp_score == 4]<- 3
|
|
||||||
revised_T2_resp_score_table<- table(reg_data$T2_resp_score)
|
|
||||||
revised_T2_resp_score_table
|
|
||||||
|
|
||||||
reg_data$inresp_sev[reg_data$inresp_sev == 4]<- 3
|
|
||||||
revised_Inresp_sev<- table(reg_data$inresp_sev)
|
|
||||||
revised_Inresp_sev
|
|
||||||
#=====================================================================
|
|
||||||
# Remove these after checking
|
|
||||||
rm(max_resp_score_table, T1_resp_score_table, T2_resp_score_table, Inresp_sev
|
|
||||||
, revised_resp_score_table, revised_T1_resp_score_table, revised_T2_resp_score_table, revised_Inresp_sev)
|
|
||||||
#=====================================================================
|
|
||||||
# Binning
|
|
||||||
# "(": not inclusive
|
|
||||||
# "]": inclusive
|
|
||||||
|
|
||||||
#========
|
|
||||||
# age
|
|
||||||
#========
|
|
||||||
# Create categories of variables
|
|
||||||
reg_data$age = round(reg_data$age, digits = 0)
|
|
||||||
table(reg_data$age)
|
|
||||||
table(reg_data$asthma, reg_data$age)
|
|
||||||
min(reg_data$age); max(reg_data$age)
|
|
||||||
|
|
||||||
max_age_interval = round_any(max(reg_data$age), 10, f = ceiling)
|
|
||||||
max_age_interval
|
|
||||||
min_age = min(reg_data$age); min_age #19
|
|
||||||
min_age_interval = min_age - 1; min_age_interval
|
|
||||||
|
|
||||||
#age_bins = cut(reg_data$age, c(0,18,30,40,50,60,70,80,90))
|
|
||||||
age_bins = cut(reg_data$age, c(min_age_interval, 30, 40, 50, 60, 70, max_age_interval))
|
|
||||||
reg_data$age_bins = age_bins
|
|
||||||
dim(reg_data) # 133 27
|
|
||||||
|
|
||||||
# age_bins (to keep consistent with the results table)
|
|
||||||
class(reg_data$age_bins)
|
|
||||||
levels(reg_data$age_bins)
|
|
||||||
#"(18,30]" "(30,40]" "(40,50]" "(50,60]" "(60,70]" "(70,80]"
|
|
||||||
table(reg_data$asthma, reg_data$age_bins)
|
|
||||||
# (18,30] (30,40] (40,50] (50,60] (60,70] (70,80]
|
|
||||||
#0 25 17 25 14 11 1
|
|
||||||
#1 11 8 12 5 3 2
|
|
||||||
|
|
||||||
if (sum(table(reg_data$asthma, reg_data$age_bins)) == nrow(reg_data) ){
|
|
||||||
cat("PASS: age_bins assigned successfully")
|
|
||||||
}else{
|
|
||||||
cat("FAIL: no. mismatch when assigning age_bins")
|
|
||||||
quit()
|
|
||||||
}
|
|
||||||
|
|
||||||
# reassign
|
|
||||||
class(reg_data$age_bins)
|
|
||||||
levels(reg_data$age_bins) <- c("(18,30]","(30,40]","(40,50]","(50,80]","(50,80]","(50,80]")
|
|
||||||
table(reg_data$asthma, reg_data$age_bins)
|
|
||||||
table(reg_data$asthma, reg_data$age_bins)
|
|
||||||
# (18,30] (30,40] (40,50] (50,80]
|
|
||||||
#0 25 17 25 26
|
|
||||||
#1 11 8 12 9
|
|
||||||
|
|
||||||
sum(table(reg_data$asthma, reg_data$age_bins)) == nrow(reg_data)
|
|
||||||
|
|
||||||
#===========================
|
|
||||||
# O2 saturation binning
|
|
||||||
#===========================
|
|
||||||
reg_data$o2_sat_admis = round(reg_data$o2_sat_admis, digits = 0)
|
|
||||||
table(reg_data$o2_sat_admis)
|
|
||||||
tot_o2 = sum(table(reg_data$o2_sat_admis))- table(reg_data$o2_sat_admis)[["-1"]]
|
|
||||||
tot_o2
|
|
||||||
|
|
||||||
o2_sat_bin = cut(reg_data$o2_sat_admis, c(0,92,100))
|
|
||||||
reg_data$o2_sat_bin = o2_sat_bin
|
|
||||||
table(reg_data$o2_sat_bin)
|
|
||||||
|
|
||||||
sum(table(reg_data$o2_sat_bin)) == tot_o2
|
|
||||||
|
|
||||||
#===========================
|
|
||||||
# Onset to initial binning
|
|
||||||
#===========================
|
|
||||||
max_in = max(reg_data$onset_2_initial); max_in #23
|
|
||||||
min_in = min(reg_data$onset_2_initial) - 1 ; min_in # -6
|
|
||||||
|
|
||||||
tot_onset2ini = sum(table(reg_data$onset_2_initial))
|
|
||||||
tot_onset2ini
|
|
||||||
|
|
||||||
onset_initial_bin = cut(reg_data$onset_2_initial, c(min_in, 4, max_in))
|
|
||||||
reg_data$onset_initial_bin = onset_initial_bin
|
|
||||||
|
|
||||||
sum(table(reg_data$onset_initial_bin)) == tot_onset2ini
|
|
||||||
|
|
||||||
#=======================
|
|
||||||
# seasonal flu: sfluv
|
|
||||||
#=======================
|
|
||||||
# should be a factor
|
|
||||||
if (! is.factor(reg_data$sfluv)){
|
|
||||||
reg_data$sfluv = as.factor(reg_data$sfluv)
|
|
||||||
}
|
|
||||||
class(reg_data$sfluv) #[1] "factor"
|
|
||||||
|
|
||||||
levels(reg_data$sfluv)
|
|
||||||
table(reg_data$asthma, reg_data$sfluv)
|
|
||||||
# reassign
|
|
||||||
levels(reg_data$sfluv) <- c("0", "0", "1")
|
|
||||||
table(reg_data$asthma, reg_data$sfluv)
|
|
||||||
|
|
||||||
#=======================
|
|
||||||
# h1n1v
|
|
||||||
#=======================
|
|
||||||
# should be a factor
|
|
||||||
if (! is.factor(reg_data$h1n1v)){
|
|
||||||
reg_data$h1n1v = as.factor(reg_data$h1n1v)
|
|
||||||
}
|
|
||||||
class(reg_data$h1n1v) #[1] "factor"
|
|
||||||
|
|
||||||
levels(reg_data$h1n1v)
|
|
||||||
table(reg_data$asthma, reg_data$h1n1v)
|
|
||||||
# reassign
|
|
||||||
levels(reg_data$h1n1v) <- c("0", "0", "1")
|
|
||||||
table(reg_data$asthma, reg_data$h1n1v)
|
|
||||||
|
|
||||||
#=======================
|
|
||||||
# ethnicity
|
|
||||||
#=======================
|
|
||||||
class(reg_data$ethnicity) # integer
|
|
||||||
table(reg_data$asthma, reg_data$ethnicity)
|
|
||||||
|
|
||||||
reg_data$ethnicity[reg_data$ethnicity == 4] <- 2
|
|
||||||
table(reg_data$asthma, reg_data$ethnicity)
|
|
||||||
|
|
||||||
#=======================
|
|
||||||
# pneumonia
|
|
||||||
#=======================
|
|
||||||
class(reg_data$ia_cxr) # integer
|
|
||||||
# ia_cxr 2 ---> yes pneumonia (1)
|
|
||||||
# 1 ---> no (0)
|
|
||||||
# ! 1 or 2 -- > "unkown"
|
|
||||||
|
|
||||||
# reassign the pneumonia codes
|
|
||||||
#0: not performed
|
|
||||||
#1: normal
|
|
||||||
#2: findings consistent with pneumonia
|
|
||||||
#3: abnormal
|
|
||||||
#-1: not recorded
|
|
||||||
#-2: n/a specified by the clinician # not in the data...
|
|
||||||
#-3: unknown specified by clinician
|
|
||||||
|
|
||||||
|
|
||||||
table(reg_data$ia_cxr)
|
|
||||||
#-3 -1 0 1 2 3
|
|
||||||
#5 48 13 47 17 3
|
|
||||||
|
|
||||||
# change these first else recoding 0 will be a problem as 0 already exists, mind you -2 categ doesn't exist
|
|
||||||
reg_data$ia_cxr[reg_data$ia_cxr == -3 | reg_data$ia_cxr == -1 | reg_data$ia_cxr == 0 | reg_data$ia_cxr == 3 ] <- ""
|
|
||||||
table(reg_data$ia_cxr)
|
|
||||||
# 1 2
|
|
||||||
#69 47 17
|
|
||||||
|
|
||||||
reg_data$ia_cxr[reg_data$ia_cxr == 1] <- 0
|
|
||||||
reg_data$ia_cxr[reg_data$ia_cxr == 2] <- 1
|
|
||||||
table(reg_data$ia_cxr)
|
|
||||||
# 0 1
|
|
||||||
#69 47 17
|
|
||||||
|
|
||||||
#=======================
|
|
||||||
# smoking [tricky one]
|
|
||||||
#=======================
|
|
||||||
class(reg_data$smoking) # integer
|
|
||||||
table(reg_data$asthma, reg_data$smoking)
|
|
||||||
|
|
||||||
# orig
|
|
||||||
# -3 -1 1 2 3 4
|
|
||||||
#0 15 9 22 2 15 30
|
|
||||||
#1 4 2 13 0 4 17
|
|
||||||
|
|
||||||
# -3 -1 1 2 3 4
|
|
||||||
#0 14 9 20 2 15 30
|
|
||||||
#1 5 2 15 0 4 17
|
|
||||||
|
|
||||||
# never smoking, 4 and 2 -- > no (0)
|
|
||||||
#1 and 3 ---> yes (1)
|
|
||||||
#!-3 and -1 ---- > NA
|
|
||||||
|
|
||||||
################# smoking
|
|
||||||
|
|
||||||
#1: current daily ===> categ smoker(1)
|
|
||||||
#2: occasional =====> categ no smoker(0)
|
|
||||||
#3: ex-smoker ===> categ smoker(1)
|
|
||||||
#4: never =====> categ no smoker(0)
|
|
||||||
#-1: not recorded =====> categ blank (NA)
|
|
||||||
#-2: n/a specified by the clinician =====> categ blank (NA)
|
|
||||||
#-3: unknown specified by clinician=====> categ blank (NA)
|
|
||||||
|
|
||||||
table(reg_data$smoking)
|
|
||||||
#-3 -1 1 2 3 4
|
|
||||||
#19 11 35 2 19 47
|
|
||||||
|
|
||||||
# reassign the smoking codes
|
|
||||||
reg_data$smoking[reg_data$smoking == 4 | reg_data$smoking == 2 ] <- 0
|
|
||||||
reg_data$smoking[reg_data$smoking == 1 | reg_data$smoking == 3 ] <- 1
|
|
||||||
reg_data$smoking[reg_data$smoking == -1 | reg_data$smoking == -2 | reg_data$smoking == -3 ] <- ""
|
|
||||||
|
|
||||||
table(reg_data$smoking)
|
|
||||||
# 0 1
|
|
||||||
#30 49 54
|
|
||||||
|
|
||||||
table(reg_data$asthma, reg_data$smoking)
|
|
||||||
|
|
||||||
# orig
|
|
||||||
# 0 1
|
|
||||||
#0 24 32 37
|
|
||||||
#1 6 17 17
|
|
||||||
|
|
||||||
# 0 1
|
|
||||||
#0 23 32 35
|
|
||||||
#1 7 17 19
|
|
||||||
|
|
||||||
################################################################
|
|
||||||
#==================
|
|
||||||
# writing output file
|
|
||||||
#==================
|
|
||||||
outfile_name_reg = "reg_data_recoded_with_NA.csv"
|
|
||||||
outfile_reg = paste0(outdir, outfile_name_reg)
|
|
||||||
|
|
||||||
cat("Writing clinical file for regression:", outfile_reg)
|
|
||||||
|
|
||||||
#write.csv(reg_data, file = outfile_reg)
|
|
||||||
################################################################
|
|
||||||
|
|
||||||
rm(age_bins, max_age_interval, max_in, min_in, o2_sat_bin, onset_initial_bin, tot_o2, tot_onset2ini, meta_data_cols)
|
|
|
@ -4,14 +4,8 @@ setwd('~/git/mosaic_2020/')
|
||||||
getwd()
|
getwd()
|
||||||
########################################################################
|
########################################################################
|
||||||
# TASK: Run regression analysis
|
# TASK: Run regression analysis
|
||||||
# npa
|
# clinical params and npa meds
|
||||||
########################################################################
|
########################################################################
|
||||||
#=================================================================================
|
|
||||||
# TO DO:
|
|
||||||
# Simple stats b/w obesity and non-obesity to consider including in reg analysis
|
|
||||||
# Include NPA statistically sign params
|
|
||||||
# Rerun graphs and plots without asthma?
|
|
||||||
#=================================================================================
|
|
||||||
|
|
||||||
#====================
|
#====================
|
||||||
# Input: source data
|
# Input: source data
|
||||||
|
@ -25,27 +19,29 @@ table(fp_adults_na$ia_exac_copd==1 & fp_adults_na$asthma == 1)
|
||||||
|
|
||||||
table(clinical_df_ics$asthma)
|
table(clinical_df_ics$asthma)
|
||||||
|
|
||||||
|
if ( length(cols_to_extract) == length(clinical_cols) + length(sig_npa_cols) ){
|
||||||
|
cat("PASS: extracting clinical and sign npa cols")
|
||||||
|
} else{
|
||||||
|
cat("FAIL: could not find cols to extract")
|
||||||
|
quit()
|
||||||
|
}
|
||||||
|
|
||||||
|
fp_adults_reg = fp_adults[, cols_to_extract]
|
||||||
|
fp_adults_reg_na = fp_adults_na[, cols_to_extract]
|
||||||
|
|
||||||
#--------------------
|
#--------------------
|
||||||
# Data reassignment
|
# Data reassignment
|
||||||
#--------------------
|
#--------------------
|
||||||
my_data = clinical_df_ics
|
my_data = fp_adults_reg
|
||||||
my_data_na = clinical_df_ics_na
|
my_data_na = fp_adults_reg_na
|
||||||
|
|
||||||
table(my_data$ia_exac_copd==1 & my_data$asthma == 1)
|
table(my_data$ia_exac_copd==1 & my_data$asthma == 1)
|
||||||
table(my_data_na$ia_exac_copd==1 & my_data_na$asthma == 1)
|
table(my_data_na$ia_exac_copd==1 & my_data_na$asthma == 1)
|
||||||
|
|
||||||
# clear variables
|
# clear variables
|
||||||
#rm(fp_adults, fp_adults_na)
|
rm(fp_adults, fp_adults_na, clinical_df_ics, clinical_df_ics_na)
|
||||||
|
|
||||||
#########################################################################
|
#########################################################################
|
||||||
|
|
||||||
if ( names(which(lapply(my_data, class) == "character")) == "mosaic" ){
|
|
||||||
cat("Character class for 1 column only:", "mosaic")
|
|
||||||
}else{
|
|
||||||
cat("More than one character class detected: Resolve!")
|
|
||||||
quit()
|
|
||||||
}
|
|
||||||
|
|
||||||
#============================
|
#============================
|
||||||
# Identifying column types: Reg data
|
# Identifying column types: Reg data
|
||||||
#===========================
|
#===========================
|
||||||
|
@ -57,6 +53,23 @@ my_vars = colnames(my_reg_data)
|
||||||
my_vars
|
my_vars
|
||||||
|
|
||||||
lapply(my_reg_data, class)
|
lapply(my_reg_data, class)
|
||||||
|
|
||||||
|
check_int_vars = my_vars[lapply(my_reg_data, class)%in%c("integer")]
|
||||||
|
check_num_vars = my_vars[lapply(my_reg_data, class)%in%c("numeric")]
|
||||||
|
check_charac_vars = my_vars[lapply(my_reg_data, class)%in%c("character")]
|
||||||
|
check_factor_vars = my_vars[lapply(my_reg_data, class)%in%c("factor")]
|
||||||
|
|
||||||
|
cat("\nNo. of int cols:", length(check_int_vars)
|
||||||
|
, "\nNo. of num cols:", length(check_num_vars)
|
||||||
|
, "\nNo. of char cols:", length(check_charac_vars)
|
||||||
|
, "\nNo. of factor cols:", length(check_factor_vars)
|
||||||
|
)
|
||||||
|
|
||||||
|
# convert char vals to int as these should be int
|
||||||
|
my_reg_data[,check_charac_vars] = lapply(my_reg_data[,check_charac_vars], as.integer)
|
||||||
|
str(my_reg_data$sfluv)
|
||||||
|
|
||||||
|
|
||||||
numerical_vars = c("age"
|
numerical_vars = c("age"
|
||||||
, "vl_pfu_ul_npa1"
|
, "vl_pfu_ul_npa1"
|
||||||
, "los"
|
, "los"
|
||||||
|
@ -64,6 +77,11 @@ numerical_vars = c("age"
|
||||||
, "onsfindeath"
|
, "onsfindeath"
|
||||||
, "o2_sat_admis")
|
, "o2_sat_admis")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
my_reg_data[numerical_vars] <- lapply(my_reg_data[numerical_vars], as.numeric)
|
my_reg_data[numerical_vars] <- lapply(my_reg_data[numerical_vars], as.numeric)
|
||||||
|
|
||||||
my_reg_params = my_vars
|
my_reg_params = my_vars
|
||||||
|
|
|
@ -1,97 +0,0 @@
|
||||||
#!/usr/bin/Rscript
|
|
||||||
getwd()
|
|
||||||
setwd('~/git/mosaic_2020/')
|
|
||||||
getwd()
|
|
||||||
########################################################################
|
|
||||||
# TASK: Run regression analysis
|
|
||||||
# npa
|
|
||||||
########################################################################
|
|
||||||
#=================================================================================
|
|
||||||
# TO DO:
|
|
||||||
# Simple stats b/w obesity and non-obesity to consider including in reg analysis
|
|
||||||
# Include NPA statistically sign params
|
|
||||||
# Rerun graphs and plots without asthma?
|
|
||||||
#=================================================================================
|
|
||||||
|
|
||||||
#====================
|
|
||||||
# Input: source data
|
|
||||||
#====================
|
|
||||||
source("data_extraction_formatting_clinical.R")
|
|
||||||
|
|
||||||
rm(fp_adults, metadata_all)
|
|
||||||
|
|
||||||
########################################################################
|
|
||||||
my_data = reg_data
|
|
||||||
#########################################################################
|
|
||||||
# check factor of each column
|
|
||||||
lapply(my_data, class)
|
|
||||||
|
|
||||||
character_vars <- lapply(my_data, class) == "character"
|
|
||||||
character_vars
|
|
||||||
table(character_vars)
|
|
||||||
|
|
||||||
factor_vars <- lapply(my_data, class) == "factor"
|
|
||||||
table(factor_vars)
|
|
||||||
|
|
||||||
my_data[, character_vars] <- lapply(my_data[, character_vars], as.factor)
|
|
||||||
factor_vars <- lapply(my_data, class) == "factor"
|
|
||||||
factor_vars
|
|
||||||
table(factor_vars)
|
|
||||||
|
|
||||||
# check again
|
|
||||||
lapply(my_data, class)
|
|
||||||
|
|
||||||
table(my_data$ethnicity)
|
|
||||||
my_data$ethnicity = as.factor(my_data$ethnicity)
|
|
||||||
class(my_data$ethnicity)
|
|
||||||
|
|
||||||
colnames(my_data)
|
|
||||||
reg_param = c("age"
|
|
||||||
, "age_bins"
|
|
||||||
#, "death" # outcome
|
|
||||||
, "asthma"
|
|
||||||
, "obesity"
|
|
||||||
, "gender"
|
|
||||||
, "los"
|
|
||||||
, "o2_sat_admis"
|
|
||||||
#, "logistic_outcome"
|
|
||||||
#, "steroid_ics"
|
|
||||||
, "ethnicity"
|
|
||||||
, "smoking"
|
|
||||||
, "sfluv"
|
|
||||||
, "h1n1v"
|
|
||||||
, "ia_cxr"
|
|
||||||
, "max_resp_score"
|
|
||||||
, "T1_resp_score"
|
|
||||||
, "com_noasthma"
|
|
||||||
, "onset_initial_bin")
|
|
||||||
|
|
||||||
for(i in reg_param) {
|
|
||||||
# print (i)
|
|
||||||
p_form = as.formula(paste("death ~ ", i ,sep = ""))
|
|
||||||
model_reg = glm(p_form , family = binomial, data = my_data)
|
|
||||||
print(summary(model_reg))
|
|
||||||
print(exp(cbind(OR = coef(model_reg), confint(model_reg))))
|
|
||||||
#print (PseudoR2(model_reg))
|
|
||||||
cat("=================================================================================\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
full_mod = glm(death ~ asthma +
|
|
||||||
gender +
|
|
||||||
age_bins +
|
|
||||||
los +
|
|
||||||
#ethnicity +
|
|
||||||
onset_initial_bin +
|
|
||||||
o2_sat_bin +
|
|
||||||
com_noasthma +
|
|
||||||
obesity +
|
|
||||||
#ia_cxr +
|
|
||||||
smoking +
|
|
||||||
#sfluv +
|
|
||||||
#h1n1v
|
|
||||||
max_resp_score +
|
|
||||||
T1_resp_score +
|
|
||||||
, family = "binomial", data = my_data)
|
|
||||||
|
|
||||||
summary(full_mod)
|
|
Loading…
Add table
Add a link
Reference in a new issue