139 lines
4.7 KiB
R
Executable file
139 lines
4.7 KiB
R
Executable file
#!/usr/bin/Rscript
|
|
getwd()
|
|
setwd("~/git/mosaic_2020/")
|
|
getwd()
|
|
########################################################################
|
|
# TASK: read data
|
|
########################################################################
|
|
# load libraries, packages and local imports
|
|
source("Header_TT.R")
|
|
source("colnames_clinical_meds.R")
|
|
|
|
########################################################################
|
|
maindir = "~/git/mosaic_2020/"
|
|
outdir = paste0(maindir, "output/")
|
|
ifelse(!dir.exists(outdir), dir.create(outdir), FALSE)
|
|
|
|
outdir_stats = paste0(maindir, "output/stats/")
|
|
ifelse(!dir.exists(outdir_stats), dir.create(outdir_stats), FALSE)
|
|
|
|
outdir_stats_na = paste0(maindir, "output/stats/non_asthmatics/")
|
|
ifelse(!dir.exists(outdir_stats_na), dir.create(outdir_stats_na), FALSE)
|
|
|
|
outdir_stats_ns = paste0(maindir, "output/stats/non_severe/")
|
|
ifelse(!dir.exists(outdir_stats_ns), dir.create(outdir_stats_ns), FALSE)
|
|
|
|
outdir_plots = paste0(maindir, "output/plots/")
|
|
ifelse(!dir.exists(outdir_plots), dir.create(outdir_plots), FALSE)
|
|
########################################################################
|
|
# static file read: csv
|
|
#==============
|
|
# all patients
|
|
#==============
|
|
all_df <- read.csv("/home/backup/MOSAIC/MEDIATOR_Data/master_file/Mosaic_master_file_from_stata.csv"
|
|
, fileEncoding = 'latin1')
|
|
|
|
# meta data columns
|
|
#meta_data_cols = c("mosaic", "gender", "age"
|
|
# , "adult"
|
|
# , "flustat", "type"
|
|
# , "obesity"
|
|
#, "obese2"
|
|
#, "height", "height_unit"
|
|
#, "weight", "weight_unit"
|
|
#, "ia_height_ftin", "ia_height_m", "ia_weight"
|
|
#, "visual_est_bmi", "bmi_rating"
|
|
# )
|
|
|
|
# check if these columns to select are present in the data
|
|
meta_clinical_cols%in%colnames(all_df)
|
|
if ( all(meta_clinical_cols%in%colnames(all_df)) ){
|
|
metadata_all = all_df[, meta_clinical_cols]
|
|
}
|
|
|
|
#==============
|
|
# adult patients
|
|
#==============
|
|
|
|
adult_df = all_df[all_df$age>=18,]
|
|
|
|
if (table(adult_df$adult == 1)[[1]] == nrow(adult_df) ){
|
|
cat ("\nPASS: adult df extracted successfully")
|
|
} else{
|
|
cat ("\nFAIL: adult df number mismatch!")
|
|
}
|
|
|
|
#=================================
|
|
# FLU positive: adult patients
|
|
#=================================
|
|
# extract the flu positive population
|
|
fp_adults = adult_df[adult_df$flustat == 1,]
|
|
|
|
if (table(fp_adults$flustat == 1)[[1]] == nrow(fp_adults) ){
|
|
cat ("\nPASS: adult df extracted successfully")
|
|
} else{
|
|
cat ("\nFAIL: adult df number mismatch!")
|
|
}
|
|
#-----------------------------------
|
|
# asthma and copd status correction
|
|
# for conflicting field!
|
|
#------------------------------------
|
|
# Reassign the copd and asthma status and do some checks
|
|
table(fp_adults$ia_exac_copd); sum(is.na(fp_adults$ia_exac_copd))
|
|
|
|
fp_adults$ia_exac_copd[fp_adults$ia_exac_copd< 1]<- 0
|
|
fp_adults$ia_exac_copd[is.na(fp_adults$ia_exac_copd)] <- 0
|
|
|
|
table(fp_adults$ia_exac_copd); sum(is.na(fp_adults$ia_exac_copd))
|
|
|
|
# check copd and asthma status
|
|
table(fp_adults$ia_exac_copd, fp_adults$asthma)
|
|
check_copd_and_asthma_1<- subset(fp_adults, ia_exac_copd ==1 & asthma == 1) # check this is 3
|
|
|
|
# reassign these 3 so these are treated as non-asthmatics as copd with asthma is NOT TRUE asthma
|
|
fp_adults$asthma[fp_adults$ia_exac_copd == 1 & fp_adults$asthma == 1]= 0
|
|
table(fp_adults$ia_exac_copd, fp_adults$asthma)
|
|
|
|
foo<- subset(fp_adults, asthma==1 & ia_exac_copd ==1) # check that its 0
|
|
|
|
rm(check_copd_and_asthma_1, foo)
|
|
|
|
if ( table(fp_adults$ia_exac_copd, fp_adults$asthma) [[2,2]] == 0 ){
|
|
cat("\nPASS: asthma and copd do not conflict")
|
|
} else{
|
|
cat ("\nFAIL: asthma and copd conflict not resolved!")
|
|
quit()
|
|
}
|
|
|
|
#=============================================
|
|
# FLU positive adult patients: without asthma
|
|
#=============================================
|
|
#cat("\nExtracting flu positive without asthma")
|
|
#table(fp_adults$asthma)
|
|
#cat("\nNo. of asthmatics:", table(fp_adults$asthma)[[2]]
|
|
# , "\nNo. of non-asthmatics:", table(fp_adults$asthma)[[1]])
|
|
#str(fp_adults$asthma)
|
|
|
|
#table(fp_adults$obesity)
|
|
#table(fp_adults$obesity, fp_adults$asthma)
|
|
|
|
#fp_adults_na = fp_adults[fp_adults$asthma == 0,]
|
|
#table(fp_adults_na$obesity)
|
|
#table(fp_adults_na$obesity, fp_adults_na$asthma)
|
|
|
|
#============
|
|
# hc
|
|
#============
|
|
#hc_data<- read.csv("/home/backup/MOSAIC/MEDIATOR_Data/master_file/Mediators_for_HC.csv")
|
|
#str(hc_data)
|
|
#table(hc_data$Timepoint, hc_data$Sample)
|
|
########################################################################
|
|
|
|
# quick sanity checks
|
|
table(adult_df$ia_exac_copd==1 & adult_df$asthma == 1) # check this is 4
|
|
|
|
table(fp_adults$ia_exac_copd==1 & fp_adults$asthma == 1) # check this is 3
|
|
|
|
# clear unnecessary variables
|
|
rm(metadata_all)
|
|
rm(all_df, adult_df)
|