#!/usr/bin/Rscript getwd() setwd("~/git/mosaic_2020/") getwd() ######################################################################## # TASK: read data ######################################################################## # load libraries, packages and local imports source("Header_TT.R") source("colnames_clinical_meds.R") ######################################################################## maindir = "~/git/mosaic_2020/" outdir = paste0(maindir, "output/") ifelse(!dir.exists(outdir), dir.create(outdir), FALSE) outdir_stats = paste0(maindir, "output/stats/") ifelse(!dir.exists(outdir_stats), dir.create(outdir_stats), FALSE) outdir_stats_na = paste0(maindir, "output/stats/non_asthmatics/") ifelse(!dir.exists(outdir_stats_na), dir.create(outdir_stats_na), FALSE) outdir_stats_ns = paste0(maindir, "output/stats/non_severe/") ifelse(!dir.exists(outdir_stats_ns), dir.create(outdir_stats_ns), FALSE) outdir_stats_s = paste0(maindir, "output/stats/severe/") ifelse(!dir.exists(outdir_stats_s), dir.create(outdir_stats_s), FALSE) outdir_stats_a = paste0(maindir, "output/stats/asthma/") ifelse(!dir.exists(outdir_stats_a), dir.create(outdir_stats_a), FALSE) outdir_plots = paste0(maindir, "output/plots/") ifelse(!dir.exists(outdir_plots), dir.create(outdir_plots), FALSE) ######################################################################## # static file read: csv #============== # all patients #============== all_df <- read.csv("/home/backup/MOSAIC/MEDIATOR_Data/master_file/Mosaic_master_file_from_stata.csv" , fileEncoding = 'latin1') # meta data columns #meta_data_cols = c("mosaic", "gender", "age" # , "adult" # , "flustat", "type" # , "obesity" #, "obese2" #, "height", "height_unit" #, "weight", "weight_unit" #, "ia_height_ftin", "ia_height_m", "ia_weight" #, "visual_est_bmi", "bmi_rating" # ) # check if these columns to select are present in the data meta_clinical_cols%in%colnames(all_df) if ( all(meta_clinical_cols%in%colnames(all_df)) ){ metadata_all = all_df[, meta_clinical_cols] } #============== # adult patients #============== adult_df = all_df[all_df$age>=18,] if (table(adult_df$adult == 1)[[1]] == nrow(adult_df) ){ cat ("\nPASS: adult df extracted successfully") } else{ cat ("\nFAIL: adult df number mismatch!") } #================================= # FLU positive: adult patients #================================= # extract the flu positive population fp_adults = adult_df[adult_df$flustat == 1,] if (table(fp_adults$flustat == 1)[[1]] == nrow(fp_adults) ){ cat ("\nPASS: adult df extracted successfully") } else{ cat ("\nFAIL: adult df number mismatch!") } #----------------------------------- # asthma and copd status correction # for conflicting field! #------------------------------------ # Reassign the copd and asthma status and do some checks table(fp_adults$ia_exac_copd); sum(is.na(fp_adults$ia_exac_copd)) fp_adults$ia_exac_copd[fp_adults$ia_exac_copd< 1]<- 0 fp_adults$ia_exac_copd[is.na(fp_adults$ia_exac_copd)] <- 0 table(fp_adults$ia_exac_copd); sum(is.na(fp_adults$ia_exac_copd)) # check copd and asthma status table(fp_adults$ia_exac_copd, fp_adults$asthma) check_copd_and_asthma_1<- subset(fp_adults, ia_exac_copd ==1 & asthma == 1) # check this is 3 # reassign these 3 so these are treated as non-asthmatics as copd with asthma is NOT TRUE asthma fp_adults$asthma[fp_adults$ia_exac_copd == 1 & fp_adults$asthma == 1]= 0 table(fp_adults$ia_exac_copd, fp_adults$asthma) foo<- subset(fp_adults, asthma==1 & ia_exac_copd ==1) # check that its 0 rm(check_copd_and_asthma_1, foo) if ( table(fp_adults$ia_exac_copd, fp_adults$asthma) [[2,2]] == 0 ){ cat("\nPASS: asthma and copd do not conflict") } else{ cat ("\nFAIL: asthma and copd conflict not resolved!") quit() } #============================================= # FLU positive adult patients: without asthma #============================================= #cat("\nExtracting flu positive without asthma") #table(fp_adults$asthma) #cat("\nNo. of asthmatics:", table(fp_adults$asthma)[[2]] # , "\nNo. of non-asthmatics:", table(fp_adults$asthma)[[1]]) #str(fp_adults$asthma) #table(fp_adults$obesity) #table(fp_adults$obesity, fp_adults$asthma) #fp_adults_na = fp_adults[fp_adults$asthma == 0,] #table(fp_adults_na$obesity) #table(fp_adults_na$obesity, fp_adults_na$asthma) #============ # hc #============ #hc_data<- read.csv("/home/backup/MOSAIC/MEDIATOR_Data/master_file/Mediators_for_HC.csv") #str(hc_data) #table(hc_data$Timepoint, hc_data$Sample) ######################################################################## # quick sanity checks table(adult_df$ia_exac_copd==1 & adult_df$asthma == 1) # check this is 4 table(fp_adults$ia_exac_copd==1 & fp_adults$asthma == 1) # check this is 3 # clear unnecessary variables #rm(metadata_all) rm(all_df, adult_df)