#!/usr/bin/Rscript getwd() setwd("~/git/mosaic_2020/") getwd() ######################################################################## # TASK: read data ######################################################################## # load libraries, packages and local imports source("Header_TT.R") ######################################################################## maindir = "~/git/mosaic_2020/" outdir = paste0(maindir, "output/") ifelse(!dir.exists(outdir), dir.create(outdir), FALSE) outdir_stats = paste0(maindir, "output/stats/") ifelse(!dir.exists(outdir_stats), dir.create(outdir_stats), FALSE) outdir_plots = paste0(maindir, "output/plots") ifelse(!dir.exists(outdir_plots), dir.create(outdir_plots), FALSE) ######################################################################## # static file read: csv #============== # all patients #============== all_df <- read.csv("/home/backup/MOSAIC/MEDIATOR_Data/master_file/Mosaic_master_file_from_stata.csv" , fileEncoding = 'latin1') # meta data columns meta_data_cols = c("mosaic", "gender", "age", "adult", "flustat", "type" , "obesity", "obese2", "height", "height_unit", "weight" , "weight_unit", "visual_est_bmi", "bmi_rating") # check if these columns to select are present in the data meta_data_cols%in%colnames(all_df) all(meta_data_cols%in%colnames(all_df)) metadata_all = all_df[, meta_data_cols] #============ # hc #============ #hc_data<- read.csv("/home/backup/MOSAIC/MEDIATOR_Data/master_file/Mediators_for_HC.csv") #str(hc_data) #table(hc_data$Timepoint, hc_data$Sample) ######################################################################## # quick checks on obesity and obese2 aki_flupos = read.csv(paste0(outdir, "aki_flupos.csv")) obesity_check = as.data.frame(cbind(all_df$mosaic, all_df$age, all_df$adult, all_df$flustat, all_df$obesity, all_df$obese2)) colnames(obesity_check) = c("mosaic", "age", "adult", "flustat", "obesity", "obese2") obesity_check$adult_check = ifelse(all_df$age>=18 && all_df$adult == 1, "PASS", "FAIL") table(obesity_check$adult_check) table(obesity_check$adult) table(obesity_check$adult, obesity_check$flustat) ob_adults = obesity_check[obesity_check$adult == 1,] ob_adults2 = obesity_check[obesity_check$adult == 1 & obesity_check$age>=18,] # AHA! # check where the discrepancy is? c = ob_adults$mosaic[!ob_adults$mosaic%in%aki_flupos$Subject_ID]; c