From 12eee6923a2dd038baa93459f9fa4c6715945de7 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 17 Nov 2020 17:31:22 +0000 Subject: [PATCH] added checks.R --- checks.R | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100755 checks.R diff --git a/checks.R b/checks.R new file mode 100755 index 0000000..90a296a --- /dev/null +++ b/checks.R @@ -0,0 +1,62 @@ +#!/usr/bin/Rscript +getwd() +setwd("~/git/mosaic_2020/") +getwd() +######################################################################## +# TASK: read data +######################################################################## +# load libraries, packages and local imports +source("Header_TT.R") +######################################################################## +maindir = "~/git/mosaic_2020/" +outdir = paste0(maindir, "output/") +ifelse(!dir.exists(outdir), dir.create(outdir), FALSE) + +outdir_stats = paste0(maindir, "output/stats/") +ifelse(!dir.exists(outdir_stats), dir.create(outdir_stats), FALSE) + +outdir_plots = paste0(maindir, "output/plots") +ifelse(!dir.exists(outdir_plots), dir.create(outdir_plots), FALSE) +######################################################################## +# static file read: csv +#============== +# all patients +#============== +all_df <- read.csv("/home/backup/MOSAIC/MEDIATOR_Data/master_file/Mosaic_master_file_from_stata.csv" + , fileEncoding = 'latin1') + +# meta data columns +meta_data_cols = c("mosaic", "gender", "age", "adult", "flustat", "type" + , "obesity", "obese2", "height", "height_unit", "weight" + , "weight_unit", "visual_est_bmi", "bmi_rating") + +# check if these columns to select are present in the data +meta_data_cols%in%colnames(all_df) +all(meta_data_cols%in%colnames(all_df)) + +metadata_all = all_df[, meta_data_cols] + +#============ +# hc +#============ +#hc_data<- read.csv("/home/backup/MOSAIC/MEDIATOR_Data/master_file/Mediators_for_HC.csv") +#str(hc_data) +#table(hc_data$Timepoint, hc_data$Sample) +######################################################################## +# quick checks on obesity and obese2 +aki_flupos = read.csv(paste0(outdir, "aki_flupos.csv")) + +obesity_check = as.data.frame(cbind(all_df$mosaic, all_df$age, all_df$adult, all_df$flustat, all_df$obesity, all_df$obese2)) +colnames(obesity_check) = c("mosaic", "age", "adult", "flustat", "obesity", "obese2") + +obesity_check$adult_check = ifelse(all_df$age>=18 && all_df$adult == 1, "PASS", "FAIL") +table(obesity_check$adult_check) + +table(obesity_check$adult) +table(obesity_check$adult, obesity_check$flustat) + +ob_adults = obesity_check[obesity_check$adult == 1,] +ob_adults2 = obesity_check[obesity_check$adult == 1 & obesity_check$age>=18,] # AHA! + +# check where the discrepancy is? +c = ob_adults$mosaic[!ob_adults$mosaic%in%aki_flupos$Subject_ID]; c