62 lines
2.4 KiB
R
Executable file
62 lines
2.4 KiB
R
Executable file
#!/usr/bin/Rscript
|
|
getwd()
|
|
setwd("~/git/mosaic_2020/")
|
|
getwd()
|
|
########################################################################
|
|
# TASK: read data
|
|
########################################################################
|
|
# load libraries, packages and local imports
|
|
source("Header_TT.R")
|
|
########################################################################
|
|
maindir = "~/git/mosaic_2020/"
|
|
outdir = paste0(maindir, "output/")
|
|
ifelse(!dir.exists(outdir), dir.create(outdir), FALSE)
|
|
|
|
outdir_stats = paste0(maindir, "output/stats/")
|
|
ifelse(!dir.exists(outdir_stats), dir.create(outdir_stats), FALSE)
|
|
|
|
outdir_plots = paste0(maindir, "output/plots")
|
|
ifelse(!dir.exists(outdir_plots), dir.create(outdir_plots), FALSE)
|
|
########################################################################
|
|
# static file read: csv
|
|
#==============
|
|
# all patients
|
|
#==============
|
|
all_df <- read.csv("/home/backup/MOSAIC/MEDIATOR_Data/master_file/Mosaic_master_file_from_stata.csv"
|
|
, fileEncoding = 'latin1')
|
|
|
|
# meta data columns
|
|
meta_data_cols = c("mosaic", "gender", "age", "adult", "flustat", "type"
|
|
, "obesity", "obese2", "height", "height_unit", "weight"
|
|
, "weight_unit", "visual_est_bmi", "bmi_rating")
|
|
|
|
# check if these columns to select are present in the data
|
|
meta_data_cols%in%colnames(all_df)
|
|
all(meta_data_cols%in%colnames(all_df))
|
|
|
|
metadata_all = all_df[, meta_data_cols]
|
|
|
|
#============
|
|
# hc
|
|
#============
|
|
#hc_data<- read.csv("/home/backup/MOSAIC/MEDIATOR_Data/master_file/Mediators_for_HC.csv")
|
|
#str(hc_data)
|
|
#table(hc_data$Timepoint, hc_data$Sample)
|
|
########################################################################
|
|
# quick checks on obesity and obese2
|
|
aki_flupos = read.csv(paste0(outdir, "aki_flupos.csv"))
|
|
|
|
obesity_check = as.data.frame(cbind(all_df$mosaic, all_df$age, all_df$adult, all_df$flustat, all_df$obesity, all_df$obese2))
|
|
colnames(obesity_check) = c("mosaic", "age", "adult", "flustat", "obesity", "obese2")
|
|
|
|
obesity_check$adult_check = ifelse(all_df$age>=18 && all_df$adult == 1, "PASS", "FAIL")
|
|
table(obesity_check$adult_check)
|
|
|
|
table(obesity_check$adult)
|
|
table(obesity_check$adult, obesity_check$flustat)
|
|
|
|
ob_adults = obesity_check[obesity_check$adult == 1,]
|
|
ob_adults2 = obesity_check[obesity_check$adult == 1 & obesity_check$age>=18,] # AHA!
|
|
|
|
# check where the discrepancy is?
|
|
c = ob_adults$mosaic[!ob_adults$mosaic%in%aki_flupos$Subject_ID]; c
|