mosaic_2020/read_data.R

70 lines
No EOL
2.2 KiB
R
Executable file

#!/usr/bin/Rscript
getwd()
setwd("~/git/mosaic_2020/")
getwd()
########################################################################
# TASK: read data
########################################################################
# load libraries, packages and local imports
source("Header_TT.R")
########################################################################
maindir = "~/git/mosaic_2020/"
outdir = paste0(maindir, "output/")
ifelse(!dir.exists(outdir), dir.create(outdir), FALSE)
outdir_stats = paste0(maindir, "output/stats/")
ifelse(!dir.exists(outdir_stats), dir.create(outdir_stats), FALSE)
outdir_plots = paste0(maindir, "output/plots/")
ifelse(!dir.exists(outdir_plots), dir.create(outdir_plots), FALSE)
########################################################################
# static file read: csv
#==============
# all patients
#==============
all_df <- read.csv("/home/backup/MOSAIC/MEDIATOR_Data/master_file/Mosaic_master_file_from_stata.csv"
, fileEncoding = 'latin1')
# meta data columns
meta_data_cols = c("mosaic", "gender", "age"
, "adult"
, "flustat", "type"
, "obesity"
, "obese2"
, "height", "height_unit"
, "weight", "weight_unit"
, "ia_height_ftin", "ia_height_m", "ia_weight"
, "visual_est_bmi", "bmi_rating"
)
# check if these columns to select are present in the data
meta_data_cols%in%colnames(all_df)
all(meta_data_cols%in%colnames(all_df))
metadata_all = all_df[, meta_data_cols]
#==============
# adult patients
#==============
adult_df = all_df[all_df$age>=18,]
if (table(adult_df$adult == 1)[[1]] == nrow(adult_df) ){
cat ("PASS: adult df extracted successfully")
} else{
cat ("FAIL: adult df number mismatch!")
}
#==============
# FLU positive: adult patients
#==============
# extract the flu positive population
fp_adults = adult_df[adult_df$flustat == 1,]
#============
# hc
#============
#hc_data<- read.csv("/home/backup/MOSAIC/MEDIATOR_Data/master_file/Mediators_for_HC.csv")
#str(hc_data)
#table(hc_data$Timepoint, hc_data$Sample)
########################################################################