added Header, read_data.R & data_extraction scripts

This commit is contained in:
Tanushree Tunstall 2020-10-24 23:11:44 +01:00
parent 69e8ac0ea8
commit de5b07edc7
3 changed files with 383 additions and 0 deletions

29
read_data.R Normal file
View file

@ -0,0 +1,29 @@
#!/usr/bin/Rscript
getwd()
setwd("~/git/mosaic_2020/")
getwd()
########################################################################
# TASK: read data
########################################################################
# load libraries, packages and local imports
source("Header_TT.R")
########################################################################
# TODO: turn this to a repo
all_df <- read.csv("/home/pub/Work/MOSAIC/MOSAIC_from_work/MASTER/MOSAIC_2015_MASTER_Aki_stata_20150721/Mosaic_master_file_from_stata.csv"
, fileEncoding='latin1')
#hc_data<-
# meta data columns
meta_data_cols = c("mosaic", "gender", "age", "adult", "flustat", "type"
, "obesity", "obese2", "height", "height_unit", "weight"
, "weight_unit", "visual_est_bmi", "bmi_rating")
# check if these columns to select are present in the data
meta_data_cols%in%colnames(all_df)
all(meta_data_cols%in%colnames(all_df))
metadata_all = all_df[, meta_data_cols]
########################################################################
#
#outdir =
#outdir_plots =
outdir_stats = paste0("~/git/mosaic_2020/output/stats/")