added steroid_ics data for merging with clinical data for completion
This commit is contained in:
parent
702ff4e327
commit
c5529206d9
1 changed files with 55 additions and 7 deletions
|
@ -3,14 +3,30 @@ getwd()
|
||||||
setwd('~/git/mosaic_2020/')
|
setwd('~/git/mosaic_2020/')
|
||||||
getwd()
|
getwd()
|
||||||
########################################################################
|
########################################################################
|
||||||
# TASK: Extract relevant columns from mosaic adults data
|
# TASK: Extract clinical data columns and recode as required for analysis
|
||||||
# npa
|
# corrects the asthma and copd status for patients
|
||||||
|
# creates age_bins and other intervals for clinical params
|
||||||
|
# merges steroid ics data and outcome var based on T1 resp score
|
||||||
|
# The steroid_ics data file is read from outdir and has been manually sourced
|
||||||
|
|
||||||
|
# TODO: for extra caution add and run checks on the steroid_ics file
|
||||||
########################################################################
|
########################################################################
|
||||||
#====================
|
#====================
|
||||||
# Input: source data
|
# Input: source data
|
||||||
|
# and steroid ics file
|
||||||
|
# This file contains steroid_ics data
|
||||||
|
# and another outcome variable based on T1_resp score
|
||||||
#====================
|
#====================
|
||||||
source("read_data.R")
|
source("read_data.R")
|
||||||
source("colnames_clinical_meds.R")
|
source("colnames_clinical_meds.R")
|
||||||
|
|
||||||
|
# read: steroid_ics file
|
||||||
|
infile_ics = paste0(outdir, "data_ics.csv")
|
||||||
|
infile_ics
|
||||||
|
|
||||||
|
clinical_ics = read.csv(infile_ics)
|
||||||
|
str(clinical_ics)
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
# quick sanity checks
|
# quick sanity checks
|
||||||
table(adult_df$ia_exac_copd==1 & adult_df$asthma == 1) # check this is 4
|
table(adult_df$ia_exac_copd==1 & adult_df$asthma == 1) # check this is 4
|
||||||
|
@ -21,7 +37,7 @@ table(fp_adults$ia_exac_copd==1 & fp_adults$asthma == 1) # check this is 3
|
||||||
rm(all_df, adult_df, metadata_all)
|
rm(all_df, adult_df, metadata_all)
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
# Clinical_data extraction
|
# Clinical_data extraction
|
||||||
########################################################################
|
########################################################################
|
||||||
cat("\nExtracting:", length(clinical_cols), "cols from fp_adults")
|
cat("\nExtracting:", length(clinical_cols), "cols from fp_adults")
|
||||||
|
|
||||||
|
@ -38,6 +54,11 @@ if ( sum(table(clinical_df$obesity)) & sum(table(clinical_df$age>=18)) & sum(tab
|
||||||
table(clinical_df$ia_exac_copd)
|
table(clinical_df$ia_exac_copd)
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
|
#==================================
|
||||||
|
# asthma and copd status correction
|
||||||
|
# for conflicting field!
|
||||||
|
#=================================
|
||||||
|
|
||||||
# Reassign the copd and asthma status and do some checks
|
# Reassign the copd and asthma status and do some checks
|
||||||
table(clinical_df$ia_exac_copd); sum(is.na(clinical_df$ia_exac_copd))
|
table(clinical_df$ia_exac_copd); sum(is.na(clinical_df$ia_exac_copd))
|
||||||
|
|
||||||
|
@ -58,6 +79,10 @@ foo<- subset(clinical_df, asthma==1 & ia_exac_copd ==1) # check that its 0
|
||||||
|
|
||||||
rm(check_copd_and_asthma_1, foo)
|
rm(check_copd_and_asthma_1, foo)
|
||||||
#=====================================================================
|
#=====================================================================
|
||||||
|
#=================================
|
||||||
|
# resp scores: In, max and t1 & t2
|
||||||
|
#=================================
|
||||||
|
|
||||||
# count the resp scores
|
# count the resp scores
|
||||||
max_resp_score_table<- table(clinical_df$max_resp_score)
|
max_resp_score_table<- table(clinical_df$max_resp_score)
|
||||||
max_resp_score_table
|
max_resp_score_table
|
||||||
|
@ -297,15 +322,38 @@ table(clinical_df$asthma, clinical_df$smoking)
|
||||||
#1 7 17 19
|
#1 7 17 19
|
||||||
|
|
||||||
################################################################
|
################################################################
|
||||||
#==================
|
|
||||||
|
#=========================
|
||||||
|
# Merge: clinical_df and infile ics
|
||||||
|
#=========================
|
||||||
|
merging_cols = intersect( names(clinical_df), names(clinical_ics) )
|
||||||
|
|
||||||
|
clinical_df_ics = merge(clinical_df, clinical_ics, by = merging_cols, all = T); clinical_df_ics
|
||||||
|
|
||||||
|
if (nrow(clinical_df_ics) == nrow(clinical_df) & nrow(clinical_ics)){
|
||||||
|
cat("\nPASS: No. of rows match, nrow =", nrow(clinical_df_ics)
|
||||||
|
, "\nChecking ncols...")
|
||||||
|
if ( ncol(clinical_df_ics) == ncol(clinical_df) + ncol(clinical_ics) - length(merging_cols) ){
|
||||||
|
cat("\nPASS: No. of cols match, ncol =", ncol(clinical_df_ics))
|
||||||
|
} else {
|
||||||
|
cat("\nFAIL: ncols mismatch"
|
||||||
|
, "Expected ncols:", ncol(clinical_df) + ncol(clinical_ics) - length(merging_cols)
|
||||||
|
, "\nGot:", ncol(clinical_df_ics))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cat("\nFAIL: nrows mismatch"
|
||||||
|
, "\nExpected nrows:", nrow(fp_adults))
|
||||||
|
}
|
||||||
|
|
||||||
|
#======================
|
||||||
# writing output file
|
# writing output file
|
||||||
#==================
|
#======================
|
||||||
outfile_name_reg = "clinical_df_recoded.csv"
|
outfile_name_reg = "clinical_df_recoded.csv"
|
||||||
outfile_reg = paste0(outdir, outfile_name_reg)
|
outfile_reg = paste0(outdir, outfile_name_reg)
|
||||||
|
|
||||||
cat("\nWriting clinical file for regression:", outfile_reg)
|
cat("\nWriting clinical file for regression:", outfile_reg)
|
||||||
|
|
||||||
#write.csv(clinical_df, file = outfile_reg)
|
#write.csv(clinical_df_ics, file = outfile_reg)
|
||||||
################################################################
|
################################################################
|
||||||
|
|
||||||
rm(age_bins, max_age_interval, max_in, min_in, o2_sat_bin, onset_initial_bin, tot_o2, tot_onset2ini, meta_data_cols)
|
rm(age_bins, max_age_interval, max_in, min_in, o2_sat_bin, onset_initial_bin, tot_o2, tot_onset2ini, meta_data_cols)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue