renamed file to reflect data_extraction_mediator

This commit is contained in:
Tanushree Tunstall 2020-11-27 17:47:05 +00:00
parent 52a2453327
commit 3a380a5be1
11 changed files with 95 additions and 1963 deletions

View file

@ -33,9 +33,9 @@ table(fp_adults$ia_exac_copd==1 & fp_adults$asthma == 1)
########################################################################
# Clinical_data extraction
########################################################################
#cat("\nExtracting:", length(clinical_cols), "cols from fp_adults")
#cat("\nExtracting:", length(meta_clinical_cols), "cols from fp_adults")
#clinical_df = fp_adults[, clinical_cols]
#clinical_df = fp_adults[, meta_clinical_cols]
# sanity checks
#if ( sum(table(clinical_df$obesity)) & sum(table(clinical_df$age>=18)) & sum(table(clinical_df$death)) & sum(table(clinical_df$asthma)) == nrow(clinical_df) ){
@ -56,24 +56,8 @@ table(fp_adults$ia_exac_copd==1 & fp_adults$asthma == 1)
if ( table(fp_adults$ia_exac_copd, fp_adults$asthma) [[2,2]] == 0){
cat("PASS: asthma and copd do not conflict")
}else{
cat("Conflict detected in asthm and copd filed, attempting to resolve...")
# Reassign the copd and asthma status and do some checks
table(fp_adults$ia_exac_copd); sum(is.na(fp_adults$ia_exac_copd))
fp_adults$ia_exac_copd[fp_adults$ia_exac_copd< 1]<- 0
fp_adults$ia_exac_copd[is.na(fp_adults$ia_exac_copd)] <- 0
table(fp_adults$ia_exac_copd); sum(is.na(fp_adults$ia_exac_copd))
# check copd and asthma status
table(fp_adults$ia_exac_copd, fp_adults$asthma)
check_copd_and_asthma_1<- subset(fp_adults, ia_exac_copd ==1 & asthma == 1) # check this is 3
# reassign these 3 so these are treated as non-asthmatics as copd with asthma is NOT TRUE asthma
fp_adults$asthma[fp_adults$ia_exac_copd == 1 & fp_adults$asthma == 1]= 0
table(fp_adults$ia_exac_copd, fp_adults$asthma)
foo<- subset(fp_adults, asthma==1 & ia_exac_copd ==1) # check that its 0
rm(check_copd_and_asthma_1, foo)
cat("Check status again...")
cat("Conflict detected in asthma and copd filed. Check script: read_data.R")
quit()
}
#=====================================================================
#=================================
@ -391,10 +375,37 @@ table(fp_adults_ics$steroid_ics)
#str(fp_adults_ics)
#=========================
# remove cols
# clinical_df only
#=========================
clinical_df_ics = fp_adults_ics[, c(meta_clinical_cols, "steroid_ics")]
fp_adults_ics = subset(fp_adults_ics, select = -c(onset_2_initial))
#=========================
# FIXME: decide! remove cols
#=========================
#fp_adults_ics = subset(fp_adults_ics, select = -c(onset_2_initial))
#=========================
# fp_adults_ics: without asthma
#=========================
#fp_adults_ics_na = fp_adults_ics[fp_adults_ics$asthma == 0,]
#=========================
# fp_adults_ics: without severity 3
#=========================
#table(fp_adults_ics$T1_resp_score)
#table(fp_adults_ics$T1_resp_score!=3)#
#fp_adults_ics_ns = fp_adults_ics[fp_adults_ics$T1_resp_score!=3,]
#table(fp_adults_ics_ns$T1_resp_score)
#=========================
# cols_added
#=========================
clinical_cols_added = c("age_bins"
, "o2_sat_bin"
, "onset_initial_bin"
, "steroid_ics"
, "t1_resp_recoded")
#======================
# writing output file
@ -405,20 +416,13 @@ outfile_reg = paste0(outdir, outfile_name_reg)
cat("\nWriting clinical file for regression:", outfile_reg)
#write.csv(fp_adults_ics, file = outfile_reg)
#=========================
# fp_adults_ics: without asthma
#=========================
fp_adults_ics_na = fp_adults_ics[fp_adults_ics$asthma == 0,]
#=========================
# clinical_df only
#=========================
clinical_df_ics = fp_adults[, clinical_cols]
################################################################
rm(age_bins, max_age_interval, max_in, min_in
, min_age, min_age_interval
, o2_sat_bin, onset_initial_bin, tot_o2
, n_text_code, n1, n2, tot_onset2ini, infile_ics
, tot_onset2ini, meta_data_cols
, fp_adults, clinical_ics)
################################################################