reformatting code to select needed df for analysis
This commit is contained in:
parent
a6cbaab40a
commit
b72c4df796
7 changed files with 243 additions and 102 deletions
|
@ -25,7 +25,7 @@ clinical_cols = c("mosaic"
|
||||||
, "onsfindeath"
|
, "onsfindeath"
|
||||||
, "onset_2_initial"
|
, "onset_2_initial"
|
||||||
, "o2_sat_admis"
|
, "o2_sat_admis"
|
||||||
, "o2_sat_suppl"
|
#, "o2_sat_suppl"
|
||||||
, "ethnicity"
|
, "ethnicity"
|
||||||
, "smoking"
|
, "smoking"
|
||||||
, "ia_cxr"
|
, "ia_cxr"
|
||||||
|
|
|
@ -13,18 +13,25 @@ getwd()
|
||||||
#====================
|
#====================
|
||||||
source("read_data.R")
|
source("read_data.R")
|
||||||
|
|
||||||
|
#============================
|
||||||
|
# Data to use: Important step
|
||||||
|
#============================
|
||||||
|
# select df to use
|
||||||
|
my_data = fp_adults
|
||||||
|
|
||||||
# clear unnecessary variables
|
# clear unnecessary variables
|
||||||
rm(all_df)
|
rm(all_df, adult_df, fp_adults_na)
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
|
|
||||||
#=========
|
#=========
|
||||||
# sam
|
# sam
|
||||||
#=========
|
#=========
|
||||||
sam_regex = regex(".*_sam[1-3]{1}$", ignore_case = T)
|
sam_regex = regex(".*_sam[1-3]{1}$", ignore_case = T)
|
||||||
sam_cols_i = str_extract(colnames(adult_df), sam_regex) # not boolean
|
sam_cols_i = str_extract(colnames(my_data), sam_regex) # not boolean
|
||||||
#sam_cols_b = colnames(adult_df)%in%sam_cols_i # boolean
|
#sam_cols_b = colnames(my_data)%in%sam_cols_i # boolean
|
||||||
|
|
||||||
sam_cols = colnames(adult_df)[colnames(adult_df)%in%sam_cols_i]
|
sam_cols = colnames(my_data)[colnames(my_data)%in%sam_cols_i]
|
||||||
|
|
||||||
# this contains log columns + daysamp_samXX: omitting these
|
# this contains log columns + daysamp_samXX: omitting these
|
||||||
sam_regex_log_days = regex("log|day.*_sam[1-3]{1}$", ignore_case = T, perl = T)
|
sam_regex_log_days = regex("log|day.*_sam[1-3]{1}$", ignore_case = T, perl = T)
|
||||||
|
@ -48,7 +55,7 @@ cat("Extracting SAM cols + metadata_cols")
|
||||||
|
|
||||||
if ( length(sam_cols_to_extract) == length(meta_data_cols) + length(sam_cols_clean) ){
|
if ( length(sam_cols_to_extract) == length(meta_data_cols) + length(sam_cols_clean) ){
|
||||||
cat("Extracing", length(sam_cols_to_extract), "columns for sam")
|
cat("Extracing", length(sam_cols_to_extract), "columns for sam")
|
||||||
sam_df = adult_df[, sam_cols_to_extract]
|
sam_df = my_data[, sam_cols_to_extract]
|
||||||
}else{
|
}else{
|
||||||
cat("FAIL: length mismatch"
|
cat("FAIL: length mismatch"
|
||||||
, "Expeceted to extract:", length(meta_data_cols) + length(sam_cols_clean), "columns"
|
, "Expeceted to extract:", length(meta_data_cols) + length(sam_cols_clean), "columns"
|
||||||
|
@ -61,10 +68,10 @@ colnames_sam_df = colnames(sam_df); colnames_sam_df
|
||||||
# serum
|
# serum
|
||||||
#=========
|
#=========
|
||||||
serum_regex = regex(".*_serum[1-3]{1}$", ignore_case = T)
|
serum_regex = regex(".*_serum[1-3]{1}$", ignore_case = T)
|
||||||
serum_cols_i = str_extract(colnames(adult_df), serum_regex) # not boolean
|
serum_cols_i = str_extract(colnames(my_data), serum_regex) # not boolean
|
||||||
#serum_cols_b = colnames(adult_df)%in%serum_cols_i # boolean
|
#serum_cols_b = colnames(my_data)%in%serum_cols_i # boolean
|
||||||
|
|
||||||
serum_cols = colnames(adult_df)[colnames(adult_df)%in%serum_cols_i]
|
serum_cols = colnames(my_data)[colnames(my_data)%in%serum_cols_i]
|
||||||
|
|
||||||
# this contains log columns + dayserump_serumXX: omitting these
|
# this contains log columns + dayserump_serumXX: omitting these
|
||||||
serum_regex_log_days = regex("log|day.*_serum[1-3]{1}$", ignore_case = T, perl = T)
|
serum_regex_log_days = regex("log|day.*_serum[1-3]{1}$", ignore_case = T, perl = T)
|
||||||
|
@ -88,7 +95,7 @@ cat("Extracting SERUM cols + metadata_cols")
|
||||||
|
|
||||||
if ( length(serum_cols_to_extract) == length(meta_data_cols) + length(serum_cols_clean) ){
|
if ( length(serum_cols_to_extract) == length(meta_data_cols) + length(serum_cols_clean) ){
|
||||||
cat("Extracing", length(serum_cols_to_extract), "columns for serum")
|
cat("Extracing", length(serum_cols_to_extract), "columns for serum")
|
||||||
serum_df = adult_df[, serum_cols_to_extract]
|
serum_df = my_data[, serum_cols_to_extract]
|
||||||
}else{
|
}else{
|
||||||
cat("FAIL: length mismatch"
|
cat("FAIL: length mismatch"
|
||||||
, "Expeceted to extract:", length(meta_data_cols) + length(serum_cols_clean), "columns"
|
, "Expeceted to extract:", length(meta_data_cols) + length(serum_cols_clean), "columns"
|
||||||
|
@ -101,10 +108,10 @@ colnames_serum_df = colnames(serum_df); colnames_serum_df
|
||||||
# npa
|
# npa
|
||||||
#=========
|
#=========
|
||||||
npa_regex = regex(".*_npa[1-3]{1}$", ignore_case = T)
|
npa_regex = regex(".*_npa[1-3]{1}$", ignore_case = T)
|
||||||
npa_cols_i = str_extract(colnames(adult_df), npa_regex) # not boolean
|
npa_cols_i = str_extract(colnames(my_data), npa_regex) # not boolean
|
||||||
#npa_cols_b = colnames(adult_df)%in%npa_cols_i # boolean
|
#npa_cols_b = colnames(my_data)%in%npa_cols_i # boolean
|
||||||
|
|
||||||
npa_cols = colnames(adult_df)[colnames(adult_df)%in%npa_cols_i]
|
npa_cols = colnames(my_data)[colnames(my_data)%in%npa_cols_i]
|
||||||
|
|
||||||
# this contains log columns + daynpap_npaXX: omitting these
|
# this contains log columns + daynpap_npaXX: omitting these
|
||||||
npa_regex_log_days = regex("log|day|vl_samptime|ct.*_npa[1-3]{1}$", ignore_case = T, perl = T)
|
npa_regex_log_days = regex("log|day|vl_samptime|ct.*_npa[1-3]{1}$", ignore_case = T, perl = T)
|
||||||
|
@ -128,7 +135,7 @@ cat("Extracting NPA cols + metadata_cols")
|
||||||
|
|
||||||
if ( length(npa_cols_to_extract) == length(meta_data_cols) + length(npa_cols_clean) ){
|
if ( length(npa_cols_to_extract) == length(meta_data_cols) + length(npa_cols_clean) ){
|
||||||
cat("Extracing", length(npa_cols_to_extract), "columns for npa")
|
cat("Extracing", length(npa_cols_to_extract), "columns for npa")
|
||||||
npa_df = adult_df[, npa_cols_to_extract]
|
npa_df = my_data[, npa_cols_to_extract]
|
||||||
}else{
|
}else{
|
||||||
cat("FAIL: length mismatch"
|
cat("FAIL: length mismatch"
|
||||||
, "Expeceted to extract:", length(meta_data_cols) + length(npa_cols_clean), "columns"
|
, "Expeceted to extract:", length(meta_data_cols) + length(npa_cols_clean), "columns"
|
||||||
|
@ -166,21 +173,21 @@ for (i in extra_cols){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tail(colnames_check_f)
|
tail(colnames_check_f)
|
||||||
# write file?
|
|
||||||
quick_check = as.data.frame(cbind(metadata_all$mosaic
|
|
||||||
, metadata_all$adult
|
|
||||||
, metadata_all$age
|
|
||||||
, metadata_all$obesity
|
|
||||||
, metadata_all$obese2
|
|
||||||
))
|
|
||||||
colnames(quick_check) = c("mosaic", "adult", "age", "obesity", "obese2")
|
|
||||||
|
|
||||||
##########################################################################
|
##########################################################################
|
||||||
# LF data
|
# LF data
|
||||||
##########################################################################
|
##########################################################################
|
||||||
cols_to_omit = c("adult", "obese2"
|
cols_to_omit = c("adult"
|
||||||
, "height", "height_unit", "weight"
|
#, "obese2"
|
||||||
, "weight_unit", "visual_est_bmi", "bmi_rating")
|
#, "height", "height_unit", "weight"
|
||||||
|
#, "weight_unit", "visual_est_bmi", "bmi_rating"
|
||||||
|
)
|
||||||
|
|
||||||
|
pivot_cols = meta_data_cols
|
||||||
|
# subselect pivot_cols
|
||||||
|
pivot_cols = meta_data_cols[!meta_data_cols%in%cols_to_omit];pivot_cols
|
||||||
|
ncols_omitted = table(meta_data_cols%in%cols_to_omit)[[2]]
|
||||||
|
ncols_omitted
|
||||||
|
|
||||||
#==============
|
#==============
|
||||||
# lf data: sam
|
# lf data: sam
|
||||||
|
@ -198,11 +205,11 @@ pivot_cols = meta_data_cols
|
||||||
# subselect pivot_cols
|
# subselect pivot_cols
|
||||||
pivot_cols = meta_data_cols[!meta_data_cols%in%cols_to_omit];pivot_cols
|
pivot_cols = meta_data_cols[!meta_data_cols%in%cols_to_omit];pivot_cols
|
||||||
|
|
||||||
if (length(pivot_cols) == length(meta_data_cols) - length(cols_to_omit)){
|
if (length(pivot_cols) == length(meta_data_cols) - ncols_omitted){
|
||||||
cat("PASS: pivot cols successfully extracted")
|
cat("PASS: pivot cols successfully extracted")
|
||||||
}else{
|
}else{
|
||||||
cat("FAIL: length mismatch! pivot cols could not be extracted"
|
cat("FAIL: length mismatch! pivot cols could not be extracted"
|
||||||
, "\nExpected length:", length(meta_data_cols) - length(cols_to_omit)
|
, "\nExpected length:", length(meta_data_cols) - ncols_omitted
|
||||||
, "\nGot:",length(pivot_cols) )
|
, "\nGot:",length(pivot_cols) )
|
||||||
quit()
|
quit()
|
||||||
}
|
}
|
||||||
|
@ -249,11 +256,11 @@ serum_wf = serum_df_adults[wf_cols]
|
||||||
pivot_cols = meta_data_cols
|
pivot_cols = meta_data_cols
|
||||||
pivot_cols = meta_data_cols[!meta_data_cols%in%cols_to_omit];pivot_cols
|
pivot_cols = meta_data_cols[!meta_data_cols%in%cols_to_omit];pivot_cols
|
||||||
|
|
||||||
if (length(pivot_cols) == length(meta_data_cols) - length(cols_to_omit)){
|
if (length(pivot_cols) == length(meta_data_cols) - ncols_omitted){
|
||||||
cat("PASS: pivot cols successfully extracted")
|
cat("PASS: pivot cols successfully extracted")
|
||||||
}else{
|
}else{
|
||||||
cat("FAIL: length mismatch! pivot cols could not be extracted"
|
cat("FAIL: length mismatch! pivot cols could not be extracted"
|
||||||
, "\nExpected length:", length(meta_data_cols) - length(cols_to_omit)
|
, "\nExpected length:", length(meta_data_cols) - ncols_omitted
|
||||||
, "\nGot:",length(pivot_cols) )
|
, "\nGot:",length(pivot_cols) )
|
||||||
quit()
|
quit()
|
||||||
}
|
}
|
||||||
|
@ -296,11 +303,11 @@ npa_wf = npa_df_adults[wf_cols]
|
||||||
pivot_cols = meta_data_cols
|
pivot_cols = meta_data_cols
|
||||||
pivot_cols = meta_data_cols[!meta_data_cols%in%cols_to_omit];pivot_cols
|
pivot_cols = meta_data_cols[!meta_data_cols%in%cols_to_omit];pivot_cols
|
||||||
|
|
||||||
if (length(pivot_cols) == length(meta_data_cols) - length(cols_to_omit)){
|
if (length(pivot_cols) == length(meta_data_cols) - ncols_omitted){
|
||||||
cat("PASS: pivot cols successfully extracted")
|
cat("PASS: pivot cols successfully extracted")
|
||||||
}else{
|
}else{
|
||||||
cat("FAIL: length mismatch! pivot cols could not be extracted"
|
cat("FAIL: length mismatch! pivot cols could not be extracted"
|
||||||
, "\nExpected length:", length(meta_data_cols) - length(cols_to_omit)
|
, "\nExpected length:", length(meta_data_cols) - ncols_omitted
|
||||||
, "\nGot:",length(pivot_cols) )
|
, "\nGot:",length(pivot_cols) )
|
||||||
quit()
|
quit()
|
||||||
}
|
}
|
||||||
|
@ -333,7 +340,7 @@ if (
|
||||||
rm(sam_regex, sam_regex_log_days, sam_cols, sam_cols_clean, sam_cols_i, sam_cols_to_extract, sam_cols_to_omit)
|
rm(sam_regex, sam_regex_log_days, sam_cols, sam_cols_clean, sam_cols_i, sam_cols_to_extract, sam_cols_to_omit)
|
||||||
rm(serum_regex, serum_regex_log_days, serum_cols, serum_cols_clean, serum_cols_i, serum_cols_to_extract, serum_cols_to_omit)
|
rm(serum_regex, serum_regex_log_days, serum_cols, serum_cols_clean, serum_cols_i, serum_cols_to_extract, serum_cols_to_omit)
|
||||||
rm(npa_regex, npa_regex_log_days, npa_cols, npa_cols_clean, npa_cols_i, npa_cols_to_extract, npa_cols_to_omit)
|
rm(npa_regex, npa_regex_log_days, npa_cols, npa_cols_clean, npa_cols_i, npa_cols_to_extract, npa_cols_to_omit)
|
||||||
rm(adult_df)
|
rm(my_data)
|
||||||
rm(colnames_check)
|
rm(colnames_check)
|
||||||
rm(i, j
|
rm(i, j
|
||||||
#, expected_cols
|
#, expected_cols
|
||||||
|
@ -344,3 +351,4 @@ rm(sam_df_adults, serum_df_adults, npa_df_adults)
|
||||||
|
|
||||||
# rm df
|
# rm df
|
||||||
rm(sam_df, serum_df, npa_df)
|
rm(sam_df, serum_df, npa_df)
|
||||||
|
rm(colnames_check_f, fp_adults)
|
||||||
|
|
|
@ -53,31 +53,36 @@ if ( sum(table(clinical_df$obesity)) & sum(table(clinical_df$age>=18)) & sum(tab
|
||||||
|
|
||||||
table(clinical_df$ia_exac_copd)
|
table(clinical_df$ia_exac_copd)
|
||||||
|
|
||||||
|
str(clinical_df)
|
||||||
|
#clinical_df$o2_sat_suppl
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
#==================================
|
#==================================
|
||||||
# asthma and copd status correction
|
# Check asthma and copd conflict
|
||||||
# for conflicting field!
|
|
||||||
#=================================
|
#=================================
|
||||||
|
if ( table(fp_adults$ia_exac_copd, fp_adults$asthma) [[2,2]] == 0){
|
||||||
|
cat("PASS: asthma and copd do not conflict")
|
||||||
|
}else{
|
||||||
|
cat("Conflict detected in asthm and copd filed, attempting to resolve...")
|
||||||
|
# Reassign the copd and asthma status and do some checks
|
||||||
|
table(fp_adults$ia_exac_copd); sum(is.na(fp_adults$ia_exac_copd))
|
||||||
|
fp_adults$ia_exac_copd[fp_adults$ia_exac_copd< 1]<- 0
|
||||||
|
fp_adults$ia_exac_copd[is.na(fp_adults$ia_exac_copd)] <- 0
|
||||||
|
table(fp_adults$ia_exac_copd); sum(is.na(fp_adults$ia_exac_copd))
|
||||||
|
|
||||||
|
# check copd and asthma status
|
||||||
|
table(fp_adults$ia_exac_copd, fp_adults$asthma)
|
||||||
|
check_copd_and_asthma_1<- subset(fp_adults, ia_exac_copd ==1 & asthma == 1) # check this is 3
|
||||||
|
|
||||||
|
# reassign these 3 so these are treated as non-asthmatics as copd with asthma is NOT TRUE asthma
|
||||||
|
fp_adults$asthma[fp_adults$ia_exac_copd == 1 & fp_adults$asthma == 1]= 0
|
||||||
|
table(fp_adults$ia_exac_copd, fp_adults$asthma)
|
||||||
|
foo<- subset(fp_adults, asthma==1 & ia_exac_copd ==1) # check that its 0
|
||||||
|
rm(check_copd_and_asthma_1, foo)
|
||||||
|
cat("Check status again...")
|
||||||
|
|
||||||
# Reassign the copd and asthma status and do some checks
|
}
|
||||||
table(clinical_df$ia_exac_copd); sum(is.na(clinical_df$ia_exac_copd))
|
|
||||||
|
|
||||||
clinical_df$ia_exac_copd[clinical_df$ia_exac_copd< 1]<- 0
|
|
||||||
clinical_df$ia_exac_copd[is.na(clinical_df$ia_exac_copd)] <- 0
|
|
||||||
|
|
||||||
table(clinical_df$ia_exac_copd); sum(is.na(clinical_df$ia_exac_copd))
|
|
||||||
|
|
||||||
# check copd and asthma status
|
|
||||||
table(clinical_df$ia_exac_copd, clinical_df$asthma)
|
|
||||||
check_copd_and_asthma_1<- subset(clinical_df, ia_exac_copd ==1 & asthma == 1) # check this is 3
|
|
||||||
|
|
||||||
# reassign these 3 so these are treated as non-asthmatics as copd with asthma is NOT TRUE asthma
|
|
||||||
clinical_df$asthma[clinical_df$ia_exac_copd == 1 & clinical_df$asthma == 1]= 0
|
|
||||||
table(clinical_df$ia_exac_copd, clinical_df$asthma)
|
|
||||||
|
|
||||||
foo<- subset(clinical_df, asthma==1 & ia_exac_copd ==1) # check that its 0
|
|
||||||
|
|
||||||
rm(check_copd_and_asthma_1, foo)
|
|
||||||
#=====================================================================
|
#=====================================================================
|
||||||
#=================================
|
#=================================
|
||||||
# resp scores: In, max and t1 & t2
|
# resp scores: In, max and t1 & t2
|
||||||
|
@ -97,7 +102,7 @@ Inresp_sev<- table(clinical_df$inresp_sev)
|
||||||
Inresp_sev
|
Inresp_sev
|
||||||
|
|
||||||
# Reassign the resp score so all 4 are replace by 3
|
# Reassign the resp score so all 4 are replace by 3
|
||||||
clinical_df$max_resp_score[clinical_df$max_resp_score ==4 ] <- 3
|
clinical_df$max_resp_score[clinical_df$max_resp_score == 4 ] <- 3
|
||||||
revised_resp_score_table<- table(clinical_df$max_resp_score)
|
revised_resp_score_table<- table(clinical_df$max_resp_score)
|
||||||
revised_resp_score_table
|
revised_resp_score_table
|
||||||
|
|
||||||
|
@ -125,29 +130,30 @@ rm(max_resp_score_table, T1_resp_score_table, T2_resp_score_table, Inresp_sev
|
||||||
# age
|
# age
|
||||||
#========
|
#========
|
||||||
# Create categories of variables
|
# Create categories of variables
|
||||||
clinical_df$age = round(clinical_df$age, digits = 0)
|
clinical_df$age_int = round(clinical_df$age, digits = 0)
|
||||||
table(clinical_df$age)
|
table(clinical_df$age_int)
|
||||||
table(clinical_df$asthma, clinical_df$age)
|
table(clinical_df$asthma, clinical_df$age_int)
|
||||||
min(clinical_df$age); max(clinical_df$age)
|
min(clinical_df$age_int); max(clinical_df$age_int)
|
||||||
|
|
||||||
max_age_interval = round_any(max(clinical_df$age), 10, f = ceiling)
|
max_age_interval = round_any(max(clinical_df$age_int), 10, f = ceiling)
|
||||||
max_age_interval
|
max_age_interval
|
||||||
min_age = min(clinical_df$age); min_age #19
|
min_age = min(clinical_df$age_int); min_age #19
|
||||||
min_age_interval = min_age - 1; min_age_interval
|
min_age_interval = min_age - 1; min_age_interval
|
||||||
|
|
||||||
#age_bins = cut(clinical_df$age, c(0,18,30,40,50,60,70,80,90))
|
#age_bins = cut(clinical_df$age_int, c(0,18,30,40,50,60,70,80,90))
|
||||||
age_bins = cut(clinical_df$age, c(min_age_interval, 30, 40, 50, 60, 70, max_age_interval))
|
age_bins = cut(clinical_df$age_int, c(min_age_interval, 30, 40, 50, 60, 70, max_age_interval))
|
||||||
clinical_df$age_bins = age_bins
|
clinical_df$age_bins = age_bins
|
||||||
dim(clinical_df) # 133 27
|
dim(clinical_df) # 133 28
|
||||||
|
|
||||||
# age_bins (to keep consistent with the results table)
|
# age_bins (to keep consistent with the results table)
|
||||||
class(clinical_df$age_bins)
|
class(clinical_df$age_bins)
|
||||||
levels(clinical_df$age_bins)
|
levels(clinical_df$age_bins)
|
||||||
#"(18,30]" "(30,40]" "(40,50]" "(50,60]" "(60,70]" "(70,80]"
|
#"(18,30]" "(30,40]" "(40,50]" "(50,60]" "(60,70]" "(70,80]"
|
||||||
|
|
||||||
table(clinical_df$asthma, clinical_df$age_bins)
|
table(clinical_df$asthma, clinical_df$age_bins)
|
||||||
# (18,30] (30,40] (40,50] (50,60] (60,70] (70,80]
|
# (18,30] (30,40] (40,50] (50,60] (60,70] (70,80]
|
||||||
#0 25 17 25 14 11 1
|
#0 25 17 25 14 11 1
|
||||||
#1 11 8 12 5 3 2
|
#1 11 8 12 5 2 2
|
||||||
|
|
||||||
if (sum(table(clinical_df$asthma, clinical_df$age_bins)) == nrow(clinical_df) ){
|
if (sum(table(clinical_df$asthma, clinical_df$age_bins)) == nrow(clinical_df) ){
|
||||||
cat("\nPASS: age_bins assigned successfully")
|
cat("\nPASS: age_bins assigned successfully")
|
||||||
|
@ -156,7 +162,7 @@ if (sum(table(clinical_df$asthma, clinical_df$age_bins)) == nrow(clinical_df) ){
|
||||||
quit()
|
quit()
|
||||||
}
|
}
|
||||||
|
|
||||||
# reassign
|
# reassign levels
|
||||||
class(clinical_df$age_bins)
|
class(clinical_df$age_bins)
|
||||||
levels(clinical_df$age_bins) <- c("(18,30]","(30,40]","(40,50]","(50,80]","(50,80]","(50,80]")
|
levels(clinical_df$age_bins) <- c("(18,30]","(30,40]","(40,50]","(50,80]","(50,80]","(50,80]")
|
||||||
table(clinical_df$asthma, clinical_df$age_bins)
|
table(clinical_df$asthma, clinical_df$age_bins)
|
||||||
|
@ -170,11 +176,25 @@ sum(table(clinical_df$asthma, clinical_df$age_bins)) == nrow(clinical_df)
|
||||||
#===========================
|
#===========================
|
||||||
# O2 saturation binning
|
# O2 saturation binning
|
||||||
#===========================
|
#===========================
|
||||||
|
clinical_df$o2_sat_admis
|
||||||
|
n1 = sum(is.na(clinical_df$o2_sat_admis))
|
||||||
|
|
||||||
clinical_df$o2_sat_admis = round(clinical_df$o2_sat_admis, digits = 0)
|
clinical_df$o2_sat_admis = round(clinical_df$o2_sat_admis, digits = 0)
|
||||||
table(clinical_df$o2_sat_admis)
|
table(clinical_df$o2_sat_admis)
|
||||||
tot_o2 = sum(table(clinical_df$o2_sat_admis))- table(clinical_df$o2_sat_admis)[["-1"]]
|
tot_o2 = sum(table(clinical_df$o2_sat_admis))- table(clinical_df$o2_sat_admis)[["-1"]]
|
||||||
tot_o2
|
tot_o2
|
||||||
|
|
||||||
|
n_text_code = table(clinical_df$o2_sat_admis)[["-1"]]
|
||||||
|
|
||||||
|
clinical_df$o2_sat_admis[clinical_df$o2_sat_admis <0] <- NA
|
||||||
|
n2 = sum(is.na(clinical_df$o2_sat_admis))
|
||||||
|
|
||||||
|
if (n2 == n1 + n_text_code) {
|
||||||
|
cat ("PASS: -1 code converted to NA")
|
||||||
|
} else{
|
||||||
|
cat("FAIL: something went wrong!")
|
||||||
|
}
|
||||||
|
|
||||||
o2_sat_bin = cut(clinical_df$o2_sat_admis, c(0,92,100))
|
o2_sat_bin = cut(clinical_df$o2_sat_admis, c(0,92,100))
|
||||||
clinical_df$o2_sat_bin = o2_sat_bin
|
clinical_df$o2_sat_bin = o2_sat_bin
|
||||||
table(clinical_df$o2_sat_bin)
|
table(clinical_df$o2_sat_bin)
|
||||||
|
@ -184,6 +204,8 @@ sum(table(clinical_df$o2_sat_bin)) == tot_o2
|
||||||
#===========================
|
#===========================
|
||||||
# Onset to initial binning
|
# Onset to initial binning
|
||||||
#===========================
|
#===========================
|
||||||
|
clinical_df$onset_2_initial
|
||||||
|
|
||||||
max_in = max(clinical_df$onset_2_initial); max_in #23
|
max_in = max(clinical_df$onset_2_initial); max_in #23
|
||||||
min_in = min(clinical_df$onset_2_initial) - 1 ; min_in # -6
|
min_in = min(clinical_df$onset_2_initial) - 1 ; min_in # -6
|
||||||
|
|
||||||
|
@ -198,14 +220,15 @@ sum(table(clinical_df$onset_initial_bin)) == tot_onset2ini
|
||||||
#=======================
|
#=======================
|
||||||
# seasonal flu: sfluv
|
# seasonal flu: sfluv
|
||||||
#=======================
|
#=======================
|
||||||
# should be a factor
|
|
||||||
if (! is.factor(clinical_df$sfluv)){
|
if (! is.factor(clinical_df$sfluv)){
|
||||||
clinical_df$sfluv = as.factor(clinical_df$sfluv)
|
clinical_df$sfluv = as.factor(clinical_df$sfluv)
|
||||||
}
|
}
|
||||||
class(clinical_df$sfluv) #[1] "factor"
|
class(clinical_df$sfluv)
|
||||||
|
|
||||||
levels(clinical_df$sfluv)
|
levels(clinical_df$sfluv)
|
||||||
|
|
||||||
|
table(clinical_df$sfluv)
|
||||||
table(clinical_df$asthma, clinical_df$sfluv)
|
table(clinical_df$asthma, clinical_df$sfluv)
|
||||||
|
|
||||||
# reassign
|
# reassign
|
||||||
levels(clinical_df$sfluv) <- c("0", "0", "1")
|
levels(clinical_df$sfluv) <- c("0", "0", "1")
|
||||||
table(clinical_df$asthma, clinical_df$sfluv)
|
table(clinical_df$asthma, clinical_df$sfluv)
|
||||||
|
@ -213,14 +236,16 @@ table(clinical_df$asthma, clinical_df$sfluv)
|
||||||
#=======================
|
#=======================
|
||||||
# h1n1v
|
# h1n1v
|
||||||
#=======================
|
#=======================
|
||||||
# should be a factor
|
|
||||||
if (! is.factor(clinical_df$h1n1v)){
|
if (! is.factor(clinical_df$h1n1v)){
|
||||||
clinical_df$h1n1v = as.factor(clinical_df$h1n1v)
|
clinical_df$h1n1v = as.factor(clinical_df$h1n1v)
|
||||||
}
|
}
|
||||||
class(clinical_df$h1n1v) #[1] "factor"
|
|
||||||
|
|
||||||
|
class(clinical_df$h1n1v)
|
||||||
levels(clinical_df$h1n1v)
|
levels(clinical_df$h1n1v)
|
||||||
|
|
||||||
|
table(clinical_df$h1n1v)
|
||||||
table(clinical_df$asthma, clinical_df$h1n1v)
|
table(clinical_df$asthma, clinical_df$h1n1v)
|
||||||
|
|
||||||
# reassign
|
# reassign
|
||||||
levels(clinical_df$h1n1v) <- c("0", "0", "1")
|
levels(clinical_df$h1n1v) <- c("0", "0", "1")
|
||||||
table(clinical_df$asthma, clinical_df$h1n1v)
|
table(clinical_df$asthma, clinical_df$h1n1v)
|
||||||
|
@ -229,18 +254,21 @@ table(clinical_df$asthma, clinical_df$h1n1v)
|
||||||
# ethnicity
|
# ethnicity
|
||||||
#=======================
|
#=======================
|
||||||
class(clinical_df$ethnicity) # integer
|
class(clinical_df$ethnicity) # integer
|
||||||
|
table(clinical_df$ethnicity)
|
||||||
table(clinical_df$asthma, clinical_df$ethnicity)
|
table(clinical_df$asthma, clinical_df$ethnicity)
|
||||||
|
|
||||||
clinical_df$ethnicity[clinical_df$ethnicity == 4] <- 2
|
clinical_df$ethnicity[clinical_df$ethnicity == 4] <- 2
|
||||||
|
table(clinical_df$ethnicity)
|
||||||
table(clinical_df$asthma, clinical_df$ethnicity)
|
table(clinical_df$asthma, clinical_df$ethnicity)
|
||||||
|
|
||||||
#=======================
|
#=======================
|
||||||
# pneumonia
|
# pneumonia
|
||||||
#=======================
|
#=======================
|
||||||
|
table(clinical_df$ia_cxr)
|
||||||
class(clinical_df$ia_cxr) # integer
|
class(clinical_df$ia_cxr) # integer
|
||||||
# ia_cxr 2 ---> yes pneumonia (1)
|
# ia_cxr 2 ---> yes pneumonia (1)
|
||||||
# 1 ---> no (0)
|
# 1 ---> no (0)
|
||||||
# ! 1 or 2 -- > "unkown"
|
# ! 1 or 2 -- > "unknown"
|
||||||
|
|
||||||
# reassign the pneumonia codes
|
# reassign the pneumonia codes
|
||||||
#0: not performed
|
#0: not performed
|
||||||
|
@ -251,7 +279,6 @@ class(clinical_df$ia_cxr) # integer
|
||||||
#-2: n/a specified by the clinician # not in the data...
|
#-2: n/a specified by the clinician # not in the data...
|
||||||
#-3: unknown specified by clinician
|
#-3: unknown specified by clinician
|
||||||
|
|
||||||
|
|
||||||
table(clinical_df$ia_cxr)
|
table(clinical_df$ia_cxr)
|
||||||
#-3 -1 0 1 2 3
|
#-3 -1 0 1 2 3
|
||||||
#5 48 13 47 17 3
|
#5 48 13 47 17 3
|
||||||
|
@ -262,6 +289,8 @@ table(clinical_df$ia_cxr)
|
||||||
# 1 2
|
# 1 2
|
||||||
#69 47 17
|
#69 47 17
|
||||||
|
|
||||||
|
sum(is.na(clinical_df$ia_cxr))
|
||||||
|
|
||||||
clinical_df$ia_cxr[clinical_df$ia_cxr == 1] <- 0
|
clinical_df$ia_cxr[clinical_df$ia_cxr == 1] <- 0
|
||||||
clinical_df$ia_cxr[clinical_df$ia_cxr == 2] <- 1
|
clinical_df$ia_cxr[clinical_df$ia_cxr == 2] <- 1
|
||||||
table(clinical_df$ia_cxr)
|
table(clinical_df$ia_cxr)
|
||||||
|
@ -306,7 +335,7 @@ clinical_df$smoking[clinical_df$smoking == 4 | clinical_df$smoking == 2 ] <- 0
|
||||||
clinical_df$smoking[clinical_df$smoking == 1 | clinical_df$smoking == 3 ] <- 1
|
clinical_df$smoking[clinical_df$smoking == 1 | clinical_df$smoking == 3 ] <- 1
|
||||||
clinical_df$smoking[clinical_df$smoking == -1 | clinical_df$smoking == -2 | clinical_df$smoking == -3 ] <- NA
|
clinical_df$smoking[clinical_df$smoking == -1 | clinical_df$smoking == -2 | clinical_df$smoking == -3 ] <- NA
|
||||||
|
|
||||||
table(clinical_df$smoking)
|
table(clinical_df$smoking); sum(is.na(clinical_df$smoking))
|
||||||
# 0 1
|
# 0 1
|
||||||
#30 49 54
|
#30 49 54
|
||||||
|
|
||||||
|
@ -316,17 +345,13 @@ table(clinical_df$asthma, clinical_df$smoking)
|
||||||
# 0 1
|
# 0 1
|
||||||
#0 24 32 37
|
#0 24 32 37
|
||||||
#1 6 17 17
|
#1 6 17 17
|
||||||
|
|
||||||
# 0 1
|
|
||||||
#0 23 32 35
|
|
||||||
#1 7 17 19
|
|
||||||
|
|
||||||
################################################################
|
################################################################
|
||||||
|
|
||||||
#=========================
|
#=========================
|
||||||
# Merge: clinical_df and infile ics
|
# Merge: clinical_df and infile ics
|
||||||
#=========================
|
#=========================
|
||||||
merging_cols = intersect( names(clinical_df), names(clinical_ics) )
|
merging_cols = intersect( names(clinical_df), names(clinical_ics) )
|
||||||
|
merging_cols
|
||||||
|
|
||||||
clinical_df_ics = merge(clinical_df, clinical_ics, by = merging_cols, all = T); clinical_df_ics
|
clinical_df_ics = merge(clinical_df, clinical_ics, by = merging_cols, all = T); clinical_df_ics
|
||||||
|
|
||||||
|
@ -351,6 +376,15 @@ if (nrow(clinical_df_ics) == nrow(clinical_df) & nrow(clinical_ics)){
|
||||||
, "\nExpected nrows:", nrow(fp_adults))
|
, "\nExpected nrows:", nrow(fp_adults))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# change the factor vars to integers
|
||||||
|
str(clinical_df_ics)
|
||||||
|
factor_vars = lapply(clinical_df_ics, class) == "factor"
|
||||||
|
table(factor_vars)
|
||||||
|
clinical_df_ics[, factor_vars] <- lapply(clinical_df_ics[, factor_vars], as.integer)
|
||||||
|
table(factor_vars)
|
||||||
|
|
||||||
|
str(clinical_df_ics)
|
||||||
|
|
||||||
#======================
|
#======================
|
||||||
# writing output file
|
# writing output file
|
||||||
#======================
|
#======================
|
||||||
|
@ -359,9 +393,8 @@ outfile_reg = paste0(outdir, outfile_name_reg)
|
||||||
|
|
||||||
cat("\nWriting clinical file for regression:", outfile_reg)
|
cat("\nWriting clinical file for regression:", outfile_reg)
|
||||||
|
|
||||||
write.csv(clinical_df_ics, file = outfile_reg)
|
#write.csv(clinical_df_ics, file = outfile_reg)
|
||||||
################################################################
|
################################################################
|
||||||
|
|
||||||
rm(age_bins, max_age_interval, max_in, min_in
|
rm(age_bins, max_age_interval, max_in, min_in
|
||||||
, o2_sat_bin, onset_initial_bin, tot_o2
|
, o2_sat_bin, onset_initial_bin, tot_o2
|
||||||
, tot_onset2ini, meta_data_cols
|
, tot_onset2ini, meta_data_cols
|
||||||
|
|
|
@ -214,4 +214,36 @@ comb_stats_categ_df_f = comb_stats_categ_df[order(comb_stats_categ_df$p_signif
|
||||||
# write output file
|
# write output file
|
||||||
#******************
|
#******************
|
||||||
cat("Chisq and fishers test results in:", outfile_clin_categ)
|
cat("Chisq and fishers test results in:", outfile_clin_categ)
|
||||||
write.csv(comb_stats_categ_df_f, outfile_clin_categ, row.names = FALSE)
|
#write.csv(comb_stats_categ_df_f, outfile_clin_categ, row.names = FALSE)
|
||||||
|
|
||||||
|
#==================
|
||||||
|
#0 date not recorded
|
||||||
|
#-1 not recorded
|
||||||
|
#-2 n/a specified by clinician
|
||||||
|
#-3 unknown specified by
|
||||||
|
|
||||||
|
|
||||||
|
chisq_test(table(clinical_df_ics$obesity, clinical_df_ics$smoking))
|
||||||
|
|
||||||
|
chisq_test(table(clinical_df_ics$obesity, clinical_df_ics$max_resp_score))
|
||||||
|
|
||||||
|
chisq_test(table(clinical_df_ics$obesity, clinical_df_ics$T1_resp_score))
|
||||||
|
chisq_test(table(clinical_df_ics$obesity, clinical_df_ics$t1_resp_recoded))
|
||||||
|
|
||||||
|
|
||||||
|
obese_df = clinical_df_ics[clinical_df_ics$obesity == 1,]
|
||||||
|
not_ob_df = clinical_df_ics[clinical_df_ics$obesity == 0,]
|
||||||
|
|
||||||
|
wilcox.test(obese_df$age, not_ob_df$age, paired = F)
|
||||||
|
wilcox.test(obese_df$los, not_ob_df$los, paired = F)
|
||||||
|
wilcox.test(obese_df$o2_sat_admis, not_ob_df$o2_sat_admis, paired = F)
|
||||||
|
wilcox.test(obese_df$onset_2_initial, not_ob_df$onset_2_initial, paired = F)
|
||||||
|
wilcox.test(obese_df$onset2final, not_ob_df$onset2final, paired = F)
|
||||||
|
wilcox.test(obese_df$onsfindeath, not_ob_df$onsfindeath, paired = F)
|
||||||
|
|
||||||
|
clinical_df_ics$age
|
||||||
|
clinical_df_ics$los
|
||||||
|
clinical_df_ics$o2_sat_admis #***** (already bin)
|
||||||
|
clinical_df_ics$onset_2_initial # ***** (already bin)
|
||||||
|
clinical_df_ics$onset2final
|
||||||
|
clinical_df_ics$onsfindeath
|
||||||
|
|
|
@ -25,27 +25,28 @@ outfile_clinical_unpaired
|
||||||
# Unpaired stats for clinical data b/w groups: wilcoxon UNpaired analysis
|
# Unpaired stats for clinical data b/w groups: wilcoxon UNpaired analysis
|
||||||
# No correction required
|
# No correction required
|
||||||
########################################################################
|
########################################################################
|
||||||
|
str(clinical_df_ics)
|
||||||
numerical_cols = c("age"
|
numerical_cols = c("age"
|
||||||
#, "vl_pfu_ul_npa1"
|
, "vl_pfu_ul_npa1"
|
||||||
, "los"
|
, "los"
|
||||||
, "onset2final"
|
, "onset2final"
|
||||||
, "onsfindeath"
|
, "onsfindeath"
|
||||||
, "onset_2_initial"
|
#, "onset_2_initial" # already bin
|
||||||
, "o2_sat_admis")
|
#, "o2_sat_admis"# already bin
|
||||||
|
)
|
||||||
|
|
||||||
metadata_cols = c("mosaic", "obesity")
|
metadata_cols = c("mosaic", "obesity")
|
||||||
|
|
||||||
clinical_df_numerical = clinical_df[, c(metadata_cols, numerical_cols)]
|
clinical_df_numerical = clinical_df_ics[, c(metadata_cols, numerical_cols)]
|
||||||
|
|
||||||
pivot_cols = metadata_cols
|
pivot_cols = metadata_cols
|
||||||
#pivot_cols = metadata_cols[!meta_data_cols%in%cols_to_omit];pivot_cols
|
#pivot_cols = metadata_cols[!meta_data_cols%in%cols_to_omit];pivot_cols
|
||||||
expected_rows_clinical_lf = nrow(clinical_df_numerical) * (length(clinical_df_numerical) - length(pivot_cols)); expected_rows_clinical_lf
|
expected_rows_clinical_lf = nrow(clinical_df_numerical) * (length(clinical_df_numerical) - length(pivot_cols)); expected_rows_clinical_lf
|
||||||
|
|
||||||
|
# lf data colnames
|
||||||
keycol <- "clinical_params"
|
keycol <- "clinical_params"
|
||||||
valuecol <- "value"
|
valuecol <- "value"
|
||||||
gathercols <- c("age", "los", "onset2final", "onsfindeath", "onset_2_initial", "o2_sat_admis")
|
gathercols <- numerical_cols
|
||||||
|
|
||||||
clinical_lf = gather_(clinical_df_numerical, keycol, valuecol, gathercols)
|
clinical_lf = gather_(clinical_df_numerical, keycol, valuecol, gathercols)
|
||||||
|
|
||||||
|
@ -70,12 +71,15 @@ stats_un_clinical = compare_means(value~obesity
|
||||||
#, data = clinical_lf_comp
|
#, data = clinical_lf_comp
|
||||||
, paired = FALSE)
|
, paired = FALSE)
|
||||||
|
|
||||||
|
head(stats_un_clinical)
|
||||||
|
|
||||||
|
# rstatix
|
||||||
stat_df <- clinical_lf %>%
|
stat_df <- clinical_lf %>%
|
||||||
group_by(clinical_params) %>%
|
group_by(clinical_params) %>%
|
||||||
wilcox_test(value ~ obesity, paired = F) %>%
|
wilcox_test(value ~ obesity, paired = F) %>%
|
||||||
add_significance("p")
|
add_significance("p")
|
||||||
stat_df$p_format = round(stat_df$p, digits = 3)
|
stat_df$p_format = round(stat_df$p, digits = 3)
|
||||||
|
stat_df
|
||||||
|
|
||||||
#----------------------------------------
|
#----------------------------------------
|
||||||
# calculate n_obs for each clinical param: Overall
|
# calculate n_obs for each clinical param: Overall
|
||||||
|
@ -101,31 +105,39 @@ n_all_gp = merge(n_all, n_gp
|
||||||
#----------------------------------------
|
#----------------------------------------
|
||||||
# calculate n_obs for each clinical param: complete cases
|
# calculate n_obs for each clinical param: complete cases
|
||||||
#----------------------------------------
|
#----------------------------------------
|
||||||
n_comp = data.frame(table(clinical_lf_comp$clinical_params))
|
n_comp = data.frame(table(clinical_lf$clinical_params))
|
||||||
colnames(n_comp) = c("clinical_params", "n_complete")
|
colnames(n_comp) = c("clinical_params", "n_complete")
|
||||||
n_comp$clinical_params = as.character(n_comp$clinical_params)
|
n_comp$clinical_params = as.character(n_comp$clinical_params)
|
||||||
n_comp
|
n_comp
|
||||||
|
|
||||||
n_gp_comp_lf = data.frame(table(clinical_lf_comp$clinical_params, clinical_lf_comp$obesity)); n_gp_comp_lf
|
n_gp_comp_lf = data.frame(table(clinical_lf$clinical_params
|
||||||
|
, clinical_lf$obesity)); n_gp_comp_lf
|
||||||
n_gp_comp = spread(n_gp_comp_lf, "Var2", "Freq"); n_gp_comp
|
n_gp_comp = spread(n_gp_comp_lf, "Var2", "Freq"); n_gp_comp
|
||||||
colnames(n_gp_comp)
|
colnames(n_gp_comp)
|
||||||
colnames(n_gp_comp) = c("clinical_params"
|
colnames(n_gp_comp) = c("clinical_params"
|
||||||
, paste0("n_complete_gp", colnames(n_gp_comp)[2])
|
, paste0("n_complete_gp", colnames(n_gp_comp)[2])
|
||||||
, paste0("n_complete_gp", colnames(n_gp_comp)[3]))
|
, paste0("n_complete_gp", colnames(n_gp_comp)[3]))
|
||||||
|
|
||||||
|
#---------
|
||||||
|
# merge 1
|
||||||
|
#---------
|
||||||
n_comp_gp = merge(n_comp, n_gp_comp
|
n_comp_gp = merge(n_comp, n_gp_comp
|
||||||
, by = intersect( names(n_comp), names(n_gp_comp))
|
, by = intersect( names(n_comp), names(n_gp_comp))
|
||||||
, all = T)
|
, all = T)
|
||||||
|
n_comp_gp
|
||||||
|
|
||||||
|
#---------
|
||||||
|
# merge 2
|
||||||
|
#---------
|
||||||
merge_cols = intersect(names(n_all_gp), names(n_comp_gp)); merge_cols
|
merge_cols = intersect(names(n_all_gp), names(n_comp_gp)); merge_cols
|
||||||
|
|
||||||
n_df = merge(n_all_gp, n_comp_gp, by = merge_cols, all = T); n_df
|
n_df = merge(n_all_gp, n_comp_gp, by = merge_cols, all = T); n_df
|
||||||
|
|
||||||
#==================================
|
#----------------------------------
|
||||||
# Merge: merge stats + n_obs df
|
# Merge 3: merge stats + n_obs df
|
||||||
#===================================
|
#----------------------------------
|
||||||
merging_cols = intersect(names(stats_un_clinical), names(n_df)); merging_cols
|
merging_cols = intersect(names(stats_un_clinical), names(n_df)); merging_cols
|
||||||
|
|
||||||
if (all(n_df$clinical_params%in%stats_un_clinical$clinical_params)) {
|
if (all(n_df$clinical_params%in%stats_un_clinical$clinical_params)) {
|
||||||
cat("PASS: merging stats and n_obs on column/s:", merging_cols)
|
cat("PASS: merging stats and n_obs on column/s:", merging_cols)
|
||||||
stats_un_clinical = merge(stats_un_clinical, n_df, by = merging_cols, all = T)
|
stats_un_clinical = merge(stats_un_clinical, n_df, by = merging_cols, all = T)
|
||||||
|
@ -188,6 +200,7 @@ if( length(my_col_order2) == ncol(stats_clinical_df) && (all(my_col_order2%in%co
|
||||||
quit()
|
quit()
|
||||||
}
|
}
|
||||||
# assign nice column names like replace "." with "_"
|
# assign nice column names like replace "." with "_"
|
||||||
|
# same ordering as my_col_order2, just minor formatting
|
||||||
colnames(stats_clinical_df_f) = c("clinical_params"
|
colnames(stats_clinical_df_f) = c("clinical_params"
|
||||||
, "method"
|
, "method"
|
||||||
, "group1"
|
, "group1"
|
||||||
|
@ -208,4 +221,4 @@ colnames(stats_clinical_df_f)
|
||||||
# write output file
|
# write output file
|
||||||
#******************
|
#******************
|
||||||
cat("UNpaired stats for clinical data for groups in:", outfile_clinical_unpaired)
|
cat("UNpaired stats for clinical data for groups in:", outfile_clinical_unpaired)
|
||||||
#write.csv(stats_clinical_df_f, outfile_clinical_unpaired, row.names = FALSE)
|
write.csv(stats_clinical_df_f, outfile_clinical_unpaired, row.names = FALSE)
|
||||||
|
|
|
@ -365,6 +365,14 @@ colnames(combined_unpaired_stats_f) = c("mediator"
|
||||||
, "p_bon_signif")
|
, "p_bon_signif")
|
||||||
|
|
||||||
colnames(combined_unpaired_stats_f)
|
colnames(combined_unpaired_stats_f)
|
||||||
|
|
||||||
|
# count how many meds are significant
|
||||||
|
n_sig = length(combined_unpaired_stats_f$mediator[combined_unpaired_stats_f$p_signif<0.05])
|
||||||
|
cat("\nTotal no. of statistically significant mediators in", toupper(my_sample_type)
|
||||||
|
, "are:", n_sig)
|
||||||
|
|
||||||
|
sig_meds = combined_unpaired_stats_f[combined_unpaired_stats_f$p_signif<0.05,]
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
#******************
|
#******************
|
||||||
# write output file
|
# write output file
|
||||||
|
|
61
read_data.R
61
read_data.R
|
@ -30,11 +30,11 @@ meta_data_cols = c("mosaic", "gender", "age"
|
||||||
, "adult"
|
, "adult"
|
||||||
, "flustat", "type"
|
, "flustat", "type"
|
||||||
, "obesity"
|
, "obesity"
|
||||||
, "obese2"
|
#, "obese2"
|
||||||
, "height", "height_unit"
|
#, "height", "height_unit"
|
||||||
, "weight", "weight_unit"
|
#, "weight", "weight_unit"
|
||||||
, "ia_height_ftin", "ia_height_m", "ia_weight"
|
#, "ia_height_ftin", "ia_height_m", "ia_weight"
|
||||||
, "visual_est_bmi", "bmi_rating"
|
#, "visual_est_bmi", "bmi_rating"
|
||||||
)
|
)
|
||||||
|
|
||||||
# check if these columns to select are present in the data
|
# check if these columns to select are present in the data
|
||||||
|
@ -55,9 +55,9 @@ if (table(adult_df$adult == 1)[[1]] == nrow(adult_df) ){
|
||||||
cat ("\nFAIL: adult df number mismatch!")
|
cat ("\nFAIL: adult df number mismatch!")
|
||||||
}
|
}
|
||||||
|
|
||||||
#==============
|
#=================================
|
||||||
# FLU positive: adult patients
|
# FLU positive: adult patients
|
||||||
#==============
|
#=================================
|
||||||
# extract the flu positive population
|
# extract the flu positive population
|
||||||
fp_adults = adult_df[adult_df$flustat == 1,]
|
fp_adults = adult_df[adult_df$flustat == 1,]
|
||||||
|
|
||||||
|
@ -67,6 +67,53 @@ if (table(fp_adults$flustat == 1)[[1]] == nrow(fp_adults) ){
|
||||||
cat ("\nFAIL: adult df number mismatch!")
|
cat ("\nFAIL: adult df number mismatch!")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#=============================================
|
||||||
|
# FLU positive adult patients: without asthma
|
||||||
|
#=============================================
|
||||||
|
#-----------------------------------
|
||||||
|
# asthma and copd status correction
|
||||||
|
# for conflicting field!
|
||||||
|
#------------------------------------
|
||||||
|
# Reassign the copd and asthma status and do some checks
|
||||||
|
table(fp_adults$ia_exac_copd); sum(is.na(fp_adults$ia_exac_copd))
|
||||||
|
|
||||||
|
fp_adults$ia_exac_copd[fp_adults$ia_exac_copd< 1]<- 0
|
||||||
|
fp_adults$ia_exac_copd[is.na(fp_adults$ia_exac_copd)] <- 0
|
||||||
|
|
||||||
|
table(fp_adults$ia_exac_copd); sum(is.na(fp_adults$ia_exac_copd))
|
||||||
|
|
||||||
|
# check copd and asthma status
|
||||||
|
table(fp_adults$ia_exac_copd, fp_adults$asthma)
|
||||||
|
check_copd_and_asthma_1<- subset(fp_adults, ia_exac_copd ==1 & asthma == 1) # check this is 3
|
||||||
|
|
||||||
|
# reassign these 3 so these are treated as non-asthmatics as copd with asthma is NOT TRUE asthma
|
||||||
|
fp_adults$asthma[fp_adults$ia_exac_copd == 1 & fp_adults$asthma == 1]= 0
|
||||||
|
table(fp_adults$ia_exac_copd, fp_adults$asthma)
|
||||||
|
|
||||||
|
foo<- subset(fp_adults, asthma==1 & ia_exac_copd ==1) # check that its 0
|
||||||
|
|
||||||
|
rm(check_copd_and_asthma_1, foo)
|
||||||
|
|
||||||
|
if ( table(fp_adults$ia_exac_copd, fp_adults$asthma) [[2,2]] == 0 ){
|
||||||
|
cat("\nPASS: asthma and copd do not conflict")
|
||||||
|
} else{
|
||||||
|
cat ("\nFAIL: asthma and copd conflict not resolved!")
|
||||||
|
quit()
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\nExtracting flu positive without asthma")
|
||||||
|
table(fp_adults$asthma)
|
||||||
|
cat("\nNo. of asthmatics:", table(fp_adults$asthma)[[2]]
|
||||||
|
, "\nNo. of non-asthmatics:", table(fp_adults$asthma)[[1]])
|
||||||
|
str(fp_adults$asthma)
|
||||||
|
|
||||||
|
table(fp_adults$obesity)
|
||||||
|
table(fp_adults$obesity, fp_adults$asthma)
|
||||||
|
|
||||||
|
fp_adults_na = fp_adults[fp_adults$asthma == 0,]
|
||||||
|
table(fp_adults_na$obesity)
|
||||||
|
table(fp_adults_na$obesity, fp_adults_na$asthma)
|
||||||
|
|
||||||
#============
|
#============
|
||||||
# hc
|
# hc
|
||||||
#============
|
#============
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue