reformatting code to select needed df for analysis
This commit is contained in:
parent
a6cbaab40a
commit
b72c4df796
7 changed files with 243 additions and 102 deletions
|
@ -25,27 +25,28 @@ outfile_clinical_unpaired
|
|||
# Unpaired stats for clinical data b/w groups: wilcoxon UNpaired analysis
|
||||
# No correction required
|
||||
########################################################################
|
||||
|
||||
str(clinical_df_ics)
|
||||
numerical_cols = c("age"
|
||||
#, "vl_pfu_ul_npa1"
|
||||
, "vl_pfu_ul_npa1"
|
||||
, "los"
|
||||
, "onset2final"
|
||||
, "onsfindeath"
|
||||
, "onset_2_initial"
|
||||
, "o2_sat_admis")
|
||||
#, "onset_2_initial" # already bin
|
||||
#, "o2_sat_admis"# already bin
|
||||
)
|
||||
|
||||
metadata_cols = c("mosaic", "obesity")
|
||||
|
||||
clinical_df_numerical = clinical_df[, c(metadata_cols, numerical_cols)]
|
||||
clinical_df_numerical = clinical_df_ics[, c(metadata_cols, numerical_cols)]
|
||||
|
||||
pivot_cols = metadata_cols
|
||||
#pivot_cols = metadata_cols[!meta_data_cols%in%cols_to_omit];pivot_cols
|
||||
expected_rows_clinical_lf = nrow(clinical_df_numerical) * (length(clinical_df_numerical) - length(pivot_cols)); expected_rows_clinical_lf
|
||||
|
||||
|
||||
# lf data colnames
|
||||
keycol <- "clinical_params"
|
||||
valuecol <- "value"
|
||||
gathercols <- c("age", "los", "onset2final", "onsfindeath", "onset_2_initial", "o2_sat_admis")
|
||||
gathercols <- numerical_cols
|
||||
|
||||
clinical_lf = gather_(clinical_df_numerical, keycol, valuecol, gathercols)
|
||||
|
||||
|
@ -70,12 +71,15 @@ stats_un_clinical = compare_means(value~obesity
|
|||
#, data = clinical_lf_comp
|
||||
, paired = FALSE)
|
||||
|
||||
head(stats_un_clinical)
|
||||
|
||||
# rstatix
|
||||
stat_df <- clinical_lf %>%
|
||||
group_by(clinical_params) %>%
|
||||
wilcox_test(value ~ obesity, paired = F) %>%
|
||||
add_significance("p")
|
||||
stat_df$p_format = round(stat_df$p, digits = 3)
|
||||
stat_df
|
||||
|
||||
#----------------------------------------
|
||||
# calculate n_obs for each clinical param: Overall
|
||||
|
@ -101,31 +105,39 @@ n_all_gp = merge(n_all, n_gp
|
|||
#----------------------------------------
|
||||
# calculate n_obs for each clinical param: complete cases
|
||||
#----------------------------------------
|
||||
n_comp = data.frame(table(clinical_lf_comp$clinical_params))
|
||||
n_comp = data.frame(table(clinical_lf$clinical_params))
|
||||
colnames(n_comp) = c("clinical_params", "n_complete")
|
||||
n_comp$clinical_params = as.character(n_comp$clinical_params)
|
||||
n_comp
|
||||
|
||||
n_gp_comp_lf = data.frame(table(clinical_lf_comp$clinical_params, clinical_lf_comp$obesity)); n_gp_comp_lf
|
||||
n_gp_comp_lf = data.frame(table(clinical_lf$clinical_params
|
||||
, clinical_lf$obesity)); n_gp_comp_lf
|
||||
n_gp_comp = spread(n_gp_comp_lf, "Var2", "Freq"); n_gp_comp
|
||||
colnames(n_gp_comp)
|
||||
colnames(n_gp_comp) = c("clinical_params"
|
||||
, paste0("n_complete_gp", colnames(n_gp_comp)[2])
|
||||
, paste0("n_complete_gp", colnames(n_gp_comp)[3]))
|
||||
|
||||
|
||||
#---------
|
||||
# merge 1
|
||||
#---------
|
||||
n_comp_gp = merge(n_comp, n_gp_comp
|
||||
, by = intersect( names(n_comp), names(n_gp_comp))
|
||||
, all = T)
|
||||
n_comp_gp
|
||||
|
||||
#---------
|
||||
# merge 2
|
||||
#---------
|
||||
merge_cols = intersect(names(n_all_gp), names(n_comp_gp)); merge_cols
|
||||
|
||||
n_df = merge(n_all_gp, n_comp_gp, by = merge_cols, all = T); n_df
|
||||
|
||||
#==================================
|
||||
# Merge: merge stats + n_obs df
|
||||
#===================================
|
||||
#----------------------------------
|
||||
# Merge 3: merge stats + n_obs df
|
||||
#----------------------------------
|
||||
merging_cols = intersect(names(stats_un_clinical), names(n_df)); merging_cols
|
||||
|
||||
if (all(n_df$clinical_params%in%stats_un_clinical$clinical_params)) {
|
||||
cat("PASS: merging stats and n_obs on column/s:", merging_cols)
|
||||
stats_un_clinical = merge(stats_un_clinical, n_df, by = merging_cols, all = T)
|
||||
|
@ -188,6 +200,7 @@ if( length(my_col_order2) == ncol(stats_clinical_df) && (all(my_col_order2%in%co
|
|||
quit()
|
||||
}
|
||||
# assign nice column names like replace "." with "_"
|
||||
# same ordering as my_col_order2, just minor formatting
|
||||
colnames(stats_clinical_df_f) = c("clinical_params"
|
||||
, "method"
|
||||
, "group1"
|
||||
|
@ -208,4 +221,4 @@ colnames(stats_clinical_df_f)
|
|||
# write output file
|
||||
#******************
|
||||
cat("UNpaired stats for clinical data for groups in:", outfile_clinical_unpaired)
|
||||
#write.csv(stats_clinical_df_f, outfile_clinical_unpaired, row.names = FALSE)
|
||||
write.csv(stats_clinical_df_f, outfile_clinical_unpaired, row.names = FALSE)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue