From d8041fb4943e76bf2c7436ca9d10dda05fb5fe69 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 5 May 2022 19:32:34 +0100 Subject: [PATCH] added count_vars_ML.R to check numbers for revised counts --- scripts/count_vars_ML.R | 66 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 scripts/count_vars_ML.R diff --git a/scripts/count_vars_ML.R b/scripts/count_vars_ML.R new file mode 100644 index 0000000..4cdf80c --- /dev/null +++ b/scripts/count_vars_ML.R @@ -0,0 +1,66 @@ +# count numbers for ML + +#source("~/git/LSHTM_analysis/config/alr.R") +source("~/git/LSHTM_analysis/config/embb.R") + +#source("~/git/LSHTM_analysis/config/gid.R") +#source("~/git/LSHTM_analysis/config/pnca.R") + +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +nrow(merged_df3) +############################################## +#============= +# mutation_info: revised labels +#============== +table(merged_df3$mutation_info) +sum(table(merged_df3$mutation_info)) +sum(table(merged_df3$mutation_info)) +############################################## + +#============= +# , dst_mode: revised labels +#============== +table(merged_df3$dst) # orig +sum(table(merged_df3$dst)) + +table(merged_df3$dst_mode) +#table(merged_df3[dr_muts_col]) +sum(table(merged_df3$drtype_mode)) + +############################################## +#============= +# drtype: revised labels +#============== +table(merged_df3$drtype) #orig + +table(merged_df3$drtype_mode) +# mapping 2.1: numeric +# drtype_map = {'XDR': 5 +# , 'Pre-XDR': 4 +# , 'MDR': 3 +# , 'Pre-MDR': 2 +# , 'Other': 1 +# , 'Sensitive': 0} + +# create a labels col that is mapped based on drtype_mode +merged_df3$drtype_mode_labels = merged_df3$drtype_mode +merged_df3$drtype_mode_labels = as.factor(merged_df3$drtype_mode) + +levels(merged_df3$drtype_mode_labels) + +levels(merged_df3$drtype_mode_labels) <- c('Sensitive', 'Other' + , 'Pre-MDR', 'MDR' + , 'Pre-XDR', 'XDR') +levels(merged_df3$drtype_mode_labels) + +# check +#table(merged_df3$drtype) +table(merged_df3$drtype_mode) +table(merged_df3$drtype_mode_labels) +sum(table(merged_df3$drtype_mode_labels)) +############################################## +# lineage +table(merged_df3$lineage) +sum(table(merged_df3$lineage_labels)) +