diff --git a/scripts/plotting/lineage_dist_plots.R b/scripts/plotting/lineage_dist_plots.R new file mode 100644 index 0000000..a425f37 --- /dev/null +++ b/scripts/plotting/lineage_dist_plots.R @@ -0,0 +1,114 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Lineage dist plots: ggridges + +# Output: 1 or 2 SVGs for PS stability + +########################################################## +# Installing and loading required packages +########################################################## +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + +source("Header_TT.R") # also loads all my functions + +#=========== +# input +#=========== +#drug = "streptomycin" +#gene = "gid" +source("get_plotting_dfs.R") + +spec = matrix(c( + "drug" , "d", 1, "character", + "gene" , "g", 1, "character", + "data_file1" , "fa", 2, "character", + "data_file2" , "fb", 2, "character" +), byrow = TRUE, ncol = 4) + +opt = getopt(spec) + +drug = opt$drug +gene = opt$gene +infile_params = opt$data_file1 +infile_metadata = opt$data_file2 + +if(is.null(drug)|is.null(gene)) { + stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)") +} + +#======= +# output +#======= +lineage_dist_dm_om_ps = "lineage_dist_dm_om_PS.svg" +plot_lineage_dist_dm_om_ps = paste0(plotdir,"/", lineage_dist_dm_om_ps) +#======================================================================== + +########################### +# Data for plots +# you need merged_df2 or merged_df2_comp +# since this is one-many relationship +# i.e the same SNP can belong to multiple lineages +# using the _comp dataset means +# we lose some muts and at this level, we should use +# as much info as available, hence use df with NA +########################### + +#=================== +# Data for plots +#=================== +# quick checks +table(merged_df2$mutation_info_labels); levels(merged_df2$lineage_labels) +table(merged_df2$lineage_labels); levels(merged_df2$mutation_info_labels) + +lin_dist_plot = merged_df2[merged_df2$lineage_labels%in%c("L1", "L2", "L3", "L4"),] +table(lin_dist_plot$lineage_labels); nlevels(lin_dist_plot$lineage_labels) + +# refactor +lin_dist_plot$lineage_labels = factor(lin_dist_plot$lineage_labels) +nlevels(lin_dist_plot$lineage_labels) + +#----------------------------------------------------------------------- +# IMPORTANT RESULTS to put inside table or text for interactive plots + +sum(table(lin_dist_plot$lineage_labels)) #{RESULT: Total number of samples for lineage} + +table(lin_dist_plot$lineage_labels)#{RESULT: No of samples within lineage} + +length(unique(lin_dist_plot$mutationinformation))#{Result: No. of unique mutations selected lineages contribute to} +length(lin_dist_plot$mutationinformation) + +u2 = unique(merged_df2$mutationinformation) +u = unique(lin_dist_plot$mutationinformation) +check = u2[!u2%in%u]; print(check) #{Muts not present within selected lineages} +#----------------------------------------------------------------------- +# without facet +linP_dm_om = lineage_distP(lin_dist_plot + , with_facet = F + , x_axis = "deepddg" + , y_axis = "lineage_labels" + , x_lab = "DeepDDG" + , leg_label = "Mutation Class" +) +linP_dm_om + +# with facet +linP_dm_om_facet = lineage_distP(lin_dist_plot + , with_facet = T + , facet_wrap_var = "mutation_info_labels" + , leg_label = "Mutation Class" + , leg_pos_wf = "none" + , leg_dir_wf = "horizontal" + +) +linP_dm_om_facet + +#================= +# output plot: +# without facet +#================= +svg(plot_lineage_dist_dm_om_ps) +linP_dm_om + +dev.off()