LSHTM_analysis/scripts/plotting/lineage_dist_plots.R

143 lines
4.5 KiB
R

#!/usr/bin/env Rscript
#########################################################
# TASK: Lineage dist plots: ggridges
# Output: 1 or 2 SVGs for PS stability
##########################################################
# Installing and loading required packages
##########################################################
getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting/")
getwd()
source("~/git/LSHTM_analysis/scripts/Header_TT.R") # also loads all my functions
#===========
# input
#===========
drug = "streptomycin"
gene = "gid"
#source("get_plotting_dfs.R")
spec = matrix(c(
"drug" , "d", 1, "character",
"gene" , "g", 1, "character",
"data_file1" , "fa", 2, "character",
"data_file2" , "fb", 2, "character"
), byrow = TRUE, ncol = 4)
opt = getopt(spec)
drug = opt$drug
gene = opt$gene
infile_params = opt$data_file1
infile_metadata = opt$data_file2
if(is.null(drug)|is.null(gene)) {
stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
}
#=======
# output
#=======
lineage_dist_dm_om_ps = "lineage_dist_dm_om_PS.svg"
plot_lineage_dist_dm_om_ps = paste0(plotdir,"/", lineage_dist_dm_om_ps)
#========================================================================
###########################
# Data for plots
# you need merged_df2_combined or merged_df2_combined_comp
# since this is one-many relationship
# i.e the same SNP can belong to multiple lineages
# using the _comp dataset means
# we lose some muts and at this level, we should use
# as much info as available, hence use df with NA
###########################
#===================
# Data for plots
#===================
# quick checks
table(merged_df2_combined$mutation_info_labels); levels(merged_df2_combined$lineage_labels)
table(merged_df2_combined$lineage_labels); levels(merged_df2_combined$mutation_info_labels)
sel_lineages = c("L1", "L2", "L3", "L4")
lin_dist_plot = merged_df2_combined[merged_df2_combined$lineage_labels%in%sel_lineages,]
table(lin_dist_plot$lineage_labels); nlevels(lin_dist_plot$lineage_labels)
# refactor
lin_dist_plot$lineage_labels = factor(lin_dist_plot$lineage_labels)
nlevels(lin_dist_plot$lineage_labels)
#-----------------------------------------------------------------------
# IMPORTANT RESULTS to put inside table or text for interactive plots
sum(table(lin_dist_plot$lineage_labels)) #{RESULT: Total number of samples for lineage}
table(lin_dist_plot$lineage_labels)#{RESULT: No of samples within lineage}
length(unique(lin_dist_plot$mutationinformation))#{Result: No. of unique mutations selected lineages contribute to}
length(lin_dist_plot$mutationinformation)
u2 = unique(merged_df2_combined$mutationinformation)
u = unique(lin_dist_plot$mutationinformation)
check = u2[!u2%in%u]; print(check) #{Muts not present within selected lineages}
#-----------------------------------------------------------------------
my_x_and_t = c("duet_scaled", "mCSM-DUET")
my_x_and_t = c("foldx_scaled", "FoldX")
#my_x_and_t = c("deepddg_scaled", "DeepDDG")
my_x_and_t = c("ddg_dynamut2_scaled", "Dynamut2")
my_x_and_t = c("ddg_dynamut_scaled", "Dynamut")
my_x_and_t = c("ddg_mcsm_scaled", "mCSM")
my_x_and_t = c("ddg_sdm_scaled", "SDM")
my_x_and_t = c("ddg_duet_scaled", "DUET-d")
my_x_and_t = c("ddg_encom_scaled", "EnCOM-Stability")
my_x_and_t = c("dds_encom_scaled", "EnCOM-Flexibility")
my_x_and_t = c("mcsm_na_scaled", "mCSM-NA")
# TO DO
my_x_and_t = c("affinity_scaled", "mCSM-Lig") #ligdist< 10
#=====================
# Plot: without facet
#=====================
linP_dm_om = lineage_distP(lin_dist_plot
, x_axis = my_x_and_t[1]
, x_lab = my_x_and_t[2]
, y_axis = "lineage_labels"
, leg_label = "Mutation Class"
, with_facet = F)
linP_dm_om
#=====================
# Plot: with facet
#=====================
linP_dm_om_facet = lineage_distP(lin_dist_plot
, x_axis = my_x_and_t[1]
, x_lab = my_x_and_t[2]
, y_axis = "lineage_labels"
, with_facet = T
, facet_wrap_var = "mutation_info_labels"
, leg_label = "Mutation Class"
, leg_pos_wf = "none"
, leg_dir_wf = "horizontal")
linP_dm_om_facet
#=================
# output plot:
# without facet
#=================
svg(plot_lineage_dist_dm_om_ps)
linP_dm_om
dev.off()