#!/usr/bin/env Rscript  
#########################################################
# TASK: Lineage dist plots: ggridges

# Output: 1 or 2 SVGs for PS stability

##########################################################
# 				Installing and loading required packages 			 
##########################################################
getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting/")
getwd()

source("Header_TT.R") # also loads all my functions

#===========
# input
#===========
#drug = "streptomycin"
#gene = "gid"
source("get_plotting_dfs.R")

spec = matrix(c(
  "drug"       , "d",  1, "character",
  "gene"       , "g",  1, "character",
  "data_file1" , "fa", 2, "character",
  "data_file2" , "fb", 2, "character" 
), byrow = TRUE, ncol = 4)

opt = getopt(spec)

drug            = opt$drug
gene            = opt$gene
infile_params   = opt$data_file1
infile_metadata = opt$data_file2

if(is.null(drug)|is.null(gene)) {
  stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
}

#=======
# output
#=======
lineage_dist_dm_om_ps      = "lineage_dist_dm_om_PS.svg"
plot_lineage_dist_dm_om_ps =  paste0(plotdir,"/", lineage_dist_dm_om_ps)
#========================================================================

###########################
# Data for plots
# you need merged_df2 or merged_df2_comp
# since this is one-many relationship 
# i.e the same SNP can belong to multiple lineages
# using the _comp dataset means
# we lose some muts and at this level, we should use
# as much info as available, hence use df with NA
###########################

#===================
# Data for plots
#===================
# quick checks
table(merged_df2$mutation_info_labels); levels(merged_df2$lineage_labels)
table(merged_df2$lineage_labels); levels(merged_df2$mutation_info_labels)

lin_dist_plot = merged_df2[merged_df2$lineage_labels%in%c("L1", "L2", "L3", "L4"),]
table(lin_dist_plot$lineage_labels); nlevels(lin_dist_plot$lineage_labels)

# refactor
lin_dist_plot$lineage_labels = factor(lin_dist_plot$lineage_labels)
nlevels(lin_dist_plot$lineage_labels)

#-----------------------------------------------------------------------
# IMPORTANT RESULTS to put inside table or text for interactive plots

sum(table(lin_dist_plot$lineage_labels)) #{RESULT: Total number of samples for lineage}

table(lin_dist_plot$lineage_labels)#{RESULT: No of samples within lineage}

length(unique(lin_dist_plot$mutationinformation))#{Result: No. of unique mutations selected lineages contribute to}
length(lin_dist_plot$mutationinformation)

u2 = unique(merged_df2$mutationinformation)
u = unique(lin_dist_plot$mutationinformation)
check = u2[!u2%in%u]; print(check) #{Muts not present within selected lineages}
#-----------------------------------------------------------------------
# without facet
linP_dm_om = lineage_distP(lin_dist_plot
              , with_facet = F
              , x_axis = "deepddg"
              , y_axis = "lineage_labels"
              , x_lab = "DeepDDG"
              , leg_label = "Mutation Class"
)
linP_dm_om

# with facet
linP_dm_om_facet = lineage_distP(lin_dist_plot
              , with_facet = T
              , facet_wrap_var = "mutation_info_labels"
              , leg_label = "Mutation Class"
              , leg_pos_wf = "none"
              , leg_dir_wf = "horizontal"
              
)
linP_dm_om_facet

#=================
# output plot:
# without facet
#=================
svg(plot_lineage_dist_dm_om_ps)
linP_dm_om

dev.off()