added foldx_scaled and deepddg_scaled values added to combine_df.py and also used that script to merge all the dfs so that merged_df2 and merged_df3 are infact what we need for downstream processing

This commit is contained in:
Tanushree Tunstall 2021-09-10 16:58:36 +01:00
parent dda5d1ea93
commit 4ba4ff602e
5 changed files with 354 additions and 977 deletions

View file

@ -16,9 +16,9 @@ source("Header_TT.R") # also loads all my functions
#===========
# input
#===========
#drug = "streptomycin"
#gene = "gid"
source("get_plotting_dfs.R")
drug = "streptomycin"
gene = "gid"
#source("get_plotting_dfs.R")
spec = matrix(c(
"drug" , "d", 1, "character",
@ -47,7 +47,7 @@ plot_lineage_dist_dm_om_ps = paste0(plotdir,"/", lineage_dist_dm_om_ps)
###########################
# Data for plots
# you need merged_df2 or merged_df2_comp
# you need merged_df2_combined or merged_df2_combined_comp
# since this is one-many relationship
# i.e the same SNP can belong to multiple lineages
# using the _comp dataset means
@ -59,10 +59,12 @@ plot_lineage_dist_dm_om_ps = paste0(plotdir,"/", lineage_dist_dm_om_ps)
# Data for plots
#===================
# quick checks
table(merged_df2$mutation_info_labels); levels(merged_df2$lineage_labels)
table(merged_df2$lineage_labels); levels(merged_df2$mutation_info_labels)
table(merged_df2_combined$mutation_info_labels); levels(merged_df2_combined$lineage_labels)
table(merged_df2_combined$lineage_labels); levels(merged_df2_combined$mutation_info_labels)
lin_dist_plot = merged_df2[merged_df2$lineage_labels%in%c("L1", "L2", "L3", "L4"),]
sel_lineages = c("L1", "L2", "L3", "L4")
lin_dist_plot = merged_df2_combined[merged_df2_combined$lineage_labels%in%sel_lineages,]
table(lin_dist_plot$lineage_labels); nlevels(lin_dist_plot$lineage_labels)
# refactor
@ -79,29 +81,55 @@ table(lin_dist_plot$lineage_labels)#{RESULT: No of samples within lineage}
length(unique(lin_dist_plot$mutationinformation))#{Result: No. of unique mutations selected lineages contribute to}
length(lin_dist_plot$mutationinformation)
u2 = unique(merged_df2$mutationinformation)
u2 = unique(merged_df2_combined$mutationinformation)
u = unique(lin_dist_plot$mutationinformation)
check = u2[!u2%in%u]; print(check) #{Muts not present within selected lineages}
#-----------------------------------------------------------------------
# without facet
my_x_and_t = c("duet_scaled", "mCSM-DUET")
my_x_and_t = c("foldx_scaled", "FoldX")
#my_x_and_t = c("deepddg_scaled", "DeepDDG")
my_x_and_t = c("ddg_dynamut2_scaled", "Dynamut2")
my_x_and_t = c("ddg_dynamut_scaled", "Dynamut")
my_x_and_t = c("ddg_mcsm_scaled", "mCSM")
my_x_and_t = c("ddg_sdm_scaled", "SDM")
my_x_and_t = c("ddg_duet_scaled", "DUET-d")
my_x_and_t = c("ddg_encom_scaled", "EnCOM-Stability")
my_x_and_t = c("dds_encom_scaled", "EnCOM-Flexibility")
my_x_and_t = c("mcsm_na_scaled", "mCSM-NA")
# TO DO
my_x_and_t = c("affinity_scaled", "mCSM-Lig") #ligdist< 10
#=====================
# Plot: without facet
#=====================
linP_dm_om = lineage_distP(lin_dist_plot
, with_facet = F
, x_axis = "deepddg"
, x_axis = my_x_and_t[1]
, x_lab = my_x_and_t[2]
, y_axis = "lineage_labels"
, x_lab = "DeepDDG"
, leg_label = "Mutation Class"
)
, with_facet = F)
linP_dm_om
# with facet
#=====================
# Plot: with facet
#=====================
linP_dm_om_facet = lineage_distP(lin_dist_plot
, with_facet = T
, facet_wrap_var = "mutation_info_labels"
, leg_label = "Mutation Class"
, leg_pos_wf = "none"
, leg_dir_wf = "horizontal"
)
, x_axis = my_x_and_t[1]
, x_lab = my_x_and_t[2]
, y_axis = "lineage_labels"
, with_facet = T
, facet_wrap_var = "mutation_info_labels"
, leg_label = "Mutation Class"
, leg_pos_wf = "none"
, leg_dir_wf = "horizontal")
linP_dm_om_facet
#=================
@ -109,6 +137,7 @@ linP_dm_om_facet
# without facet
#=================
svg(plot_lineage_dist_dm_om_ps)
linP_dm_om
dev.off()