From 04253b961f82df6b253d8ed374e424ca9b2d5bc9 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 22 Aug 2022 21:56:13 +0100 Subject: [PATCH] lots of per-plot configs --- scripts/functions/bp_lineage.R | 15 +- scripts/functions/bp_lineage_diversity.R | 13 +- scripts/functions/corr_plot_data.R | 18 +- scripts/functions/dm_om_data.R | 6 +- scripts/functions/lineage_dist.R | 11 +- scripts/functions/position_count_bp.R | 3 +- scripts/plotting/get_plotting_dfs.R | 6 +- .../alr/dm_om_plots_layout_alr.R | 164 +++++++ .../plotting/plotting_thesis/basic_barplots.R | 46 +- .../plotting_thesis/basic_barplots_UPDATED.R | 357 +++++++++++++++ .../plotting/plotting_thesis/dm_om_plots.R | 48 ++- .../gid/basic_barplots_layout_gid.R | 271 ++++++++++++ .../plotting_thesis/gid/dm_om_plots_gid.R | 320 ++++++++++++++ .../plotting_thesis/gid/gg_pairs_all_gid.R | 178 ++++++++ scripts/plotting/plotting_thesis/just.R | 38 ++ .../katg/basic_barplots_katg.R | 364 ++++++++++++++++ .../katg/basic_barplots_layput_katg.R | 309 +++++++++++++ .../katg/dm_om_plots_layout_katg.R | 179 ++++++++ .../plotting_thesis/katg/gg_pairs_all_katg.R | 203 +++++++++ .../katg/pe_sens_site_count_katg.R | 179 ++++++++ .../katg/prominent_effects_katg.R | 331 ++++++++++++++ .../katg/sensitivity_count_katg.R | 65 +++ .../plotting/plotting_thesis/linage_bp_dist.R | 69 ++- .../plotting_thesis/linage_bp_dist_layout.R | 36 +- .../plotting_thesis/pe_sens_site_count.R | 78 ++++ .../plotting_thesis/prominent_effects.R | 405 ------------------ .../rpob/basic_barplots_layout_rpob.R | 348 +++++++++++++++ .../rpob/basic_barplots_rpob.R | 372 ++++++++++++++++ .../rpob/dm_om_plots_layout_rpob.R | 179 ++++++++ .../plotting_thesis/rpob/gg_pairs_all_rpob.R | 203 +++++++++ .../plotting_thesis/rpob/katg_other_plots.R | 2 + .../plotting_thesis/rpob/linage_bp_dist.R | 172 ++++++++ .../rpob/linage_bp_dist_layout.R | 30 ++ .../rpob/pe_sens_site_count_rpob.R | 179 ++++++++ .../rpob/prominent_effects_rpob.R | 398 +++++++++++++++++ .../rpob/sensitivity_count_rpob.R | 66 +++ 36 files changed, 5121 insertions(+), 540 deletions(-) create mode 100644 scripts/plotting/plotting_thesis/alr/dm_om_plots_layout_alr.R create mode 100644 scripts/plotting/plotting_thesis/basic_barplots_UPDATED.R create mode 100644 scripts/plotting/plotting_thesis/gid/basic_barplots_layout_gid.R create mode 100644 scripts/plotting/plotting_thesis/gid/dm_om_plots_gid.R create mode 100644 scripts/plotting/plotting_thesis/gid/gg_pairs_all_gid.R create mode 100644 scripts/plotting/plotting_thesis/just.R create mode 100644 scripts/plotting/plotting_thesis/katg/basic_barplots_katg.R create mode 100644 scripts/plotting/plotting_thesis/katg/basic_barplots_layput_katg.R create mode 100644 scripts/plotting/plotting_thesis/katg/dm_om_plots_layout_katg.R create mode 100644 scripts/plotting/plotting_thesis/katg/gg_pairs_all_katg.R create mode 100644 scripts/plotting/plotting_thesis/katg/pe_sens_site_count_katg.R create mode 100644 scripts/plotting/plotting_thesis/katg/prominent_effects_katg.R create mode 100644 scripts/plotting/plotting_thesis/katg/sensitivity_count_katg.R create mode 100644 scripts/plotting/plotting_thesis/pe_sens_site_count.R delete mode 100644 scripts/plotting/plotting_thesis/prominent_effects.R create mode 100644 scripts/plotting/plotting_thesis/rpob/basic_barplots_layout_rpob.R create mode 100644 scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R create mode 100644 scripts/plotting/plotting_thesis/rpob/dm_om_plots_layout_rpob.R create mode 100644 scripts/plotting/plotting_thesis/rpob/gg_pairs_all_rpob.R create mode 100644 scripts/plotting/plotting_thesis/rpob/katg_other_plots.R create mode 100644 scripts/plotting/plotting_thesis/rpob/linage_bp_dist.R create mode 100644 scripts/plotting/plotting_thesis/rpob/linage_bp_dist_layout.R create mode 100644 scripts/plotting/plotting_thesis/rpob/pe_sens_site_count_rpob.R create mode 100644 scripts/plotting/plotting_thesis/rpob/prominent_effects_rpob.R create mode 100644 scripts/plotting/plotting_thesis/rpob/sensitivity_count_rpob.R diff --git a/scripts/functions/bp_lineage.R b/scripts/functions/bp_lineage.R index 643d0a5..cc8103f 100644 --- a/scripts/functions/bp_lineage.R +++ b/scripts/functions/bp_lineage.R @@ -13,15 +13,15 @@ lin_count_bp <- function( lf_data = lin_lf , display_label_col = "p_count" , bar_stat_stype = "identity" , x_lab_angle = 90 - , d_lab_size = 5 + , d_lab_size = 2.3 , d_lab_hjust = 0.5 , d_lab_vjust = 0.5 , d_lab_col = "black" - , my_xats = 20 # x axis text size - , my_yats = 20 # y axis text size - , my_xals = 22 # x axis label size - , my_yals = 22 # y axis label size - , my_lls = 22 # legend label size + , my_xats = 8 # x axis text size + , my_yats = 8 # y axis text size + , my_xals = 10 # x axis label size + , my_yals = 10 # y axis label size + , my_lls = 10 # legend label size , bar_col_labels = c("Mutations", "Total Samples") , bar_col_values = c("grey50", "gray75") , bar_leg_name = "" @@ -56,7 +56,8 @@ lin_count_bp <- function( lf_data = lin_lf , axis.title.y = element_text(size = my_yals , colour = "black") , legend.position = leg_location - , legend.text = element_text(size = my_lls)) + + , legend.text = element_text(size = my_lls) + , legend.key.size = unit(my_lls, 'pt')) + geom_label(aes(label = eval(parse(text = display_label_col))) , size = d_lab_size diff --git a/scripts/functions/bp_lineage_diversity.R b/scripts/functions/bp_lineage_diversity.R index 68617f2..2a5d6a3 100644 --- a/scripts/functions/bp_lineage_diversity.R +++ b/scripts/functions/bp_lineage_diversity.R @@ -14,15 +14,15 @@ lin_count_bp_diversity <- function( lf_data = lin_wf , display_label_col = "snp_diversity_f" , bar_stat_stype = "identity" , x_lab_angle = 90 - , d_lab_size = 5 + , d_lab_size = 2.3 , d_lab_hjust = 0.5 , d_lab_vjust = 0.5 , d_lab_col = "black" - , my_xats = 20 # x axis text size - , my_yats = 20 # y axis text size - , my_xals = 22 # x axis label size - , my_yals = 22 # y axis label size - , my_lls = 22 # legend label size + , my_xats = 8 # x axis text size + , my_yats = 8 # y axis text size + , my_xals = 10 # x axis label size + , my_yals = 10 # y axis label size + , my_lls = 10 # legend label size , bar_col_labels = "" #c("Mutations", "Total Samples") , bar_col_values = c("gray50", "gray75") , bar_leg_name = "" @@ -64,6 +64,7 @@ lin_count_bp_diversity <- function( lf_data = lin_wf , colour = "black") , legend.position = leg_location , legend.text = element_text(size = my_lls) + , legend.key.size = unit(my_lls, 'pt') , plot.title = element_text(size = my_lls , colour = title_colour , hjust = 0.5) diff --git a/scripts/functions/corr_plot_data.R b/scripts/functions/corr_plot_data.R index ab479c2..0384df9 100644 --- a/scripts/functions/corr_plot_data.R +++ b/scripts/functions/corr_plot_data.R @@ -7,9 +7,6 @@ # LigDist_colname #from globals: plotting_globals.R # ppi2Dist_colname #from globals: plotting_globals.R # naDist_colname #from globals: plotting_globals.R -geneL_normal = c("pnca") -geneL_na = c("gid", "rpob") -geneL_ppi2 = c("alr", "embb", "katg", "rpob") corr_data_extract <- function(df , gene @@ -75,11 +72,24 @@ corr_data_extract <- function(df if (tolower(gene)%in%geneL_na){ colnames_to_extract = c(common_colnames,"mcsm_na_affinity", naDist_colname) - display_colnames = c(display_common_colnames, "mCSM-NA", "NCA-Dist") + display_colnames = c(display_common_colnames, "mCSM-NA", "NA-Dist") corr_df = df[,colnames_to_extract] colnames(corr_df) = display_colnames } + # SPECIAL case for rpob as it exists in both ppi and na + if (tolower(gene)%in%c("rpob")){ + colnames_to_extract = c(common_colnames + , "mcsm_ppi2_affinity", ppi2Dist_colname + , "mcsm_na_affinity", naDist_colname) + + display_colnames = c(display_common_colnames + ,"mCSM-PPI2", "PPI-Dist" + ,"mCSM-NA", "NA-Dist" ) + + corr_df = df[,colnames_to_extract] + colnames(corr_df) = display_colnames + } # [optional] arg: extract_scaled_cols if (extract_scaled_cols){ cat("\nExtracting scaled columns as well...\n") diff --git a/scripts/functions/dm_om_data.R b/scripts/functions/dm_om_data.R index 48c290b..8fd8470 100644 --- a/scripts/functions/dm_om_data.R +++ b/scripts/functions/dm_om_data.R @@ -734,7 +734,7 @@ dm_om_wf_lf_data <- function(df # mcsm-ppi2 affinity # data filtered by cut off #======================== - if (tolower(gene)%in%geneL_ppi2){ + if (tolower(gene)%in%geneL_ppi2 || tolower(gene)%in%geneL_both){ #----------------- # mCSM-PPI2: WF and lF #----------------- @@ -776,13 +776,11 @@ dm_om_wf_lf_data <- function(df } - - #==================== # mcsm-NA affinity # data filtered by cut off #==================== - if (tolower(gene)%in%geneL_na){ + if (tolower(gene)%in%geneL_na|| tolower(gene)%in%geneL_both){ #--------------- # mCSM-NA: WF and lF #----------------- diff --git a/scripts/functions/lineage_dist.R b/scripts/functions/lineage_dist.R index 4e3689a..e19f2ba 100644 --- a/scripts/functions/lineage_dist.R +++ b/scripts/functions/lineage_dist.R @@ -20,11 +20,11 @@ lineage_distP <- function(plotdf , fill_categ = "mutation_info_labels" , fill_categ_cols = c("#E69F00", "#999999") , label_categories = c("LABEL1", "LABEL2") - , my_ats = 15 # axis text size - , my_als = 20 # axis label size - , my_leg_ts = 16 - , my_leg_title = 16 - , my_strip_ts = 20 + , my_ats = 15 # 15 axis text size + , my_als = 20 # 20 axis label size + , my_leg_ts = 16 #16 + , my_leg_title = 16 #16 + , my_strip_ts = 20 #20 , leg_pos = c(0.8, 0.9) , leg_pos_wf = c("top", "left", "bottom", "right") , leg_dir_wf = c("horizontal", "vertical") @@ -57,6 +57,7 @@ lineage_distP <- function(plotdf , axis.title.y = element_blank() , strip.text = element_text(size = my_strip_ts) , legend.text = element_text(size = my_leg_ts) + , legend.key.size = unit(my_leg_ts, 'pt') , legend.title = element_text(size = my_leg_title) , legend.position = c(0.8, 0.9)) + labs(x = x_lab diff --git a/scripts/functions/position_count_bp.R b/scripts/functions/position_count_bp.R index d81932d..d334ae4 100755 --- a/scripts/functions/position_count_bp.R +++ b/scripts/functions/position_count_bp.R @@ -132,7 +132,8 @@ site_snp_count_bp <- function (plotdf scale_x_continuous(breaks = unique(snpsBYpos_df$snpsBYpos)) + geom_label(stat = "count", aes(label = ..count..) , color = "black" - , size = geom_ls) + + , size = geom_ls + , position = position_dodge2(width = 1)) + theme(axis.text.x = element_text(size = axis_text_size , angle = 0) , axis.text.y = element_text(size = axis_text_size diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index aae338e..da0687a 100644 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -49,9 +49,13 @@ if (!exists("infile_params") && exists("gene")){ cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n") } -# Input 1: read _comb_afor.csv +# Input 1: cat("\nReading mcsm combined data file: ", infile_params) mcsm_df = read.csv(infile_params, header = T) +if (tolower(gene)%in%c("rpob")){ + mcsm_df = mcsm_df[mcsm_df$position!=1148,] +} + pd_df = plotting_data(mcsm_df , gene = gene # ADDED , lig_dist_colname = LigDist_colname diff --git a/scripts/plotting/plotting_thesis/alr/dm_om_plots_layout_alr.R b/scripts/plotting/plotting_thesis/alr/dm_om_plots_layout_alr.R new file mode 100644 index 0000000..64d17a0 --- /dev/null +++ b/scripts/plotting/plotting_thesis/alr/dm_om_plots_layout_alr.R @@ -0,0 +1,164 @@ +# source dm_om_plots.R +#============ +# Plot labels +#============ +tit1 = "Stability changes" +tit2 = "Genomic measure" +tit3 = "Distance to partners" +tit4 = "Evolutionary Conservation" +tit5 = "Affinity changes" +pt_size = 30 + +theme_georgia <- function(...) { + theme_gray(base_family = "sans", ...) + + theme(plot.title = element_text(face = "bold")) +} + + +title_theme <- calc_element("plot.title", theme_georgia()) + +pt1 = ggdraw() + + draw_label( + tit1, + fontfamily = title_theme$family, + fontface = title_theme$face, + #size = title_theme$size + size = pt_size + ) + +pt2 = ggdraw() + + draw_label( + tit2, + fontfamily = title_theme$family, + fontface = title_theme$face, + size = pt_size + ) + +pt3 = ggdraw() + + draw_label( + tit3, + fontfamily = title_theme$family, + fontface = title_theme$face, + size = pt_size + ) + +pt4 = ggdraw() + + draw_label( + tit4, + fontfamily = title_theme$family, + fontface = title_theme$face, + size = pt_size + ) + + +pt5 = ggdraw() + + draw_label( + tit5, + fontfamily = title_theme$family, + fontface = title_theme$face, + size = pt_size + ) + +#====================== +# Output plot function +#====================== +OutPlot_dm_om = function(x){ + + # dist b/w plot title and plot + relH_tp = c(0.08, 0.92) + + my_label_size = 25 + #---------------- + # Top panel + #---------------- + top_panel = cowplot::plot_grid( + cowplot::plot_grid(pt1, + cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P + , nrow = 1 + , labels = c("A", "B", "C", "D") + , label_size = my_label_size) + , ncol = 1 + , rel_heights = relH_tp + ), + NULL, + cowplot::plot_grid(pt2, + cowplot::plot_grid(genomicsP + , nrow = 1 + , labels = c("E") + , label_size = my_label_size) + , ncol = 1 + , rel_heights = relH_tp + ), + NULL, + cowplot::plot_grid(pt3, + cowplot::plot_grid( #distanceP + distanceP_lig + , distanceP_ppi2 + #, distanceP_na + , nrow = 1 + , labels = c("F", "G") + , label_size = my_label_size) + , ncol = 1 + , rel_heights = relH_tp + ), + nrow = 1, + rel_widths = c(2/7, 0.1/7, 0.5/7, 0.1/7, 1/7) + ) + + #---------------- + # Bottom panel + #---------------- + bottom_panel = cowplot::plot_grid( + cowplot::plot_grid(pt4, + cowplot::plot_grid(consurfP, proveanP, snap2P + , nrow = 1 + , labels = c("H", "I", "J") + , label_size = my_label_size) + , ncol = 1 + , rel_heights =relH_tp + ),NULL, + cowplot::plot_grid(pt5, + cowplot::plot_grid(mcsmligP, mcsmlig2P + , mcsmppi2P + #, mcsmnaP + , nrow = 1 + , labels = c("K", "L", "M") + , label_size = my_label_size) + , ncol = 1 + , rel_heights = relH_tp + ),NULL, + nrow = 1, + rel_widths = c(3/6,0.1/6,3/6, 0.1/6 ) + ) + + #------------------------------- + # combine: Top and Bottom panel + #------------------------------- + cowplot::plot_grid (top_panel, bottom_panel + , nrow =2 + , rel_widths = c(1, 1) + , align = "hv") +} + +#===================== +# OutPlot: svg and png +#====================== +dm_om_combinedP = paste0(outdir_images + ,tolower(gene) + ,"_dm_om_all.svg") + +cat("DM OM plots with stats:", dm_om_combinedP) +svg(dm_om_combinedP, width = 32, height = 18) + +OutPlot_dm_om() +dev.off() + + +dm_om_combinedP_png = paste0(outdir_images + ,tolower(gene) + ,"_dm_om_all.png") +cat("DM OM plots with stats:", dm_om_combinedP_png) +png(dm_om_combinedP_png, width = 32, height = 18, units = "in", res = 300) + +OutPlot_dm_om() +dev.off() diff --git a/scripts/plotting/plotting_thesis/basic_barplots.R b/scripts/plotting/plotting_thesis/basic_barplots.R index 61b3540..10f1dc4 100644 --- a/scripts/plotting/plotting_thesis/basic_barplots.R +++ b/scripts/plotting/plotting_thesis/basic_barplots.R @@ -183,7 +183,6 @@ if (tolower(gene)%in%geneL_na){ } ##################################################################### - # ------------------------------ # bp site site count: mCSM-lig # < 10 Ang ligand @@ -203,10 +202,10 @@ posC_lig = site_snp_count_bp(plotdf = df3_lig , axis_label_size = 10) posC_lig -# ------------------------------ +#------------------------------ # bp site site count: ppi2 # < 10 Ang interface -# ------------------------------ +#------------------------------ if (tolower(gene)%in%geneL_ppi2){ posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2 @@ -223,10 +222,10 @@ if (tolower(gene)%in%geneL_ppi2){ posC_ppi2 } -# ------------------------------ +#------------------------------ # bp site site count: NCA dist # < 10 Ang nca -# ------------------------------ +#------------------------------ if (tolower(gene)%in%geneL_na){ posC_nca = site_snp_count_bp(plotdf = df3_na @@ -242,15 +241,11 @@ if (tolower(gene)%in%geneL_na){ , axis_label_size = 10) posC_nca } - - #=============================================================== - - -# ------------------------------ +#------------------------------ # bp site site count: ALL # <10 Ang ligand -# ------------------------------ +#------------------------------ posC_all = site_snp_count_bp(plotdf = df3 , df_colname = "position" , xaxis_title = "Number of nsSNPs" @@ -282,34 +277,6 @@ consurfP = stability_count_bp(plotdf = df3 consurfP -#################### -# Sensitivity count: Mutations -#################### -table(df3$sensitivity) - -rect_sens=data.frame(mutation_class=c("Resistant","Sensitive") - , tile_colour =c("red","blue") - , numbers = c(table(df3$sensitivity)[[1]], table(df3$sensitivity)[[2]])) - - - -sensP = ggplot(rect_sens, aes(mutation_class, y = 0 - , fill = tile_colour - , label = paste0("n=", numbers) - )) + - geom_tile(width = 1, height = 1) + # make square tiles - geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + # add white text in the middle - scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame - coord_fixed() + # make sure tiles are square - #coord_flip()+ scale_x_reverse() + - # theme_void() # remove any axis markings - theme_nothing() # remove any axis markings -sensP - -# sensP2 = sensP + -# coord_flip() + scale_x_reverse() -# sensP2 - ############################################################## #=================== # Stability @@ -421,5 +388,4 @@ snap2P = stability_count_bp(plotdf = df3 , ltis = 11 , geom_ls = 2.5) snap2P - ##################################################################################### diff --git a/scripts/plotting/plotting_thesis/basic_barplots_UPDATED.R b/scripts/plotting/plotting_thesis/basic_barplots_UPDATED.R new file mode 100644 index 0000000..96572e0 --- /dev/null +++ b/scripts/plotting/plotting_thesis/basic_barplots_UPDATED.R @@ -0,0 +1,357 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Barplots +# basic barplots with outcome +# basic barplots with frequency of count of mutations +######################################################### +#============= +# Data: Input +#============== +#source("~/git/LSHTM_analysis/config/pnca.R") +#source("~/git/LSHTM_analysis/config/embb.R") +#source("~/git/LSHTM_analysis/config/gid.R") + +#source("~/git/LSHTM_analysis/config/alr.R") +#source("~/git/LSHTM_analysis/config/katg.R") +source("~/git/LSHTM_analysis/config/rpob.R") + +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") +#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above + +cat("\nSourced plotting cols as well:", length(plotting_cols)) + +#################################################### +class(merged_df3) + +df3 = subset(merged_df3, select = -c(pos_count)) + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +cat("plots will output to:", outdir_images) + +########################################################### +#------------------------------ +# plot default sizes +#------------------------------ +#========================= +# Affinity outcome +# check this var: outcome_cols_affinity +# get from preformatting or put in globals +#========================== +DistCutOff +LigDist_colname # = "ligand_distance" # from globals +ppi2Dist_colname +naDist_colname + +########################################################### +# get plotting data within the distance +df3_lig = df3[df3[[LigDist_colname]]DistCutOff,"mCSM-lig"]=0 +corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0 + +if (tolower(gene)%in%geneL_ppi2){ + corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0 +} + +if (tolower(gene)%in%geneL_na){ + corr_affinity_df[corr_affinity_df["NCA-Dist"]>DistCutOff,"mCSM-NA"]=0 +} + +# count 0 +#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100 +unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0) +unmasked_vals + +########################################################## +#================ +# Stability +#================ +corr_ps_colnames = c(static_cols + , "DUET" + , "FoldX" + , "DeepDDG" + , "Dynamut2" + , aff_dist_cols + , "dst_mode") + +corr_df_ps = corr_plotdf[, corr_ps_colnames] + +# Plot #1 +plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates") +########################################################## +#================ +# Conservation +#================ +corr_conservation_cols = c( static_cols + , "ConSurf" + , "SNAP2" + , "PROVEAN" + #, aff_dist_cols + , "dst_mode" +) + +corr_df_cons = corr_plotdf[, corr_conservation_cols] + +# Plot #2 +plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation estimates") + +########################################################## +#================ +# Affinity: lig, ppi and na as applicable +#================ +#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]DistCutOff,"mCSM-lig"]=0 +corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0 + +if (tolower(gene)%in%geneL_ppi2){ + corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0 +} + +if (tolower(gene)%in%geneL_na){ + corr_affinity_df[corr_affinity_df["NCA-Dist"]>DistCutOff,"mCSM-NA"]=0 +} + +# count 0 +#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100 +unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0) +unmasked_vals + +########################################################## +#================ +# Stability +#================ +corr_ps_colnames = c(static_cols + , "DUET" + , "FoldX" + , "DeepDDG" + , "Dynamut2" + , aff_dist_cols + , "dst_mode") + +corr_df_ps = corr_plotdf[, corr_ps_colnames] + +# Plot #1 +plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates") + +########################################################## +#================ +# Conservation +#================ +corr_conservation_cols = c( static_cols + , "ConSurf" + , "SNAP2" + , "PROVEAN" + #, aff_dist_cols + , "dst_mode" +) + +corr_df_cons = corr_plotdf[, corr_conservation_cols] + +# Plot #2 +plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation estimates") + +########################################################## +#================ +# Affinity: lig, ppi and na as applicable +#================ +#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]10 +head(str_df) + +# replace in place affinity values >10 +str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0 +str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0 + +#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10 +if (tolower(gene)%in%geneL_ppi2){ + str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0 +} + +# na gene: replace in place na affinity values where na dist >10 +if (tolower(gene)%in%geneL_na){ + str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0 +} + +colnames(str_df) +head(str_df) + +scaled_cols_tc = other_cols[grep("scaled", other_cols)] + + +################################################ +#=============== +# whole df +#=============== +give_col=function(x,y,df=str_df){ + df[df[[pos_colname]]==x,y] +} + +for (i in unique(str_df[[pos_colname]]) ){ + print(i) + #cat(length(unique(str_df[[pos_colname]]))) + + biggest = max(abs(give_col(i,scaled_cols_tc))) + + str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest + str_df[str_df[[pos_colname]]==i,'effect_type']= names( + give_col(i,scaled_cols_tc)[which( + abs( + give_col(i,scaled_cols_tc) + ) == biggest, arr.ind=T + )[, "col"]])[1] + + effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values + + # get index/rowname for value of max effect, and then use it to get the original sign + # here + #ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) + ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T)) + + str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1] +} + +# ends with suffix 2 if dups +str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values +colnames(str_df) +table(str_df$effect_type) + +# check +str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),] + +#================ +# for Plots +#================ +str_df_short = str_df[, c("mutationinformation", + #"position", + pos_colname, + "sensitivity" + , "effect_type" + , "effect_sign")] + +table(str_df_short$effect_type) +table(str_df_short$effect_sign) +str(str_df_short) + +# assign pe outcome +str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS") +table(str_df_short$pe_outcome ) +table(str_df_short$effect_sign) + +#============== +# group effect type: +# lig, ppi2, nuc. acid, stability +#============== +affcols = c("affinity_scaled", "mmcsm_lig_scaled") +ppi2_cols = c("mcsm_ppi2_scaled") + +#lig +table(str_df_short$effect_type) +str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols + , "lig" + , str_df_short$effect_type) +table(str_df_short$effect_grouped) + +#ppi2 +str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols + , "ppi2" + , str_df_short$effect_grouped) +table(str_df_short$effect_grouped) + + +#stability +str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig", + "ppi2" + ) + , "stability" + , str_df_short$effect_grouped) + +table(str_df_short$effect_grouped) + +# create a sign as well +str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_" + , str_df_short$effect_grouped) + +table(str_df_short$pe_effect_outcome) + +##################################################################### +# Chimera: for colouring +#################################################################### + +#------------------------------------- +# get df with unique position +#-------------------------------------- +#data[!duplicated(data$x), ] +str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),] + +if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){ + cat("\nPASS: successfully extracted df with unique positions") +}else{ + stop("\nAbort: Could not extract df with unique positions") +} + +#------------------------------------- +# generate colours for effect types +#-------------------------------------- +str_df_plot_cols = str_df_plot[, c(pos_colname, + "sensitivity", + "pe_outcome", + "effect_grouped", + "pe_effect_outcome")] +head(str_df_plot_cols) + +# colour intensity based on sign +#str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light") +str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light") + +table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome) +head(str_df_plot_cols) + +# colour based on effect +table(str_df_plot_cols$pe_effect_outcome) + +# colors = c("#ffd700" #gold +# , "#f0e68c" #khaki +# , "#da70d6"# orchid +# , "#ff1493"# deeppink +# , "#a0522d" #sienna +# , "#d2b48c" # tan +# , "#00BFC4" #, "#007d85" #blue +# , "#F8766D" )# red + +pe_colour_map = c("DD_lig" = "#ffd700" # gold + , "SS_lig" = "#f0e68c" # khaki + + , "DD_nucleic_acid"= "#a0522d" # sienna + , "SS_nucleic_acid"= "#d2b48c" # tan + + , "DD_ppi2" = "#da70d6" # orchid + , "SS_ppi2" = "#ff1493" # deeppink + + , "DD_stability" = "#f8766d" # red + , "SS_stability" = "#00BFC4") # blue + +#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE) + +#map the colours +str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome + ,function(x){pe_colour_map[[x]]} + )) +head(str_df_plot_cols$colour_map) +table(str_df_plot_cols$colour_map) +table(str_df_plot_cols$pe_effect_outcome) + +# str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue +# , "_" +# , str_df_plot_cols$colour_map) +# head(str_df_plot_cols$colours) +# table(str_df_plot_cols$colours) +# +# +# class(str_df_plot_cols$colour_map) +# str(str_df_plot_cols) + +# sort by colour +head(str_df_plot_cols) +str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ] +head(str_df_plot_cols) + +#====================================== +# write file with prominent effects +#====================================== +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv")) + +################################ +# printing for chimera +############################### +chain_suffix = ".A" +str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], chain_suffix) +table(str_df_plot_cols$colour_map) +table(str_df_plot_cols$pe_effect_outcome) + +#=================================================== +#------------------- +# Ligand Affinity +#------------------- +# -ve Lig Aff +dd_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",] +if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){ + dd_lig_pos = dd_lig[[pos_colname]] +}else{ + stop("Abort: DD affinity colour numbers mismtatch") +} + +# +ve Lig Aff +ss_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",] +if (!empty(ss_lig)){ + if (nrow(ss_lig) == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){ + ss_lig_pos = ss_lig[[pos_colname]] + }else{ + stop("Abort: SS affinity colour numbers mismtatch") + } + #put in chimera cmd + paste0(dd_lig_pos, chain_suffix) + paste0(ss_lig_pos, chain_suffix) +} + + + + +#=================================================== +#------------------- +# PPI2 Affinity +#------------------- +# -ve PPI2 +dd_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_ppi2",] +if (nrow(dd_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['DD_ppi2']]){ + dd_ppi2_pos = dd_ppi2[[pos_colname]] +}else{ + stop("Abort: DD PPI2 colour numbers mismtatch") +} + +# +ve PPI2 +ss_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_ppi2",] +if (nrow(ss_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['SS_ppi2']]){ + ss_ppi2_pos = ss_ppi2[[pos_colname]] +}else{ + stop("Abort: SS PPI2 colour numbers mismtatch") +} + +#put in chimera cmd +paste0(dd_ppi2_pos,chain_suffix) +paste0(ss_ppi2_pos,chain_suffix) + +#========================================================= +#------------------------ +# Stability +#------------------------ +# -ve Stability +dd_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",] +if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){ + dd_stability_pos = dd_stability[[pos_colname]] +}else{ + stop("Abort: DD Stability colour numbers mismtatch") +} + +# +ve Stability +ss_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",] +if (nrow(ss_stability) == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){ + ss_stability_pos = ss_stability[[pos_colname]] +}else{ + stop("Abort: SS Stability colour numbers mismtatch") +} + +#put in chimera cmd +# stabiliting first as it has less numbers +paste0(ss_stability_pos, chain_suffix) +paste0(dd_stability_pos, chain_suffix) +#################################################################### + diff --git a/scripts/plotting/plotting_thesis/katg/sensitivity_count_katg.R b/scripts/plotting/plotting_thesis/katg/sensitivity_count_katg.R new file mode 100644 index 0000000..66caef2 --- /dev/null +++ b/scripts/plotting/plotting_thesis/katg/sensitivity_count_katg.R @@ -0,0 +1,65 @@ +#========================= +# Count Sensitivity +# Mutations and positions +#========================= +pos_colname_c ="position" + +sensP_df = merged_df3[,c("mutationinformation", + #"position", + pos_colname_c, + "sensitivity")] + +head(sensP_df) +table(sensP_df$sensitivity) + +#--------------- +# Total unique positions +#---------------- +tot_mut_pos = length(unique(sensP_df[[pos_colname_c]])) +cat("\nNo of Tot muts sites:", tot_mut_pos) + +# resistant mut pos +sens_site_allR = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="R"] +sens_site_UR = unique(sens_site_allR) +length(sens_site_UR) + +# Sensitive mut pos +sens_site_allS = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="S"] +sens_site_US = unique(sens_site_allS) +length(sens_site_UR) + +#--------------- +# Common Sites +#---------------- +common_pos = intersect(sens_site_UR,sens_site_US) +site_Cc = length(common_pos) +cat("\nNo of Common sites:", site_Cc + , "\nThese are:", common_pos) + +#--------------- +# Resistant muts +#---------------- +site_R = sens_site_UR[!sens_site_UR%in%common_pos] +site_Rc = length(site_R) + +if ( length(sens_site_allR) == table(sensP_df$sensitivity)[['R']] ){ + cat("\nNo of R muts:", length(sens_site_allR) + , "\nNo. of R sites:",site_Rc + , "\nThese are:", site_R +) +} + +#--------------- +# Sensitive muts +#---------------- +site_S = sens_site_US[!sens_site_US%in%common_pos] +site_Sc = length(site_S) + +if ( length(sens_site_allS) == table(sensP_df$sensitivity)[['S']] ){ + cat("\nNo of S muts:", length(sens_site_allS) + , "\nNo. of S sites:", site_Sc + , "\nThese are:", site_S) +} + +######################### + diff --git a/scripts/plotting/plotting_thesis/linage_bp_dist.R b/scripts/plotting/plotting_thesis/linage_bp_dist.R index bfe6118..dde9ed0 100644 --- a/scripts/plotting/plotting_thesis/linage_bp_dist.R +++ b/scripts/plotting/plotting_thesis/linage_bp_dist.R @@ -34,7 +34,12 @@ if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels ################################################### # Lineage barplots # ################################################### - +my_xats = 8 # x axis text size # were 25 +my_yats = 8# y axis text sized_lab_size +my_xals = 8 # x axis label size +my_yals = 8 # y axis label size +my_lls = 8 # legend label size +d_lab_size = 2.3 #=============================== # lineage sample and SNP count #=============================== @@ -46,13 +51,13 @@ lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']] , bar_fill_categ = "count_categ" , display_label_col = "p_count" , bar_stat_stype = "identity" - , d_lab_size = 8 + , d_lab_size = d_lab_size , d_lab_col = "black" - , my_xats = 25 # x axis text size - , my_yats = 25 # y axis text sized_lab_size - , my_xals = 25 # x axis label size - , my_yals = 25 # y axis label size - , my_lls = 25 # legend label size + , my_xats = my_xats # x axis text size + , my_yats = my_yats # y axis text sized_lab_size + , my_xals = my_xals # x axis label size + , my_yals = my_yals # y axis label size + , my_lls = my_lls # legend label size , bar_col_labels = c("SNPs", "Total Samples") , bar_col_values = c("grey50", "gray75") , bar_leg_name = "" @@ -73,12 +78,12 @@ lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']] , display_label_col = "snp_diversity_f" , bar_stat_stype = "identity" , x_lab_angle = 90 - , d_lab_size =9 - , my_xats = 25 # x axis text size - , my_yats = 25 # y axis text size - , my_xals = 25 # x axis label size - , my_yals = 25 # y axis label size - , my_lls = 25 # legend label size + , d_lab_size = d_lab_size + , my_xats = my_xats # x axis text size + , my_yats = my_yats # y axis text sized_lab_size + , my_xals = my_xals # x axis label size + , my_yals = my_yals # y axis label size + , my_lls = my_lls # legend label size , y_log10 = F , y_scale_percent = F , leg_location = "top" @@ -86,28 +91,9 @@ lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']] , bp_plot_title = "nsSNP diversity" , title_colour = "black" #"chocolate4" , subtitle_text = NULL - , sts = 20 + , sts = 10 , subtitle_colour = "#350E20FF") lin_diversityP -#============================================= -# Output plots: Lineage count and Diversity -#============================================= -# lineage_bp_CL = paste0(outdir_images -# ,tolower(gene) -# ,"_lineage_bp_CL_Tall.svg") -# -# cat("Lineage barplots:", lineage_bp_CL) -# svg(lineage_bp_CL, width = 8, height = 15) -# -# cowplot::plot_grid(lin_countP, lin_diversityP -# #, labels = c("(a)", "(b)", "(c)", "(d)") -# , nrow = 2 -# # , ncols = 2 -# , labels = "AUTO" -# , label_size = 25) -# -# dev.off() -######################################################################## ################################################### @@ -119,6 +105,13 @@ lin_diversityP # , "foldx_scaled" # , "avg_stability_scaled") +my_ats = 8 # x axis text size # were 25 +my_als = 8# y axis text sized_lab_size +my_leg_ts = 8 # x axis label size +my_leg_title = 8 # y axis label size +my_strip_ts = 8 # + + my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel #plotdf = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),] @@ -133,11 +126,11 @@ linP_dm_om = lineage_distP(merged_df2 , fill_categ_cols = c("red", "blue") , label_categories = c("Resistant", "Sensitive") , leg_label = "Mutation group" - , my_ats = 22 # axis text size - , my_als = 22 # axis label size - , my_leg_ts = 22 - , my_leg_title = 22 - , my_strip_ts = 22 + , my_ats = my_ats # axis text size + , my_als = my_als # axis label size + , my_leg_ts = my_leg_ts + , my_leg_title = my_leg_title + , my_strip_ts = my_strip_ts , alpha = 0.56 ) diff --git a/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R b/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R index 00f3adc..aae8f3d 100644 --- a/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R +++ b/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R @@ -1,23 +1,47 @@ #!/usr/bin/env Rscript - +source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp_dist.R") ########################################### # TASK: generate plots for lineage # Individual plots in #lineage_bp_both.R #linage_dist_ens_stability.R ########################################### -my_label_size = 25 +# svg +# my_label_size = 12 +# linPlots_combined = paste0(outdir_images +# , tolower(gene) +# ,"_linP_combined.svg") +# +# cat("\nOutput plot:", linPlots_combined) +# svg(linPlots_combined, width = 18, height = 12) +# +# cowplot::plot_grid( +# cowplot::plot_grid(lin_countP, lin_diversityP +# , nrow = 2 +# , rel_heights = c(1.2,1) +# , labels = "AUTO" +# , label_size = my_label_size), +# NULL, +# linP_dm_om, +# nrow = 1, +# labels = c("", "", "C"), +# label_size = my_label_size, +# rel_widths = c(35, 3, 52) +# ) +# dev.off() +# png linPlots_combined = paste0(outdir_images - , tolower(gene) - ,"_linP_combined.svg") + , tolower(gene) + ,"_linP_combined.png") cat("\nOutput plot:", linPlots_combined) -svg(linPlots_combined, width = 18, height = 12) +png(linPlots_combined, width = 9, height = 6, units = "in" ,res = 300) cowplot::plot_grid( cowplot::plot_grid(lin_countP, lin_diversityP , nrow = 2 + , rel_heights = c(1.2,1) , labels = "AUTO" , label_size = my_label_size), NULL, @@ -27,4 +51,4 @@ cowplot::plot_grid( label_size = my_label_size, rel_widths = c(35, 3, 52) ) -dev.off() \ No newline at end of file +dev.off() diff --git a/scripts/plotting/plotting_thesis/pe_sens_site_count.R b/scripts/plotting/plotting_thesis/pe_sens_site_count.R new file mode 100644 index 0000000..3bba656 --- /dev/null +++ b/scripts/plotting/plotting_thesis/pe_sens_site_count.R @@ -0,0 +1,78 @@ +############################################################## +# PE count +############################################################## +rects <- data.frame(x = 1:6, + colors = c("#ffd700" #gold + , "#f0e68c" #khaki + , "#da70d6"# orchid + , "#ff1493"# deeppink + , "#00BFC4" #, "#007d85" #blue + , "#F8766D" )# red, +) +rects + +rects$text = c("-ve Lig" + , "+ve Lig" + , "+ve PPI2" + , "-ve PPI2" + , "+ve stability" + , "-ve stability") + +# FOR EMBB ONLY +rects$numbers = c(38, 0, 22, 9, 108, 681) +rects$num_labels = paste0("n=", rects$numbers) + +rects + +#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text + +peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) + + geom_tile(width = 1, height = 1) + # make square tiles + geom_text(color = "black", size = 1.7) + # add white text in the middle + scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame + coord_fixed() + # make sure tiles are square + coord_flip()+ scale_x_reverse() + + # theme_void() # remove any axis markings + theme_nothing() # remove any axis markings +peP + +peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) + + geom_tile() + # make square tiles + geom_text(color = "black", size = 1.6) + # add white text in the middle + scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame + coord_fixed() + # make sure tiles are square + theme_nothing() # remove any axis markings +peP2 + + +######################################################## +# MANUAL process +#=============================== +# Sensitivity count: Site +#============================== +table(df3$sensitivity) +#-------- +# embb +#-------- +#rsc = 54 +#ccc = 46 +#ssc = 470 + +rect_rs_siteC =data.frame(mutation_class=c("A_Resistant sites" + , "B_Common sites" + , "C_Sensitive sites"), + tile_colour =c("red", + "purple", + "blue"), + numbers = c(rsc, ccc, ssc), + order = c(1, 2, 3)) + +rect_rs_siteC$labels = paste0(rect_rs_siteC$mutation_class, "\nn=", rect_rs_siteC$ numbers) + +sens_siteP = ggplot(rect_rs_siteC, aes(mutation_class, y = 0, + fill = tile_colour, + label = paste0("n=", numbers))) + + geom_tile(width = 1, height = 1) + + geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + + theme_nothing() +sens_siteP diff --git a/scripts/plotting/plotting_thesis/prominent_effects.R b/scripts/plotting/plotting_thesis/prominent_effects.R deleted file mode 100644 index ad03b14..0000000 --- a/scripts/plotting/plotting_thesis/prominent_effects.R +++ /dev/null @@ -1,405 +0,0 @@ -#!/usr/bin/env Rscript -#source("~/git/LSHTM_analysis/config/alr.R") -source("~/git/LSHTM_analysis/config/embb.R") -#source("~/git/LSHTM_analysis/config/katg.R") -#source("~/git/LSHTM_analysis/config/gid.R") -#source("~/git/LSHTM_analysis/config/pnca.R") -#source("~/git/LSHTM_analysis/config/rpob.R") - -# get plotting dfs -source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") - -source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") - -length(all_stability_cols); length(raw_stability_cols) -length(scaled_stability_cols); length(outcome_stability_cols) -length(affinity_dist_colnames) - -static_cols = c("mutationinformation", "position", "sensitivity") -other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames) - -#omit avg cols and foldx_scaled_signC cols -other_cols = other_cols_all[grep("avg", other_cols_all, invert = T)] -other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )] -other_cols - -cols_to_extract = c(static_cols, other_cols) -expected_ncols = length(static_cols) + length(other_cols) - -str_df = merged_df3[, cols_to_extract] - -if (ncol(str_df) == expected_ncols){ - cat("\nPASS: successfully extracted cols for calculating prominent effects") -}else{ - stop("\nAbort: Could not extract cols for calculating prominent effects") -} - -#========================= -# Masking affinity columns -#========================= -# First make values for affinity cols 0 when their corresponding dist >10 -head(str_df) - -# replace in place affinity values >10 -str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0 -str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0 - -#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10 - -if (tolower(gene)%in%geneL_ppi2){ - str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0 -} - -# na gene: replace in place na affinity values where na dist >10 -if (tolower(gene)%in%geneL_na){ - str_df[str_df["XXXX"]>10,"mcsm_na_scaled"]=0 -} - -colnames(str_df) - -head(str_df) - -# get names of cols to calculate the prominent effects from -scaled_cols_tc = c("duet_scaled", "deepddg_scaled" - , "ddg_dynamut2_scaled", "foldx_scaled","affinity_scaled" - , "mmcsm_lig_scaled" , "mcsm_ppi2_scaled") - -#-------------------------------- -#get rowmax for absolute values -#-------------------------------- -#str_df$row_maximum = apply(str_df[,-1], 1, function(x){max(abs(x))}) -#str_df$row_maximum = apply(str_df[,scaled_cols_tc], 1, function(x){max(abs(x))}) - -#correct -#BOO= abs(str_df[,scaled_cols_tc]) == str_df[,'row_maximum']; head(BOO) -#also corr but weird -#POO = which(abs(str_df[,scaled_cols_tc]) == str_df[,'row_maximum'], arr.ind =T); head(POO) - -################################################ -# #------------- -# # short df: try -# #------------- -# df2_short = str_df[str_df$position%in%c(167, 423, 427),] -# df2_short = str_df[str_df$position%in%c(170, 167, 493, 453, 435, 433, 480, 456, 445),] -# df2_short = str_df[str_df$position%in%c(435, 480),] -# -# -# give_col=function(x,y,df=df2_short){ -# df[df$position==x,y] -# } -# -# for (i in unique(df2_short$position) ){ -# print(i) -# #print(paste0("\nNo. of unique positions:", length(unique(df2$position))) ) -# #cat(length(unique(df2$position))) -# #df2_short[df2_short$position==i,scaled_cols_tc] -# -# biggest = max(abs(give_col(i,scaled_cols_tc))) -# -# df2_short[df2_short$position==i,'abs_max_effect'] = biggest -# df2_short[df2_short$position==i,'effect_type']= names( -# give_col(i,scaled_cols_tc)[which( -# abs( -# give_col(i,scaled_cols_tc) -# ) == biggest, arr.ind=T -# )[, "col"]]) -# -# effect_name = df2_short[df2_short$position==i,'effect_type'][1] # pick first one in case we have multiple exact values -# -# # get index/rowname for value of max effect, and then use it to get the original sign -# # here -# #df2_short[df2_short$position==i,c(effect_name)] -# #which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])==biggest, arr.ind=T) -# ind = rownames(which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) -# df2_short[df2_short$position==i,'effect_sign'] = sign(df2_short[effect_name][ind,]) -# } -# -# # ends with suffix 2 if dups -# df2_short$effect_type = sub("\\.[0-9]+", "", df2_short$effect_type) # cull duplicate effect types that happen when there are exact duplicate values -# -# View(df2_short) - - - -#=============== -# whole df -#=============== -give_col=function(x,y,df=str_df){ - df[df$position==x,y] -} - -for (i in unique(str_df$position) ){ - print(i) - #cat(length(unique(str_df$position))) - - biggest = max(abs(give_col(i,scaled_cols_tc))) - - str_df[str_df$position==i,'abs_max_effect'] = biggest - str_df[str_df$position==i,'effect_type']= names( - give_col(i,scaled_cols_tc)[which( - abs( - give_col(i,scaled_cols_tc) - ) == biggest, arr.ind=T - )[, "col"]])[1] - - effect_name = unique(str_df[str_df$position==i,'effect_type'])#[1] # pick first one in case we have multiple exact values - - # get index/rowname for value of max effect, and then use it to get the original sign - # here - ind = rownames(which(abs(str_df[str_df$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) - str_df[str_df$position==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1] -} - -# ends with suffix 2 if dups -str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values - -colnames(str_df) - -# check -str_df_check = str_df[str_df$position%in%c(24, 32,160, 303, 334),] -table(str_df$effect_type) - -#================ -# for Plots -#================ -str_df_short = str_df[, c("mutationinformation","position","sensitivity" - , "effect_type" - , "effect_sign")] - -table(str_df_short$effect_type) -table(str_df_short$effect_sign) -str(str_df_short) - -# assign pe outcome -str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS") -table(str_df_short$pe_outcome ) -table(str_df_short$effect_sign) - -#============== -# group effect type: -# lig, ppi2, nuc. acid, stability -#============== - -affcols = c("affinity_scaled", "mmcsm_lig_scaled") -ppi2_cols = c("mcsm_ppi2_scaled") -#nuc_na_cols = c("mcsm_a_scaled") - - -#lig -table(str_df_short$effect_type) -str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols - , "lig" - , str_df_short$effect_type) -table(str_df_short$effect_grouped) - -#ppi2 -str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols - , "ppi2" - , str_df_short$effect_grouped) -table(str_df_short$effect_grouped) - -#stability -str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig", "ppi2") - , "stability" - , str_df_short$effect_grouped) - -table(str_df_short$effect_grouped) - -# create a sign as well -str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_" - , str_df_short$effect_grouped) - -table(str_df_short$pe_effect_outcome) - - - -##################################################################### -# Chimera: for colouring -#################################################################### - -#------------------------------------- -# get df with unique position -#-------------------------------------- -#data[!duplicated(data$x), ] -str_df_plot = str_df[!duplicated(str_df$position),] - -if (nrow(str_df_plot) == length(unique(str_df$position))){ - cat("\nPASS: successfully extracted df with unique positions") -}else{ - stop("\nAbort: Could not extract df with unique positions") -} - -#------------------------------------- -# generate colours for effect types -#-------------------------------------- -str_df_plot_cols = str_df_plot[, c("position", "sensitivity" - , affinity_dist_colnames - , "abs_max_effect" - , "effect_type" - , "effect_sign")] -head(str_df_plot_cols) - -# colour intensity based on sign -str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light") -table(str_df_plot_cols$colour_hue) -head(str_df_plot_cols) -# colour based on effect -table(str_df_plot_cols$effect_type) - -pe_colour_map = c("affinity_scaled" = "salmon" - , "mmcsm_lig_scaled" = "salmon" - , "mcsm_ppi2_scaled" = "pink" - , "mcsm_na_scaled" = "orange" - , "duet_scaled" = "dimgray" - , "deepddg_scaled" = "dimgray" - , "ddg_dynamut2_scaled"= "dimgray" - , "foldx_scaled" = "dimgray") - -#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE) - -#map the colours -str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$effect_type - ,function(x){pe_colour_map[[x]]} - )) - -str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue - , "_" - , str_df_plot_cols$colour_map) -head(str_df_plot_cols$colours) -table(str_df_plot_cols$colours) - - -class(str_df_plot_cols$colour_map) -str(str_df_plot_cols) - -# sort by colour -head(str_df_plot_cols) -str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ] -head(str_df_plot_cols) - -#====================================== -# write file with prominent effects -#====================================== -outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") -write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv")) - -################################ -# printing for chimera -############################### -str_df_plot_cols$pos_chain = paste0(str_df_plot_cols$position, ".B,") -table(str_df_plot_cols$colour_map) - -#=================================================== -#------------------- -# Ligand Affinity -#------------------- -foo = str_df_plot_cols[str_df_plot_cols$colours=="yellow",] -all(foo2$effect_sign == 1) - -foo1 = str_df_plot_cols[str_df_plot_cols$colours=="bright_salmon",] -all(foo3$effect_sign == -1) - -#light salmon: stabilising affinity -table(str_df_plot_cols$colours) - -affinity_pos_l = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="light_salmon"] -affinity_pos_lc = paste(affinity_pos_l, collapse = "") -affinity_pos_lc -table(str_df_plot_cols$colours)[["light_salmon"]] - -#bright salmon: DEstabilsing affinity -affinity_pos_b = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="bright_salmon"] -affinity_pos_bc = paste(affinity_pos_b, collapse = "") -affinity_pos_bc -table(str_df_plot_cols$colours)[["bright_salmon"]] - -c1 = length(affinity_pos_l) + length(affinity_pos_b) == table(str_df_plot_cols$colour_map)[["salmon"]] - -if (c1){ - cat("PASS: affinity colour numbers checked") -}else{ - stop("Abort: affinity colour numbers mismtatch") -} - -#put in chimera cmd -affinity_pos_lc -affinity_pos_bc - -#=================================================== -#------------------- -# ppi2 Affinity -#------------------- -foo2 = str_df_plot_cols[str_df_plot_cols$colours=="light_pink",] -all(foo2$effect_sign == 1) - -foo3 = str_df_plot_cols[str_df_plot_cols$colours=="bright_pink",] -all(foo3$effect_sign == -1) - -#light_pink: stabilising affinity -table(str_df_plot_cols$colours) - -ppi2_pos_l = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="light_pink"] -ppi2_pos_lc = paste(ppi2_pos_l, collapse = "") -ppi2_pos_lc -table(str_df_plot_cols$colours)[["light_pink"]] - -#bright pink: DEstabilsing affinity -ppi2_pos_b = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="bright_pink"] -ppi2_pos_bc = paste(ppi2_pos_b, collapse = "") -ppi2_pos_bc -table(str_df_plot_cols$colours)[["bright_pink"]] - -c2 = length(ppi2_pos_l) + length(ppi2_pos_b) == table(str_df_plot_cols$colour_map)[["pink"]] - -if (c2){ - cat("PASS: ppi2 colour numbers checked") -}else{ - stop("Abort: ppi2 colour numbers mismtatch") -} - -#put in chimera cmd -ppi2_pos_lc -ppi2_pos_bc - -#========================================================= -#------------------- -# Stability -#------------------- -foo4 = str_df_plot_cols[str_df_plot_cols$colours=="light_dimgray",] -all(foo2$effect_sign == 1) - -foo5 = str_df_plot_cols[str_df_plot_cols$colours=="bright_dimgray",] -all(foo3$effect_sign == -1) - -#light_dimgray: stabilising Stability -table(str_df_plot_cols$colours) - -stab_pos_l = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="light_dimgray"] -stab_pos_lc = paste(stab_pos_l, collapse = "") -stab_pos_lc -table(str_df_plot_cols$colours)[["light_dimgray"]] - -#bright_dimgray pink: DEstabilsing Stability -stab_pos_b = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="bright_dimgray"] -stab_pos_bc = paste(stab_pos_b, collapse = "") -stab_pos_bc -table(str_df_plot_cols$colours)[["bright_dimgray"]] - -c3 = length(stab_pos_l) + length(stab_pos_b) == table(str_df_plot_cols$colour_map)[["dimgray"]] - -if (c3){ - cat("PASS: stability colour numbers checked") -}else{ - stop("Abort: stability colour numbers mismtatch") -} - -#put in chimera cmd -stab_pos_lc -stab_pos_bc - - -# stab tool count -table(str_df_plot_cols$effect_type) - -table(str_df_plot_cols$effect_type, str_df_plot_cols$effect_sign) - diff --git a/scripts/plotting/plotting_thesis/rpob/basic_barplots_layout_rpob.R b/scripts/plotting/plotting_thesis/rpob/basic_barplots_layout_rpob.R new file mode 100644 index 0000000..43080ff --- /dev/null +++ b/scripts/plotting/plotting_thesis/rpob/basic_barplots_layout_rpob.R @@ -0,0 +1,348 @@ +#source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pe_sens_site_count_rpob.R") + +if ( tolower(gene)%in%c("rpob") ){ + cat("\nPlots available for layout are:") + + duetP + foldxP + deepddgP + dynamut2P + proveanP + snap2P + + mLigP + mmLigP + posC_lig + + ppi2P + posC_ppi2 + + nca_distP + posC_nca + + peP2 + sens_siteP + peP # not used + sensP # not used +} + + +#======================== +# Common title settings +#========================= +theme_georgia <- function(...) { + theme_gray(base_family = "sans", ...) + + theme(plot.title = element_text(face = "bold")) +} +title_theme <- calc_element("plot.title", theme_georgia()) + +############################################################### +common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol) + +# extract common legends +# lig affinity +common_legend_outcome = get_legend(mLigP + + guides(color = guide_legend(nrow = 1)) + + theme(legend.position = "top")) + +# stability +common_legend_outcome = get_legend(duetP + + guides(color = guide_legend(nrow = 1)) + + theme(legend.position = "top")) +# conservation +cons_common_legend_outcome = get_legend(snap2P + + guides(color = guide_legend(nrow = 1)) + + theme(legend.position = "top")) +################################################################### +#================================== +# Stability+Conservation: COMBINE +#================================== +tt_size = 10 +#---------------------------- +# stability and consv title +#---------------------------- +tt_stab = ggdraw() + + draw_label( + paste0("Stability outcome"), + fontfamily = title_theme$family, + fontface = title_theme$face, + #size = title_theme$size + size = tt_size + ) + +tt_cons = ggdraw() + + draw_label( + paste0("Conservation outcome"), + fontfamily = title_theme$family, + fontface = title_theme$face, + size = tt_size + ) + +#---------------------- +# Output plot +#----------------------- +stab_cons_CLP = paste0(outdir_images + ,tolower(gene) + ,"_stab_cons_BP_CLP.png") + +print(paste0("plot filename:", stab_cons_CLP)) +png(stab_cons_CLP, units = "in", width = 10, height = 5, res = 300 ) + +cowplot::plot_grid( + cowplot::plot_grid( + cowplot::plot_grid( + tt_stab, + common_legend_outcome, + nrow = 2 + ), + cowplot::plot_grid( + duetP, + foldxP, + deepddgP, + dynamut2P, + nrow = 1, + labels = c("A", "B", "C", "D"), + label_size = 12), + nrow = 2, + rel_heights=c(1,10) + ), + NULL, + cowplot::plot_grid( + cowplot::plot_grid( + cowplot::plot_grid( + tt_cons, + cons_common_legend_outcome, + nrow = 2 + ), + cowplot::plot_grid( + proveanP, + snap2P, + nrow=1, + labels = c("E", "F"), + align = "hv"), + nrow = 2, + rel_heights = c(1, 10), + label_size = 12), + nrow=1 + ), + rel_widths = c(2,0.15,1), + nrow=1 +) + +dev.off() + +################################################################# +#======================================= +# Affinity barplots: COMBINE ALL four +#======================================== +ligT = paste0(common_bp_title, " ligand") +lig_affT = ggdraw() + + draw_label( + ligT, + fontfamily = title_theme$family, + fontface = title_theme$face, + #size = title_theme$size + size = 8 + ) + +p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT + , common_legend_outcome + , nrow=2), + cowplot::plot_grid(mLigP, mmLigP, posC_lig + , nrow = 1 + , rel_widths = c(1,0.65,1.8) + , align = "h"), + nrow = 2, + rel_heights = c(1,8) + +) +p1 +########################################################### +ncaT = paste0(common_bp_title, " Nucleic Acid") +nca_affT = ggdraw() + + draw_label( + ncaT, + fontfamily = title_theme$family, + fontface = title_theme$face, + #size = title_theme$size + size = 8 + ) + +p2 = cowplot::plot_grid(cowplot::plot_grid(nca_affT + , common_legend_outcome + , nrow=2), + cowplot::plot_grid(nca_distP, posC_nca + , nrow = 1 + , rel_widths = c(1,1.8) + , align = "h"), + nrow = 2, + rel_heights = c(1,8) +) +p2 +########################################################### +ppi2T = paste0(common_bp_title, " PP-interface") +ppi2_affT = ggdraw() + + draw_label( + ppi2T, + fontfamily = title_theme$family, + fontface = title_theme$face, + size = 8 + ) + +p3 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nrow=2), + cowplot::plot_grid(ppi2P, posC_ppi2 + , nrow = 1 + , rel_widths = c(1,1.9) + , align = "h"), + nrow = 2, + rel_heights = c(1,8) +) +p3 + +#### Combine p1+p2+p3 #### +w = 11.79 +h = 3.5 +mut_impact_CLP = paste0(outdir_images + ,tolower(gene) + ,"_mut_impactCLP.png") + +#svg(affP, width = 20, height = 5.5) +print(paste0("plot filename:", mut_impact_CLP)) +png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 ) + +cowplot::plot_grid(p1, + p2, + p3, + #p4, + nrow = 1, + labels = "AUTO", + label_size = 12, + rel_widths = c(2.5,2,2) + #, rel_heights = c(1) +) + +dev.off() +w = 11.79 +h = 3.5 +mut_impact_CLP = paste0(outdir_images + ,tolower(gene) + ,"_mut_impactCLP.png") + +#svg(affP, width = 20, height = 5.5) +print(paste0("plot filename:", mut_impact_CLP)) +png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 ) + +cowplot::plot_grid(p1, + p2, + p3, + #p4, + nrow = 1, + labels = "AUTO", + label_size = 12, + rel_widths = c(2.5,2,2) + #, rel_heights = c(1) +) + +dev.off() + +#### Combine p4: ALL pos count #### +# PE + All position count +peT_allT = ggdraw() + + draw_label( + paste0("All mutation sites"), + fontfamily = title_theme$family, + fontface = title_theme$face, + #size = title_theme$size + size = 8 + ) + +p4 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2 + , rel_widths = c(1,3),axis = "lr"), + cowplot::plot_grid( + peP2, posC_all, + nrow = 2, + rel_widths = c(1,1), + align = "v", + axis = "lr", + rel_heights = c(1,8) + ), + rel_heights = c(1,18), + nrow = 2,axis = "lr") +p4 + +w = 3.5 +h = 4 +mut_impact_CLP_PE = paste0(outdir_images + ,tolower(gene) + ,"_mut_impactCLP_PE.png") + +print(paste0("plot filename:", mut_impact_CLP_PE)) +png(mut_impact_CLP_PE, units = "in", width = w, height = h, res = 300 ) + +cowplot::plot_grid(p4, + nrow = 1, + labels = c("D"), + label_size = 12#, + #rel_widths = c(2.5,2,2) + #, rel_heights = c(1) +) + +dev.off() + +################################################## +sensP +consurfP +#================= +#### Combine sensitivity + ConSurf #### +# or ConSurf +#================= +w = 3 +h = 3 +# sens_conP = paste0(outdir_images +# ,tolower(gene) +# ,"_sens_cons_CLP.png") +# +# print(paste0("plot filename:", sens_conP)) +# png(sens_conP, units = "in", width = w, height = h, res = 300 ) +# +# cowplot::plot_grid(sensP, consurfP, +# nrow = 2, +# rel_heights = c(1, 1.5) +# ) +# +# dev.off() + +conCLP = paste0(outdir_images + ,tolower(gene) + ,"_consurf_BP.png") + +print(paste0("plot filename:", conCLP)) +png(conCLP, units = "in", width = w, height = h, res = 300 ) +consurfP + +dev.off() +#================================ +# Sensitivity mutation numbers: geom_tile +#================================ +sensCLP = paste0(outdir_images + ,tolower(gene) + ,"_sensN_tile.png") + +print(paste0("plot filename:", sensCLP)) +png(sensCLP, units = "in", width = 1, height = 1, res = 300 ) +sensP +dev.off() +#================================ +# Sensitivity SITE numbers: geom_tile +#================================ +sens_siteCLP = paste0(outdir_images + ,tolower(gene) + ,"_sens_siteC_tile.png") + +print(paste0("plot filename:", sens_siteCLP)) +png(sens_siteCLP, units = "in", width = 1.2, height = 1, res = 300 ) +sens_siteP +dev.off() + +########################################################### + diff --git a/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R b/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R new file mode 100644 index 0000000..769c32c --- /dev/null +++ b/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R @@ -0,0 +1,372 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Barplots +# basic barplots with outcome +# basic barplots with frequency of count of mutations +######################################################### +#============= +# Data: Input +#============== +#source("~/git/LSHTM_analysis/config/pnca.R") +#source("~/git/LSHTM_analysis/config/embb.R") +#source("~/git/LSHTM_analysis/config/gid.R") + +#source("~/git/LSHTM_analysis/config/alr.R") +#source("~/git/LSHTM_analysis/config/katg.R") +#source("~/git/LSHTM_analysis/config/rpob.R") + +#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") +#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above + +#cat("\nSourced plotting cols as well:", length(plotting_cols)) + +#################################################### +class(merged_df3) + +df3 = subset(merged_df3, select = -c(pos_count)) + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +cat("plots will output to:", outdir_images) + +########################################################## +# blue, red bp +sts = 8 +lts = 8 +ats = 8 +als = 8 +ltis = 8 +geom_ls = 2.2 + +#pos_count +subtitle_size = 8 +geom_ls_pc = 2.2 +leg_text_size = 8 +axis_text_size = 8 +axis_label_size = 8 + +########################################################### +#------------------------------ +# plot default sizes +#------------------------------ +#========================= +# Affinity outcome +# check this var: outcome_cols_affinity +# get from preformatting or put in globals +#========================== +DistCutOff +LigDist_colname # = "ligand_distance" # from globals +ppi2Dist_colname +naDist_colname + +########################################################### +# get plotting data within the distance +df3_lig = df3[df3[[LigDist_colname]]DistCutOff,"mCSM-lig"]=0 +corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0 + +if (tolower(gene)%in%geneL_ppi2){ + corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0 +} + +if (tolower(gene)%in%geneL_na){ + corr_affinity_df[corr_affinity_df["NCA-Dist"]>DistCutOff,"mCSM-NA"]=0 +} + +# count 0 +#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100 +unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0) +unmasked_vals + +########################################################## +#================ +# Stability +#================ +corr_ps_colnames = c(static_cols + , "DUET" + , "FoldX" + , "DeepDDG" + , "Dynamut2" + , aff_dist_cols + , "dst_mode") + +corr_df_ps = corr_plotdf[, corr_ps_colnames] + +# Plot #1 +plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates") + +########################################################## +#================ +# Conservation +#================ +corr_conservation_cols = c( static_cols + , "ConSurf" + , "SNAP2" + , "PROVEAN" + #, aff_dist_cols + , "dst_mode" +) + +corr_df_cons = corr_plotdf[, corr_conservation_cols] + +# Plot #2 +plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation estimates") + +########################################################## +#================ +# Affinity: lig, ppi and na as applicable +#================ +#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]10 +head(str_df) + +# replace in place affinity values >10 +str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0 +str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0 + +#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10 +if (tolower(gene)%in%geneL_ppi2){ + str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0 +} + +# na gene: replace in place na affinity values where na dist >10 +if (tolower(gene)%in%geneL_na){ + str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0 +} + +colnames(str_df) +head(str_df) + +# get names of cols to calculate the prominent effects from +# scaled_cols_tc = c("duet_scaled", +# "deepddg_scaled", +# "ddg_dynamut2_scaled", +# "foldx_scaled", +# "affinity_scaled", +# "mmcsm_lig_scaled", +# "mcsm_ppi2_scaled", "mcsm_na_scaled") + +scaled_cols_tc = other_cols[grep("scaled", other_cols)] + +#-------------------------------- +#get rowmax for absolute values +#-------------------------------- +#str_df$row_maximum = apply(str_df[,-1], 1, function(x){max(abs(x))}) +#str_df$row_maximum = apply(str_df[,scaled_cols_tc], 1, function(x){max(abs(x))}) + +#correct +#BOO= abs(str_df[,scaled_cols_tc]) == str_df[,'row_maximum']; head(BOO) +#also corr but weird +#POO = which(abs(str_df[,scaled_cols_tc]) == str_df[,'row_maximum'], arr.ind =T); head(POO) + +################################################ + + +#=============== +# whole df +#=============== +give_col=function(x,y,df=str_df){ + df[df[[pos_colname]]==x,y] +} + +for (i in unique(str_df[[pos_colname]]) ){ + print(i) + #cat(length(unique(str_df[[pos_colname]]))) + + biggest = max(abs(give_col(i,scaled_cols_tc))) + + str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest + str_df[str_df[[pos_colname]]==i,'effect_type']= names( + give_col(i,scaled_cols_tc)[which( + abs( + give_col(i,scaled_cols_tc) + ) == biggest, arr.ind=T + )[, "col"]])[1] + + effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values + + # get index/rowname for value of max effect, and then use it to get the original sign + # here + #ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) + ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T)) + + str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1] +} + +# ends with suffix 2 if dups +str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values +colnames(str_df) +table(str_df$effect_type) + +# check +str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),] + +#================ +# for Plots +#================ +str_df_short = str_df[, c("mutationinformation", + #"position", + pos_colname, + "sensitivity" + , "effect_type" + , "effect_sign")] + +table(str_df_short$effect_type) +table(str_df_short$effect_sign) +str(str_df_short) + +# assign pe outcome +str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS") +table(str_df_short$pe_outcome ) +table(str_df_short$effect_sign) + +#============== +# group effect type: +# lig, ppi2, nuc. acid, stability +#============== +affcols = c("affinity_scaled", "mmcsm_lig_scaled") +ppi2_cols = c("mcsm_ppi2_scaled") +nuc_na_cols = c("mcsm_na_scaled") + + +#lig +table(str_df_short$effect_type) +str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols + , "lig" + , str_df_short$effect_type) +table(str_df_short$effect_grouped) + +#ppi2 +str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols + , "ppi2" + , str_df_short$effect_grouped) +table(str_df_short$effect_grouped) + + +#na +str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%nuc_na_cols + , "nucleic_acid" + , str_df_short$effect_grouped) +table(str_df_short$effect_grouped) + +#stability +str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig", + "ppi2", + "nucleic_acid") + , "stability" + , str_df_short$effect_grouped) + +table(str_df_short$effect_grouped) + +# create a sign as well +str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_" + , str_df_short$effect_grouped) + +table(str_df_short$pe_effect_outcome) + +##################################################################### +# Chimera: for colouring +#################################################################### + +#------------------------------------- +# get df with unique position +#pos_colname = "position" +if (length(merged_df3[[pos_colname]]) == length(merged_df3$X5uhc_position) ){ + pos_colname = "X5uhc_position" +}else{ + stop("Abort: position colname could not be found") +} +#-------------------------------------- +#data[!duplicated(data$x), ] +str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),] + +if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){ + cat("\nPASS: successfully extracted df with unique positions") +}else{ + stop("\nAbort: Could not extract df with unique positions") +} + +#------------------------------------- +# generate colours for effect types +#-------------------------------------- +str_df_plot_cols = str_df_plot[, c(pos_colname, + "sensitivity", + "pe_outcome", + "effect_grouped", + "pe_effect_outcome")] +head(str_df_plot_cols) + +# colour intensity based on sign +#str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light") +str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light") + +table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome) +head(str_df_plot_cols) + +# colour based on effect +table(str_df_plot_cols$pe_effect_outcome) + +# colors = c("#ffd700" #gold +# , "#f0e68c" #khaki +# , "#da70d6"# orchid +# , "#ff1493"# deeppink +# , "#a0522d" #sienna +# , "#d2b48c" # tan +# , "#00BFC4" #, "#007d85" #blue +# , "#F8766D" )# red + +pe_colour_map = c("DD_lig" = "#ffd700" # gold + , "SS_lig" = "#f0e68c" # khaki + + , "DD_nucleic_acid"= "#a0522d" # sienna + , "SS_nucleic_acid"= "#d2b48c" # tan + + , "DD_ppi2" = "#da70d6" # orchid + , "SS_ppi2" = "#ff1493" # deeppink + + , "DD_stability" = "#f8766d" # red + , "SS_stability" = "#00BFC4") # blue + +#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE) + +#map the colours +str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome + ,function(x){pe_colour_map[[x]]} + )) +head(str_df_plot_cols$colour_map) +table(str_df_plot_cols$colour_map) +table(str_df_plot_cols$pe_effect_outcome) + +# str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue +# , "_" +# , str_df_plot_cols$colour_map) +# head(str_df_plot_cols$colours) +# table(str_df_plot_cols$colours) +# +# +# class(str_df_plot_cols$colour_map) +# str(str_df_plot_cols) + +# sort by colour +head(str_df_plot_cols) +str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ] +head(str_df_plot_cols) + +#====================================== +# write file with prominent effects +#====================================== +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv")) + +################################ +# printing for chimera +############################### +str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], ".C,") +table(str_df_plot_cols$colour_map) +table(str_df_plot_cols$pe_effect_outcome) + +#=================================================== +#------------------- +# Ligand Affinity +#------------------- +# -ve Lig Aff +dd_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",] +if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){ + dd_lig_pos = dd_lig[[pos_colname]] +}else{ + stop("Abort: DD affinity colour numbers mismtatch") +} + +# +ve Lig Aff +ss_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",] +if (nrow(ss_lig) == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){ + ss_lig_pos = ss_lig[[pos_colname]] +}else{ + stop("Abort: SS affinity colour numbers mismtatch") +} + +#put in chimera cmd +paste0(dd_lig_pos, ".C") +paste0(ss_lig_pos, ".C") + +#=================================================== +#------------------------ +# Nucleic Acid Affinity +#------------------------ +# -ve NA aff +dd_nca = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_nucleic_acid",] +if (nrow(dd_nca) == table(str_df_plot_cols$pe_effect_outcome)[['DD_nucleic_acid']]){ + dd_nca_pos = dd_nca[[pos_colname]] +}else{ + stop("Abort: DD nucleic_acid colour numbers mismtatch") +} + +# +ve NA aff +ss_nca = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_nucleic_acid",] +if (nrow(ss_nca) == table(str_df_plot_cols$pe_effect_outcome)[['SS_nucleic_acid']]){ + ss_nca_pos = ss_nca[[pos_colname]] +}else{ + stop("Abort: SS nucleic_acid colour numbers mismtatch") +} + +#put in chimera cmd +paste0(dd_nca_pos, ".C") +paste0(ss_nca_pos, ".C") +#=================================================== +#------------------- +# PPI2 Affinity +#------------------- +# -ve PPI2 +dd_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_ppi2",] +if (nrow(dd_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['DD_ppi2']]){ + dd_ppi2_pos = dd_ppi2[[pos_colname]] +}else{ + stop("Abort: DD PPI2 colour numbers mismtatch") +} + +# +ve PPI2 +ss_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_ppi2",] +if (nrow(ss_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['SS_ppi2']]){ + ss_ppi2_pos = ss_ppi2[[pos_colname]] +}else{ + stop("Abort: SS PPI2 colour numbers mismtatch") +} + +#put in chimera cmd +paste0(dd_ppi2_pos, ".C") +paste0(ss_ppi2_pos, ".C") + +#========================================================= +#------------------------ +# Stability +#------------------------ +# -ve Stability +dd_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",] +if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){ + dd_stability_pos = dd_stability[[pos_colname]] +}else{ + stop("Abort: DD Stability colour numbers mismtatch") +} + +# +ve Stability +ss_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",] +if (nrow(ss_stability) == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){ + ss_stability_pos = ss_stability[[pos_colname]] +}else{ + stop("Abort: SS Stability colour numbers mismtatch") +} + +#put in chimera cmd +# stabiliting first as it has less numbers +paste0(ss_stability_pos, ".C") +paste0(dd_stability_pos, ".C") +#################################################################### + diff --git a/scripts/plotting/plotting_thesis/rpob/sensitivity_count_rpob.R b/scripts/plotting/plotting_thesis/rpob/sensitivity_count_rpob.R new file mode 100644 index 0000000..a0fc5dd --- /dev/null +++ b/scripts/plotting/plotting_thesis/rpob/sensitivity_count_rpob.R @@ -0,0 +1,66 @@ +#========================= +# Count Sensitivity +# Mutations and positions +#========================= +pos_colname_c ="X5uhc_position" +#pos_colname_c ="position" + +sensP_df = merged_df3[,c("mutationinformation", + #"position", + pos_colname_c, + "sensitivity")] + +head(sensP_df) +table(sensP_df$sensitivity) + +#--------------- +# Total unique positions +#---------------- +tot_mut_pos = length(unique(sensP_df[[pos_colname_c]])) +cat("\nNo of Tot muts sites:", tot_mut_pos) + +# resistant mut pos +sens_site_allR = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="R"] +sens_site_UR = unique(sens_site_allR) +length(sens_site_UR) + +# Sensitive mut pos +sens_site_allS = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="S"] +sens_site_US = unique(sens_site_allS) +length(sens_site_UR) + +#--------------- +# Common Sites +#---------------- +common_pos = intersect(sens_site_UR,sens_site_US) +site_Cc = length(common_pos) +cat("\nNo of Common sites:", site_Cc + , "\nThese are:", common_pos) + +#--------------- +# Resistant muts +#---------------- +site_R = sens_site_UR[!sens_site_UR%in%common_pos] +site_Rc = length(site_R) + +if ( length(sens_site_allR) == table(sensP_df$sensitivity)[['R']] ){ + cat("\nNo of R muts:", length(sens_site_allR) + , "\nNo. of R sites:",site_Rc + , "\nThese are:", site_R +) +} + +#--------------- +# Sensitive muts +#---------------- +site_S = sens_site_US[!sens_site_US%in%common_pos] +site_Sc = length(site_S) + +if ( length(sens_site_allS) == table(sensP_df$sensitivity)[['S']] ){ + cat("\nNo of S muts:", length(sens_site_allS) + , "\nNo. of S sites:", site_Sc + , "\nThese are:", site_S) +} + +######################### +