added pnca plot dir to generate plots that weren#t covered in the paper

2022-09-05 14:02:04 +01:00 · 2022-09-05 14:02:04 +01:00 · 2cec743ae0
commit 2cec743ae0
parent 1dacebbaf6
8 changed files with 1391 additions and 0 deletions
--- a/scripts/plotting/plotting_thesis/pnca/basic_barplots_pnca.R
+++ b/scripts/plotting/plotting_thesis/pnca/basic_barplots_pnca.R
@ -0,0 +1,364 @@
 #!/usr/bin/env Rscript   
 #########################################################
 # TASK: Barplots
 # basic barplots with outcome
 # basic barplots with frequency of count of mutations
 #########################################################
 #=============
 # Data: Input
 #==============
 #source("~/git/LSHTM_analysis/config/pnca.R")
 #source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
 #cat("\nSourced plotting cols as well:", length(plotting_cols))
 ####################################################
 class(merged_df3)
 df3 = subset(merged_df3, select = -c(pos_count))
 #=======
 # output
 #=======
 outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
 cat("plots will output to:", outdir_images)
 ##########################################################
 # blue, red bp
 sts = 8
 lts = 8
 ats = 8
 als = 8
 ltis = 8
 geom_ls = 2.2
 #pos_count
 subtitle_size = 8
 geom_ls_pc = 2.2
 leg_text_size = 8
 axis_text_size = 8
 axis_label_size = 8
 ###########################################################
 #------------------------------
 # plot default sizes
 #------------------------------
 #=========================
 # Affinity outcome
 # check this var: outcome_cols_affinity
 # get from preformatting or put in globals
 #==========================
 DistCutOff
 LigDist_colname  # = "ligand_distance" # from globals 
 ppi2Dist_colname
 naDist_colname
 ###########################################################
 # get plotting data within the distance
 df3_lig  = df3[df3[[LigDist_colname]]<DistCutOff,]
 df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
 df3_na   = df3[df3[[naDist_colname]]<DistCutOff,]
 common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
 #------------------------------
 # barplot for ligand affinity:
 # <10 Ang of ligand
 #------------------------------
 mLigP = stability_count_bp(plotdf = df3_lig
               , df_colname = "ligand_outcome"
               #, leg_title  = "mCSM-lig"
               #, bp_plot_title = paste(common_bp_title, "ligand")
               , yaxis_title = "Number of nsSNPs"
               , leg_position = "none"
               , subtitle_text = "mCSM\nLig"
               , bar_fill_values = c("#F8766D", "#00BFC4")
               , subtitle_colour= "black"
               , sts = sts
               , lts = lts
               , ats = ats
               , als = als
               , ltis = ltis
               , geom_ls = geom_ls
               )
 mLigP
 #------------------------------
 # barplot for ligand affinity:
 # <10 Ang of ligand
 # mmCSM-lig: will be the same no. of sites but the effect will be different
 #------------------------------
 mmLigP = stability_count_bp(plotdf = df3_lig
                   , df_colname = "mmcsm_lig_outcome"
                   #, leg_title  = "mmCSM-lig"
                   #, label_categories = labels_mmlig
                   #, bp_plot_title = paste(common_bp_title, "ligand")
                   , yaxis_title = ""
                   , leg_position = "none"
                   , subtitle_text = "mmCSM\nLig"
                   , bar_fill_values = c("#F8766D", "#00BFC4")
                   , subtitle_colour= "black"
                   , sts = sts
                   , lts = lts
                   , ats = ats
                   , als = als
                   , ltis = ltis
                   , geom_ls = geom_ls
                   )
 mmLigP
 #------------------------------
 # barplot for ppi2 affinity
 #  <10 Ang of interface
 #------------------------------
 if (tolower(gene)%in%geneL_ppi2){
    ppi2P = stability_count_bp(plotdf = df3_ppi2
                     , df_colname = "mcsm_ppi2_outcome"
                     #, leg_title  = "mCSM-ppi2"
                     #, label_categories = labels_ppi2
                     #, bp_plot_title = paste(common_bp_title, "PP-interface")
                     , yaxis_title = "Number of nsSNPs"
                     , leg_position = "none"
                     , subtitle_text = "mCSM\nPPI2"
                     , bar_fill_values = c("#F8766D", "#00BFC4")
                     , subtitle_colour= "black"
                     , sts = sts
                     , lts = lts
                     , ats = ats
                     , als = als
                     , ltis = ltis
                     , geom_ls = geom_ls
                     )
  ppi2P
 }
 #----------------------------
 # barplot for ppi2 affinity
 #  <10 Ang of interface
 #------------------------------
 if (tolower(gene)%in%geneL_na){
  nca_distP = stability_count_bp(plotdf = df3_na
                             , df_colname = "mcsm_na_outcome"
                             #, leg_title  = "mCSM-NA"
                             #, label_categories = 
                             #, bp_plot_title = paste(common_bp_title, "Dist to NA")
                             , yaxis_title = "Number of nsSNPs"
                             , leg_position = "none"
                             , subtitle_text = "mCSM\nNA"
                             , bar_fill_values = c("#F8766D", "#00BFC4")
                             , subtitle_colour= "black"
                             , sts = sts
                             , lts = lts
                             , ats = ats
                             , als = als
                             , ltis = ltis
                             , geom_ls = geom_ls
  )
  nca_distP
 }
 #####################################################################
 # ------------------------------
 # bp site site count: mCSM-lig
 # < 10 Ang ligand
 # ------------------------------
 common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
 posC_lig = site_snp_count_bp(plotdf = df3_lig
                  , df_colname = "position"
                  , xaxis_title = "Number of nsSNPs"
                  , yaxis_title = "Number of Sites"
                  , subtitle_colour = "chocolate4"
                  , subtitle_text = ""
                  , subtitle_size = subtitle_size
                  , geom_ls = geom_ls_pc
                  , leg_text_size = leg_text_size
                  , axis_text_size = axis_text_size
                  , axis_label_size = axis_label_size)
 posC_lig
 #------------------------------
 # bp site site count: ppi2
 # < 10 Ang interface
 #------------------------------
 if (tolower(gene)%in%geneL_ppi2){
  posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
                    , df_colname = "position"
                    , xaxis_title = "Number of nsSNPs"
                    , yaxis_title = "Number of Sites"
                    , subtitle_colour = "chocolate4"
                    , subtitle_text = ""
                    , subtitle_size = subtitle_size
                    , geom_ls = geom_ls_pc
                    , leg_text_size = leg_text_size
                    , axis_text_size = axis_text_size
                    , axis_label_size = axis_label_size)
  posC_ppi2
 }
 #------------------------------
 # bp site site count: NCA dist
 # < 10 Ang nca
 #------------------------------
 if (tolower(gene)%in%geneL_na){
  posC_nca = site_snp_count_bp(plotdf = df3_na
                                , df_colname = "position"
                                , xaxis_title = "Number of nsSNPs"
                                , yaxis_title = "Number of Sites"
                                , subtitle_colour = "chocolate4"
                                , subtitle_text = ""
                               , subtitle_size = subtitle_size
                               , geom_ls = geom_ls_pc
                               , leg_text_size = leg_text_size
                               , axis_text_size = axis_text_size
                               , axis_label_size = axis_label_size)
  posC_nca
 }
 #===============================================================
 #------------------------------
 # bp site site count: ALL
 # <10 Ang ligand
 #------------------------------
 posC_all = site_snp_count_bp(plotdf = df3
                             , df_colname = "position"
                             , xaxis_title = "Number of nsSNPs"
                             , yaxis_title = "Number of Sites"
                             , subtitle_colour = "chocolate4"
                             , subtitle_text = "All mutations sites"
                             , subtitle_size = subtitle_size
                             , geom_ls = geom_ls_pc
                             , leg_text_size = leg_text_size
                             , axis_text_size = axis_text_size
                             , axis_label_size = axis_label_size)
 posC_all
 ##################################################################
 consurfP = stability_count_bp(plotdf = df3
                              , df_colname = "consurf_outcome"
                              #, leg_title = "ConSurf"
                              #, label_categories = labels_consurf
                              , yaxis_title = "Number of nsSNPs"
                              , leg_position = "top"
                              , subtitle_text = "ConSurf"
                              , bar_fill_values = consurf_colours # from globals
                              , subtitle_colour= "black"
                              , sts = sts
                              , lts = lts
                              , ats = ats
                              , als = als
                              , ltis = ltis
                              , geom_ls = geom_ls)
 consurfP
 ##############################################################
 sts_so = 10
 lts_so = 10
 ats_so = 10
 als_so = 10
 ltis_so = 10
 geom_ls_so = 2.5
 #===================
 # Stability
 #===================
 # duetP
 duetP = stability_count_bp(plotdf = df3
                           , df_colname = "duet_outcome"
                           , leg_title = "mCSM-DUET"
                           #, label_categories = labels_duet
                           , yaxis_title = "Number of nsSNPs"
                           , leg_position = "none"
                           , subtitle_text = "mCSM-DUET"
                           , bar_fill_values = c("#F8766D", "#00BFC4")
                           , subtitle_colour= "black"
                           , sts = sts_so
                           , lts = lts_so
                           , ats = ats_so
                           , als = als_so
                           , ltis = ltis_so
                           , geom_ls = geom_ls_so)
 duetP
 # foldx
 foldxP = stability_count_bp(plotdf = df3
                            , df_colname = "foldx_outcome"
                            #, leg_title = "FoldX"
                            #, label_categories = labels_foldx
                            , yaxis_title = ""
                            , leg_position = "none"
                            , subtitle_text = "FoldX"
                            , bar_fill_values = c("#F8766D", "#00BFC4")
                            , sts = sts_so
                            , lts = lts_so
                            , ats = ats_so
                            , als = als_so
                            , ltis = ltis_so
                            , geom_ls = geom_ls_so)
 foldxP
 # deepddg
 deepddgP = stability_count_bp(plotdf = df3
                              , df_colname = "deepddg_outcome"
                              #, leg_title = "DeepDDG"
                              #, label_categories = labels_deepddg
                              , yaxis_title = ""
                              , leg_position = "none"
                              , subtitle_text = "DeepDDG"
                              , bar_fill_values = c("#F8766D", "#00BFC4")
                              , sts = sts_so
                              , lts = lts_so
                              , ats = ats_so
                              , als = als_so
                              , ltis = ltis_so
                              , geom_ls = geom_ls_so)
 deepddgP
 # deepddg
 dynamut2P = stability_count_bp(plotdf = df3
                               , df_colname = "ddg_dynamut2_outcome"
                               #, leg_title = "Dynamut2"
                               #, label_categories = labels_ddg_dynamut2_outcome
                               , yaxis_title = ""
                               , leg_position = "none"
                               , subtitle_text = "Dynamut2"
                               , bar_fill_values = c("#F8766D", "#00BFC4")
                               , sts = sts_so
                               , lts = lts_so
                               , ats = ats_so
                               , als = als_so
                               , ltis = ltis_so
                               , geom_ls = geom_ls_so)
 dynamut2P
 # provean
 proveanP = stability_count_bp(plotdf = df3
                              , df_colname = "provean_outcome"
                              #, leg_title = "PROVEAN"
                              #, label_categories = labels_provean
                              , yaxis_title = "Number of nsSNPs"
                              , leg_position = "none" # top
                              , subtitle_text = "PROVEAN"
                              , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
                              , sts = sts_so
                              , lts = lts_so
                              , ats = ats_so
                              , als = als_so
                              , ltis = ltis_so
                              , geom_ls = geom_ls_so)
 proveanP
 # snap2
 snap2P = stability_count_bp(plotdf = df3
                            , df_colname = "snap2_outcome"
                            #, leg_title = "SNAP2"
                            #, label_categories = labels_snap2
                            , yaxis_title = ""
                            , leg_position = "none" # top
                            , subtitle_text = "SNAP2"
                            , bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
                            , sts = sts_so
                            , lts = lts_so
                            , ats = ats_so
                            , als = als_so
                            , ltis = ltis_so
                            , geom_ls = geom_ls_so)
 snap2P
 #####################################################################################
--- a/scripts/plotting/plotting_thesis/pnca/basic_barplots_pnca_layout.R
+++ b/scripts/plotting/plotting_thesis/pnca/basic_barplots_pnca_layout.R
@ -0,0 +1,261 @@
 #=============
 # Data: Input
 #==============
 #source("~/git/LSHTM_analysis/config/pnca.R")
 #source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
 source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pnca/basic_barplots_pnca.R")
 source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pnca/pe_sens_site_count_pnca.R")
 if ( tolower(gene)%in%c("pnca") ){
  cat("\nPlots available for layout are:")
  duetP
  foldxP
  deepddgP
  dynamut2P
  proveanP
  snap2P
  mLigP
  mmLigP
  posC_lig
  #ppi2P
  #posC_ppi2
  peP2
  sens_siteP
  peP   # not used
  sensP # not used
 }
 #========================
 # Common title settings
 #=========================
 theme_georgia <- function(...) {
  theme_gray(base_family = "sans", ...) + 
    theme(plot.title = element_text(face = "bold"))
 }
 title_theme <- calc_element("plot.title", theme_georgia())
 ###############################################################
 common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
 # extract common legends
 # lig affinity
 common_legend_outcome = get_legend(mLigP +
                                     guides(color = guide_legend(nrow = 1)) +
                                     theme(legend.position = "top"))
 # stability
 common_legend_outcome = get_legend(duetP +
                                     guides(color = guide_legend(nrow = 1)) +
                                     theme(legend.position = "top"))
 # conservation
 cons_common_legend_outcome = get_legend(snap2P +
                                          guides(color = guide_legend(nrow = 1)) +
                                          theme(legend.position = "top"))
 ###################################################################
 #==================================
 # Stability+Conservation: COMBINE
 #==================================
 tt_size = 10
 #----------------------------
 # stability and consv title
 #----------------------------
 tt_stab = ggdraw() + 
  draw_label(
    paste0("Stability outcome"),
    fontfamily = title_theme$family,
    fontface = title_theme$face,
    #size = title_theme$size
    size = tt_size
  )
 tt_cons = ggdraw() + 
  draw_label(
    paste0("Conservation outcome"),
    fontfamily = title_theme$family,
    fontface = title_theme$face,
    size = tt_size
  )
 #----------------------
 # Output plot
 #-----------------------
 stab_cons_CLP =  paste0(outdir_images
                        ,tolower(gene)
                        ,"_stab_cons_BP_CLP.png")
 print(paste0("plot filename:", stab_cons_CLP))
 png(stab_cons_CLP, units = "in", width = 10, height = 5, res = 300 )
 cowplot::plot_grid(
  cowplot::plot_grid(
    cowplot::plot_grid(
      tt_stab,
      common_legend_outcome,
      nrow = 2
    ),
    cowplot::plot_grid(
      duetP, 
      foldxP, 
      deepddgP, 
      dynamut2P,
      nrow = 1,
      labels = c("A", "B", "C", "D"),
      label_size = 12),
    nrow = 2,
    rel_heights=c(1,10)
  ),
  NULL,
  cowplot::plot_grid(
    cowplot::plot_grid(
      cowplot::plot_grid(
        tt_cons,
        cons_common_legend_outcome,
        nrow = 2
      ),
      cowplot::plot_grid(
        proveanP,
        snap2P,
        nrow=1, 
        labels = c("E", "F"),
        align = "hv"),
      nrow = 2,
      rel_heights = c(1, 10),
      label_size = 12),
    nrow=1
  ),
  rel_widths = c(2,0.15,1),
  nrow=1
 )
 dev.off()
 #################################################################
 #=======================================
 # Affinity barplots: COMBINE ALL four
 #========================================
 ligT = paste0(common_bp_title, " ligand")
 lig_affT = ggdraw() +
  draw_label(
    ligT,
    fontfamily = title_theme$family,
    fontface = title_theme$face,
    #size = title_theme$size
    size = 8
  )
 p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT
                                           , common_legend_outcome
                                           , nrow=2),
                        cowplot::plot_grid(mLigP, mmLigP, posC_lig
                                           , nrow = 1
                                           , rel_widths = c(1,0.65,1.8)
                                           , align = "h"),
                        nrow = 2,
                        rel_heights = c(1,8)
 )
 p1
 #### Combine p1 ####
 w = 11.79
 h = 3.5
 mut_impact_CLP =  paste0(outdir_images
                         ,tolower(gene)
                         ,"_mut_impactCLP.png")
 #svg(affP, width = 20, height = 5.5)
 print(paste0("plot filename:", mut_impact_CLP))
 png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
 cowplot::plot_grid(p1,
                   nrow = 1,
                   labels = "AUTO",
                   label_size = 12,
                   rel_widths = c(2.5,2,2)
                   #, rel_heights = c(1)
 )
 dev.off()
 w = 11.79
 h = 3.5
 mut_impact_CLP =  paste0(outdir_images
                         ,tolower(gene)
                         ,"_mut_impactCLP.png")
 #svg(affP, width = 20, height = 5.5)
 print(paste0("plot filename:", mut_impact_CLP))
 png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
 cowplot::plot_grid(p1,
                   nrow = 1,
                   labels = "AUTO",
                   label_size = 12,
                   rel_widths = c(2.5,2,2)
                   #, rel_heights = c(1)
 )
 dev.off()
 ##################################################
 sensP
 consurfP
 #=================
 #### Combine sensitivity + ConSurf ####
 # or ConSurf
 #=================
 w = 3
 h = 3
 # sens_conP =  paste0(outdir_images
 #                          ,tolower(gene)
 #                          ,"_sens_cons_CLP.png")
 # 
 # print(paste0("plot filename:", sens_conP))
 # png(sens_conP, units = "in", width = w, height = h, res = 300 )
 # 
 # cowplot::plot_grid(sensP, consurfP,
 #                    nrow = 2,
 #                    rel_heights = c(1, 1.5)
 #                    )
 # 
 # dev.off()
 conCLP =  paste0(outdir_images
                    ,tolower(gene)
                    ,"_consurf_BP.png")
 print(paste0("plot filename:", conCLP))
 png(conCLP, units = "in", width = w, height = h, res = 300 )
 consurfP
 dev.off()
 #================================
 # Sensitivity mutation numbers: geom_tile
 #================================
 sensCLP =  paste0(outdir_images
                 ,tolower(gene)
                 ,"_sensN_tile.png")
 print(paste0("plot filename:", sensCLP))
 png(sensCLP, units = "in", width = 1, height = 1, res = 300 )
 sensP
 dev.off()
 #================================
 # Sensitivity SITE numbers: geom_tile
 #================================
 sens_siteCLP =  paste0(outdir_images
                  ,tolower(gene)
                  ,"_sens_siteC_tile.png")
 print(paste0("plot filename:", sens_siteCLP))
 png(sens_siteCLP, units = "in", width = 1.2, height = 1, res = 300 )
 sens_siteP
 dev.off()
 ###########################################################
--- a/scripts/plotting/plotting_thesis/pnca/lineage_bp_dist.R
+++ b/scripts/plotting/plotting_thesis/pnca/lineage_bp_dist.R
@ -0,0 +1,165 @@
 #!/usr/bin/env Rscript  
 #########################################################
 # TASK: Lineage plots [merged_df2]
 # Count
 # Diversity
 # Average stability dist
 # Avergae affinity dist: optional
 #########################################################
 #=======
 # output
 #=======
 # outdir_images = paste0("~/git/Writing/thesis/images/results/"
 #                        , tolower(gene), "/")
 # cat("plots will output to:", outdir_images)
 #########################################################
 #===============
 #Quick numbers checks
 #===============
 nsample_lin = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
 if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){
  cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin)
      , "\nCounting R and S samples")
  if( sum(table(nsample_lin$sensitivity)) ==  nrow(nsample_lin) ){
    cat("\nPASSNumbers cross checked:")
    print(table(nsample_lin$sensitivity))
  }
 }else{
  stop("Abort: Numbers mismatch. Please check")
 }
 ########################################################################
 ###################################################
 #                  Lineage barplots               #
 ###################################################
 my_xats = 8 # x axis text size # were  25
 my_yats = 8# y axis text sized_lab_size
 my_xals = 8 # x axis label size
 my_yals = 8 # y axis label size
 my_lls  = 8 # legend label size
 d_lab_size = 2.3
 #===============================
 # lineage sample and SNP count
 #===============================
 lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']]
             , all_lineages = F
             , x_categ = "sel_lineages"
             , y_count = "p_count"
             , use_lineages = c("L1", "L2", "L3", "L4")
             , bar_fill_categ = "count_categ"
             , display_label_col = "p_count"
             , bar_stat_stype = "identity"
             , d_lab_size = d_lab_size
             , d_lab_col = "black"
             , my_xats = my_xats # x axis text size
             , my_yats = my_yats # y axis text sized_lab_size
             , my_xals = my_xals # x axis label size
             , my_yals = my_yals # y axis label size
             , my_lls  = my_lls # legend label size
             , bar_col_labels =  c("nsSNPs", "Total Samples")
             , bar_col_values = c("grey50", "gray75")
             , bar_leg_name = ""
             , leg_location = "top"
             , y_log10 = F
             , y_scale_percent = FALSE
             , y_label = c("Count")
             )
 lin_countP
 #===============================
 # lineage SNP diversity count
 #===============================
 lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']]
                              , x_categ = "sel_lineages"
                              , y_count = "snp_diversity"
                              #, all_lineages = F
                              , use_lineages = c("L1", "L2", "L3", "L4")
                              , display_label_col = "snp_diversity_f"
                              , bar_stat_stype = "identity"
                              , x_lab_angle = 90
                              , d_lab_size = d_lab_size
                              , my_xats = my_xats # x axis text size
                              , my_yats = my_yats # y axis text sized_lab_size
                              , my_xals = my_xals # x axis label size
                              , my_yals = my_yals # y axis label size
                              , my_lls  = my_lls # legend label size
                              , y_log10 = F
                              , y_scale_percent = F
                              , leg_location = "top"
                              , y_label = "Percent" #"SNP diversity"
                              , bp_plot_title = "nsSNP diversity"
                              , title_colour = "black" #"chocolate4"
                              , subtitle_text = NULL
                              , sts = 10
                              , subtitle_colour = "#350E20FF")
 lin_diversityP
 ###################################################
 #                   Stability dist                #
 ###################################################
 # scaled_cols_stability = c("duet_scaled"       
 #                           , "deepddg_scaled"   
 #                           , "ddg_dynamut2_scaled"
 #                           , "foldx_scaled"
 #                           , "avg_stability_scaled")
 my_ats = 8 # x axis text size # were  25
 my_als = 8# y axis text sized_lab_size
 my_leg_ts = 8 # x axis label size
 my_leg_title = 8 # y axis label size
 my_strip_ts  = 8 #
 my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel
 #plotdf = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
 linP_dm_om = lineage_distP(merged_df2
                           , with_facet = F
                           , x_axis = "avg_stability_scaled"
                           , y_axis = "lineage_labels"
                           , x_lab = my_xlabel
                           , use_lineages = c("L1", "L2", "L3", "L4")
                           #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
                           , fill_categ = "sensitivity"
                           , fill_categ_cols = c("red", "blue")
                           , label_categories = c("Resistant", "Sensitive")
                           , leg_label = "Mutation group"
                           , my_ats = my_ats # axis text size
                           , my_als = my_als # axis label size
                           , my_leg_ts = my_leg_ts
                           , my_leg_title = my_leg_title
                           , my_strip_ts = my_strip_ts
                           , alpha = 0.56
 )
 linP_dm_om
 ###################################################
 #                 Affinity dist [OPTIONAL]        #
 ###################################################
 # scaled_cols_affinity = c("affinity_scaled" 
 #                          , "mmcsm_lig_scaled" 
 #                          , "mcsm_ppi2_scaled" 
 #                          , "mcsm_na_scaled"
 #                          , "avg_lig_affinity_scaled")
 # lineage_distP(merged_df2
 #               , with_facet = F
 #               , x_axis = "avg_lig_affinity_scaled"
 #               , y_axis = "lineage_labels"
 #               , x_lab = my_xlabel
 #               , use_lineages = c("L1", "L2", "L3", "L4")
 #               #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
 #               , fill_categ = "sensitivity"
 #               , fill_categ_cols = c("red", "blue")
 #               , label_categories = c("Resistant", "Sensitive")
 #               , leg_label = "Mutation group"
 #               , my_ats = 22 # axis text size
 #               , my_als = 22 # axis label size
 #               , my_leg_ts = 22
 #               , my_leg_title = 22
 #               , my_strip_ts = 22
 #               , alpha = 0.56
 # )
--- a/scripts/plotting/plotting_thesis/pnca/lineage_bp_dist_layout.R
+++ b/scripts/plotting/plotting_thesis/pnca/lineage_bp_dist_layout.R
@ -0,0 +1,62 @@
 #!/usr/bin/env Rscript  
 source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pnca/lineage_bp_dist.R")
 #=======
 # output
 #=======
 #outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
 #cat("plots will output to:", outdir_images)
 ###########################################
 # TASK: generate plots for lineage
 # Individual plots in 
 #lineage_bp_both.R
 #linage_dist_ens_stability.R
 ###########################################
 # svg
 # linPlots_combined = paste0(outdir_images
 #                         , tolower(gene)
 #                         ,"_linP_combined.svg")
 # 
 # cat("\nOutput plot:", linPlots_combined)
 # svg(linPlots_combined, width = 18, height = 12)
 # 
 # cowplot::plot_grid(
 #   cowplot::plot_grid(lin_countP, lin_diversityP
 #                      , nrow = 2
 #                      , rel_heights = c(1.2,1)
 #                      , labels = "AUTO"
 #                      , label_size = my_label_size),
 #   NULL,
 #   linP_dm_om,
 #   nrow = 1,
 #   labels = c("", "", "C"),
 #   label_size = my_label_size,
 #   rel_widths = c(35, 3, 52)
 # )
 # dev.off()
 # png
 my_label_size = 12
 linPlots_combined = paste0(outdir_images
                            , tolower(gene)
                            ,"_linP_combined.png")
 cat("\nOutput plot:", linPlots_combined)
 png(linPlots_combined, width = 9, height = 6, units = "in" ,res = 300)
 cowplot::plot_grid(
  cowplot::plot_grid(lin_countP, lin_diversityP,
                     nrow = 2,
                     rel_heights = c(1.2,1),
                     labels = "AUTO",
                     label_size = my_label_size),
  NULL,
  linP_dm_om,
  nrow = 1,
  labels = c("", "", "C"),
  label_size = my_label_size,
  rel_widths = c(35, 3, 52)
 )
 dev.off()
--- a/scripts/plotting/plotting_thesis/pnca/pe_sens_site_count_pnca.R
+++ b/scripts/plotting/plotting_thesis/pnca/pe_sens_site_count_pnca.R
@ -0,0 +1,173 @@
 source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pnca/prominent_effects_pnca.R")
 source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pnca/sensitivity_count_pnca.R")
 ##############################################################
 #                         PE count
 #pe_colour_map = c("DD_lig"           = "#f0e68c" # khaki
 #                  , "SS_lig"         = "#ffd700"  # gold
 #                  , "DD_nucleic_acid"= "#d2b48c"  # sandybrown
 #                  , "SS_nucleic_acid"= "#a0522d"  # sienna
 #                  , "DD_ppi2"        = "#da70d6"  # orchid
 #                  , "SS_ppi2"        = "#ff1493"  # deeppink
 #                  , "DD_stability"   = "#f8766d"  # red
 #                  , "SS_stability"   = "#00BFC4") # blue
 table(str_df_plot_cols$pe_effect_outcome)
 ##############################################################
 #===========
 #PE count:
 # lig, ppi2, stability
 #===========
 rects <- data.frame(x=1:6, 
                     colors = c("#f0e68c" ,
                                "#ffd700" ,
                                "#da70d6" ,
                                "#ff1493" ,
                                "#f8766d" ,
                                "#00BFC4") 
                     )
 rects$text =  c("-ve Lig"
                , "+ve Lig"
                , "-ve PPI2"
                , "+ve PPI2"
                , "-ve stability"
                , "+ve stability"
 )
 cell1 = table(str_df_plot_cols$pe_effect_outcome)[["DD_lig"]]
 cell2 = 0
 #cell3 = table(str_df_plot_cols$pe_effect_outcome)[["DD_nucleic_acid"]]
 #cell4 = table(str_df_plot_cols$pe_effect_outcome)[["SS_nucleic_acid"]]
 cell5 = table(str_df_plot_cols$pe_effect_outcome)[["DD_ppi2"]]
 cell6 = table(str_df_plot_cols$pe_effect_outcome)[["SS_ppi2"]]
 cell7 = table(str_df_plot_cols$pe_effect_outcome)[["DD_stability"]]
 cell8 = table(str_df_plot_cols$pe_effect_outcome)[["SS_stability"]]
 #rects$numbers = c(38, 0, 22, 9, 108, 681) #for embb
 rects$numbers = c(cell1, cell2,
                  #cell3, cell4,
                  cell5, cell6,
                  cell7, cell8)
 rects$num_labels = paste0("n=", rects$numbers)
 rects
 #------
 # Plot
 #------
 #https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
 peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
  geom_tile(width = 1, height = 1) + # make square tiles
  geom_text(color = "black", size = 1.7) + # add white text in the middle
  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
  coord_fixed() + # make sure tiles are square
  coord_flip()+ scale_x_reverse() +
  # theme_void() # remove any axis markings
  theme_nothing() # remove any axis markings
 peP
 #------
 # Plot: this one is better
 #------
 peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
  geom_tile() + # make square tiles
  geom_text(color = "black", size = 1.6) + # add white text in the middle
  scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
  coord_fixed() + # make sure tiles are square
  theme_nothing() # remove any axis markings
 peP2
 ########################################################
 # From: script sensitivity_count per gene
 #===============================
 # Sensitivity count: SITE
 #===============================
 #--------
 # embb
 #--------
 #rsc = 54
 #ccc = 46
 #ssc = 470
 rsc = site_Rc; rsc
 ccc = site_Cc; ccc
 ssc = site_Sc; ssc
 rect_rs_siteC <- data.frame(x=1:3,
                    colors = c("red",
                               "purple",
                               "blue")
                    )
 rect_rs_siteC                          
 rect_rs_siteC$text = c("Resistant",
                       "Common",
                       "Sensitive")
 rect_rs_siteC$numbers = c(rsc,ccc,ssc)
 rect_rs_siteC$num_labels = paste0("n=", rect_rs_siteC$numbers)
 rect_rs_siteC
 #------
 # Plot
 #------
 sens_siteP = ggplot(rect_rs_siteC, aes(x, y = 0,
                                       fill = colors,
                                       label = num_labels
                                       #,label = paste0(text,"\n", num_labels)
                                       )) +
  geom_tile(width = 1, height = 1) +
  #geom_text(color = "black", size = 1.7) +
  geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + 
  scale_fill_identity(guide = "none") + 
  coord_fixed()+
  theme_nothing() # remove any axis markings
 sens_siteP
 ################################################################
 #===============================
 # Sensitivity count: Mutations
 #===============================
 table(sensP_df$sensitivity)
 muts_Rc = table(sensP_df$sensitivity)[["R"]]
 muts_Sc = table(sensP_df$sensitivity)[["S"]]
 rect_sens <- data.frame(x=1:2,
                              colors = c("red",
                                         "blue")
                        )
 rect_sens$text = c("Resistant",
                       "Sensitive")
 rect_sens$numbers = c(muts_Rc,muts_Sc)
 rect_sens$num_labels = paste0("n=", rect_sens$numbers)
 rect_sens
 #------
 # Plot
 #------
 sensP = ggplot(rect_sens, aes(x, y = 0,
                              fill = colors,
                              label = paste0(text,"\n", num_labels))) +
  geom_tile(width = 1, height = 1) +
  #geom_text(color = "black", size = 1.7) +
  geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + 
  scale_fill_identity(guide = "none") + 
  coord_fixed()+
  theme_nothing() # remove any axis markings
 sensP
 sensP2 = sensP + 
   coord_flip() + scale_x_reverse()
 sensP2
--- a/scripts/plotting/plotting_thesis/pnca/plot_data_pnca.R
+++ b/scripts/plotting/plotting_thesis/pnca/plot_data_pnca.R
@ -0,0 +1,16 @@
 #!/usr/bin/env Rscript   
 #=============
 # Data: Input
 #==============
 source("~/git/LSHTM_analysis/config/pnca.R")
 source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
 ####################################################
 class(merged_df3)
 df3 = subset(merged_df3, select = -c(pos_count))
 #=======
 # output
 #=======
 outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
 cat("plots will output to:", outdir_images)
--- a/scripts/plotting/plotting_thesis/pnca/prominent_effects_pnca.R
+++ b/scripts/plotting/plotting_thesis/pnca/prominent_effects_pnca.R
@ -0,0 +1,285 @@
 ########################################################
 pos_colname = "position"
 #-------------
 # from ~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R
 #-------------
 length(all_stability_cols); length(raw_stability_cols)
 length(scaled_stability_cols); length(outcome_stability_cols)
 length(affinity_dist_colnames)
 static_cols =  c("mutationinformation",
                 #"position",
                 pos_colname,
                 "sensitivity")
 other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames)
 #omit avg cols and foldx_scaled_signC cols
 other_cols =  other_cols_all[grep("avg", other_cols_all, invert = T)]
 other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )]
 other_cols
 cols_to_extract =  c(static_cols, other_cols)
 cat("\nExtracting cols:", cols_to_extract)
 expected_ncols = length(static_cols) + length(other_cols)
 expected_ncols
 str_df = merged_df3[, cols_to_extract]
 if (ncol(str_df) == expected_ncols){
  cat("\nPASS: successfully extracted cols for calculating prominent effects")
 }else{
  stop("\nAbort: Could not extract cols for calculating prominent effects")
 }
 #=========================
 # Masking affinity columns
 #=========================
 # First make values for affinity cols 0 when their corresponding dist >10
 head(str_df)
 # replace in place affinity values >10
 str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0
 str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0
 #ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10
 if (tolower(gene)%in%geneL_ppi2){
  str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0
 } 
 # na gene: replace in place na affinity values where na dist >10
 if (tolower(gene)%in%geneL_na){
  str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0
 }
 colnames(str_df)
 head(str_df)
 scaled_cols_tc = other_cols[grep("scaled", other_cols)]
 ################################################
 #===============
 # whole df
 #===============
 give_col=function(x,y,df=str_df){
  df[df[[pos_colname]]==x,y]
 }
 for (i in unique(str_df[[pos_colname]]) ){
  print(i)
  #cat(length(unique(str_df[[pos_colname]])))
  biggest     = max(abs(give_col(i,scaled_cols_tc)))
  str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest
  str_df[str_df[[pos_colname]]==i,'effect_type']= names(
    give_col(i,scaled_cols_tc)[which(
      abs(
        give_col(i,scaled_cols_tc)
      ) == biggest, arr.ind=T
    )[, "col"]])[1]
  effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values
  # get index/rowname for value of max effect, and then use it to get the original sign
  # here
  #ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
  ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T))
  str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1]
 }
 # ends with suffix 2 if dups
 str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
 colnames(str_df)
 table(str_df$effect_type)
 # check
 str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),]
 #================
 # for Plots
 #================
 str_df_short = str_df[, c("mutationinformation",
                          #"position",
                          pos_colname,
                          "sensitivity"
                          , "effect_type"
                          , "effect_sign")]
 table(str_df_short$effect_type)
 table(str_df_short$effect_sign)
 str(str_df_short)
 # assign pe outcome
 str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
 table(str_df_short$pe_outcome )
 table(str_df_short$effect_sign)
 #==============
 # group effect type:
 # lig, ppi2, nuc. acid, stability
 #==============
 affcols = c("affinity_scaled",  "mmcsm_lig_scaled")
 #lig 
 table(str_df_short$effect_type)
 str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
                                     , "lig"
                                     , str_df_short$effect_type)
 table(str_df_short$effect_grouped)
 #stability
 str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig")
                                     , "stability"
                                     , str_df_short$effect_grouped)
 table(str_df_short$effect_grouped)
 # create a sign as well
 str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_"
                                     , str_df_short$effect_grouped)
 table(str_df_short$pe_effect_outcome)
 #####################################################################
 # Chimera: for colouring
 ####################################################################
 #-------------------------------------
 # get df with unique position
 #--------------------------------------
 #data[!duplicated(data$x), ]   
 str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),]
 if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){
  cat("\nPASS: successfully extracted df with unique positions")
 }else{
  stop("\nAbort: Could not extract df with unique positions")
 }
 #-------------------------------------
 # generate colours for effect types
 #--------------------------------------
 str_df_plot_cols = str_df_plot[, c(pos_colname,
                                   "sensitivity",
                                   "pe_outcome",
                                   "effect_grouped",
                                   "pe_effect_outcome")]
 head(str_df_plot_cols)
 # colour intensity based on sign
 #str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light")
 str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light")
 table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome)
 head(str_df_plot_cols)
 # colour based on effect
 table(str_df_plot_cols$pe_effect_outcome)
 pe_colour_map = c("DD_lig"           = "#f0e68c"  # khaki
                  , "SS_lig"         = "#ffd700"  # gold
                  , "DD_nucleic_acid"= "#d2b48c"  # sandybrown
                  , "SS_nucleic_acid"= "#a0522d"  # sienna
                  , "DD_ppi2"        = "#da70d6"  # orchid
                  , "SS_ppi2"        = "#ff1493"  # deeppink
                  , "DD_stability"   = "#f8766d"  # red
                  , "SS_stability"   = "#00BFC4") # blue
 #unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE)
 #map the colours
 str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome
                                 ,function(x){pe_colour_map[[x]]}
                                 ))
 head(str_df_plot_cols$colour_map)
 table(str_df_plot_cols$colour_map)
 table(str_df_plot_cols$pe_effect_outcome)
 # str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue
 #                                   , "_"
 #                                   , str_df_plot_cols$colour_map)
 # head(str_df_plot_cols$colours)
 # table(str_df_plot_cols$colours)
 # 
 # 
 # class(str_df_plot_cols$colour_map)
 # str(str_df_plot_cols)
 # sort by colour
 head(str_df_plot_cols)
 str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ]
 head(str_df_plot_cols)
 #======================================
 # write file with prominent effects
 #======================================
 outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
 write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv"))
 ################################
 # printing for chimera
 ###############################
 chain_suffix = ".A"
 str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], chain_suffix)
 table(str_df_plot_cols$colour_map)
 table(str_df_plot_cols$pe_effect_outcome)
 #===================================================
 #-------------------
 # Ligand Affinity
 #-------------------
 # -ve Lig Aff
 dd_lig        = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",]
 if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){
  dd_lig_pos  = dd_lig[[pos_colname]]
 }else{
  stop("Abort: DD affinity colour numbers mismtatch")
 }
 toString(paste0(dd_lig_pos, chain_suffix))
 # +ve Lig Aff
 ss_lig            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",]
 if (!empty(ss_lig)){
  if (nrow(ss_lig)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){
    ss_lig_pos      = ss_lig[[pos_colname]]
  }else{
    stop("Abort: SS affinity colour numbers mismtatch")
  } 
  #put in chimera cmd
  toString(paste0(ss_lig_pos, chain_suffix))
 }
 #=========================================================
 #------------------------
 # Stability
 #------------------------
 # -ve Stability
 dd_stability           =  str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",]
 if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){
  dd_stability_pos     =  dd_stability[[pos_colname]]
 }else{
  stop("Abort: DD Stability colour numbers mismtatch")
 }
 # +ve Stability
 ss_stability            = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",]
 if (nrow(ss_stability)  == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){
  ss_stability_pos      = ss_stability[[pos_colname]]
 }else{
  stop("Abort: SS Stability colour numbers mismtatch")
 }
 #put in chimera cmd
 toString(paste0(dd_stability_pos, chain_suffix))
 toString(paste0(ss_stability_pos, chain_suffix))
 ####################################################################
--- a/scripts/plotting/plotting_thesis/pnca/sensitivity_count_pnca.R
+++ b/scripts/plotting/plotting_thesis/pnca/sensitivity_count_pnca.R
@ -0,0 +1,65 @@
 #=========================
 # Count Sensitivity
 # Mutations and positions
 #=========================
 pos_colname_c ="position"
 sensP_df = merged_df3[,c("mutationinformation",
                         #"position",
                         pos_colname_c,
                         "sensitivity")]
 head(sensP_df)
 table(sensP_df$sensitivity)
 #---------------
 # Total unique positions
 #----------------
 tot_mut_pos = length(unique(sensP_df[[pos_colname_c]]))
 cat("\nNo of Tot muts sites:", tot_mut_pos)
 # resistant mut pos
 sens_site_allR = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="R"]
 sens_site_UR   = unique(sens_site_allR)
 length(sens_site_UR)
 # Sensitive mut pos
 sens_site_allS = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="S"]
 sens_site_US = unique(sens_site_allS)
 length(sens_site_UR)
 #---------------
 # Common Sites
 #----------------
 common_pos = intersect(sens_site_UR,sens_site_US)
 site_Cc = length(common_pos)
 cat("\nNo of Common sites:", site_Cc
    , "\nThese are:", common_pos)
 #---------------
 # Resistant muts
 #----------------
 site_R = sens_site_UR[!sens_site_UR%in%common_pos]
 site_Rc = length(site_R)
 if ( length(sens_site_allR) == table(sensP_df$sensitivity)[['R']] ){
  cat("\nNo of R muts:", length(sens_site_allR) 
      , "\nNo. of R sites:",site_Rc
      , "\nThese are:", site_R
 )
 }
 #---------------
 # Sensitive muts
 #----------------
 site_S = sens_site_US[!sens_site_US%in%common_pos]
 site_Sc = length(site_S)
 if ( length(sens_site_allS) == table(sensP_df$sensitivity)[['S']] ){
  cat("\nNo of S muts:", length(sens_site_allS) 
      , "\nNo. of S sites:", site_Sc
      , "\nThese are:", site_S)
 }
 #########################