From 6c6709e41e58c42b43b7b68c8b0e4e9cfafd77fc Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 2 Mar 2022 11:44:04 +0000 Subject: [PATCH] various changes --- scripts/Header_TT.R | 5 ++ scripts/functions/bp_lineage_diversity.R | 93 +++++++++++++++++++++ scripts/functions/consurfP.R | 13 +-- scripts/functions/tests/data_for_testingF.R | 63 ++++++++++++++ scripts/functions/tests/test_consurfP.R | 36 ++++---- scripts/functions/tests/test_logo_plots.R | 17 ++-- 6 files changed, 198 insertions(+), 29 deletions(-) create mode 100644 scripts/functions/bp_lineage_diversity.R create mode 100644 scripts/functions/tests/data_for_testingF.R diff --git a/scripts/Header_TT.R b/scripts/Header_TT.R index e9c83e2..e336ed2 100755 --- a/scripts/Header_TT.R +++ b/scripts/Header_TT.R @@ -48,6 +48,11 @@ if (!require("shinyBS")) { library(shinyBS) } +if (!require("shinydashboard")) { + install.packages("shinydashboard", dependencies = TRUE) + library(shinydashboard) +} + if (!require("gridExtra")) { install.packages("gridExtra", dependencies = TRUE) library(gridExtra) diff --git a/scripts/functions/bp_lineage_diversity.R b/scripts/functions/bp_lineage_diversity.R new file mode 100644 index 0000000..ee0cada --- /dev/null +++ b/scripts/functions/bp_lineage_diversity.R @@ -0,0 +1,93 @@ +######################################## +# Lineage barplot +# Lineage and nsSNP count barplot +# Lineage Diversity barplot +######################################## + +lin_count_bp_diversity <- function( lf_data = lin_wf + , x_categ = "sel_lineages" + , y_count = "snp_diversity" + #, bar_fill_categ = "count_categ" + , display_label_col = "snp_diversity_f" + , bar_stat_stype = "identity" + , x_lab_angle = 90 + , d_lab_size = 5 + , d_lab_hjust = 0.5 + , d_lab_vjust = 0.5 + , d_lab_col = "black" + , my_xats = 20 # x axis text size + , my_yats = 20 # y axis text size + , my_xals = 22 # x axis label size + , my_yals = 22 # y axis label size + , my_lls = 22 # legend label size + , bar_col_labels = c("Mutations", "Total Samples") + , bar_col_values = c("grey50", "gray75") + , bar_leg_name = "" + , leg_location = "top" + , y_log10 = FALSE + , y_scale_percent = FALSE + #, y_label = c("Count", "SNP diversity") + , y_label = c("SNP diversity") + ) { + g = ggplot(lf_data + , aes( x = factor( eval(parse(text = x_categ)), ordered = T ) + , y = eval(parse(text = y_count)) + #, fill = eval(parse(text = bar_fill_categ)) + ) ) + + OutPlot = g + geom_bar( stat = bar_stat_stype + , position = position_stack(reverse = TRUE) + #, alpha = 1 + #, colour = "grey75" + ) + + theme(axis.text.x = element_text(size = my_xats + , angle = x_lab_angle) + , axis.text.y = element_text(size = my_yats + , angle = 90 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = my_xals + , colour = "black") + , axis.title.y = element_text(size = my_yals + , colour = "black") + , legend.position = leg_location + , legend.text = element_text(size = my_lls)) + + + geom_label(aes(label = eval(parse(text = display_label_col))) + , size = d_lab_size + , hjust = d_lab_hjust + , vjust = d_lab_vjust + , colour = d_lab_col + , show.legend = FALSE + #, check_overlap = TRUE + , position = position_stack(reverse = T)) + + + scale_fill_manual(values = bar_col_values + , name = bar_leg_name + , labels = bar_col_labels) + + labs(title = "" + , x = "" + , y = y_label + , colour = "black") + + if (y_log10){ + + OutPlot = OutPlot + + scale_y_continuous(trans = "log10" + , labels = trans_format("log10", math_format(10^.x) ) ) + } + + if (y_scale_percent){ + + OutPlot = OutPlot + + scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + + #scale_y_continuous(labels = scales::percent) + + + labs(title = "" + , x = "" + , y = y_label + , colour = "black") + } + + return(OutPlot) +} diff --git a/scripts/functions/consurfP.R b/scripts/functions/consurfP.R index f7e73f5..f976c62 100644 --- a/scripts/functions/consurfP.R +++ b/scripts/functions/consurfP.R @@ -28,6 +28,8 @@ wideP_consurf <- function(plotdf , "1": "Variable" , "2", "3", "4", "5", "6", "7", "8" , "9": "Conserved") + , panel_col = "black" + , panel_col_fill = "black" # axes title and label sizes , x_axls = 12 # x-axis label size @@ -38,6 +40,7 @@ wideP_consurf <- function(plotdf , ptitle = "" , xlab = "" , ylab = "" + , pts = 20 # plot margins , t_margin = 0.5 @@ -282,9 +285,11 @@ wideP_consurf <- function(plotdf , vjust = 0) , axis.title.x = element_text(size = x_axls) , axis.title.y = element_text(size = y_axls ) - , panel.background = element_rect(fill = "black", color = "black") + , panel.background = element_rect(fill = panel_col_fill, color = panel_col) , panel.grid.major = element_line(color = "black") , panel.grid.minor = element_line(color = "black") + , plot.title = element_text(size = pts + , hjust = 0.5) , plot.margin = margin(t = t_margin , r = r_margin , b = b_margin @@ -400,8 +405,6 @@ wideP_consurf <- function(plotdf out = g1 } - - ##################################################### # #============================================ # # x-axis: geom_tiles ~ ligand distance @@ -548,13 +551,13 @@ wideP_consurf <- function(plotdf , legend2 , ncol = 1 , align = "hv" - , rel_heights = c(3/4,1)) + , rel_heights = c(2/4,3/4)) out2 = cowplot::plot_grid( out + theme(legend.position = "none") , legs , ncol = 2 , align = "hv" - , rel_widths = c(9/10, 0.5/10) + , rel_widths = c(9/10, 0.4/10) ) }else{ out2 = cowplot::plot_grid( out + theme(legend.position = "none") diff --git a/scripts/functions/tests/data_for_testingF.R b/scripts/functions/tests/data_for_testingF.R new file mode 100644 index 0000000..ce2d376 --- /dev/null +++ b/scripts/functions/tests/data_for_testingF.R @@ -0,0 +1,63 @@ +############################################################################ +# merged_df3 = read.csv("~/git/Data/cycloserine/output/alr_all_params.csv"); source("~/git/LSHTM_analysis/config/alr.R") +# if ( tolower(gene) == "alr") { +# aa_pos_lig1 = NULL +# aa_pos_lig2 = NULL +# aa_pos_lig3 = NULL +# p_title = gene +# } + +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") +########################################################################### +# merged_df3 = read.csv("~/git/Data/ethambutol/output/embb_all_params.csv"); source("~/git/LSHTM_analysis/config/embb.R") +# if ( tolower(gene) == "embb") { +# aa_pos_lig1 = aa_pos_ca +# aa_pos_lig2 = aa_pos_cdl +# aa_pos_lig3 = aa_pos_dsl +# p_title = gene +# } +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +########################################################################### +merged_df3 = read.csv("~/git/Data/streptomycin/output/gid_all_params.csv") + +source("~/git/LSHTM_analysis/config/gid.R") +if ( tolower(gene) == "gid") { + aa_pos_lig1 = aa_pos_rna + aa_pos_lig2 = aa_pos_sam + aa_pos_lig3 = aa_pos_amp + p_title = gene +} +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +########################################################################### +# merged_df3 = read.csv("~/git/Data/isoniazid/output/katg_all_params.csv"); source("~/git/LSHTM_analysis/config/katg.R") +# if ( tolower(gene) == "katg") { +# aa_pos_lig1 = aa_pos_hem +# aa_pos_lig2 = NULL +# aa_pos_lig3 = NULL +# p_title = gene +# } +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +########################################################################### +# merged_df3 = read.csv("~/git/Data/pyrazinamide/output/pnca_all_params.csv"); source("~/git/LSHTM_analysis/config/pnca.R") +# if ( tolower(gene) == "pnca") { +# aa_pos_lig1 = aa_pos_fe +# aa_pos_lig2 = NULL +# aa_pos_lig3 = NULL +# p_title = gene +# } +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +########################################################################### +merged_df3 = read.csv("~/git/Data/rifampicin/output/rpob_all_params.csv"); source("~/git/LSHTM_analysis/config/rpob.R") +if ( tolower(gene) == "rpob") { + aa_pos_lig1 = NULL + aa_pos_lig2 = NULL + aa_pos_lig3 = NULL + p_title = gene +} +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +######################################################################### diff --git a/scripts/functions/tests/test_consurfP.R b/scripts/functions/tests/test_consurfP.R index 8ac8750..a4f1e3a 100644 --- a/scripts/functions/tests/test_consurfP.R +++ b/scripts/functions/tests/test_consurfP.R @@ -15,13 +15,13 @@ source("~/git/LSHTM_analysis/scripts/functions/consurfP.R") # p_title = gene # } ########################################################################### -merged_df3 = read.csv("~/git/Data/ethambutol/output/embb_all_params.csv"); source("~/git/LSHTM_analysis/config/embb.R") -if ( tolower(gene) == "embb") { - aa_pos_lig1 = aa_pos_ca - aa_pos_lig2 = aa_pos_cdl - aa_pos_lig3 = aa_pos_dsl - p_title = gene -} +# merged_df3 = read.csv("~/git/Data/ethambutol/output/embb_all_params.csv"); source("~/git/LSHTM_analysis/config/embb.R") +# if ( tolower(gene) == "embb") { +# aa_pos_lig1 = aa_pos_ca +# aa_pos_lig2 = aa_pos_cdl +# aa_pos_lig3 = aa_pos_dsl +# p_title = gene +# } ########################################################################### # merged_df3 = read.csv("~/git/Data/streptomycin/output/gid_all_params.csv"); source("~/git/LSHTM_analysis/config/gid.R") # if ( tolower(gene) == "gid") { @@ -47,13 +47,13 @@ if ( tolower(gene) == "embb") { # p_title = gene # } ########################################################################### -# merged_df3 = read.csv("~/git/Data/rifampicin/output/rpob_all_params.csv"); source("~/git/LSHTM_analysis/config/rpob.R") -# if ( tolower(gene) == "rpob") { -# aa_pos_lig1 = NULL -# aa_pos_lig2 = NULL -# aa_pos_lig3 = NULL -# p_title = gene -# } +merged_df3 = read.csv("~/git/Data/rifampicin/output/rpob_all_params.csv"); source("~/git/LSHTM_analysis/config/rpob.R") +if ( tolower(gene) == "rpob") { + aa_pos_lig1 = NULL + aa_pos_lig2 = NULL + aa_pos_lig3 = NULL + p_title = gene +} ######################################################################### consurf_palette1 = c("0" = "yellow2" @@ -99,15 +99,19 @@ wideP_consurf(plotdf = merged_df3 , "1" = "Variable" , "2", "3", "4", "5", "6", "7", "8" , "9" = "Conserved") + , panel_col = "black" + , panel_col_fill = "black" # axes title and label sizes - , x_axts = 8 + , x_axts = 8 , y_axts = 12 , x_axls = 12 , y_axls = 15 , default_xtc = "black" , ptitle = p_title - , xlab = "" + , xlab = "" # ylab is above + , pts = 20 + # x-axis: text colour , xtext_colour_aa = F diff --git a/scripts/functions/tests/test_logo_plots.R b/scripts/functions/tests/test_logo_plots.R index 1414876..c52c431 100644 --- a/scripts/functions/tests/test_logo_plots.R +++ b/scripts/functions/tests/test_logo_plots.R @@ -16,10 +16,10 @@ LP1<- LogoPlotCustomH (plot_df = merged_df3 , x_axis_colname = "position" , y_axis_colname = "or_mychisq" , symbol_colname = "mutant_type" - , y_axis_log = T + , y_axis_log = F , log_value = log10 - , y_axis_increment = 100 - , rm_empty_y = T + , y_axis_increment = 50 + , rm_empty_y = F , my_logo_col = 'chemistry' , x_lab = "Wild-type position" , y_lab = "Odds Ratio" @@ -45,7 +45,7 @@ LP2<- LogoPlotSnps(plot_df = merged_df3 , x_axis_colname = "position" , symbol_mut_colname = "mutant_type" , symbol_wt_colname = "wild_type" - , omit_snp_count = c(1)# can be 0,1, 2, etc.# DD + , omit_snp_count = c(0)# can be 0,1, 2, etc.# DD , my_logo_col = "chemistry" #DD , x_lab = "Wild-type position" , y_lab = "nsSNP count" @@ -75,8 +75,9 @@ LP2<- LogoPlotSnps(plot_df = merged_df3 # to select a small dataset: see test_ed_pfm_data.R ##################################################### -LP3<- LogoPlotMSA(msaSeq_mut = msa_seq - , msaSeq_wt = wt_seq +LP3<- LogoPlotMSA(unified_msa + #msaSeq_mut = msa_seq + #, msaSeq_wt = wt_seq , logo_type = c("EDLogo") # "EDLogo", bits_pfm", "probability_pfm", "bits_raw", "probability_raw") , EDScore_type = c("log") , bg_prob = NULL @@ -90,7 +91,7 @@ LP3<- LogoPlotMSA(msaSeq_mut = msa_seq #, y_lab_mut , x_ats = 10 , x_tangle = 90 - , y_ats = 15 + , y_ats = 12 , y_tangle = 0 , x_tts = 13 , y_tts = 13 @@ -106,7 +107,7 @@ out_logoP = cowplot::plot_grid(LP3, LP1, LP2 , nrow = 3 , ncol = 1 , rel_width = c(1/3, 0.5/3, 1/3) - , rel_heights = c(1, 1, 1) + , rel_heights = c(0.8/2, 0.5/2, 0.7/2) , align = "hv") out_logoP