From 7317156bbaf9e2f27aaea6c767da699232946a27 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 26 Jan 2022 15:53:53 +0000 Subject: [PATCH] updated docs for the logo functions and tested all of them again --- scripts/functions/ed_pfm_data.R | 1 - scripts/functions/logoP_msa.R | 42 +++++++++++- scripts/functions/logoP_or.R | 58 +++++++++------- scripts/functions/logoP_snp.R | 30 +++++--- scripts/functions/tests/test_logo_plots.R | 84 +++++++++++------------ 5 files changed, 135 insertions(+), 80 deletions(-) diff --git a/scripts/functions/ed_pfm_data.R b/scripts/functions/ed_pfm_data.R index 924dac8..49bf9cd 100644 --- a/scripts/functions/ed_pfm_data.R +++ b/scripts/functions/ed_pfm_data.R @@ -1,4 +1,3 @@ -library(Logolas) source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R") ##################################################################################### # DataED_PFM(): diff --git a/scripts/functions/logoP_msa.R b/scripts/functions/logoP_msa.R index a3ab3f9..fb198f6 100644 --- a/scripts/functions/logoP_msa.R +++ b/scripts/functions/logoP_msa.R @@ -1,8 +1,46 @@ +##################################################################################### +# LogoPlotMSA(): +# Input: + # Data: + # msaSeq_mut: MSA chr vector for muts + # msaSeq_wt: MSA chr vector for wt + + # Logo type params: + # logo_type = c("EDLogo", "bits_pfm", "probability_pfm", "bits_raw", "probability_raw") + # EDLogo: calculated from the Logolas package based on PFM matrix (scaled). + #The required content from the package is sourced locally within 'my_logolas.R' + # bits_pfm: Information Content based on PFM scaled matrix (my_logolas.R) + # probability_pfm: Probability based on PFM scaled matrix (my_logolas.R) + # bits_raw: Information Content based on Raw MSA (ggseqlogo) + # probability_raw: Probability based on Raw MSA (ggseqlogo) + + # EDScore_type = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL") + # bg_prob: background probability, default is equal i.e NULL. + # This is used by the internal call to DataED_PFM(). This func takes thse args. I have used it here for + # completeness and allow nuanced plot control + + # my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor") + # --> if clustalx and taylor, set variable to black bg + white font + # --> if chemistry and hydrophobicity, then grey bg + black font + + # ...other params + +# Returns: Logo plots from MSA both mutant and wt (for comparability) +# For my case, I always use it as it helps see what is at the wild-type already! + +# TODO: SHINY +# drop down: logo_type +# drop down: ED score type +# drop down/enter field : bg probability (in the actual plot function!) +# drop down: my_logo_col +# Make it hover over position and then get the corresponding data table! +################################################################################### + + ########################################### LogoPlotMSA <- function(msaSeq_mut # chr vector , msaSeq_wt # chr vector - #, msa_method = c("custom") # can be "bits", "probability" or "custom" - , logo_type = c("EDLogo") #"bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # can be "bits", "probability" or "custom" + , logo_type = c("EDLogo") #"bits_pfm", "probability_pfm", "bits_raw", "probability_raw") , EDScore_type = c("log") # see if this relevant, or source function should have it! , bg_prob = NULL , my_logo_col = "chemistry" diff --git a/scripts/functions/logoP_or.R b/scripts/functions/logoP_or.R index 96bd0a5..f7e4c14 100644 --- a/scripts/functions/logoP_or.R +++ b/scripts/functions/logoP_or.R @@ -1,46 +1,52 @@ -#logo plots +# Input: + # Data: +# plot_df: merged_df3 containing the OR column to use as y-axis or any other relevant column -# create functions +# x_axis_colname = "position" +# y_axis_colname = "or_mychisq" +# symbol_colname = "mutant_type" +# y_axis_log = F +# log_value = log10 + # if used, y-axis label has "Log" appended to it -# one with OR - # --> select/drop down option to remove empty positions - # --> select/drop down option for colour - # --> if clustalx and taylor, set variable to black bg + white font - # --> if chemistry and hydrophobicity, then grey bg + black font +# my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor") + # --> if clustalx and taylor, set variable to black bg + white font + # --> if chemistry and hydrophobicity, then grey bg + black font - # --> select/drop down option for log scale - # --> should include WT +# rm_empty_y = F + # option to remove empty positions i.e positions with no assocaited y-val -# one for multiple muts - # --> select/drop down option to filter count of nsSNPs - # --> select/drop down option for colour - # --> should include WT +# y_axis_log = F + # option to use log scale + # FIXME Minor bug: if used with rm_empty_y, sometimes the labels are too small to render(!?) + # so positions appear empty despite having y-vals + +# ...other params + +# Returns: Logo plot from combined data containing specific y-value such as OR, etc by position. + +# TODO: SHINY +# select/drop down option to remove empty positions +# select/drop down option for colour +# select/drop down option for log scale +# include WT # Make it hover over position and then get the corresponding data table! -#%%====================================================================== +########################a########################################################### + #================== # logo data: OR #================== -# NOTE: my_logo_col -# Can be one of these: 'chemistry', 'hydrophobicity', 'clustalx', 'taylor' -# if 'chemistry' or 'hydrophobicity' --> then bg is grey with black font (x, y and labels) -# if 'clustalx'or 'taylor' --> then bg is black with white font (x, y and labels) - #, theme_bgc = - #, xfont_bgc = - #, yfont_bgc = - #, xtt_col = - #, ytt_col = - LogoPlotCustomH <- function(plot_df , x_axis_colname = "position" , y_axis_colname = "or_mychisq" , symbol_colname = "mutant_type" + , my_logo_col = "chemistry" + , rm_empty_y = F , y_axis_log = F , log_value = log10 , y_axis_increment = 5 - , rm_empty_y = F - , my_logo_col = "chemistry" , x_lab = "Position" , y_lab = "Odds Ratio" , x_ats = 12 # text size diff --git a/scripts/functions/logoP_snp.R b/scripts/functions/logoP_snp.R index bee8e9f..2917153 100644 --- a/scripts/functions/logoP_snp.R +++ b/scripts/functions/logoP_snp.R @@ -1,17 +1,29 @@ -#logo plots +########################a########################################################### +# Input: + # Data + # plot_df: merged_df3 containing the OR column to use as y-axis or any other relevant column -# one for multiple muts - # --> select/drop down option to filter count of nsSNPs - # --> select/drop down option for colour - # --> should include WT +# x_axis_colname = "position" +# symbol_mut_colname = "mutant_type" +# symbol_wt_colname = "mutant_type" +# omit_snp_count = c(0, 1, 2...) can be used to filter positions with specified snp count -# Data used +# my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor") +# --> if clustalx and taylor, set variable to black bg + white font +# --> if chemistry and hydrophobicity, then grey bg + black font -#tab_mt # mutant logo plot -#tab_wt # wt logo plot +# ...other params + +# Returns: Logo plot from combined data containing all nsSNPs per position. + # Helps to see the overview of SNP diversity + +# TODO: SHINY +# select/drop down: omit_snp_count +# select/drop down: my_logo_col +# should include WT?? # Make it hover over position and then get the corresponding data table! -#%%====================================================================== +#################################################################################### #================== # logo data: OR diff --git a/scripts/functions/tests/test_logo_plots.R b/scripts/functions/tests/test_logo_plots.R index 94d42b4..cf38699 100644 --- a/scripts/functions/tests/test_logo_plots.R +++ b/scripts/functions/tests/test_logo_plots.R @@ -12,29 +12,29 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") # mainly OR # script: logoP_or.R ################################ -# LogoPlotCustomH (plot_df = merged_df3 -# , x_axis_colname = "position" -# , y_axis_colname = "or_mychisq" -# , symbol_colname = "mutant_type" -# , y_axis_log = T -# , log_value = log10 -# , y_axis_increment = 100 -# , rm_empty_y = T -# , my_logo_col = 'chemistry' -# , x_lab = "Wild-type position" -# , y_lab = "Odds Ratio" -# , x_ats = 10 # text size -# , x_tangle = 90 # text angle -# , y_ats = 22 -# , y_tangle = 0 -# , x_tts = 19 # title size -# , y_tts = 22 -# #, leg_pos = c(0.05,-0.12) -# , leg_pos = "top" -# , leg_dir = "horizontal" -# , leg_ts = 15 # leg text size -# , leg_tts = 16 # leg title size -# ) +LogoPlotCustomH (plot_df = merged_df3 + , x_axis_colname = "position" + , y_axis_colname = "or_mychisq" + , symbol_colname = "mutant_type" + , y_axis_log = T + , log_value = log10 + , y_axis_increment = 100 + , rm_empty_y = T + , my_logo_col = 'chemistry' + , x_lab = "Wild-type position" + , y_lab = "Odds Ratio" + , x_ats = 10 # text size + , x_tangle = 90 # text angle + , y_ats = 22 + , y_tangle = 0 + , x_tts = 19 # title size + , y_tts = 22 + #, leg_pos = c(0.05,-0.12) + , leg_pos = "top" + , leg_dir = "horizontal" + , leg_ts = 15 # leg text size + , leg_tts = 16 # leg title size +) ######################################## @@ -42,25 +42,25 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") # wild-type and mutant aa # script: logoP_snp.R ######################################## -# LogoPlotSnps(plot_df = merged_df3 -# , x_axis_colname = "position" -# , symbol_mut_colname = "mutant_type" -# , symbol_wt_colname = "wild_type" -# , omit_snp_count = c(1)# can be 0,1, 2, etc. -# , my_logo_col = "chemistry" -# , x_lab = "Wild-type position" -# , y_lab = "nsSNP count" -# , x_ats = 10 # text size -# , x_tangle = 90 # text angle -# , y_ats = 18 -# , y_tangle = 0 -# , x_tts = 18 # title size -# , y_tts = 18 -# , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) -# , leg_dir = "horizontal" # can be vertical or horizontal -# , leg_ts = 14 # leg text size -# , leg_tts = 16 # leg title size -# ) +LogoPlotSnps(plot_df = merged_df3 + , x_axis_colname = "position" + , symbol_mut_colname = "mutant_type" + , symbol_wt_colname = "wild_type" + , omit_snp_count = c(1)# can be 0,1, 2, etc. + , my_logo_col = "chemistry" + , x_lab = "Wild-type position" + , y_lab = "nsSNP count" + , x_ats = 10 # text size + , x_tangle = 90 # text angle + , y_ats = 18 + , y_tangle = 0 + , x_tts = 18 # title size + , y_tts = 18 + , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) + , leg_dir = "horizontal" # can be vertical or horizontal + , leg_ts = 14 # leg text size + , leg_tts = 16 # leg title size +) #################################################### # Logo plot MSA