updated docs for the logo functions and tested all of them again

This commit is contained in:
Tanushree Tunstall 2022-01-26 15:53:53 +00:00
parent 2f7f40efb1
commit 7317156bba
5 changed files with 135 additions and 80 deletions

View file

@ -1,4 +1,3 @@
library(Logolas)
source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
#####################################################################################
# DataED_PFM():

View file

@ -1,8 +1,46 @@
#####################################################################################
# LogoPlotMSA():
# Input:
# Data:
# msaSeq_mut: MSA chr vector for muts
# msaSeq_wt: MSA chr vector for wt
# Logo type params:
# logo_type = c("EDLogo", "bits_pfm", "probability_pfm", "bits_raw", "probability_raw")
# EDLogo: calculated from the Logolas package based on PFM matrix (scaled).
#The required content from the package is sourced locally within 'my_logolas.R'
# bits_pfm: Information Content based on PFM scaled matrix (my_logolas.R)
# probability_pfm: Probability based on PFM scaled matrix (my_logolas.R)
# bits_raw: Information Content based on Raw MSA (ggseqlogo)
# probability_raw: Probability based on Raw MSA (ggseqlogo)
# EDScore_type = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
# bg_prob: background probability, default is equal i.e NULL.
# This is used by the internal call to DataED_PFM(). This func takes thse args. I have used it here for
# completeness and allow nuanced plot control
# my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor")
# --> if clustalx and taylor, set variable to black bg + white font
# --> if chemistry and hydrophobicity, then grey bg + black font
# ...other params
# Returns: Logo plots from MSA both mutant and wt (for comparability)
# For my case, I always use it as it helps see what is at the wild-type already!
# TODO: SHINY
# drop down: logo_type
# drop down: ED score type
# drop down/enter field : bg probability (in the actual plot function!)
# drop down: my_logo_col
# Make it hover over position and then get the corresponding data table!
###################################################################################
###########################################
LogoPlotMSA <- function(msaSeq_mut # chr vector
, msaSeq_wt # chr vector
#, msa_method = c("custom") # can be "bits", "probability" or "custom"
, logo_type = c("EDLogo") #"bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # can be "bits", "probability" or "custom"
, logo_type = c("EDLogo") #"bits_pfm", "probability_pfm", "bits_raw", "probability_raw")
, EDScore_type = c("log") # see if this relevant, or source function should have it!
, bg_prob = NULL
, my_logo_col = "chemistry"

View file

@ -1,46 +1,52 @@
#logo plots
# Input:
# Data:
# plot_df: merged_df3 containing the OR column to use as y-axis or any other relevant column
# create functions
# x_axis_colname = "position"
# y_axis_colname = "or_mychisq"
# symbol_colname = "mutant_type"
# y_axis_log = F
# log_value = log10
# if used, y-axis label has "Log" appended to it
# one with OR
# --> select/drop down option to remove empty positions
# --> select/drop down option for colour
# --> if clustalx and taylor, set variable to black bg + white font
# --> if chemistry and hydrophobicity, then grey bg + black font
# my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor")
# --> if clustalx and taylor, set variable to black bg + white font
# --> if chemistry and hydrophobicity, then grey bg + black font
# --> select/drop down option for log scale
# --> should include WT
# rm_empty_y = F
# option to remove empty positions i.e positions with no assocaited y-val
# one for multiple muts
# --> select/drop down option to filter count of nsSNPs
# --> select/drop down option for colour
# --> should include WT
# y_axis_log = F
# option to use log scale
# FIXME Minor bug: if used with rm_empty_y, sometimes the labels are too small to render(!?)
# so positions appear empty despite having y-vals
# ...other params
# Returns: Logo plot from combined data containing specific y-value such as OR, etc by position.
# TODO: SHINY
# select/drop down option to remove empty positions
# select/drop down option for colour
# select/drop down option for log scale
# include WT
# Make it hover over position and then get the corresponding data table!
#%%======================================================================
########################a###########################################################
#==================
# logo data: OR
#==================
# NOTE: my_logo_col
# Can be one of these: 'chemistry', 'hydrophobicity', 'clustalx', 'taylor'
# if 'chemistry' or 'hydrophobicity' --> then bg is grey with black font (x, y and labels)
# if 'clustalx'or 'taylor' --> then bg is black with white font (x, y and labels)
#, theme_bgc =
#, xfont_bgc =
#, yfont_bgc =
#, xtt_col =
#, ytt_col =
LogoPlotCustomH <- function(plot_df
, x_axis_colname = "position"
, y_axis_colname = "or_mychisq"
, symbol_colname = "mutant_type"
, my_logo_col = "chemistry"
, rm_empty_y = F
, y_axis_log = F
, log_value = log10
, y_axis_increment = 5
, rm_empty_y = F
, my_logo_col = "chemistry"
, x_lab = "Position"
, y_lab = "Odds Ratio"
, x_ats = 12 # text size

View file

@ -1,17 +1,29 @@
#logo plots
########################a###########################################################
# Input:
# Data
# plot_df: merged_df3 containing the OR column to use as y-axis or any other relevant column
# one for multiple muts
# --> select/drop down option to filter count of nsSNPs
# --> select/drop down option for colour
# --> should include WT
# x_axis_colname = "position"
# symbol_mut_colname = "mutant_type"
# symbol_wt_colname = "mutant_type"
# omit_snp_count = c(0, 1, 2...) can be used to filter positions with specified snp count
# Data used
# my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor")
# --> if clustalx and taylor, set variable to black bg + white font
# --> if chemistry and hydrophobicity, then grey bg + black font
#tab_mt # mutant logo plot
#tab_wt # wt logo plot
# ...other params
# Returns: Logo plot from combined data containing all nsSNPs per position.
# Helps to see the overview of SNP diversity
# TODO: SHINY
# select/drop down: omit_snp_count
# select/drop down: my_logo_col
# should include WT??
# Make it hover over position and then get the corresponding data table!
#%%======================================================================
####################################################################################
#==================
# logo data: OR

View file

@ -12,29 +12,29 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# mainly OR
# script: logoP_or.R
################################
# LogoPlotCustomH (plot_df = merged_df3
# , x_axis_colname = "position"
# , y_axis_colname = "or_mychisq"
# , symbol_colname = "mutant_type"
# , y_axis_log = T
# , log_value = log10
# , y_axis_increment = 100
# , rm_empty_y = T
# , my_logo_col = 'chemistry'
# , x_lab = "Wild-type position"
# , y_lab = "Odds Ratio"
# , x_ats = 10 # text size
# , x_tangle = 90 # text angle
# , y_ats = 22
# , y_tangle = 0
# , x_tts = 19 # title size
# , y_tts = 22
# #, leg_pos = c(0.05,-0.12)
# , leg_pos = "top"
# , leg_dir = "horizontal"
# , leg_ts = 15 # leg text size
# , leg_tts = 16 # leg title size
# )
LogoPlotCustomH (plot_df = merged_df3
, x_axis_colname = "position"
, y_axis_colname = "or_mychisq"
, symbol_colname = "mutant_type"
, y_axis_log = T
, log_value = log10
, y_axis_increment = 100
, rm_empty_y = T
, my_logo_col = 'chemistry'
, x_lab = "Wild-type position"
, y_lab = "Odds Ratio"
, x_ats = 10 # text size
, x_tangle = 90 # text angle
, y_ats = 22
, y_tangle = 0
, x_tts = 19 # title size
, y_tts = 22
#, leg_pos = c(0.05,-0.12)
, leg_pos = "top"
, leg_dir = "horizontal"
, leg_ts = 15 # leg text size
, leg_tts = 16 # leg title size
)
########################################
@ -42,25 +42,25 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# wild-type and mutant aa
# script: logoP_snp.R
########################################
# LogoPlotSnps(plot_df = merged_df3
# , x_axis_colname = "position"
# , symbol_mut_colname = "mutant_type"
# , symbol_wt_colname = "wild_type"
# , omit_snp_count = c(1)# can be 0,1, 2, etc.
# , my_logo_col = "chemistry"
# , x_lab = "Wild-type position"
# , y_lab = "nsSNP count"
# , x_ats = 10 # text size
# , x_tangle = 90 # text angle
# , y_ats = 18
# , y_tangle = 0
# , x_tts = 18 # title size
# , y_tts = 18
# , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
# , leg_dir = "horizontal" # can be vertical or horizontal
# , leg_ts = 14 # leg text size
# , leg_tts = 16 # leg title size
# )
LogoPlotSnps(plot_df = merged_df3
, x_axis_colname = "position"
, symbol_mut_colname = "mutant_type"
, symbol_wt_colname = "wild_type"
, omit_snp_count = c(1)# can be 0,1, 2, etc.
, my_logo_col = "chemistry"
, x_lab = "Wild-type position"
, y_lab = "nsSNP count"
, x_ats = 10 # text size
, x_tangle = 90 # text angle
, y_ats = 18
, y_tangle = 0
, x_tts = 18 # title size
, y_tts = 18
, leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
, leg_dir = "horizontal" # can be vertical or horizontal
, leg_ts = 14 # leg text size
, leg_tts = 16 # leg title size
)
####################################################
# Logo plot MSA