updated docs for the logo functions and tested all of them again

This commit is contained in:
Tanushree Tunstall 2022-01-26 15:53:53 +00:00
parent 2f7f40efb1
commit 7317156bba
5 changed files with 135 additions and 80 deletions

View file

@ -1,4 +1,3 @@
library(Logolas)
source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R") source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R")
##################################################################################### #####################################################################################
# DataED_PFM(): # DataED_PFM():

View file

@ -1,8 +1,46 @@
#####################################################################################
# LogoPlotMSA():
# Input:
# Data:
# msaSeq_mut: MSA chr vector for muts
# msaSeq_wt: MSA chr vector for wt
# Logo type params:
# logo_type = c("EDLogo", "bits_pfm", "probability_pfm", "bits_raw", "probability_raw")
# EDLogo: calculated from the Logolas package based on PFM matrix (scaled).
#The required content from the package is sourced locally within 'my_logolas.R'
# bits_pfm: Information Content based on PFM scaled matrix (my_logolas.R)
# probability_pfm: Probability based on PFM scaled matrix (my_logolas.R)
# bits_raw: Information Content based on Raw MSA (ggseqlogo)
# probability_raw: Probability based on Raw MSA (ggseqlogo)
# EDScore_type = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL")
# bg_prob: background probability, default is equal i.e NULL.
# This is used by the internal call to DataED_PFM(). This func takes thse args. I have used it here for
# completeness and allow nuanced plot control
# my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor")
# --> if clustalx and taylor, set variable to black bg + white font
# --> if chemistry and hydrophobicity, then grey bg + black font
# ...other params
# Returns: Logo plots from MSA both mutant and wt (for comparability)
# For my case, I always use it as it helps see what is at the wild-type already!
# TODO: SHINY
# drop down: logo_type
# drop down: ED score type
# drop down/enter field : bg probability (in the actual plot function!)
# drop down: my_logo_col
# Make it hover over position and then get the corresponding data table!
###################################################################################
########################################### ###########################################
LogoPlotMSA <- function(msaSeq_mut # chr vector LogoPlotMSA <- function(msaSeq_mut # chr vector
, msaSeq_wt # chr vector , msaSeq_wt # chr vector
#, msa_method = c("custom") # can be "bits", "probability" or "custom" , logo_type = c("EDLogo") #"bits_pfm", "probability_pfm", "bits_raw", "probability_raw")
, logo_type = c("EDLogo") #"bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # can be "bits", "probability" or "custom"
, EDScore_type = c("log") # see if this relevant, or source function should have it! , EDScore_type = c("log") # see if this relevant, or source function should have it!
, bg_prob = NULL , bg_prob = NULL
, my_logo_col = "chemistry" , my_logo_col = "chemistry"

View file

@ -1,46 +1,52 @@
#logo plots # Input:
# Data:
# plot_df: merged_df3 containing the OR column to use as y-axis or any other relevant column
# create functions # x_axis_colname = "position"
# y_axis_colname = "or_mychisq"
# symbol_colname = "mutant_type"
# y_axis_log = F
# log_value = log10
# if used, y-axis label has "Log" appended to it
# one with OR # my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor")
# --> select/drop down option to remove empty positions # --> if clustalx and taylor, set variable to black bg + white font
# --> select/drop down option for colour # --> if chemistry and hydrophobicity, then grey bg + black font
# --> if clustalx and taylor, set variable to black bg + white font
# --> if chemistry and hydrophobicity, then grey bg + black font
# --> select/drop down option for log scale # rm_empty_y = F
# --> should include WT # option to remove empty positions i.e positions with no assocaited y-val
# one for multiple muts # y_axis_log = F
# --> select/drop down option to filter count of nsSNPs # option to use log scale
# --> select/drop down option for colour # FIXME Minor bug: if used with rm_empty_y, sometimes the labels are too small to render(!?)
# --> should include WT # so positions appear empty despite having y-vals
# ...other params
# Returns: Logo plot from combined data containing specific y-value such as OR, etc by position.
# TODO: SHINY
# select/drop down option to remove empty positions
# select/drop down option for colour
# select/drop down option for log scale
# include WT
# Make it hover over position and then get the corresponding data table! # Make it hover over position and then get the corresponding data table!
#%%====================================================================== ########################a###########################################################
#================== #==================
# logo data: OR # logo data: OR
#================== #==================
# NOTE: my_logo_col
# Can be one of these: 'chemistry', 'hydrophobicity', 'clustalx', 'taylor'
# if 'chemistry' or 'hydrophobicity' --> then bg is grey with black font (x, y and labels)
# if 'clustalx'or 'taylor' --> then bg is black with white font (x, y and labels)
#, theme_bgc =
#, xfont_bgc =
#, yfont_bgc =
#, xtt_col =
#, ytt_col =
LogoPlotCustomH <- function(plot_df LogoPlotCustomH <- function(plot_df
, x_axis_colname = "position" , x_axis_colname = "position"
, y_axis_colname = "or_mychisq" , y_axis_colname = "or_mychisq"
, symbol_colname = "mutant_type" , symbol_colname = "mutant_type"
, my_logo_col = "chemistry"
, rm_empty_y = F
, y_axis_log = F , y_axis_log = F
, log_value = log10 , log_value = log10
, y_axis_increment = 5 , y_axis_increment = 5
, rm_empty_y = F
, my_logo_col = "chemistry"
, x_lab = "Position" , x_lab = "Position"
, y_lab = "Odds Ratio" , y_lab = "Odds Ratio"
, x_ats = 12 # text size , x_ats = 12 # text size

View file

@ -1,17 +1,29 @@
#logo plots ########################a###########################################################
# Input:
# Data
# plot_df: merged_df3 containing the OR column to use as y-axis or any other relevant column
# one for multiple muts # x_axis_colname = "position"
# --> select/drop down option to filter count of nsSNPs # symbol_mut_colname = "mutant_type"
# --> select/drop down option for colour # symbol_wt_colname = "mutant_type"
# --> should include WT # omit_snp_count = c(0, 1, 2...) can be used to filter positions with specified snp count
# Data used # my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor")
# --> if clustalx and taylor, set variable to black bg + white font
# --> if chemistry and hydrophobicity, then grey bg + black font
#tab_mt # mutant logo plot # ...other params
#tab_wt # wt logo plot
# Returns: Logo plot from combined data containing all nsSNPs per position.
# Helps to see the overview of SNP diversity
# TODO: SHINY
# select/drop down: omit_snp_count
# select/drop down: my_logo_col
# should include WT??
# Make it hover over position and then get the corresponding data table! # Make it hover over position and then get the corresponding data table!
#%%====================================================================== ####################################################################################
#================== #==================
# logo data: OR # logo data: OR

View file

@ -12,29 +12,29 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# mainly OR # mainly OR
# script: logoP_or.R # script: logoP_or.R
################################ ################################
# LogoPlotCustomH (plot_df = merged_df3 LogoPlotCustomH (plot_df = merged_df3
# , x_axis_colname = "position" , x_axis_colname = "position"
# , y_axis_colname = "or_mychisq" , y_axis_colname = "or_mychisq"
# , symbol_colname = "mutant_type" , symbol_colname = "mutant_type"
# , y_axis_log = T , y_axis_log = T
# , log_value = log10 , log_value = log10
# , y_axis_increment = 100 , y_axis_increment = 100
# , rm_empty_y = T , rm_empty_y = T
# , my_logo_col = 'chemistry' , my_logo_col = 'chemistry'
# , x_lab = "Wild-type position" , x_lab = "Wild-type position"
# , y_lab = "Odds Ratio" , y_lab = "Odds Ratio"
# , x_ats = 10 # text size , x_ats = 10 # text size
# , x_tangle = 90 # text angle , x_tangle = 90 # text angle
# , y_ats = 22 , y_ats = 22
# , y_tangle = 0 , y_tangle = 0
# , x_tts = 19 # title size , x_tts = 19 # title size
# , y_tts = 22 , y_tts = 22
# #, leg_pos = c(0.05,-0.12) #, leg_pos = c(0.05,-0.12)
# , leg_pos = "top" , leg_pos = "top"
# , leg_dir = "horizontal" , leg_dir = "horizontal"
# , leg_ts = 15 # leg text size , leg_ts = 15 # leg text size
# , leg_tts = 16 # leg title size , leg_tts = 16 # leg title size
# ) )
######################################## ########################################
@ -42,25 +42,25 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# wild-type and mutant aa # wild-type and mutant aa
# script: logoP_snp.R # script: logoP_snp.R
######################################## ########################################
# LogoPlotSnps(plot_df = merged_df3 LogoPlotSnps(plot_df = merged_df3
# , x_axis_colname = "position" , x_axis_colname = "position"
# , symbol_mut_colname = "mutant_type" , symbol_mut_colname = "mutant_type"
# , symbol_wt_colname = "wild_type" , symbol_wt_colname = "wild_type"
# , omit_snp_count = c(1)# can be 0,1, 2, etc. , omit_snp_count = c(1)# can be 0,1, 2, etc.
# , my_logo_col = "chemistry" , my_logo_col = "chemistry"
# , x_lab = "Wild-type position" , x_lab = "Wild-type position"
# , y_lab = "nsSNP count" , y_lab = "nsSNP count"
# , x_ats = 10 # text size , x_ats = 10 # text size
# , x_tangle = 90 # text angle , x_tangle = 90 # text angle
# , y_ats = 18 , y_ats = 18
# , y_tangle = 0 , y_tangle = 0
# , x_tts = 18 # title size , x_tts = 18 # title size
# , y_tts = 18 , y_tts = 18
# , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9)
# , leg_dir = "horizontal" # can be vertical or horizontal , leg_dir = "horizontal" # can be vertical or horizontal
# , leg_ts = 14 # leg text size , leg_ts = 14 # leg text size
# , leg_tts = 16 # leg title size , leg_tts = 16 # leg title size
# ) )
#################################################### ####################################################
# Logo plot MSA # Logo plot MSA