reran plots with current lig dist

This commit is contained in:
Tanushree Tunstall 2021-06-30 17:35:57 +01:00
parent b679068a5e
commit a6f0832a42
2 changed files with 30 additions and 141 deletions

View file

@ -28,8 +28,6 @@ if(is.null(drug)|is.null(gene)) {
#=========== #===========
# Input # Input
#=========== #===========
source("../functions/bp_subcolours.R")
source("get_plotting_dfs.R") source("get_plotting_dfs.R")
#=========== #===========
@ -44,58 +42,10 @@ bp_subcols_lig = "barplot_coloured_LIG.svg"
plot_bp_subcols_lig = paste0(plotdir, "/", bp_subcols_lig) plot_bp_subcols_lig = paste0(plotdir, "/", bp_subcols_lig)
############################################################################## ##############################################################################
#==================== #********************
# Data for plots: PS # generate plot: PS
#==================== # NO axis colours
# sanity checks #********************
str(my_df_u)
upos = unique(my_df_u$position)
# should be a factor
if (is.factor(my_df_u$duet_outcome)){
print("duet_outcome is factor")
}else{
print("convert duet_outcome to factor")
my_df_u$duet_outcome = as.factor(my_df_u$duet_outcome)
}
is.factor(my_df_u$duet_outcome)
table(my_df_u$duet_outcome)
# should be -1 and 1
min(my_df_u$duet_scaled)
max(my_df_u$duet_scaled)
tapply(my_df_u$duet_scaled, my_df_u$duet_outcome, min)
tapply(my_df_u$duet_scaled, my_df_u$duet_outcome, max)
#=======================================================
# Barplot (unordered) colour each nsSNP by stability
## My colour FUNCTION: based on group and subgroup
# in my case;
# df = df
# group = duet_outcome
# subgroup = normalised score i.e duet_scaled
#========================================================
# check unique values in normalised data
u = unique(my_df_u$duet_scaled)
my_grp = my_df_u$duet_scaled #no rounding
#-------------------------------------------------
# Run this section if rounding is to be used
#n = 3
#my_df_u$duet_scaledR = round(my_df_u$duet_scaled, n)
#ur = unique(my_df_u$duet_scaledR)
#my_grp = my_df_u$duet_scaledR # rounding
#---------------------------------------------------
my_df_u$group <- paste0(my_df_u$duet_outcome, "_", my_grp, sep = "")
# Call the function to create the palette based on the group defined above
colours <- ColourPalleteMulti(my_df_u, "duet_outcome", "my_grp")
print(paste0("Colour palette generated for: ", length(colours), " colours"))
# axis label size # axis label size
my_xaxls = 12 my_xaxls = 12
my_yaxls = 20 my_yaxls = 20
@ -104,18 +54,15 @@ my_yaxls = 20
my_xaxts = 18 my_xaxts = 18
my_yaxts = 20 my_yaxts = 20
my_title = "Protein stability (DUET)" title_ps = "Protein stability (DUET)"
#********************
# generate plot: PS
# NO axis colours
#********************
print(paste0("plot name:", plot_bp_subcols_duet)) print(paste0("plot name:", plot_bp_subcols_duet))
svg(plot_bp_subcols_duet, width = 26, height = 4) svg(plot_bp_subcols_duet, width = 26, height = 4)
g = ggplot(my_df_u, aes(factor(position, ordered = T))) g = ggplot(subcols_df_ps, aes(factor(position, ordered = T)))
outPlot_bp_ps = g + outPlot_bp_ps = g +
geom_bar(aes(fill = group), colour = "grey") + geom_bar(aes(fill = group), colour = "grey") +
scale_fill_manual( values = colours scale_fill_manual( values = subcols_ps
, guide = "none") + , guide = "none") +
theme( axis.text.x = element_text(size = my_xaxls theme( axis.text.x = element_text(size = my_xaxls
, angle = 90 , angle = 90
@ -128,7 +75,7 @@ outPlot_bp_ps = g +
, axis.title.x = element_text(size = my_xaxts) , axis.title.x = element_text(size = my_xaxts)
, axis.title.y = element_text(size = my_yaxts ) ) + , axis.title.y = element_text(size = my_yaxts ) ) +
labs(title = "" labs(title = ""
#title = my_title #title = title_ps
, x = "Position" , x = "Position"
, y = "Frequency") , y = "Frequency")
@ -136,66 +83,10 @@ print(outPlot_bp_ps)
dev.off() dev.off()
#################################################### ####################################################
#==================== #******************
# Data for plots: LIG # generate plot: LIG
#==================== # NO axis colours
# sanity checks #******************
str(my_df_u_lig)
upos = unique(my_df_u_lig$position)
# should be a factor
if (is.factor(my_df_u_lig$ligand_outcome)){
print("ligand_outcome is factor")
}else{
print("convert ligand_outcome to factor")
my_df_u_lig$ligand_outcome = as.factor(my_df_u_lig$ligand_outcome)
}
is.factor(my_df_u_lig$ligand_outcome)
table(my_df_u_lig$ligand_outcome)
# should be -1 and 1
min(my_df_u_lig$affinity_scaled)
max(my_df_u_lig$affinity_scaled)
tapply(my_df_u_lig$affinity_scaled, my_df_u_lig$ligand_outcome, min)
tapply(my_df_u_lig$affinity_scaled, my_df_u_lig$ligand_outcome, max)
#=======================================================
# Barplot (unordered) colour each nsSNP by stability
## My colour FUNCTION: based on group and subgroup
# in my case;
# df = my_df_u_lig
# group = ligand_outcome
# subgroup = normalised score i.e affinity_scaled
#========================================================
# check unique values in normalised data
u_lig = unique(my_df_u_lig$affinity_scaled)
my_grp_lig = my_df_u_lig$affinity_scaled #no rounding
#-------------------------------------------------
# Run this section if rounding is to be used
#n = 3
#my_df_u_lig$affinity_scaledR = round(my_df_u_lig$affinity_scaled, n)
#ur_lig = unique(my_df_u_lig$affinity_scaledR)
#my_grp_lig = my_df_u_lig$affinity_scaledR # rounding
#---------------------------------------------------
my_df_u_lig$group_lig <- paste0(my_df_u_lig$ligand_outcome, "_"
, my_grp_lig
, sep = "")
# Call the function to create the palette based on the group defined above
colours_lig <- ColourPalleteMulti(my_df_u_lig
, "ligand_outcome"
, "my_grp_lig")
print(paste0("Colour palette generated for: "
, length(colours_lig)
, " colours_lig"))
my_title_lig = "Ligand Affinity"
# axis label size # axis label size
my_xaxls = 12 my_xaxls = 12
my_yaxls = 20 my_yaxls = 20
@ -204,17 +95,15 @@ my_yaxls = 20
my_xaxts = 18 my_xaxts = 18
my_yaxts = 20 my_yaxts = 20
#****************** title_lig = "Ligand Affinity"
# generate plot: LIG
# NO axis colours
#******************
print(paste0("plot name:", plot_bp_subcols_lig)) print(paste0("plot name:", plot_bp_subcols_lig))
svg(plot_bp_subcols_lig, width = 26, height = 4) svg(plot_bp_subcols_lig, width = 26, height = 4)
g2 = ggplot(my_df_u_lig, aes(factor(position, ordered = T))) g2 = ggplot(subcols_df_lig, aes(factor(position, ordered = T)))
outPlot_bp_lig = g2 + outPlot_bp_lig = g2 +
geom_bar(aes(fill = group_lig), colour = "grey") + geom_bar(aes(fill = group_lig), colour = "grey") +
scale_fill_manual( values = colours_lig scale_fill_manual( values = subcols_lig
, guide = "none") + , guide = "none") +
theme( axis.text.x = element_text(size = my_xaxls theme( axis.text.x = element_text(size = my_xaxls
, angle = 90 , angle = 90
@ -227,7 +116,7 @@ outPlot_bp_lig = g2 +
, axis.title.x = element_text(size = my_xaxts) , axis.title.x = element_text(size = my_xaxts)
, axis.title.y = element_text(size = my_yaxts ) ) + , axis.title.y = element_text(size = my_yaxts ) ) +
labs(title = "" labs(title = ""
#title = my_title_lig #title = title_lig
, x = "Position" , x = "Position"
, y = "Frequency") , y = "Frequency")
@ -235,4 +124,4 @@ print(outPlot_bp_lig)
dev.off() dev.off()
######################################################################= ######################################################################=
# End of script # End of script
######################################################################= ######################################################################=

View file

@ -39,8 +39,8 @@ import_dirs(drug, gene)
#--------------------------- #---------------------------
# call: plotting_data() # call: plotting_data()
#--------------------------- #---------------------------
if (!exists("infile_params") && exists("gene")){ #if (!exists("infile_params") && exists("gene")){
#if (!is.character(infile_params) && exists("gene")){ # when running as cmd if (!is.character(infile_params) && exists("gene")){ # when running as cmd
#in_filename_params = paste0(tolower(gene), "_all_params.csv") #in_filename_params = paste0(tolower(gene), "_all_params.csv")
in_filename_params = paste0(tolower(gene), "_comb_afor.csv") # part combined for gid in_filename_params = paste0(tolower(gene), "_comb_afor.csv") # part combined for gid
infile_params = paste0(outdir, "/", in_filename_params) infile_params = paste0(outdir, "/", in_filename_params)
@ -62,8 +62,8 @@ dup_muts = pd_df[[4]]
#-------------------------------- #--------------------------------
# call: combining_dfs_plotting() # call: combining_dfs_plotting()
#-------------------------------- #--------------------------------
if (!exists("infile_metadata") && exists("gene")){ #if (!exists("infile_metadata") && exists("gene")){
#if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd
in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid
infile_metadata = paste0(outdir, "/", in_filename_metadata) infile_metadata = paste0(outdir, "/", in_filename_metadata)
cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n") cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n")
@ -95,19 +95,19 @@ merged_df3_comp_lig = all_plot_dfs[[8]]
#################################################################### ####################################################################
# can include: mutation, or_kin, pwald, af_kin # can include: mutation, or_kin, pwald, af_kin
cols_to_select = c("mutationinformation", "drtype" cols_to_select = c("mutationinformation", "drtype"
#, "wild_type" , "wild_type"
, "position" , "position"
#, "mutant_type" , "mutant_type"
, "chain", "ligand_id", "ligand_distance" , "chain", "ligand_id", "ligand_distance"
, "duet_stability_change", "duet_outcome", "duet_scaled" , "duet_stability_change", "duet_outcome", "duet_scaled"
, "ligand_affinity_change", "ligand_outcome", "affinity_scaled" , "ligand_affinity_change", "ligand_outcome", "affinity_scaled"
, "ddg", "foldx_scaled", "foldx_outcome" , "ddg", "foldx_scaled", "foldx_outcome"
, "deepddg", "deepddg_outcome" , "deepddg", "deepddg_outcome"
, "asa", "rsa", "rd_values", "kd_values") , "asa", "rsa", "rd_values", "kd_values"
#, "af", "or_mychisq", "pval_fisher" , "af", "or_mychisq", "pval_fisher"
#, "or_fisher", "or_logistic", "pval_logistic") , "or_fisher", "or_logistic", "pval_logistic"
#, "wt_prop_water", "mut_prop_water", "wt_prop_polarity", "mut_prop_polarity" , "wt_prop_water", "mut_prop_water", "wt_prop_polarity", "mut_prop_polarity"
#, "wt_calcprop", "mut_calcprop") , "wt_calcprop", "mut_calcprop")
#======================= #=======================
# Data for sub colours # Data for sub colours