From a6f0832a42cd7947e801ea773c6ac7e88ebb9523 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 30 Jun 2021 17:35:57 +0100 Subject: [PATCH] reran plots with current lig dist --- scripts/plotting/barplots_subcolours.R | 149 ++++--------------------- scripts/plotting/get_plotting_dfs.R | 22 ++-- 2 files changed, 30 insertions(+), 141 deletions(-) diff --git a/scripts/plotting/barplots_subcolours.R b/scripts/plotting/barplots_subcolours.R index 80a4b53..4e4806a 100755 --- a/scripts/plotting/barplots_subcolours.R +++ b/scripts/plotting/barplots_subcolours.R @@ -28,8 +28,6 @@ if(is.null(drug)|is.null(gene)) { #=========== # Input #=========== -source("../functions/bp_subcolours.R") - source("get_plotting_dfs.R") #=========== @@ -44,58 +42,10 @@ bp_subcols_lig = "barplot_coloured_LIG.svg" plot_bp_subcols_lig = paste0(plotdir, "/", bp_subcols_lig) ############################################################################## -#==================== -# Data for plots: PS -#==================== -# sanity checks -str(my_df_u) -upos = unique(my_df_u$position) - -# should be a factor -if (is.factor(my_df_u$duet_outcome)){ - print("duet_outcome is factor") -}else{ - print("convert duet_outcome to factor") - my_df_u$duet_outcome = as.factor(my_df_u$duet_outcome) -} - -is.factor(my_df_u$duet_outcome) -table(my_df_u$duet_outcome) - -# should be -1 and 1 -min(my_df_u$duet_scaled) -max(my_df_u$duet_scaled) - -tapply(my_df_u$duet_scaled, my_df_u$duet_outcome, min) -tapply(my_df_u$duet_scaled, my_df_u$duet_outcome, max) - -#======================================================= -# Barplot (unordered) colour each nsSNP by stability -## My colour FUNCTION: based on group and subgroup -# in my case; -# df = df -# group = duet_outcome -# subgroup = normalised score i.e duet_scaled -#======================================================== -# check unique values in normalised data -u = unique(my_df_u$duet_scaled) -my_grp = my_df_u$duet_scaled #no rounding - -#------------------------------------------------- -# Run this section if rounding is to be used -#n = 3 -#my_df_u$duet_scaledR = round(my_df_u$duet_scaled, n) -#ur = unique(my_df_u$duet_scaledR) -#my_grp = my_df_u$duet_scaledR # rounding -#--------------------------------------------------- - -my_df_u$group <- paste0(my_df_u$duet_outcome, "_", my_grp, sep = "") - -# Call the function to create the palette based on the group defined above -colours <- ColourPalleteMulti(my_df_u, "duet_outcome", "my_grp") -print(paste0("Colour palette generated for: ", length(colours), " colours")) - - +#******************** +# generate plot: PS +# NO axis colours +#******************** # axis label size my_xaxls = 12 my_yaxls = 20 @@ -104,18 +54,15 @@ my_yaxls = 20 my_xaxts = 18 my_yaxts = 20 -my_title = "Protein stability (DUET)" -#******************** -# generate plot: PS -# NO axis colours -#******************** +title_ps = "Protein stability (DUET)" + print(paste0("plot name:", plot_bp_subcols_duet)) svg(plot_bp_subcols_duet, width = 26, height = 4) -g = ggplot(my_df_u, aes(factor(position, ordered = T))) +g = ggplot(subcols_df_ps, aes(factor(position, ordered = T))) outPlot_bp_ps = g + geom_bar(aes(fill = group), colour = "grey") + - scale_fill_manual( values = colours + scale_fill_manual( values = subcols_ps , guide = "none") + theme( axis.text.x = element_text(size = my_xaxls , angle = 90 @@ -128,7 +75,7 @@ outPlot_bp_ps = g + , axis.title.x = element_text(size = my_xaxts) , axis.title.y = element_text(size = my_yaxts ) ) + labs(title = "" - #title = my_title + #title = title_ps , x = "Position" , y = "Frequency") @@ -136,66 +83,10 @@ print(outPlot_bp_ps) dev.off() #################################################### -#==================== -# Data for plots: LIG -#==================== -# sanity checks -str(my_df_u_lig) -upos = unique(my_df_u_lig$position) - -# should be a factor -if (is.factor(my_df_u_lig$ligand_outcome)){ - print("ligand_outcome is factor") -}else{ - print("convert ligand_outcome to factor") - my_df_u_lig$ligand_outcome = as.factor(my_df_u_lig$ligand_outcome) -} - -is.factor(my_df_u_lig$ligand_outcome) -table(my_df_u_lig$ligand_outcome) - -# should be -1 and 1 -min(my_df_u_lig$affinity_scaled) -max(my_df_u_lig$affinity_scaled) - -tapply(my_df_u_lig$affinity_scaled, my_df_u_lig$ligand_outcome, min) -tapply(my_df_u_lig$affinity_scaled, my_df_u_lig$ligand_outcome, max) - -#======================================================= -# Barplot (unordered) colour each nsSNP by stability -## My colour FUNCTION: based on group and subgroup -# in my case; -# df = my_df_u_lig -# group = ligand_outcome -# subgroup = normalised score i.e affinity_scaled -#======================================================== -# check unique values in normalised data -u_lig = unique(my_df_u_lig$affinity_scaled) -my_grp_lig = my_df_u_lig$affinity_scaled #no rounding - -#------------------------------------------------- -# Run this section if rounding is to be used -#n = 3 -#my_df_u_lig$affinity_scaledR = round(my_df_u_lig$affinity_scaled, n) -#ur_lig = unique(my_df_u_lig$affinity_scaledR) -#my_grp_lig = my_df_u_lig$affinity_scaledR # rounding -#--------------------------------------------------- - -my_df_u_lig$group_lig <- paste0(my_df_u_lig$ligand_outcome, "_" - , my_grp_lig - , sep = "") - -# Call the function to create the palette based on the group defined above -colours_lig <- ColourPalleteMulti(my_df_u_lig - , "ligand_outcome" - , "my_grp_lig") - -print(paste0("Colour palette generated for: " - , length(colours_lig) - , " colours_lig")) - -my_title_lig = "Ligand Affinity" - +#****************** +# generate plot: LIG +# NO axis colours +#****************** # axis label size my_xaxls = 12 my_yaxls = 20 @@ -204,17 +95,15 @@ my_yaxls = 20 my_xaxts = 18 my_yaxts = 20 -#****************** -# generate plot: LIG -# NO axis colours -#****************** +title_lig = "Ligand Affinity" + print(paste0("plot name:", plot_bp_subcols_lig)) svg(plot_bp_subcols_lig, width = 26, height = 4) -g2 = ggplot(my_df_u_lig, aes(factor(position, ordered = T))) +g2 = ggplot(subcols_df_lig, aes(factor(position, ordered = T))) outPlot_bp_lig = g2 + geom_bar(aes(fill = group_lig), colour = "grey") + - scale_fill_manual( values = colours_lig + scale_fill_manual( values = subcols_lig , guide = "none") + theme( axis.text.x = element_text(size = my_xaxls , angle = 90 @@ -227,7 +116,7 @@ outPlot_bp_lig = g2 + , axis.title.x = element_text(size = my_xaxts) , axis.title.y = element_text(size = my_yaxts ) ) + labs(title = "" - #title = my_title_lig + #title = title_lig , x = "Position" , y = "Frequency") @@ -235,4 +124,4 @@ print(outPlot_bp_lig) dev.off() ######################################################################= # End of script -######################################################################= +######################################################################= \ No newline at end of file diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index 6261dba..299baa1 100644 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -39,8 +39,8 @@ import_dirs(drug, gene) #--------------------------- # call: plotting_data() #--------------------------- -if (!exists("infile_params") && exists("gene")){ -#if (!is.character(infile_params) && exists("gene")){ # when running as cmd +#if (!exists("infile_params") && exists("gene")){ +if (!is.character(infile_params) && exists("gene")){ # when running as cmd #in_filename_params = paste0(tolower(gene), "_all_params.csv") in_filename_params = paste0(tolower(gene), "_comb_afor.csv") # part combined for gid infile_params = paste0(outdir, "/", in_filename_params) @@ -62,8 +62,8 @@ dup_muts = pd_df[[4]] #-------------------------------- # call: combining_dfs_plotting() #-------------------------------- -if (!exists("infile_metadata") && exists("gene")){ -#if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd +#if (!exists("infile_metadata") && exists("gene")){ +if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid infile_metadata = paste0(outdir, "/", in_filename_metadata) cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n") @@ -95,19 +95,19 @@ merged_df3_comp_lig = all_plot_dfs[[8]] #################################################################### # can include: mutation, or_kin, pwald, af_kin cols_to_select = c("mutationinformation", "drtype" - #, "wild_type" + , "wild_type" , "position" - #, "mutant_type" + , "mutant_type" , "chain", "ligand_id", "ligand_distance" , "duet_stability_change", "duet_outcome", "duet_scaled" , "ligand_affinity_change", "ligand_outcome", "affinity_scaled" , "ddg", "foldx_scaled", "foldx_outcome" , "deepddg", "deepddg_outcome" - , "asa", "rsa", "rd_values", "kd_values") - #, "af", "or_mychisq", "pval_fisher" - #, "or_fisher", "or_logistic", "pval_logistic") - #, "wt_prop_water", "mut_prop_water", "wt_prop_polarity", "mut_prop_polarity" - #, "wt_calcprop", "mut_calcprop") + , "asa", "rsa", "rd_values", "kd_values" + , "af", "or_mychisq", "pval_fisher" + , "or_fisher", "or_logistic", "pval_logistic" + , "wt_prop_water", "mut_prop_water", "wt_prop_polarity", "mut_prop_polarity" + , "wt_calcprop", "mut_calcprop") #======================= # Data for sub colours