diff --git a/scripts/plotting/barplots_subcolours_PS.R b/scripts/plotting/barplots_subcolours_PS.R index 9d79eb0..5e80eec 100644 --- a/scripts/plotting/barplots_subcolours_PS.R +++ b/scripts/plotting/barplots_subcolours_PS.R @@ -3,105 +3,71 @@ setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() ######################################################### -# TASK: - +# TASK: output barplot by position with each bar coloured by +# its stability value and NO coloured positions indicated ######################################################### +#======================================================================= -######################################################################## -# Installing and loading required packages and functions # -######################################################################## +############################################################ +# 1: Installing and loading required packages and functions +############################################################# -source("Header_TT.R") +#source("Header_TT.R") +library(ggplot2) +library(data.table) source("barplot_colour_function.R") +source("plotting_data.R") -######################################################################## -# Read file: call script for combining df for PS # -######################################################################## -#????????????? -# +# should return the following dfs, directories and variables +# mut_pos_cols +# my_df +# my_df_u +# my_df_u_lig +# dup_muts + +cat(paste0("Directories imported:" + , "\ndatadir:", datadir + , "\nindir:", indir + , "\noutdir:", outdir + , "\nplotdir:", plotdir)) + +cat(paste0("Variables imported:" + , "\ndrug:", drug + , "\ngene:", gene + , "\ngene_match:", gene_match + , "\nLength of upos:", length(upos) + , "\nAngstrom symbol:", angstroms_symbol)) + +# clear excess variable +rm(my_df, upos, dup_muts, my_df_u_lig) ######################################################## -#%% variable assignment: input and output paths & filenames -drug = "pyrazinamide" -gene = "pncA" -gene_match = paste0(gene,"_p.") -cat(gene_match) - -#============= -# directories -#============= -datadir = paste0("~/git/Data") -indir = paste0(datadir, "/", drug, "/input") -outdir = paste0("~/git/Data", "/", drug, "/output") - -#====== -# input -#====== -#in_filename = "mcsm_complex1_normalised.csv" -in_filename_params = paste0(tolower(gene), "_all_params.csv") -infile_params = paste0(outdir, "/", in_filename_params) -cat(paste0("Input file:", infile_params) ) #======= # output #======= -subcols_bp_duet = "barplot_subcols_DUET.svg" -outPlot_subcols_bp_duet = paste0(outdir, "/plots/", subcols_bp_duet) +print(paste0("plot will be in:", plotdir)) +bp_subcols_duet = "barplot_coloured_PS.svg" +plot_bp_subcols_duet = paste0(plotdir, "/", bp_subcols_duet) -#%%=============================================================== -########################### -# Read file: struct params -########################### -cat("Reading struct params including mcsm:", in_filename_params) - -my_df = read.csv(infile_params - #, stringsAsFactors = F - , header = T) - -cat("Input dimensions:", dim(my_df)) - -# clear variables -rm(in_filename_params, infile_params) - -# quick checks -colnames(my_df) -str(my_df) - -# check for duplicate mutations -if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformation)){ - cat(paste0("CAUTION:", " Duplicate mutations identified" - , "\nExtracting these...")) - dup_muts = my_df[duplicated(my_df$mutationinformation),] - dup_muts_nu = length(unique(dup_muts$mutationinformation)) - cat(paste0("\nDim of duplicate mutation df:", nrow(dup_muts) - , "\nNo. of unique duplicate mutations:", dup_muts_nu - , "\n\nExtracting df with unique mutations only")) - my_df_u = my_df[!duplicated(my_df$mutationinformation),] -}else{ - cat(paste0("No duplicate mutations detected")) - my_df_u = my_df -} - -upos = unique(my_df_u$position) -cat("Dim of clean df:"); cat(dim(my_df_u)) -cat("\nNo. of unique mutational positions:"); cat(length(upos)) - -######################################################################## -# end of data extraction and cleaning for plots # -######################################################################## #=================== # Data for plots #=================== # REASSIGNMENT as necessary df = my_df_u -rm(my_df) - # sanity checks +str(df) upos = unique(df$position) # should be a factor -is.factor(my_df$duet_outcome) -#[1] TRUE +if (is.factor(df$duet_outcome)){ + print("duet_outcome is factor") +}else{ + print("convert duet_outcome to factor") + df$duet_outcome = as.factor(df$duet_outcome) +} + +is.factor(df$duet_outcome) table(df$duet_outcome) @@ -167,12 +133,7 @@ my_yaxts = 15 #****************** # generate plot: NO axis colours -# no ordering of x-axis #****************** -# plot name and location -print(paste0("plot will be in:", outdir)) -bp_subcols_duet = "barplot_coloured_PS.svg" -plot_bp_subcols_duet = paste0(outdir, "/plots/", bp_subcols_duet) print(paste0("plot name:", plot_bp_subcols_duet)) svg(plot_bp_subcols_duet, width = 26, height = 4) @@ -192,7 +153,8 @@ outPlot = g + , vjust = 0) , axis.title.x = element_text(size = my_xaxts) , axis.title.y = element_text(size = my_yaxts ) ) + - labs(title = my_title + labs(title = "" + #title = my_title , x = "position" , y = "Frequency") diff --git a/scripts/plotting/barplots_subcolours_aa_PS.R b/scripts/plotting/barplots_subcolours_aa_PS.R index f6444ce..84b5811 100644 --- a/scripts/plotting/barplots_subcolours_aa_PS.R +++ b/scripts/plotting/barplots_subcolours_aa_PS.R @@ -75,6 +75,15 @@ my_axis_colours = mut_pos_cols$lab_fg # now clear mut_pos_cols rm(mut_pos_cols) + +#======= +# output +#======= +# plot name and location +print(paste0("plot will be in:", plotdir)) +bp_aa_subcols_duet = "barplot_acoloured_PS.svg" +plot_bp_aa_subcols_duet = paste0(plotdir, "/", bp_aa_subcols_duet) + #======================================================================= #================ # Data for plots @@ -125,7 +134,13 @@ snp_count = sort(unique(snpsBYpos_df$snpsBYpos)) # sanity checks # should be a factor -df$duet_outcome = as.factor(df$duet_outcome) +if (is.factor(df$duet_outcome)){ + print("duet_outcome is factor") +}else{ + print("convert duet_outcome to factor") + df$duet_outcome = as.factor(df$duet_outcome) +} + is.factor(df$duet_outcome) table(df$duet_outcome) @@ -189,14 +204,6 @@ my_yats = 18 #****************** # generate plot: with axis colours #****************** -# plot name and location -# outdir/ (should be imported from reading file) -plotdir = paste0(outdir, "/", "plots") #should be imported from reading file -print(paste0("plot will be in:", plotdir)) - -bp_aa_subcols_duet = "barplot_acoloured_PS.svg" -plot_bp_aa_subcols_duet = paste0(outdir, "/plots/", bp_aa_subcols_duet) - print(paste0("plot name:", plot_bp_aa_subcols_duet)) svg(plot_bp_aa_subcols_duet, width = 26, height = 4) @@ -230,6 +237,7 @@ outPlot = g + , axis.title.y = element_text(size = my_yals ) , axis.ticks.x = element_blank()) + labs(title = "" + #title = my_title , x = "position" , y = "Frequency") diff --git a/scripts/plotting/running_plot_scripts b/scripts/plotting/running_plot_scripts index d2f8f50..a77cc90 100644 --- a/scripts/plotting/running_plot_scripts +++ b/scripts/plotting/running_plot_scripts @@ -3,20 +3,75 @@ #======== #======================= -#1) plotting_data.R +plotting_data.R #======================= ??? update how to run #======================= -# mcsm_mean_stability.R +mcsm_mean_stability.R #======================= ??? update how to run # input: calls the "plotting_data.R" # output: _mean_stability.csv #====================== -# replaceBfactor_pdb.R +replaceBfactor_pdb.R #======================= # input: 2 files; pdb file and _mean_stability.csv (output from "mcsm_mean_stability.R") # output: 2 pdb files with bfactors replaced with 1) mean_duet and 2) mean_affinity values +#======================= +basic_barplots_PS.R +#======================= +# input: calls the "plotting_data.R" +output plots: 2 svgs + 1) basic_barplot_PS.svg + 2) position_count_PS.svg + +#======================= +barplot_colour_function.R" +#======================= +function that generates stability coloures (red to blue) based on the +corresponding value. It is sourced by other scripts. + +#======================= +subcols_axis_PS.R +# assigns colours to position numebers with bg and fg colours +# sourced by other scripts. +#======================= +# input: calls the "plotting_data.R" +# sourced by other scripts, no output perse! + +#======================= +barplots_subcolours_aa_PS.R +# barplot coloured by stability and position numbers coloured by active site colours +#======================= +# input: calls the "subcols_axis.R" and "barplot_colour_function.R" +# output plots: 1 svg + 1) barplot_acoloured_PS.svg + +#======================= +barplots_subcolours_PS.R +# same as the above script expcet no positional colouring +#======================= +# input: calls the "plotting_data.R" and "barplot_colour_function.R" +# output plots: 1 svg + 1) barplot_coloured_PS.svg + + + +######################################################################## +# ligand plots +######################################################################## + +#======================= +basic_barplots_LIG.R +# generates two basic barplots +# No. of stabilsing and destabilising mutations for ligand_distance<10A +# No. of SNPs per site +#======================= +# input: calls the "plotting_data.R" +# output plots: 2 svgs + 1) basic_barplot_LIG.svg + 2) position_count_LIg.svg +