diff --git a/scripts/plotting/barplots_subcolours_PS.R b/scripts/plotting/barplots_subcolours_PS.R index d5b4e3f..9d79eb0 100644 --- a/scripts/plotting/barplots_subcolours_PS.R +++ b/scripts/plotting/barplots_subcolours_PS.R @@ -1,5 +1,5 @@ getwd() -setwd('~/git/LSHTM_analysis/scripts/plotting') +setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() ######################################################### @@ -11,8 +11,8 @@ getwd() # Installing and loading required packages and functions # ######################################################################## -source('Header_TT.R') -source('barplot_colour_function.R') +source("Header_TT.R") +source("barplot_colour_function.R") ######################################################################## # Read file: call script for combining df for PS # @@ -21,43 +21,43 @@ source('barplot_colour_function.R') # ######################################################## #%% variable assignment: input and output paths & filenames -drug = 'pyrazinamide' -gene = 'pncA' -gene_match = paste0(gene,'_p.') +drug = "pyrazinamide" +gene = "pncA" +gene_match = paste0(gene,"_p.") cat(gene_match) #============= # directories #============= -datadir = paste0('~/git/Data') -indir = paste0(datadir, '/', drug, '/input') -outdir = paste0('~/git/Data', '/', drug, '/output') +datadir = paste0("~/git/Data") +indir = paste0(datadir, "/", drug, "/input") +outdir = paste0("~/git/Data", "/", drug, "/output") #====== # input #====== -#in_filename = 'mcsm_complex1_normalised.csv' -in_filename_params = paste0(tolower(gene), '_all_params.csv') -infile_params = paste0(outdir, '/', in_filename_params) -cat(paste0('Input file:', infile_params) ) +#in_filename = "mcsm_complex1_normalised.csv" +in_filename_params = paste0(tolower(gene), "_all_params.csv") +infile_params = paste0(outdir, "/", in_filename_params) +cat(paste0("Input file:", infile_params) ) #======= # output #======= -subcols_bp_duet = 'barplot_subcols_DUET.svg' -outPlot_subcols_bp_duet = paste0(outdir, '/plots/', subcols_bp_duet) +subcols_bp_duet = "barplot_subcols_DUET.svg" +outPlot_subcols_bp_duet = paste0(outdir, "/plots/", subcols_bp_duet) #%%=============================================================== ########################### # Read file: struct params ########################### -cat('Reading struct params including mcsm:', in_filename_params) +cat("Reading struct params including mcsm:", in_filename_params) my_df = read.csv(infile_params #, stringsAsFactors = F , header = T) -cat('Input dimensions:', dim(my_df)) +cat("Input dimensions:", dim(my_df)) # clear variables rm(in_filename_params, infile_params) @@ -68,22 +68,22 @@ str(my_df) # check for duplicate mutations if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformation)){ - cat(paste0('CAUTION:', ' Duplicate mutations identified' - , '\nExtracting these...')) + cat(paste0("CAUTION:", " Duplicate mutations identified" + , "\nExtracting these...")) dup_muts = my_df[duplicated(my_df$mutationinformation),] dup_muts_nu = length(unique(dup_muts$mutationinformation)) - cat(paste0('\nDim of duplicate mutation df:', nrow(dup_muts) - , '\nNo. of unique duplicate mutations:', dup_muts_nu - , '\n\nExtracting df with unique mutations only')) + cat(paste0("\nDim of duplicate mutation df:", nrow(dup_muts) + , "\nNo. of unique duplicate mutations:", dup_muts_nu + , "\n\nExtracting df with unique mutations only")) my_df_u = my_df[!duplicated(my_df$mutationinformation),] }else{ - cat(paste0('No duplicate mutations detected')) + cat(paste0("No duplicate mutations detected")) my_df_u = my_df } upos = unique(my_df_u$position) -cat('Dim of clean df:'); cat(dim(my_df_u)) -cat('\nNo. of unique mutational positions:'); cat(length(upos)) +cat("Dim of clean df:"); cat(dim(my_df_u)) +cat("\nNo. of unique mutational positions:"); cat(length(upos)) ######################################################################## # end of data extraction and cleaning for plots # @@ -154,7 +154,7 @@ df$group <- paste0(df$duet_outcome, "_", my_grp, sep = "") # Call the function to create the palette based on the group defined above colours <- ColourPalleteMulti(df, "duet_outcome", "my_grp") -print(paste0('Colour palette generated for: ', length(colours), ' colours')) +print(paste0("Colour palette generated for: ", length(colours), " colours")) my_title = "Protein stability (DUET)" # axis label size @@ -170,10 +170,10 @@ my_yaxts = 15 # no ordering of x-axis #****************** # plot name and location -print(paste0('plot will be in:', outdir)) +print(paste0("plot will be in:", outdir)) bp_subcols_duet = "barplot_coloured_PS.svg" plot_bp_subcols_duet = paste0(outdir, "/plots/", bp_subcols_duet) -print(paste0('plot name:', plot_bp_subcols_duet)) +print(paste0("plot name:", plot_bp_subcols_duet)) svg(plot_bp_subcols_duet, width = 26, height = 4) @@ -181,7 +181,7 @@ g = ggplot(df, aes(factor(position, ordered = T))) outPlot = g + geom_bar(aes(fill = group), colour = "grey") + scale_fill_manual( values = colours - , guide = 'none') + + , guide = "none") + theme( axis.text.x = element_text(size = my_xaxls , angle = 90 , hjust = 1 diff --git a/scripts/plotting/barplots_subcolours_aa_PS.R b/scripts/plotting/barplots_subcolours_aa_PS.R index 9a57f11..f29c872 100644 --- a/scripts/plotting/barplots_subcolours_aa_PS.R +++ b/scripts/plotting/barplots_subcolours_aa_PS.R @@ -1,5 +1,5 @@ getwd() -setwd('~/git/LSHTM_analysis/scripts/plotting') +setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() ######################################################### @@ -11,8 +11,8 @@ getwd() # 1: Installing and loading required packages and functions ############################################################ -#source('Header_TT.R') -source('barplot_colour_function.R') +#source("Header_TT.R") +source("barplot_colour_function.R") ############################################################ # 2: Read file: struct params data with columns containing @@ -134,7 +134,7 @@ df$group <- paste0(df$duet_outcome, "_", my_grp, sep = "") # Call the function to create the palette based on the group defined above colours <- ColourPalleteMulti(df, "duet_outcome", "my_grp") -print(paste0('Colour palette generated for: ', length(colours), ' colours')) +print(paste0("Colour palette generated for: ", length(colours), " colours")) my_title = "Protein stability (DUET)" #======================== @@ -158,12 +158,12 @@ my_yats = 18 #****************** # plot name and location # outdir/ (should be imported from reading file) -print(paste0('plot will be in:', outdir)) +print(paste0("plot will be in:", outdir)) bp_aa_subcols_duet = "barplot_acoloured_PS.svg" plot_bp_aa_subcols_duet = paste0(outdir, "/plots/", bp_aa_subcols_duet) -print(paste0('plot name:', plot_bp_aa_subcols_duet)) +print(paste0("plot name:", plot_bp_aa_subcols_duet)) svg(plot_bp_aa_subcols_duet, width = 26, height = 4) @@ -176,13 +176,13 @@ outPlot = g + , clip = "off") + geom_bar(aes(fill = group), colour = "grey") + scale_fill_manual(values = colours - , guide = 'none') + + , guide = "none") + geom_tile(aes(,-0.8, width = 0.95, height = 0.85) , fill = df$lab_bg) + geom_tile(aes(,-1.2, width = 0.95, height = -0.2) , fill = df$lab_bg2) + -# Here it's important to specify that your axis goes from 1 to max number of levels +# Here it"s important to specify that your axis goes from 1 to max number of levels theme(axis.text.x = element_text(size = my_xats , angle = 90 , hjust = 1 diff --git a/scripts/plotting/basic_barplots_PS.R b/scripts/plotting/basic_barplots_PS.R index e4bbc04..f2a26fc 100644 --- a/scripts/plotting/basic_barplots_PS.R +++ b/scripts/plotting/basic_barplots_PS.R @@ -1,5 +1,5 @@ getwd() -setwd('~/git/LSHTM_analysis/scripts/plotting') +setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() ######################################################### @@ -22,48 +22,48 @@ getwd() ######################################################### #%% variable assignment: input and output paths & filenames -drug = 'pyrazinamide' -gene = 'pncA' -gene_match = paste0(gene,'_p.') +drug = "pyrazinamide" +gene = "pncA" +gene_match = paste0(gene,"_p.") cat(gene_match) #============= # directories #============= -datadir = paste0('~/git/Data') -indir = paste0(datadir, '/', drug, '/input') -outdir = paste0('~/git/Data', '/', drug, '/output') +datadir = paste0("~/git/Data") +indir = paste0(datadir, "/", drug, "/input") +outdir = paste0("~/git/Data", "/", drug, "/output") #====== # input #====== -#in_filename = 'mcsm_complex1_normalised.csv' -in_filename_params = paste0(tolower(gene), '_all_params.csv') -infile_params = paste0(outdir, '/', in_filename_params) -cat(paste0('Input file 1:', infile_params) ) +#in_filename = "mcsm_complex1_normalised.csv" +in_filename_params = paste0(tolower(gene), "_all_params.csv") +infile_params = paste0(outdir, "/", in_filename_params) +cat(paste0("Input file 1:", infile_params) ) #======= # output #======= # plot 1 -basic_bp_duet = 'basic_barplot_PS.svg' -plot_basic_bp_duet = paste0(outdir, '/plots/', basic_bp_duet) +basic_bp_duet = "basic_barplot_PS.svg" +plot_basic_bp_duet = paste0(outdir, "/plots/", basic_bp_duet) # plot 2 -pos_count_duet = 'position_count_PS.svg' -plot_pos_count_duet = paste0(outdir, '/plots/', pos_count_duet) +pos_count_duet = "position_count_PS.svg" +plot_pos_count_duet = paste0(outdir, "/plots/", pos_count_duet) #%%=============================================================== ########################### # Read file: struct params ########################### -cat('Reading struct params including mcsm:', in_filename_params) +cat("Reading struct params including mcsm:", in_filename_params) my_df = read.csv(infile_params #, stringsAsFactors = F , header = T) -cat('Input dimensions:', dim(my_df)) +cat("Input dimensions:", dim(my_df)) # clear variables rm(in_filename_params, infile_params) @@ -74,22 +74,22 @@ str(my_df) # check for duplicate mutations if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformation)){ - cat(paste0('CAUTION:', ' Duplicate mutations identified' - , '\nExtracting these...')) + cat(paste0("CAUTION:", " Duplicate mutations identified" + , "\nExtracting these...")) dup_muts = my_df[duplicated(my_df$mutationinformation),] dup_muts_nu = length(unique(dup_muts$mutationinformation)) - cat(paste0('\nDim of duplicate mutation df:', nrow(dup_muts) - , '\nNo. of unique duplicate mutations:', dup_muts_nu - , '\n\nExtracting df with unique mutations only')) + cat(paste0("\nDim of duplicate mutation df:", nrow(dup_muts) + , "\nNo. of unique duplicate mutations:", dup_muts_nu + , "\n\nExtracting df with unique mutations only")) my_df_u = my_df[!duplicated(my_df$mutationinformation),] }else{ - cat(paste0('No duplicate mutations detected')) + cat(paste0("No duplicate mutations detected")) my_df_u = my_df } upos = unique(my_df_u$position) -cat('Dim of clean df:'); cat(dim(my_df_u)) -cat('\nNo. of unique mutational positions:'); cat(length(upos)) +cat("Dim of clean df:"); cat(dim(my_df_u)) +cat("\nNo. of unique mutational positions:"); cat(length(upos)) ######################################################################## # end of data extraction and cleaning for plots # @@ -109,9 +109,9 @@ library(ggplot2) #**************** # Plot 1:Count of stabilising and destabilsing muts #**************** -#svg('basic_barplots_PS.svg') +#svg("basic_barplots_PS.svg") svg(plot_basic_bp_duet) -print(paste0('plot filename:', basic_bp_duet)) +print(paste0("plot filename:", basic_bp_duet)) my_ats = 25 # axis text size my_als = 22 # axis label size @@ -138,7 +138,7 @@ prinfFile = g + geom_bar(aes(fill = duet_outcome) , plot.title = element_blank()) + labs(title = "" , y = "Number of SNPs" - #, fill='DUET Outcome' + #, fill="DUET Outcome" ) + scale_fill_discrete(name = "DUET Outcome" , labels = c("Destabilising", "Stabilising")) @@ -178,9 +178,9 @@ foo = select(df, mutationinformation #write.csv(foo, "/pos_count_freq.csv") #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -#svg('position_count_PS.svg') +#svg("position_count_PS.svg") svg(plot_pos_count_duet) -print(paste0('plot filename:', plot_pos_count_duet)) +print(paste0("plot filename:", plot_pos_count_duet)) my_ats = 25 # axis text size my_als = 22 # axis label size diff --git a/scripts/plotting/mcsm_mean_stability.R b/scripts/plotting/mcsm_mean_stability.R index 0f4db91..1281cae 100644 --- a/scripts/plotting/mcsm_mean_stability.R +++ b/scripts/plotting/mcsm_mean_stability.R @@ -1,5 +1,5 @@ getwd() -setwd('~/git/LSHTM_analysis/scripts/plotting') +setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() ######################################################### @@ -17,25 +17,25 @@ require(dplyr) #======================================================== #%% variable assignment: input and output paths & filenames -drug = 'pyrazinamide' -gene = 'pncA' -gene_match = paste0(gene,'_p.') +drug = "pyrazinamide" +gene = "pncA" +gene_match = paste0(gene,"_p.") cat(gene_match) #============= # directories #============= -datadir = paste0('~/git/Data') -indir = paste0(datadir, '/', drug, '/input') -outdir = paste0('~/git/Data', '/', drug, '/output') +datadir = paste0("~/git/Data") +indir = paste0(datadir, "/", drug, "/input") +outdir = paste0("~/git/Data", "/", drug, "/output") #====== # input #====== -#in_filename = 'mcsm_complex1_normalised.csv' -in_filename_params = paste0(tolower(gene), '_all_params.csv') -infile_params = paste0(outdir, '/', in_filename_params) -cat(paste0('Input file 1:', infile_params) ) +#in_filename = "mcsm_complex1_normalised.csv" +in_filename_params = paste0(tolower(gene), "_all_params.csv") +infile_params = paste0(outdir, "/", in_filename_params) +cat(paste0("Input file 1:", infile_params) ) #======= # output @@ -48,13 +48,13 @@ print(paste0("Output file:", outfile_mean_stability)) ########################### # Read file: struct params ########################### -cat('Reading struct params including mcsm:', in_filename_params) +cat("Reading struct params including mcsm:", in_filename_params) my_df = read.csv(infile_params #, stringsAsFactors = F , header = T) -cat('Input dimensions:', dim(my_df)) +cat("Input dimensions:", dim(my_df)) # clear variables rm(in_filename_params, infile_params) @@ -65,23 +65,23 @@ str(my_df) # check for duplicate mutations if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformation)){ - cat(paste0('CAUTION:', ' Duplicate mutations identified' - , '\nExtracting these...')) + cat(paste0("CAUTION:", " Duplicate mutations identified" + , "\nExtracting these...")) dup_muts = my_df[duplicated(my_df$mutationinformation),] dup_muts_nu = length(unique(dup_muts$mutationinformation)) - cat(paste0('\nDim of duplicate mutation df:', nrow(dup_muts) - , '\nNo. of unique duplicate mutations:', dup_muts_nu - , '\n\nExtracting df with unique mutations only')) + cat(paste0("\nDim of duplicate mutation df:", nrow(dup_muts) + , "\nNo. of unique duplicate mutations:", dup_muts_nu + , "\n\nExtracting df with unique mutations only")) my_df_u = my_df[!duplicated(my_df$mutationinformation),] }else{ - cat(paste0('No duplicate mutations detected')) + cat(paste0("No duplicate mutations detected")) my_df_u = my_df } upos = unique(my_df_u$position) -cat('Dim of clean df:') +cat("Dim of clean df:") cat(dim(my_df_u)) -cat('\nNo. of unique mutational positions:'); cat(length(upos)) +cat("\nNo. of unique mutational positions:"); cat(length(upos)) ######################################################################## # end of data extraction and cleaning for plots # @@ -160,4 +160,4 @@ cat("Finished writing file:\n" , "\nNo. of cols:", ncol(combined_df)) # end of script -#=============================================================== \ No newline at end of file +#=============================================================== diff --git a/scripts/plotting/subcols_axis_PS.R b/scripts/plotting/subcols_axis_PS.R index 2e930af..5b9b0f4 100644 --- a/scripts/plotting/subcols_axis_PS.R +++ b/scripts/plotting/subcols_axis_PS.R @@ -1,5 +1,5 @@ getwd() -setwd('~/git/LSHTM_analysis/scripts/plotting') +setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() ######################################################### @@ -11,8 +11,8 @@ getwd() # Installing and loading required packages and functions # ######################################################################## -#source('Header_TT.R') -#source('barplot_colour_function.R') +#source("Header_TT.R") +#source("barplot_colour_function.R") ######################################################################## # Read file: call script for combining df for PS # @@ -21,25 +21,25 @@ getwd() # ######################################################## #%% variable assignment: input and output paths & filenames -drug = 'pyrazinamide' -gene = 'pncA' -gene_match = paste0(gene,'_p.') +drug = "pyrazinamide" +gene = "pncA" +gene_match = paste0(gene,"_p.") cat(gene_match) #============= # directories #============= -datadir = paste0('~/git/Data') -indir = paste0(datadir, '/', drug, '/input') -outdir = paste0('~/git/Data', '/', drug, '/output') +datadir = paste0("~/git/Data") +indir = paste0(datadir, "/", drug, "/input") +outdir = paste0("~/git/Data", "/", drug, "/output") #====== # input #====== -#in_filename = 'mcsm_complex1_normalised.csv' -in_filename_params = paste0(tolower(gene), '_all_params.csv') -infile_params = paste0(outdir, '/', in_filename_params) -cat(paste0('Input file:', infile_params) ) +#in_filename = "mcsm_complex1_normalised.csv" +in_filename_params = paste0(tolower(gene), "_all_params.csv") +infile_params = paste0(outdir, "/", in_filename_params) +cat(paste0("Input file:", infile_params) ) #======= # output @@ -50,13 +50,13 @@ cat(paste0('Input file:', infile_params) ) ########################### # Read file: struct params ########################### -cat('Reading struct params including mcsm:', in_filename_params) +cat("Reading struct params including mcsm:", in_filename_params) my_df = read.csv(infile_params #, stringsAsFactors = F , header = T) -cat('Input dimensions:', dim(my_df)) +cat("Input dimensions:", dim(my_df)) # clear variables rm(in_filename_params, infile_params) @@ -67,22 +67,22 @@ str(my_df) # check for duplicate mutations if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformation)){ - cat(paste0('CAUTION:', ' Duplicate mutations identified' - , '\nExtracting these...')) + cat(paste0("CAUTION:", " Duplicate mutations identified" + , "\nExtracting these...")) dup_muts = my_df[duplicated(my_df$mutationinformation),] dup_muts_nu = length(unique(dup_muts$mutationinformation)) - cat(paste0('\nDim of duplicate mutation df:', nrow(dup_muts) - , '\nNo. of unique duplicate mutations:', dup_muts_nu - , '\n\nExtracting df with unique mutations only')) + cat(paste0("\nDim of duplicate mutation df:", nrow(dup_muts) + , "\nNo. of unique duplicate mutations:", dup_muts_nu + , "\n\nExtracting df with unique mutations only")) my_df_u = my_df[!duplicated(my_df$mutationinformation),] }else{ - cat(paste0('No duplicate mutations detected')) + cat(paste0("No duplicate mutations detected")) my_df_u = my_df } upos = unique(my_df_u$position) -cat('Dim of clean df:'); cat(dim(my_df_u)) -cat('\nNo. of unique mutational positions:'); cat(length(upos)) +cat("Dim of clean df:"); cat(dim(my_df_u)) +cat("\nNo. of unique mutational positions:"); cat(length(upos)) #====================================================== # create a new df with unique position numbers and cols position = unique(my_df$position) #130 @@ -174,7 +174,7 @@ mut_pos_cols = merge(position_cols, aa_cols_ref , all.x = TRUE) head(mut_pos_cols) -# replace NA's +# replace NA"s # :column "lab_bg" with "white" # : column "lab_fg" with "black" mut_pos_cols$lab_bg[is.na(mut_pos_cols$lab_bg)] <- "white"