From ac34de9e794cc0a16217ba8374da5cda7fbe1a3e Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 31 Jan 2020 15:30:08 +0000 Subject: [PATCH] added subaxis plots for PS and lig separately --- mcsm_analysis/pyrazinamide/scripts/.Rhistory | 437 +++++++++++++++--- .../plotting/barplots_subcolours_aa_PS.R | 8 +- .../scripts/plotting/subcols_axis_LIG.R | 208 +++++++++ .../{subcols_axis.R => subcols_axis_PS.R} | 29 +- 4 files changed, 601 insertions(+), 81 deletions(-) create mode 100644 mcsm_analysis/pyrazinamide/scripts/plotting/subcols_axis_LIG.R rename mcsm_analysis/pyrazinamide/scripts/plotting/{subcols_axis.R => subcols_axis_PS.R} (94%) diff --git a/mcsm_analysis/pyrazinamide/scripts/.Rhistory b/mcsm_analysis/pyrazinamide/scripts/.Rhistory index 715e7a4..efb7b8e 100644 --- a/mcsm_analysis/pyrazinamide/scripts/.Rhistory +++ b/mcsm_analysis/pyrazinamide/scripts/.Rhistory @@ -1,38 +1,18 @@ -getwd() -setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") -getwd() -source("../Header_TT.R") -source("../barplot_colour_function.R") -############################################################ -# Output dir for plots -############################################################ -out_dir = "~/git/Data/pyrazinamide/output/plots" -source("subcols_axis.R") -table(mut_pos_cols$lab_bg) -#blue cornflowerblue green purple white yellow -#2 2 2 4 117 3 -sum( table(mut_pos_cols$lab_bg) ) == nrow(mut_pos_cols) # should be True -table(mut_pos_cols$lab_bg2) -#green white -#2 128 -sum( table(mut_pos_cols$lab_bg2) ) == nrow(mut_pos_cols) # should be True -table(mut_pos_cols$lab_fg) -#black white -#124 6 -sum( table(mut_pos_cols$lab_fg) ) == nrow(mut_pos_cols) # should be True -# very important! -my_axis_colours = mut_pos_cols$lab_fg -# now clear mut_pos_cols -rm(mut_pos_cols) -########################### -# 2: Plot: DUET scores -########################### -#========================== -# Plot 2: Barplot with scores (unordered) -# corresponds to DUET_outcome -# Stacked Barplot with colours: DUET_outcome @ position coloured by +max(my_df2$Dis_lig_Ang) #9.847 +min(my_df2$Dis_lig_Ang) #3.047 +# count no of unique positions +length(unique(my_df2$Position))#47 +#count no of unique mutations +length(unique(my_df2$Mutationinformation)) #110 +#clear variable to avoid confusion +rm(my_df) +#%%%%%%%%%% +# Reassign +#%%%%%%%%%% +my_df = my_df2 +# Stacked Barplot with colours: Lig_outcome @ position coloured by # stability scores. This is a barplot where each bar corresponds -# to a SNP and is coloured by its corresponding DUET stability value. +# to a SNP and is coloured by its corresponding PredAff stability value. # Normalised values (range between -1 and 1 ) to aid visualisation # NOTE: since barplot plots discrete values, colour = score, so number of # colours will be equal to the no. of unique normalised scores @@ -41,7 +21,7 @@ rm(mut_pos_cols) #============================ # sanity checks upos = unique(my_df$Position) -str(my_df$DUET_outcome) +str(my_df$Lig_outcome) colnames(my_df) #=========================== # Data preparation for plots @@ -49,44 +29,146 @@ colnames(my_df) #!!!!!!!!!!!!!!!!! # REASSIGNMENT df <- my_df -#!!!!!!!!!!!!!!!!! rm(my_df) # sanity checks # should be a factor -is.factor(df$DUET_outcome) -#TRUE -table(df$DUET_outcome) -#Destabilizing Stabilizing -#288 47 +is.factor(df$Lig_outcome) +table(df$Lig_outcome) +# sanity checks +# should be a factor +is.factor(df$Lig_outcome); as.factor(df$Lig_outcome) +# sanity checks +# should be a factor +is.factor(df$Lig_outcome); as.factor(df$Lig_outcome) +#=========================== +# Data preparation for plots +#=========================== +#!!!!!!!!!!!!!!!!! +# REASSIGNMENT +df <- my_df +getwd() +setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") +getwd() +############################################################ +# 1: Installing and loading required packages and functions +############################################################ +source("../Header_TT.R") +source("../barplot_colour_function.R") +############################################################ +# Output dir for plots +############################################################ +out_dir = "~/git/Data/pyrazinamide/output/plots" +############################################################ +# 2: call script the prepares the data with columns containing +# colours for axis labels +############################################################ +source("subcols_axis.R") +# this should return +#mut_pos_cols: 130, 4 +#my_df: 335, 39 +# clear excess variable +# "mut_pos_cols" is just for inspection in case you need to cross check +# position numbers and colours +# open file from deskptop ("sample_axis_cols") for cross checking +table(mut_pos_cols$lab_bg) +sum( table(mut_pos_cols$lab_bg) ) == nrow(mut_pos_cols) # should be True +table(mut_pos_cols$lab_bg2) +sum( table(mut_pos_cols$lab_bg2) ) == nrow(mut_pos_cols) # should be True +table(mut_pos_cols$lab_fg) +sum( table(mut_pos_cols$lab_fg) ) == nrow(mut_pos_cols) # should be True +# very important! +my_axis_colours = mut_pos_cols$lab_fg +# now clear mut_pos_cols +rm(mut_pos_cols) +########################### !!! only for mcsm_lig +# 4: Filter/subset data +# Lig plots < 10Ang +# Filter the lig plots for Dis_to_lig < 10Ang +########################### +# check range of distances +max(my_df$Dis_lig_Ang) +min(my_df$Dis_lig_Ang) +# subset data to have only values less than 10 Ang +my_df2 = subset(my_df, my_df$Dis_lig_Ang < 10) +# sanity checks +table(my_df2$Dis_lig_Ang<10) +table(my_df2$Dis_lig_Ang>10) +max(my_df2$Dis_lig_Ang) +min(my_df2$Dis_lig_Ang) +# count no of unique positions +length(unique(my_df2$Position)) +#count no of unique mutations +length(unique(my_df2$Mutationinformation)) +# clear variable to avoid confusion +rm(my_df) +#%%%%%%%%%% +# Reassign to keep code below consistent +#%%%%%%%%%% +my_df = my_df2 +########################### +# 2: Plot: Lig scores +########################### +#========================== +# Plot 2: Barplot with scores (unordered) +# corresponds to Lig_outcome +# Stacked Barplot with colours: Lig_outcome @ position coloured by +# stability scores. This is a barplot where each bar corresponds +# to a SNP and is coloured by its corresponding PredAff stability value. +# Normalised values (range between -1 and 1 ) to aid visualisation +# NOTE: since barplot plots discrete values, colour = score, so number of +# colours will be equal to the no. of unique normalised scores +# rather than a continuous scale +# will require generating the colour scale separately. +#============================ +# sanity checks +upos = unique(my_df$Position) +str(my_df$Lig_outcome) +colnames(my_df) +#=========================== +# Data preparation for plots +#=========================== +#!!!!!!!!!!!!!!!!! +# REASSIGNMENT +df <- my_df +rm(my_df) +# sanity checks +# should be a factor +is.factor(df$Lig_outcome); as.factor(df$Lig_outcome) +df$Lig_outcome = as.factor(df$Lig_outcome) +is.factor(df$Lig_outcome); +table(df$Lig_outcome) # should be -1 and 1 -min(df$ratioDUET) -max(df$ratioDUET) +min(df$ratioPredAff) +max(df$ratioPredAff) # sanity checks # very important!!!! -tapply(df$ratioDUET, df$DUET_outcome, min) -#Destabilizing Stabilizing -#-1.0000000 0.01065719 -tapply(df$ratioDUET, df$DUET_outcome, max) -#Destabilizing Stabilizing -#-0.003875969 1.0000000 +tapply(df$ratioPredAff, df$Lig_outcome, min) +tapply(df$ratioPredAff, df$Lig_outcome, max) +# sanity checks +# very important!!!! +tapply(df$ratioPredAff, df$Lig_outcome, min) +tapply(df$ratioPredAff, df$Lig_outcome, max) # check unique values in normalised data -u = unique(df$ratioDUET) # 323 +u = unique(df$ratioPredAff) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Run this section if rounding is to be used # specify number for rounding n = 3 -df$ratioDUETR = round(df$ratioDUET, n) # 335, 40 -u = unique(df$ratioDUETR) # 287 +df$ratioPredAffR = round(df$ratioPredAff, n) +u = unique(df$ratioPredAffR) # create an extra column called group which contains the "gp name and score" # so colours can be generated for each unique values in this column -my_grp = df$ratioDUETR -df$group <- paste0(df$DUET_outcome, "_", my_grp, sep = "") # 335,41 +my_grp = df$ratioPredAffR +df$group <- paste0(df$Lig_outcome, "_", my_grp, sep = "") # Call the function to create the palette based on the group defined above -colours <- ColourPalleteMulti(df, "DUET_outcome", "my_grp") -my_title = "Protein stability (DUET)" +colours <- ColourPalleteMulti(df, "Lig_outcome", "my_grp") +my_title = "Protein stability (PredAff)" library(ggplot2) # axis label size my_xaxls = 13 +my_title = "Ligand Affinity" +# axis label size +my_xaxls = 13 my_yaxls = 15 # axes text size my_xaxts = 15 @@ -129,9 +211,9 @@ coord_cartesian(xlim = c(1, my_xlim) geom_bar(aes(fill = group), colour = "grey") + scale_fill_manual( values = colours , guide = 'none') + -geom_tile(aes(,-0.8, width = 0.9, height = 0.85) +geom_tile(aes(,-0.8, width = 0.95, height = 0.85) , fill = df$lab_bg) + -geom_tile(aes(,-1.2, width = 0.9, height = -0.2) +geom_tile(aes(,-1.2, width = 0.95, height = -0.2) , fill = df$lab_bg2) + # Here it's important to specify that your axis goes from 1 to max number of levels theme( axis.text.x = element_text(size = my_xats @@ -150,6 +232,9 @@ theme( axis.text.x = element_text(size = my_xats labs(title = my_title , x = "Position" , y = "Frequency") +#======================== +# output plot as svg/png +#======================== class(df$lab_bg) # make this a named vector # define cartesian coord @@ -160,12 +245,234 @@ my_yals = 18 # axes text size my_xats = 14 my_yats = 18 -my_plot_name = "barplot_PS_acoloured.svg" -out_file = paste0(out_dir, "/", my_plot_name); outfile -svg(outfile, width = 26, height = 4) -svg(out_file, width = 26, height = 4) +# set output dir for plots +#getwd() +#setwd("~/git/Data/pyrazinamide/output/plots") +#getwd() +getwd() +setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") +getwd() +############################################################ +# 1: Installing and loading required packages and functions +############################################################ +source("../Header_TT.R") +source("../barplot_colour_function.R") +############################################################ +# Output dir for plots +############################################################ +out_dir = "~/git/Data/pyrazinamide/output/plots" +############################################################ +# 2: call script the prepares the data with columns containing +# colours for axis labels +############################################################ +source("subcols_axis.R") +# this should return +#mut_pos_cols: 130, 4 +#my_df: 335, 39 +# clear excess variable +# "mut_pos_cols" is just for inspection in case you need to cross check +# position numbers and colours +# open file from deskptop ("sample_axis_cols") for cross checking +table(mut_pos_cols$lab_bg) +sum( table(mut_pos_cols$lab_bg) ) == nrow(mut_pos_cols) # should be True +table(mut_pos_cols$lab_bg2) +sum( table(mut_pos_cols$lab_bg2) ) == nrow(mut_pos_cols) # should be True +table(mut_pos_cols$lab_fg) +sum( table(mut_pos_cols$lab_fg) ) == nrow(mut_pos_cols) # should be True +# very important! +my_axis_colours = mut_pos_cols$lab_fg +# now clear mut_pos_cols +rm(mut_pos_cols) +########################### !!! only for mcsm_lig +# 4: Filter/subset data +# Lig plots < 10Ang +# Filter the lig plots for Dis_to_lig < 10Ang +########################### +# check range of distances +max(my_df$Dis_lig_Ang) +min(my_df$Dis_lig_Ang) +# subset data to have only values less than 10 Ang +my_df2 = subset(my_df, my_df$Dis_lig_Ang < 10) +# sanity checks +table(my_df2$Dis_lig_Ang<10) +table(my_df2$Dis_lig_Ang>10) +max(my_df2$Dis_lig_Ang) +min(my_df2$Dis_lig_Ang) +# count no of unique positions +length(unique(my_df2$Position)) +#count no of unique mutations +length(unique(my_df2$Mutationinformation)) +# clear variable to avoid confusion +rm(my_df) +#%%%%%%%%%% +# Reassign to keep code below consistent +#%%%%%%%%%% +my_df = my_df2 +########################### +# 2: Plot: Lig scores +########################### +#========================== +# Plot 2: Barplot with scores (unordered) +# corresponds to Lig_outcome +# Stacked Barplot with colours: Lig_outcome @ position coloured by +# stability scores. This is a barplot where each bar corresponds +# to a SNP and is coloured by its corresponding PredAff stability value. +# Normalised values (range between -1 and 1 ) to aid visualisation +# NOTE: since barplot plots discrete values, colour = score, so number of +# colours will be equal to the no. of unique normalised scores +# rather than a continuous scale +# will require generating the colour scale separately. +#============================ +# sanity checks +upos = unique(my_df$Position) +str(my_df$Lig_outcome) +colnames(my_df) +#=========================== +# Data preparation for plots +#=========================== +#!!!!!!!!!!!!!!!!! +# REASSIGNMENT +df <- my_df +#!!!!!!!!!!!!!!!!! +rm(my_df) +# sanity checks +# should be a factor +is.factor(df$Lig_outcome); +#FALSE +df$Lig_outcome = as.factor(df$Lig_outcome) +is.factor(df$Lig_outcome); +#TRUE +table(df$Lig_outcome) +# check the range +min(df$ratioPredAff) +max(df$ratioPredAff) +# sanity checks +# very important!!!! +tapply(df$ratioPredAff, df$Lig_outcome, min) +tapply(df$ratioPredAff, df$Lig_outcome, max) +# My colour FUNCTION: based on group and subgroup +# in my case; +# df = df +# group = Lig_outcome +# subgroup = normalised score i.e ratioPredAff +# Prepare data: round off ratioPredAff scores +# round off to 3 significant digits: +# 323 if no rounding is performed: used to generate the original graph +# 287 if rounded to 3 places +# FIXME: check if reducing precicion creates any ML prob +# check unique values in normalised data +u = unique(df$ratioPredAff) +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Run this section if rounding is to be used +# specify number for rounding +n = 3 +df$ratioPredAffR = round(df$ratioPredAff, n) +u = unique(df$ratioPredAffR) +# create an extra column called group which contains the "gp name and score" +# so colours can be generated for each unique values in this column +my_grp = df$ratioPredAffR +df$group <- paste0(df$Lig_outcome, "_", my_grp, sep = "") +# ELSE +# uncomment the below if rounding is not required +#my_grp = df$ratioPredAff +#df$group <- paste0(df$Lig_outcome, "_", my_grp, sep = "") +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#****************** +# generate plot +#****************** +# Call the function to create the palette based on the group defined above +colours <- ColourPalleteMulti(df, "Lig_outcome", "my_grp") +my_title = "Ligand Affinity" +library(ggplot2) +# axis label size +my_xaxls = 13 +my_yaxls = 15 +# axes text size +my_xaxts = 15 +my_yaxts = 15 +# no ordering of x-axis according to frequency +g = ggplot(df, aes(factor(Position, ordered = T))) +g + +geom_bar(aes(fill = group), colour = "grey") + +scale_fill_manual( values = colours +, guide = 'none') + +theme( axis.text.x = element_text(size = my_xaxls +, angle = 90 +, hjust = 1 +, vjust = 0.4) +, axis.text.y = element_text(size = my_yaxls +, angle = 0 +, hjust = 1 +, vjust = 0) +, axis.title.x = element_text(size = my_xaxts) +, axis.title.y = element_text(size = my_yaxts ) ) + +labs(title = my_title +, x = "Position" +, y = "Frequency") +#======================== +# plot with axis colours +#======================== +class(df$lab_bg) +# make this a named vector +# define cartesian coord +my_xlim = length(unique(df$Position)); my_xlim +# axis label size +my_xals = 15 +my_yals = 15 +# axes text size +my_xats = 15 +my_yats = 18 # using geom_tile g = ggplot(df, aes(factor(Position, ordered = T))) +g + +coord_cartesian(xlim = c(1, my_xlim) +, ylim = c(0, 6) +, clip = "off") + +geom_bar(aes(fill = group), colour = "grey") + +scale_fill_manual( values = colours +, guide = 'none') + +geom_tile(aes(,-0.8, width = 0.95, height = 0.85) +, fill = df$lab_bg) + +geom_tile(aes(,-1.2, width = 0.95, height = -0.2) +, fill = df$lab_bg2) + +# Here it's important to specify that your axis goes from 1 to max number of levels +theme( axis.text.x = element_text(size = my_xats +, angle = 90 +, hjust = 1 +, vjust = 0.4 +, colour = my_axis_colours) +, axis.text.y = element_text(size = my_yats +, angle = 0 +, hjust = 1 +, vjust = 0) +, axis.title.x = element_text(size = my_xals) +, axis.title.y = element_text(size = my_yals ) +, axis.ticks.x = element_blank() +) + +labs(title = my_title +, x = "Position" +, y = "Frequency") +#======================== +# output plot as svg/png +#======================== +class(df$lab_bg) +# make this a named vector +# define cartesian coord +my_xlim = length(unique(df$Position)); my_xlim +# axis label size +my_xals = 18 +my_yals = 18 +# axes text size +my_xats = 14 +my_yats = 18 +# set output dir for plots +#getwd() +#setwd("~/git/Data/pyrazinamide/output/plots") +#getwd() +plot_name = "barplot_LIG_acoloured.svg" +my_plot_name = paste0(out_dir, "/", plot_name); my_plot_name +svg(my_plot_name, width = 26, height = 4) +g = ggplot(df, aes(factor(Position, ordered = T))) outFile = g + coord_cartesian(xlim = c(1, my_xlim) , ylim = c(0, 6) @@ -178,9 +485,9 @@ scale_fill_manual( values = colours # , fill = df$lab_bg) + # geom_tile(aes(,-1, width = 0.9, height = 0.3) # , fill = df$lab_bg2) + -geom_tile(aes(,-0.8, width = 0.9, height = 0.85) +geom_tile(aes(,-0.8, width = 0.95, height = 0.85) , fill = df$lab_bg) + -geom_tile(aes(,-1.2, width = 0.9, height = -0.2) +geom_tile(aes(,-1.2, width = 0.95, height = -0.2) , fill = df$lab_bg2) + # Here it's important to specify that your axis goes from 1 to max number of levels theme( axis.text.x = element_text(size = my_xats @@ -201,3 +508,5 @@ labs(title = "" , y = "Frequency") print(outFile) dev.off() +# for sanity and good practice +#rm(df) diff --git a/mcsm_analysis/pyrazinamide/scripts/plotting/barplots_subcolours_aa_PS.R b/mcsm_analysis/pyrazinamide/scripts/plotting/barplots_subcolours_aa_PS.R index 63be70a..78029be 100644 --- a/mcsm_analysis/pyrazinamide/scripts/plotting/barplots_subcolours_aa_PS.R +++ b/mcsm_analysis/pyrazinamide/scripts/plotting/barplots_subcolours_aa_PS.R @@ -196,9 +196,9 @@ g + geom_bar(aes(fill = group), colour = "grey") + scale_fill_manual( values = colours , guide = 'none') + - geom_tile(aes(,-0.8, width = 0.9, height = 0.85) + geom_tile(aes(,-0.8, width = 0.95, height = 0.85) , fill = df$lab_bg) + - geom_tile(aes(,-1.2, width = 0.9, height = -0.2) + geom_tile(aes(,-1.2, width = 0.95, height = -0.2) , fill = df$lab_bg2) + # Here it's important to specify that your axis goes from 1 to max number of levels @@ -261,9 +261,9 @@ outFile = g + # , fill = df$lab_bg) + # geom_tile(aes(,-1, width = 0.9, height = 0.3) # , fill = df$lab_bg2) + - geom_tile(aes(,-0.8, width = 0.9, height = 0.85) + geom_tile(aes(,-0.8, width = 0.95, height = 0.85) , fill = df$lab_bg) + - geom_tile(aes(,-1.2, width = 0.9, height = -0.2) + geom_tile(aes(,-1.2, width = 0.95, height = -0.2) , fill = df$lab_bg2) + # Here it's important to specify that your axis goes from 1 to max number of levels diff --git a/mcsm_analysis/pyrazinamide/scripts/plotting/subcols_axis_LIG.R b/mcsm_analysis/pyrazinamide/scripts/plotting/subcols_axis_LIG.R new file mode 100644 index 0000000..2049c3e --- /dev/null +++ b/mcsm_analysis/pyrazinamide/scripts/plotting/subcols_axis_LIG.R @@ -0,0 +1,208 @@ +getwd() +setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") +getwd() + +############################################################ +# 1: Installing and loading required packages and functions +############################################################ + +#source("../Header_TT.R") +#source("../barplot_colour_function.R") +#library(tidyverse) + +########################### +#2: Read file: normalised file, output of step 4 mcsm pipeline +########################### +#my_df <- read.csv("../../Data/mcsm_complex1_normalised.csv" +# , row.names = 1 +# , stringsAsFactors = F +# , header = T) + +# call script combining_df +source("../combining_two_df_lig.R") + +#---------------------- PAY ATTENTION +# the above changes the working dir +# from Plotting to Scripts" +#---------------------- PAY ATTENTION + +#========================== +# This will return: + +# df with NA for pyrazinamide: +#merged_df2 +#merged_df2_comp + +# df without NA for pyrazinamide: +#merged_df3 +#merged_df3_comp +#========================== +########################### +# Data to choose: +# We will be using the small dfs +# to generate the coloured axis +########################### + +# uncomment as necessary +#!!!!!!!!!!!!!!!!!!!!!!! +# REASSIGNMENT +my_df = merged_df3 +#my_df = merged_df3_comp +#!!!!!!!!!!!!!!!!!!!!!!! + +# delete variables not required +rm(merged_df2, merged_df2_comp, merged_df3, merged_df3_comp) + +str(my_df) +my_df$Position +c1 = my_df[my_df$Mutationinformation == "A134V",] + +# order my_df by Position +my_df_o = my_df[order(my_df$Position),] +head(my_df_o$Position); tail(my_df_o$Position) + +c2 = my_df_o[my_df_o$Mutationinformation == "A134V",] + +# sanity check +if (sum(table(c1 == c2)) == ncol(my_df)){ + print ("Sanity check passsd") +}else{ + print ("Error!: Please debug your code") +} + +rm(my_df, c1, c2) + +# create a new df with unique position numbers and cols +Position = unique(my_df_o$Position) +Position_cols = as.data.frame(Position) + +head(Position_cols) ; tail(Position_cols) + +# specify active site residues and bg colour +Position = c(49, 51, 57, 71 + , 8, 96, 138 + , 13, 68 + , 103, 137 + , 133, 134) #13 + +lab_bg = rep(c("purple" + , "yellow" + , "cornflowerblue" + , "blue" + , "green"), times = c(4, 3, 2, 2, 2) +) + +# second bg colour for active site residues +#lab_bg2 = rep(c("white" +# , "green" , "white", "green" +# , "white" +# , "white" +# , "white"), times = c(4 +# , 1, 1, 1 +# , 2 +# , 2 +# , 2) +#) + +#%%%%%%%%% +# revised: leave the second box coloured as the first one incase there is no second colour +#%%%%%%%%% +lab_bg2 = rep(c("purple" + , "green", "yellow", "green" + , "cornflowerblue" + , "blue" + , "green"), times = c(4 + , 1, 1, 1 + , 2 + , 2 + , 2)) + +# fg colour for labels for active site residues +lab_fg = rep(c("white" + , "black" + , "black" + , "white" + , "black"), times = c(4, 3, 2, 2, 2)) + +#%%%%%%%%% +# revised: make the purple ones black +# fg colour for labels for active site residues +#%%%%%%%%% +#lab_fg = rep(c("black" +# , "black" +# , "black" +# , "white" +# , "black"), times = c(4, 3, 2, 2, 2)) + +# combined df with active sites, bg and fg colours +aa_cols_ref = data.frame(Position + , lab_bg + , lab_bg2 + , lab_fg + , stringsAsFactors = F) #13, 4 + +str(Position_cols); class(Position_cols) +str(aa_cols_ref); class(aa_cols_ref) + +# since Position is int and numeric in the two dfs resp, +# converting numeric to int for consistency +aa_cols_ref$Position = as.integer(aa_cols_ref$Position) +class(aa_cols_ref$Position) + +#=========== +# Merge 1: merging Positions df (Position_cols) and +# active site cols (aa_cols_ref) +# linking column: "Position" +# This is so you can have colours defined for all 130 positions +#=========== +head(Position_cols$Position); head(aa_cols_ref$Position) + +mut_pos_cols = merge(Position_cols, aa_cols_ref + , by = "Position" + , all.x = TRUE) + +head(mut_pos_cols) +# replace NA's +# :column "lab_bg" with "white" +# : column "lab_fg" with "black" +mut_pos_cols$lab_bg[is.na(mut_pos_cols$lab_bg)] <- "white" +mut_pos_cols$lab_bg2[is.na(mut_pos_cols$lab_bg2)] <- "white" +mut_pos_cols$lab_fg[is.na(mut_pos_cols$lab_fg)] <- "black" +head(mut_pos_cols) + +#=========== +# Merge 2: Merge mut_pos_cols with mcsm df +# Now combined the 130 positions with aa colours with +# the mcsm_data +#=========== +# dfs to merge +df0 = my_df_o +df1 = mut_pos_cols + +# check the column on which merge will be performed +head(df0$Position); tail(df0$Position) +head(df1$Position); tail(df1$Position) + +# should now have 3 extra columns +my_df = merge(df0, df1 + , by = "Position" + , all.x = TRUE) + +# sanity check +my_df[my_df$Position == "49",] +my_df[my_df$Position == "13",] + +my_df$Position + +# clear variables +rm(aa_cols_ref + , df0 + , df1 + , my_df_o + , Position_cols + , lab_bg + , lab_bg2 + , lab_fg + , Position + ) + diff --git a/mcsm_analysis/pyrazinamide/scripts/plotting/subcols_axis.R b/mcsm_analysis/pyrazinamide/scripts/plotting/subcols_axis_PS.R similarity index 94% rename from mcsm_analysis/pyrazinamide/scripts/plotting/subcols_axis.R rename to mcsm_analysis/pyrazinamide/scripts/plotting/subcols_axis_PS.R index c7fc436..37dfe32 100644 --- a/mcsm_analysis/pyrazinamide/scripts/plotting/subcols_axis.R +++ b/mcsm_analysis/pyrazinamide/scripts/plotting/subcols_axis_PS.R @@ -104,32 +104,35 @@ lab_bg = rep(c("purple" # , 2) #) +#%%%%%%%%% # revised: leave the second box coloured as the first one incase there is no second colour +#%%%%%%%%% lab_bg2 = rep(c("purple" - , "green" , "yellow", "green" + , "green", "yellow", "green" , "cornflowerblue" , "blue" , "green"), times = c(4 , 1, 1, 1 , 2 , 2 - , 2) -) + , 2)) # fg colour for labels for active site residues -#lab_fg = rep(c("white" -# , "black" -# , "black" -# , "white" -# , "black"), times = c(4, 3, 2, 2, 2)) - -# revised: make the purple ones black -# fg colour for labels for active site residues -lab_fg = rep(c("black" +lab_fg = rep(c("white" , "black" , "black" , "white" - , "black"), times = c(4, 3, 2, 2, 2)) + , "black"), times = c(4, 3, 2, 2, 2)) + +#%%%%%%%%% +# revised: make the purple ones black +# fg colour for labels for active site residues +#%%%%%%%%% +#lab_fg = rep(c("black" +# , "black" +# , "black" +# , "white" +# , "black"), times = c(4, 3, 2, 2, 2)) # combined df with active sites, bg and fg colours aa_cols_ref = data.frame(Position