From 29022c5462473e7ac8d3805ee51214b31595226a Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 30 Jan 2020 08:25:45 +0000 Subject: [PATCH] saving previous stuff from work --- mcsm_analysis/pyrazinamide/scripts/.Rhistory | 212 ++---------------- .../scripts/plotting/corr_plots_v3_PS.R | 4 +- 2 files changed, 18 insertions(+), 198 deletions(-) diff --git a/mcsm_analysis/pyrazinamide/scripts/.Rhistory b/mcsm_analysis/pyrazinamide/scripts/.Rhistory index 715e7a4..18dd768 100644 --- a/mcsm_analysis/pyrazinamide/scripts/.Rhistory +++ b/mcsm_analysis/pyrazinamide/scripts/.Rhistory @@ -1,203 +1,23 @@ getwd() setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") getwd() +source("../combining_two_df.R") +source("../Header_TT.R") +getwd() +setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") +getwd() source("../Header_TT.R") -source("../barplot_colour_function.R") -############################################################ -# Output dir for plots -############################################################ -out_dir = "~/git/Data/pyrazinamide/output/plots" -source("subcols_axis.R") -table(mut_pos_cols$lab_bg) -#blue cornflowerblue green purple white yellow -#2 2 2 4 117 3 -sum( table(mut_pos_cols$lab_bg) ) == nrow(mut_pos_cols) # should be True -table(mut_pos_cols$lab_bg2) -#green white -#2 128 -sum( table(mut_pos_cols$lab_bg2) ) == nrow(mut_pos_cols) # should be True -table(mut_pos_cols$lab_fg) -#black white -#124 6 -sum( table(mut_pos_cols$lab_fg) ) == nrow(mut_pos_cols) # should be True -# very important! -my_axis_colours = mut_pos_cols$lab_fg -# now clear mut_pos_cols -rm(mut_pos_cols) -########################### -# 2: Plot: DUET scores -########################### -#========================== -# Plot 2: Barplot with scores (unordered) -# corresponds to DUET_outcome -# Stacked Barplot with colours: DUET_outcome @ position coloured by -# stability scores. This is a barplot where each bar corresponds -# to a SNP and is coloured by its corresponding DUET stability value. -# Normalised values (range between -1 and 1 ) to aid visualisation -# NOTE: since barplot plots discrete values, colour = score, so number of -# colours will be equal to the no. of unique normalised scores -# rather than a continuous scale -# will require generating the colour scale separately. -#============================ -# sanity checks -upos = unique(my_df$Position) -str(my_df$DUET_outcome) -colnames(my_df) -#=========================== -# Data preparation for plots -#=========================== -#!!!!!!!!!!!!!!!!! +source("../combining_two_df.R") +#<<<<<<<<<<<<<<<<<<<<<<<<< # REASSIGNMENT -df <- my_df -#!!!!!!!!!!!!!!!!! +my_df = merged_df3_comp +# delete variables not required +rm(merged_df2, merged_df2_comp, merged_df3, merged_df3_comp) +# quick checks +colnames(my_df) +str(my_df) +#<<<<<<<<<<<<<<<<<<<<<<<< +# REASSIGNMENT +df = my_df rm(my_df) -# sanity checks -# should be a factor -is.factor(df$DUET_outcome) -#TRUE table(df$DUET_outcome) -#Destabilizing Stabilizing -#288 47 -# should be -1 and 1 -min(df$ratioDUET) -max(df$ratioDUET) -# sanity checks -# very important!!!! -tapply(df$ratioDUET, df$DUET_outcome, min) -#Destabilizing Stabilizing -#-1.0000000 0.01065719 -tapply(df$ratioDUET, df$DUET_outcome, max) -#Destabilizing Stabilizing -#-0.003875969 1.0000000 -# check unique values in normalised data -u = unique(df$ratioDUET) # 323 -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Run this section if rounding is to be used -# specify number for rounding -n = 3 -df$ratioDUETR = round(df$ratioDUET, n) # 335, 40 -u = unique(df$ratioDUETR) # 287 -# create an extra column called group which contains the "gp name and score" -# so colours can be generated for each unique values in this column -my_grp = df$ratioDUETR -df$group <- paste0(df$DUET_outcome, "_", my_grp, sep = "") # 335,41 -# Call the function to create the palette based on the group defined above -colours <- ColourPalleteMulti(df, "DUET_outcome", "my_grp") -my_title = "Protein stability (DUET)" -library(ggplot2) -# axis label size -my_xaxls = 13 -my_yaxls = 15 -# axes text size -my_xaxts = 15 -my_yaxts = 15 -# no ordering of x-axis according to frequency -g = ggplot(df, aes(factor(Position, ordered = T))) -g + -geom_bar(aes(fill = group), colour = "grey") + -scale_fill_manual( values = colours -, guide = 'none') + -theme( axis.text.x = element_text(size = my_xaxls -, angle = 90 -, hjust = 1 -, vjust = 0.4) -, axis.text.y = element_text(size = my_yaxls -, angle = 0 -, hjust = 1 -, vjust = 0) -, axis.title.x = element_text(size = my_xaxts) -, axis.title.y = element_text(size = my_yaxts ) ) + -labs(title = my_title -, x = "Position" -, y = "Frequency") -class(df$lab_bg) -# make this a named vector -# define cartesian coord -my_xlim = length(unique(df$Position)); my_xlim -# axis label size -my_xals = 15 -my_yals = 15 -# axes text size -my_xats = 15 -my_yats = 18 -# using geom_tile -g = ggplot(df, aes(factor(Position, ordered = T))) -g + -coord_cartesian(xlim = c(1, my_xlim) -, ylim = c(0, 6) -, clip = "off") + -geom_bar(aes(fill = group), colour = "grey") + -scale_fill_manual( values = colours -, guide = 'none') + -geom_tile(aes(,-0.8, width = 0.9, height = 0.85) -, fill = df$lab_bg) + -geom_tile(aes(,-1.2, width = 0.9, height = -0.2) -, fill = df$lab_bg2) + -# Here it's important to specify that your axis goes from 1 to max number of levels -theme( axis.text.x = element_text(size = my_xats -, angle = 90 -, hjust = 1 -, vjust = 0.4 -, colour = my_axis_colours) -, axis.text.y = element_text(size = my_yats -, angle = 0 -, hjust = 1 -, vjust = 0) -, axis.title.x = element_text(size = my_xals) -, axis.title.y = element_text(size = my_yals ) -, axis.ticks.x = element_blank() -) + -labs(title = my_title -, x = "Position" -, y = "Frequency") -class(df$lab_bg) -# make this a named vector -# define cartesian coord -my_xlim = length(unique(df$Position)); my_xlim -# axis label size -my_xals = 18 -my_yals = 18 -# axes text size -my_xats = 14 -my_yats = 18 -my_plot_name = "barplot_PS_acoloured.svg" -out_file = paste0(out_dir, "/", my_plot_name); outfile -svg(outfile, width = 26, height = 4) -svg(out_file, width = 26, height = 4) -# using geom_tile -g = ggplot(df, aes(factor(Position, ordered = T))) -outFile = g + -coord_cartesian(xlim = c(1, my_xlim) -, ylim = c(0, 6) -, clip = "off" -) + -geom_bar(aes(fill = group), colour = "grey") + -scale_fill_manual( values = colours -, guide = 'none') + -# geom_tile(aes(,-0.6, width = 0.9, height = 0.7) -# , fill = df$lab_bg) + -# geom_tile(aes(,-1, width = 0.9, height = 0.3) -# , fill = df$lab_bg2) + -geom_tile(aes(,-0.8, width = 0.9, height = 0.85) -, fill = df$lab_bg) + -geom_tile(aes(,-1.2, width = 0.9, height = -0.2) -, fill = df$lab_bg2) + -# Here it's important to specify that your axis goes from 1 to max number of levels -theme( axis.text.x = element_text(size = my_xats -, angle = 90 -, hjust = 1 -, vjust = 0.4 -, colour = my_axis_colours) -, axis.text.y = element_text(size = my_yats -, angle = 0 -, hjust = 1 -, vjust = 0) -, axis.title.x = element_text(size = my_xals) -, axis.title.y = element_text(size = my_yals ) -, axis.ticks.x = element_blank() -) + -labs(title = "" -, x = "Position" -, y = "Frequency") -print(outFile) -dev.off() diff --git a/mcsm_analysis/pyrazinamide/scripts/plotting/corr_plots_v3_PS.R b/mcsm_analysis/pyrazinamide/scripts/plotting/corr_plots_v3_PS.R index f3a507e..0059bca 100644 --- a/mcsm_analysis/pyrazinamide/scripts/plotting/corr_plots_v3_PS.R +++ b/mcsm_analysis/pyrazinamide/scripts/plotting/corr_plots_v3_PS.R @@ -64,10 +64,10 @@ str(my_df) # Data for plots #=================== -#<<<<<<<<<<<<<<<<<<<<<<<< +#!!!!!!!!!!!!!!!!!!!!!!!! # REASSIGNMENT df = my_df -#<<<<<<<<<<<<<<<<<<<<<<<<< +#!!!!!!!!!!!!!!!!!!!!!!!! rm(my_df)