From 15daa6dfc134423824bbec53069d6f5b96ecca31 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 31 Jan 2020 15:32:32 +0000 Subject: [PATCH] remove .Rhistory --- mcsm_analysis/pyrazinamide/scripts/.Rhistory | 512 ------------------- 1 file changed, 512 deletions(-) delete mode 100644 mcsm_analysis/pyrazinamide/scripts/.Rhistory diff --git a/mcsm_analysis/pyrazinamide/scripts/.Rhistory b/mcsm_analysis/pyrazinamide/scripts/.Rhistory deleted file mode 100644 index efb7b8e..0000000 --- a/mcsm_analysis/pyrazinamide/scripts/.Rhistory +++ /dev/null @@ -1,512 +0,0 @@ -max(my_df2$Dis_lig_Ang) #9.847 -min(my_df2$Dis_lig_Ang) #3.047 -# count no of unique positions -length(unique(my_df2$Position))#47 -#count no of unique mutations -length(unique(my_df2$Mutationinformation)) #110 -#clear variable to avoid confusion -rm(my_df) -#%%%%%%%%%% -# Reassign -#%%%%%%%%%% -my_df = my_df2 -# Stacked Barplot with colours: Lig_outcome @ position coloured by -# stability scores. This is a barplot where each bar corresponds -# to a SNP and is coloured by its corresponding PredAff stability value. -# Normalised values (range between -1 and 1 ) to aid visualisation -# NOTE: since barplot plots discrete values, colour = score, so number of -# colours will be equal to the no. of unique normalised scores -# rather than a continuous scale -# will require generating the colour scale separately. -#============================ -# sanity checks -upos = unique(my_df$Position) -str(my_df$Lig_outcome) -colnames(my_df) -#=========================== -# Data preparation for plots -#=========================== -#!!!!!!!!!!!!!!!!! -# REASSIGNMENT -df <- my_df -rm(my_df) -# sanity checks -# should be a factor -is.factor(df$Lig_outcome) -table(df$Lig_outcome) -# sanity checks -# should be a factor -is.factor(df$Lig_outcome); as.factor(df$Lig_outcome) -# sanity checks -# should be a factor -is.factor(df$Lig_outcome); as.factor(df$Lig_outcome) -#=========================== -# Data preparation for plots -#=========================== -#!!!!!!!!!!!!!!!!! -# REASSIGNMENT -df <- my_df -getwd() -setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") -getwd() -############################################################ -# 1: Installing and loading required packages and functions -############################################################ -source("../Header_TT.R") -source("../barplot_colour_function.R") -############################################################ -# Output dir for plots -############################################################ -out_dir = "~/git/Data/pyrazinamide/output/plots" -############################################################ -# 2: call script the prepares the data with columns containing -# colours for axis labels -############################################################ -source("subcols_axis.R") -# this should return -#mut_pos_cols: 130, 4 -#my_df: 335, 39 -# clear excess variable -# "mut_pos_cols" is just for inspection in case you need to cross check -# position numbers and colours -# open file from deskptop ("sample_axis_cols") for cross checking -table(mut_pos_cols$lab_bg) -sum( table(mut_pos_cols$lab_bg) ) == nrow(mut_pos_cols) # should be True -table(mut_pos_cols$lab_bg2) -sum( table(mut_pos_cols$lab_bg2) ) == nrow(mut_pos_cols) # should be True -table(mut_pos_cols$lab_fg) -sum( table(mut_pos_cols$lab_fg) ) == nrow(mut_pos_cols) # should be True -# very important! -my_axis_colours = mut_pos_cols$lab_fg -# now clear mut_pos_cols -rm(mut_pos_cols) -########################### !!! only for mcsm_lig -# 4: Filter/subset data -# Lig plots < 10Ang -# Filter the lig plots for Dis_to_lig < 10Ang -########################### -# check range of distances -max(my_df$Dis_lig_Ang) -min(my_df$Dis_lig_Ang) -# subset data to have only values less than 10 Ang -my_df2 = subset(my_df, my_df$Dis_lig_Ang < 10) -# sanity checks -table(my_df2$Dis_lig_Ang<10) -table(my_df2$Dis_lig_Ang>10) -max(my_df2$Dis_lig_Ang) -min(my_df2$Dis_lig_Ang) -# count no of unique positions -length(unique(my_df2$Position)) -#count no of unique mutations -length(unique(my_df2$Mutationinformation)) -# clear variable to avoid confusion -rm(my_df) -#%%%%%%%%%% -# Reassign to keep code below consistent -#%%%%%%%%%% -my_df = my_df2 -########################### -# 2: Plot: Lig scores -########################### -#========================== -# Plot 2: Barplot with scores (unordered) -# corresponds to Lig_outcome -# Stacked Barplot with colours: Lig_outcome @ position coloured by -# stability scores. This is a barplot where each bar corresponds -# to a SNP and is coloured by its corresponding PredAff stability value. -# Normalised values (range between -1 and 1 ) to aid visualisation -# NOTE: since barplot plots discrete values, colour = score, so number of -# colours will be equal to the no. of unique normalised scores -# rather than a continuous scale -# will require generating the colour scale separately. -#============================ -# sanity checks -upos = unique(my_df$Position) -str(my_df$Lig_outcome) -colnames(my_df) -#=========================== -# Data preparation for plots -#=========================== -#!!!!!!!!!!!!!!!!! -# REASSIGNMENT -df <- my_df -rm(my_df) -# sanity checks -# should be a factor -is.factor(df$Lig_outcome); as.factor(df$Lig_outcome) -df$Lig_outcome = as.factor(df$Lig_outcome) -is.factor(df$Lig_outcome); -table(df$Lig_outcome) -# should be -1 and 1 -min(df$ratioPredAff) -max(df$ratioPredAff) -# sanity checks -# very important!!!! -tapply(df$ratioPredAff, df$Lig_outcome, min) -tapply(df$ratioPredAff, df$Lig_outcome, max) -# sanity checks -# very important!!!! -tapply(df$ratioPredAff, df$Lig_outcome, min) -tapply(df$ratioPredAff, df$Lig_outcome, max) -# check unique values in normalised data -u = unique(df$ratioPredAff) -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Run this section if rounding is to be used -# specify number for rounding -n = 3 -df$ratioPredAffR = round(df$ratioPredAff, n) -u = unique(df$ratioPredAffR) -# create an extra column called group which contains the "gp name and score" -# so colours can be generated for each unique values in this column -my_grp = df$ratioPredAffR -df$group <- paste0(df$Lig_outcome, "_", my_grp, sep = "") -# Call the function to create the palette based on the group defined above -colours <- ColourPalleteMulti(df, "Lig_outcome", "my_grp") -my_title = "Protein stability (PredAff)" -library(ggplot2) -# axis label size -my_xaxls = 13 -my_title = "Ligand Affinity" -# axis label size -my_xaxls = 13 -my_yaxls = 15 -# axes text size -my_xaxts = 15 -my_yaxts = 15 -# no ordering of x-axis according to frequency -g = ggplot(df, aes(factor(Position, ordered = T))) -g + -geom_bar(aes(fill = group), colour = "grey") + -scale_fill_manual( values = colours -, guide = 'none') + -theme( axis.text.x = element_text(size = my_xaxls -, angle = 90 -, hjust = 1 -, vjust = 0.4) -, axis.text.y = element_text(size = my_yaxls -, angle = 0 -, hjust = 1 -, vjust = 0) -, axis.title.x = element_text(size = my_xaxts) -, axis.title.y = element_text(size = my_yaxts ) ) + -labs(title = my_title -, x = "Position" -, y = "Frequency") -class(df$lab_bg) -# make this a named vector -# define cartesian coord -my_xlim = length(unique(df$Position)); my_xlim -# axis label size -my_xals = 15 -my_yals = 15 -# axes text size -my_xats = 15 -my_yats = 18 -# using geom_tile -g = ggplot(df, aes(factor(Position, ordered = T))) -g + -coord_cartesian(xlim = c(1, my_xlim) -, ylim = c(0, 6) -, clip = "off") + -geom_bar(aes(fill = group), colour = "grey") + -scale_fill_manual( values = colours -, guide = 'none') + -geom_tile(aes(,-0.8, width = 0.95, height = 0.85) -, fill = df$lab_bg) + -geom_tile(aes(,-1.2, width = 0.95, height = -0.2) -, fill = df$lab_bg2) + -# Here it's important to specify that your axis goes from 1 to max number of levels -theme( axis.text.x = element_text(size = my_xats -, angle = 90 -, hjust = 1 -, vjust = 0.4 -, colour = my_axis_colours) -, axis.text.y = element_text(size = my_yats -, angle = 0 -, hjust = 1 -, vjust = 0) -, axis.title.x = element_text(size = my_xals) -, axis.title.y = element_text(size = my_yals ) -, axis.ticks.x = element_blank() -) + -labs(title = my_title -, x = "Position" -, y = "Frequency") -#======================== -# output plot as svg/png -#======================== -class(df$lab_bg) -# make this a named vector -# define cartesian coord -my_xlim = length(unique(df$Position)); my_xlim -# axis label size -my_xals = 18 -my_yals = 18 -# axes text size -my_xats = 14 -my_yats = 18 -# set output dir for plots -#getwd() -#setwd("~/git/Data/pyrazinamide/output/plots") -#getwd() -getwd() -setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") -getwd() -############################################################ -# 1: Installing and loading required packages and functions -############################################################ -source("../Header_TT.R") -source("../barplot_colour_function.R") -############################################################ -# Output dir for plots -############################################################ -out_dir = "~/git/Data/pyrazinamide/output/plots" -############################################################ -# 2: call script the prepares the data with columns containing -# colours for axis labels -############################################################ -source("subcols_axis.R") -# this should return -#mut_pos_cols: 130, 4 -#my_df: 335, 39 -# clear excess variable -# "mut_pos_cols" is just for inspection in case you need to cross check -# position numbers and colours -# open file from deskptop ("sample_axis_cols") for cross checking -table(mut_pos_cols$lab_bg) -sum( table(mut_pos_cols$lab_bg) ) == nrow(mut_pos_cols) # should be True -table(mut_pos_cols$lab_bg2) -sum( table(mut_pos_cols$lab_bg2) ) == nrow(mut_pos_cols) # should be True -table(mut_pos_cols$lab_fg) -sum( table(mut_pos_cols$lab_fg) ) == nrow(mut_pos_cols) # should be True -# very important! -my_axis_colours = mut_pos_cols$lab_fg -# now clear mut_pos_cols -rm(mut_pos_cols) -########################### !!! only for mcsm_lig -# 4: Filter/subset data -# Lig plots < 10Ang -# Filter the lig plots for Dis_to_lig < 10Ang -########################### -# check range of distances -max(my_df$Dis_lig_Ang) -min(my_df$Dis_lig_Ang) -# subset data to have only values less than 10 Ang -my_df2 = subset(my_df, my_df$Dis_lig_Ang < 10) -# sanity checks -table(my_df2$Dis_lig_Ang<10) -table(my_df2$Dis_lig_Ang>10) -max(my_df2$Dis_lig_Ang) -min(my_df2$Dis_lig_Ang) -# count no of unique positions -length(unique(my_df2$Position)) -#count no of unique mutations -length(unique(my_df2$Mutationinformation)) -# clear variable to avoid confusion -rm(my_df) -#%%%%%%%%%% -# Reassign to keep code below consistent -#%%%%%%%%%% -my_df = my_df2 -########################### -# 2: Plot: Lig scores -########################### -#========================== -# Plot 2: Barplot with scores (unordered) -# corresponds to Lig_outcome -# Stacked Barplot with colours: Lig_outcome @ position coloured by -# stability scores. This is a barplot where each bar corresponds -# to a SNP and is coloured by its corresponding PredAff stability value. -# Normalised values (range between -1 and 1 ) to aid visualisation -# NOTE: since barplot plots discrete values, colour = score, so number of -# colours will be equal to the no. of unique normalised scores -# rather than a continuous scale -# will require generating the colour scale separately. -#============================ -# sanity checks -upos = unique(my_df$Position) -str(my_df$Lig_outcome) -colnames(my_df) -#=========================== -# Data preparation for plots -#=========================== -#!!!!!!!!!!!!!!!!! -# REASSIGNMENT -df <- my_df -#!!!!!!!!!!!!!!!!! -rm(my_df) -# sanity checks -# should be a factor -is.factor(df$Lig_outcome); -#FALSE -df$Lig_outcome = as.factor(df$Lig_outcome) -is.factor(df$Lig_outcome); -#TRUE -table(df$Lig_outcome) -# check the range -min(df$ratioPredAff) -max(df$ratioPredAff) -# sanity checks -# very important!!!! -tapply(df$ratioPredAff, df$Lig_outcome, min) -tapply(df$ratioPredAff, df$Lig_outcome, max) -# My colour FUNCTION: based on group and subgroup -# in my case; -# df = df -# group = Lig_outcome -# subgroup = normalised score i.e ratioPredAff -# Prepare data: round off ratioPredAff scores -# round off to 3 significant digits: -# 323 if no rounding is performed: used to generate the original graph -# 287 if rounded to 3 places -# FIXME: check if reducing precicion creates any ML prob -# check unique values in normalised data -u = unique(df$ratioPredAff) -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Run this section if rounding is to be used -# specify number for rounding -n = 3 -df$ratioPredAffR = round(df$ratioPredAff, n) -u = unique(df$ratioPredAffR) -# create an extra column called group which contains the "gp name and score" -# so colours can be generated for each unique values in this column -my_grp = df$ratioPredAffR -df$group <- paste0(df$Lig_outcome, "_", my_grp, sep = "") -# ELSE -# uncomment the below if rounding is not required -#my_grp = df$ratioPredAff -#df$group <- paste0(df$Lig_outcome, "_", my_grp, sep = "") -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#****************** -# generate plot -#****************** -# Call the function to create the palette based on the group defined above -colours <- ColourPalleteMulti(df, "Lig_outcome", "my_grp") -my_title = "Ligand Affinity" -library(ggplot2) -# axis label size -my_xaxls = 13 -my_yaxls = 15 -# axes text size -my_xaxts = 15 -my_yaxts = 15 -# no ordering of x-axis according to frequency -g = ggplot(df, aes(factor(Position, ordered = T))) -g + -geom_bar(aes(fill = group), colour = "grey") + -scale_fill_manual( values = colours -, guide = 'none') + -theme( axis.text.x = element_text(size = my_xaxls -, angle = 90 -, hjust = 1 -, vjust = 0.4) -, axis.text.y = element_text(size = my_yaxls -, angle = 0 -, hjust = 1 -, vjust = 0) -, axis.title.x = element_text(size = my_xaxts) -, axis.title.y = element_text(size = my_yaxts ) ) + -labs(title = my_title -, x = "Position" -, y = "Frequency") -#======================== -# plot with axis colours -#======================== -class(df$lab_bg) -# make this a named vector -# define cartesian coord -my_xlim = length(unique(df$Position)); my_xlim -# axis label size -my_xals = 15 -my_yals = 15 -# axes text size -my_xats = 15 -my_yats = 18 -# using geom_tile -g = ggplot(df, aes(factor(Position, ordered = T))) -g + -coord_cartesian(xlim = c(1, my_xlim) -, ylim = c(0, 6) -, clip = "off") + -geom_bar(aes(fill = group), colour = "grey") + -scale_fill_manual( values = colours -, guide = 'none') + -geom_tile(aes(,-0.8, width = 0.95, height = 0.85) -, fill = df$lab_bg) + -geom_tile(aes(,-1.2, width = 0.95, height = -0.2) -, fill = df$lab_bg2) + -# Here it's important to specify that your axis goes from 1 to max number of levels -theme( axis.text.x = element_text(size = my_xats -, angle = 90 -, hjust = 1 -, vjust = 0.4 -, colour = my_axis_colours) -, axis.text.y = element_text(size = my_yats -, angle = 0 -, hjust = 1 -, vjust = 0) -, axis.title.x = element_text(size = my_xals) -, axis.title.y = element_text(size = my_yals ) -, axis.ticks.x = element_blank() -) + -labs(title = my_title -, x = "Position" -, y = "Frequency") -#======================== -# output plot as svg/png -#======================== -class(df$lab_bg) -# make this a named vector -# define cartesian coord -my_xlim = length(unique(df$Position)); my_xlim -# axis label size -my_xals = 18 -my_yals = 18 -# axes text size -my_xats = 14 -my_yats = 18 -# set output dir for plots -#getwd() -#setwd("~/git/Data/pyrazinamide/output/plots") -#getwd() -plot_name = "barplot_LIG_acoloured.svg" -my_plot_name = paste0(out_dir, "/", plot_name); my_plot_name -svg(my_plot_name, width = 26, height = 4) -g = ggplot(df, aes(factor(Position, ordered = T))) -outFile = g + -coord_cartesian(xlim = c(1, my_xlim) -, ylim = c(0, 6) -, clip = "off" -) + -geom_bar(aes(fill = group), colour = "grey") + -scale_fill_manual( values = colours -, guide = 'none') + -# geom_tile(aes(,-0.6, width = 0.9, height = 0.7) -# , fill = df$lab_bg) + -# geom_tile(aes(,-1, width = 0.9, height = 0.3) -# , fill = df$lab_bg2) + -geom_tile(aes(,-0.8, width = 0.95, height = 0.85) -, fill = df$lab_bg) + -geom_tile(aes(,-1.2, width = 0.95, height = -0.2) -, fill = df$lab_bg2) + -# Here it's important to specify that your axis goes from 1 to max number of levels -theme( axis.text.x = element_text(size = my_xats -, angle = 90 -, hjust = 1 -, vjust = 0.4 -, colour = my_axis_colours) -, axis.text.y = element_text(size = my_yats -, angle = 0 -, hjust = 1 -, vjust = 0) -, axis.title.x = element_text(size = my_xals) -, axis.title.y = element_text(size = my_yals ) -, axis.ticks.x = element_blank() -) + -labs(title = "" -, x = "Position" -, y = "Frequency") -print(outFile) -dev.off() -# for sanity and good practice -#rm(df)