From 6934faca1093fc56370e90bdeb0c1885aaffc39c Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 10 Sep 2020 17:53:49 +0100 Subject: [PATCH] saving other_plots.R --- scripts/plotting/combining_dfs_plotting.R | 3 +- scripts/plotting/notes | 10 + scripts/plotting/other_plots.R | 214 ++++++++++++++-------- scripts/plotting/other_plots_data.R | 145 ++++++++------- scripts/plotting/test.R | 96 ---------- 5 files changed, 227 insertions(+), 241 deletions(-) delete mode 100644 scripts/plotting/test.R diff --git a/scripts/plotting/combining_dfs_plotting.R b/scripts/plotting/combining_dfs_plotting.R index 911b0f8..f2589d1 100644 --- a/scripts/plotting/combining_dfs_plotting.R +++ b/scripts/plotting/combining_dfs_plotting.R @@ -319,8 +319,7 @@ all.equal(foo, bar) # clear variables rm(foo, bar, gene_metadata , in_filename_params, infile_params, merging_cols - , in_filename_gene_metadata, infile_gene_metadata - , merged_df2v2, merged_df2v3) + , in_filename_gene_metadata, infile_gene_metadata) #************************* ##################################################################### # Combining: LIG diff --git a/scripts/plotting/notes b/scripts/plotting/notes index 7cc9e0e..5cbe303 100644 --- a/scripts/plotting/notes +++ b/scripts/plotting/notes @@ -64,3 +64,13 @@ table(snpsBYpos_df$snpsBYpos) # with dups 1 2 3 4 5 6 7 39 30 25 23 20 6 2 + + + +#====================== +# dr and other muts count = PS +#====================== +dr_mutations_pyrazinamide : 227 +other_mutations_pyrazinamide : 197 + + diff --git a/scripts/plotting/other_plots.R b/scripts/plotting/other_plots.R index 2ca3868..25e0171 100644 --- a/scripts/plotting/other_plots.R +++ b/scripts/plotting/other_plots.R @@ -18,19 +18,29 @@ source("other_plots_data.R") #======= # output #======= -dr_other_plots_combined = "dr_other_muts.svg" -plot_dr_other_plots_combined = paste0(plotdir,"/", dr_other_plots_combined) +dr_other_combined = "dr_other_muts.svg" +plot_dr_other_combined = paste0(plotdir,"/", dr_other_combined) +dr_other_foldx = "dr_other_muts_foldx.svg" +plot_dr_other_foldx = paste0(plotdir,"/", dr_other_foldx) + +point_legend = "point_legend.svg" +plot_point_legend = paste0(plotdir,"/", point_legend) ######################################################################## # end of data extraction and cleaning for plots # ######################################################################## #my_comparisons <- list( c(dr_muts_col, other_muts_col) ) my_comparisons <- list( c("DM", "OM") ) + +my_ats = 15# axis text size +my_als = 20 # axis label size +my_fls = 20 # facet label size +my_pts = 22 # plot title size + #=========== # Plot1: PS #=========== - my_stat_ps = compare_means(param_value~mutation_info, group.by = "param_type" , data = df_lf_ps, paired = FALSE, p.adjust.method = "BH") @@ -42,22 +52,23 @@ p1 = ggplot(df_lf_ps, aes(x = mutation_info , nrow = 1 , scales = "free_y") + geom_boxplot(fill = "white", outlier.colour = NA - # position = position_dodge(width = 0.9) - , width = 0.5) + + #, position = position_dodge(width = 0.9) + , width = 0.2) + geom_point(position = position_jitterdodge(dodge.width=0.01) + , alpha = 0.5 , show.legend = FALSE , aes(colour = factor(duet_outcome))) + - theme(axis.text.x = element_text(size = 15) - , axis.text.y = element_text(size = 15 + theme(axis.text.x = element_text(size = my_ats) + , axis.text.y = element_text(size = my_ats , angle = 0 , hjust = 1 , vjust = 0) - , axis.title.x = element_text(size = 15) - , axis.title.y = element_text(size = 15) - , plot.title = element_text(size = 20, hjust = 0.5) - , strip.text.x = element_text(size = 15, colour = "black") - , legend.title = element_text(color = "black", size = 20) - , legend.text = element_text(size = 15) + , axis.title.x = element_text(size = my_ats) + , axis.title.y = element_text(size = my_ats) + , plot.title = element_text(size = my_pts , hjust = 0.5) + , strip.text.x = element_text(size = my_fls, colour = "black") + , legend.title = element_text(color = "black", size = my_als) + , legend.text = element_text(size = my_ats) , legend.direction = "vertical") + labs(title = "DUET" , x = "" @@ -68,51 +79,9 @@ p1 = ggplot(df_lf_ps, aes(x = mutation_info #, label = "p.format") , label = "p.signif") p1 - +#=============================================================== #=========== -# Plot 2: Foldx -#=========== -my_stat_foldx = compare_means(param_value~mutation_info, group.by = "param_type" - , data = df_lf_foldx, paired = FALSE, p.adjust.method = "BH") - -y_value = "param_value" - -p2 = ggplot(df_lf_foldx, aes(x = mutation_info - , y = eval(parse(text=y_value)) )) + - facet_wrap(~ param_type - , nrow = 1 - , scales = "free_y") + - geom_boxplot(fill = "white", outlier.colour = NA - # position = position_dodge(width = 0.9) - , width = 0.5) + - geom_point(position = position_jitterdodge(dodge.width=0.01) - , show.legend = FALSE - , aes(colour = factor(foldx_outcome))) + - theme(axis.text.x = element_text(size = 15) - , axis.text.y = element_text(size = 15 - , angle = 0 - , hjust = 1 - , vjust = 0) - , axis.title.x = element_text(size = 15) - , axis.title.y = element_text(size = 15) - , plot.title = element_text(size = 20, hjust = 0.5) - , strip.text.x = element_text(size = 15, colour = "black") - , legend.title = element_text(color = "black", size = 20) - , legend.text = element_text(size = 15) - , legend.direction = "vertical") + - labs(title = "Foldx" - , x = "" - , y = "")+ - stat_compare_means(comparisons = my_comparisons - , method = "wilcox.test" - , paired = FALSE - #, label = "p.format") - , label = "p.signif") -p2 - - -#=========== -# Plot 3: LIG +# Plot 2: LIG #=========== my_stat_lig = compare_means(param_value~mutation_info, group.by = "param_type" @@ -120,28 +89,29 @@ my_stat_lig = compare_means(param_value~mutation_info, group.by = "param_type" y_value = "param_value" -p3 = ggplot(df_lf_lig, aes(x = mutation_info +p2 = ggplot(df_lf_lig, aes(x = mutation_info , y = eval(parse(text=y_value)) )) + facet_wrap(~ param_type , nrow = 1 , scales = "free_y") + geom_boxplot(fill = "white", outlier.colour = NA - # position = position_dodge(width = 0.9) - , width = 0.5) + + #, position = position_dodge(width = 0.9) + , width = 0.2) + geom_point(position = position_jitterdodge(dodge.width=0.01) + , alpha = 0.5 , show.legend = FALSE , aes(colour = factor(ligand_outcome))) + - theme(axis.text.x = element_text(size = 15) - , axis.text.y = element_text(size = 15 + theme(axis.text.x = element_text(size = my_ats) + , axis.text.y = element_text(size = my_ats , angle = 0 , hjust = 1 , vjust = 0) - , axis.title.x = element_text(size = 15) - , axis.title.y = element_text(size = 15) - , plot.title = element_text(size = 20, hjust = 0.5) - , strip.text.x = element_text(size = 15, colour = "black") - , legend.title = element_text(color = "black", size = 20) - , legend.text = element_text(size = 15) + , axis.title.x = element_text(size = my_ats) + , axis.title.y = element_text(size = my_ats) + , plot.title = element_text(size = my_pts , hjust = 0.5) + , strip.text.x = element_text(size = my_fls, colour = "black") + , legend.title = element_text(color = "black", size = my_als) + , legend.text = element_text(size = my_ats) , legend.direction = "vertical") + labs(title = "Ligand Affinity" , x = "" @@ -152,20 +122,108 @@ p3 = ggplot(df_lf_lig, aes(x = mutation_info #, label = "p.format") , label = "p.signif") -p3 +p2 + +#==================================================================== +#=========== +# Plot 3: Foldx +#=========== +my_stat_foldx = compare_means(param_value~mutation_info, group.by = "param_type" + , data = df_lf_foldx, paired = FALSE, p.adjust.method = "BH") + +y_value = "param_value" + +p3 = ggplot(df_lf_foldx, aes(x = mutation_info + , y = eval(parse(text=y_value)) )) + + facet_wrap(~ param_type + , nrow = 1 + , scales = "free_y") + + geom_boxplot(fill = "white", outlier.colour = NA + #, position = position_dodge(width = 0.2) + , width = 0.5) + + geom_point(position = position_jitterdodge(dodge.width=0.01) + , alpha = 0.5 + , show.legend = FALSE + , aes(colour = factor(foldx_outcome))) + + theme(axis.text.x = element_text(size = my_ats) + , axis.text.y = element_text(size = my_ats + , angle = 0 + , hjust = 1 + , vjust = 0) + , axis.title.x = element_text(size = my_ats) + , axis.title.y = element_text(size = my_ats) + , plot.title = element_text(size = my_pts , hjust = 0.5) + , strip.text.x = element_text(size = my_fls, colour = "black") + #, legend.title = element_text(color = "black", size = my_als) + , legend.text = element_text(size = my_ats) + , legend.direction = "vertical" + , legend.title = element_blank()) + + labs(title = "" + , x = "" + , y = "") + + stat_compare_means(comparisons = my_comparisons + , method = "wilcox.test" + , paired = FALSE + #, label = "p.format") + , label = "p.signif") + +p3 + +#==================================================================== +#=========== +# legend +#=========== + +legend = ggplot(df_lf_foldx, aes(x = mutation_info + , y = eval(parse(text=y_value)) )) + + facet_wrap(~ param_type + , nrow = 1 + , scales = "free_y") + + geom_boxplot(fill = "white", outlier.colour = NA + #, position = position_dodge(width = 0.2) + , width = 0.5) + + geom_point(position = position_jitterdodge(dodge.width=0.01) + , aes(colour = factor(foldx_outcome))) + + theme(legend.text = element_text(size = 25) + , legend.direction = "vertical" + , legend.title = element_blank())+ + + guides(color = guide_legend(override.aes = list(size = 6))) + +legend #=========================== # combine #=========================== -#svg(plot_or_combined, width = 32, height = 12) -svg("test.svg", width = 25, height = 12) -#theme_set(theme_gray()) # to preserve default theme +#--------- +# plot 1 +#--------- +cat("Output plot:",plot_dr_other_combined) +svg(plot_dr_other_combined, width = 24, height = 12) -printFile = cowplot::plot_grid(plot_grid(p1, p2, p3 - , nrow = 3 - , align = 'h' - , labels = c("", "", "") - , label_size = 20)) -print(printFile) +#theme_set(theme_gray()) # to preserve default theme +OutPlot = cowplot::plot_grid(p1, p2 + , nrow = 2 + , align = "hv" + , axis = "b" + , labels = c("", "", "") + , label_size = my_als) +print(OutPlot) dev.off() + +#--------- +# plot 2 +#--------- +svg(plot_dr_other_foldx , width = 6, height = 7) +OutPlot2 = p3 +print(OutPlot2) +dev.off() + +#--------- +# plot 3: legend +#--------- +svg(plot_point_legend, width = 6, height = 7) +OutPlot3 = legend +print(OutPlot3) +dev.off() diff --git a/scripts/plotting/other_plots_data.R b/scripts/plotting/other_plots_data.R index a60af1e..1a0d73e 100644 --- a/scripts/plotting/other_plots_data.R +++ b/scripts/plotting/other_plots_data.R @@ -33,7 +33,7 @@ my_max = max(df_ps[,n]); my_max df_ps$foldx_scaled = ifelse(df_ps[,n] < 0 , df_ps[,n]/abs(my_min) , df_ps[,n]/my_max) #335, 15 -#sanity check +# sanity check my_min = min(df_ps$foldx_scaled); my_min my_max = max(df_ps$foldx_scaled); my_max @@ -50,7 +50,6 @@ if ( all(c1 == c2) ){ exit() } - # name tidying df_ps$mutation_info = as.factor(df_ps$mutation_info) df_ps$duet_outcome = as.factor(df_ps$duet_outcome) @@ -60,6 +59,21 @@ df_ps$ligand_outcome = as.factor(df_ps$ligand_outcome) # check table(df_ps$mutation_info) + # further checks to make sure dr and other muts are indeed unique +dr_muts = df_ps[df_ps$mutation_info == dr_muts_col,] +dr_muts_names = unique(dr_muts$mutation) + +other_muts = df_ps[df_ps$mutation_info == other_muts_col,] +other_muts_names = unique(other_muts$mutation) + +if ( table(dr_muts_names%in%other_muts_names)[[1]] == length(dr_muts_names) && + table(other_muts_names%in%dr_muts_names)[[1]] == length(other_muts_names) ){ + cat("PASS: dr and other muts are indeed unique") +}else{ + cat("FAIL: dr adn others muts are NOT unique!") + quit() +} + #%%%%%%%%%%%%%%%%%%% # REASSIGNMENT: LIG #%%%%%%%%%%%%%%%%%%%% @@ -133,78 +147,25 @@ levels(df_lf_ps$mutation_info)[levels(df_lf_ps$mutation_info)==other_muts_col] < levels(df_lf_ps$mutation_info); table(df_lf_ps$mutation_info) ############################################################################ -#=========== -# Data: foldx -#=========== -# keep similar dtypes cols together -cols_to_select_foldx = c("mutationinformation", "mutation", "position", "mutation_info" - , "foldx_outcome" - - , "foldx_scaled" - , "ligand_distance" - , "asa" - , "rsa" - , "rd_values" - , "kd_values") - -df_wf_foldx = df_ps[, cols_to_select_foldx] - -pivot_cols_foldx = cols_to_select_foldx[1:5]; pivot_cols_foldx - -expected_rows_lf_foldx = nrow(df_wf_foldx) * (length(df_wf_foldx) - length(pivot_cols_foldx)) -expected_rows_lf_foldx - -# LF data: foldx -df_lf_foldx = gather(df_wf_foldx, param_type, param_value, foldx_scaled:kd_values, factor_key=TRUE) - -if (nrow(df_lf_foldx) == expected_rows_lf_foldx){ - cat("PASS: long format data created for foldx") -}else{ - cat("FAIL: long format data could not be created for foldx") - exit() -} - -# assign pretty labels: param type -levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type) - -levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="foldx_scaled"] <- "Foldx" -levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="ligand_distance"] <- "Ligand Distance" -levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="asa"] <- "ASA" -levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rsa"] <- "RSA" -levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rd_values"] <- "RD" -levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="kd_values"] <- "KD" -# check -levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type) - -# assign pretty labels: mutation_info -levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info) -sum(table(df_lf_foldx$mutation_info)) == nrow(df_lf_foldx) - -levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==dr_muts_col] <- "DM" -levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==other_muts_col] <- "OM" -# check -levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info) - -############################################################################ #=========== # LF data: LIG #=========== # keep similar dtypes cols together cols_to_select_lig = c("mutationinformation", "mutation", "position", "mutation_info" - , "ligand_outcome" - - , "affinity_scaled" - , "ligand_distance" - , "asa" - , "rsa" - , "rd_values" - , "kd_values") + , "ligand_outcome" + + , "affinity_scaled" + , "ligand_distance" + , "asa" + , "rsa" + , "rd_values" + , "kd_values") df_wf_lig = df_lig[, cols_to_select_lig] pivot_cols_lig = cols_to_select_lig[1:5]; pivot_cols_lig - + expected_rows_lf_lig = nrow(df_wf_lig) * (length(df_wf_lig) - length(pivot_cols_lig)) expected_rows_lf_lig @@ -239,7 +200,61 @@ levels(df_lf_lig$mutation_info)[levels(df_lf_lig$mutation_info)==other_muts_col] # check levels(df_lf_lig$mutation_info); table(df_lf_lig$mutation_info) -############################################################################### +############################################################################# +#=========== +# Data: foldx +#=========== +# keep similar dtypes cols together +cols_to_select_foldx = c("mutationinformation", "mutation", "position", "mutation_info" + , "foldx_outcome" + + , "foldx_scaled") + #, "ligand_distance" + #, "asa" + #, "rsa" + #, "rd_values" + #, "kd_values") + + +df_wf_foldx = df_ps[, cols_to_select_foldx] + +pivot_cols_foldx = cols_to_select_foldx[1:5]; pivot_cols_foldx + +expected_rows_lf_foldx = nrow(df_wf_foldx) * (length(df_wf_foldx) - length(pivot_cols_foldx)) +expected_rows_lf_foldx + +# LF data: foldx +df_lf_foldx = gather(df_wf_foldx, param_type, param_value, foldx_scaled, factor_key=TRUE) + +if (nrow(df_lf_foldx) == expected_rows_lf_foldx){ + cat("PASS: long format data created for foldx") +}else{ + cat("FAIL: long format data could not be created for foldx") + exit() +} + +# assign pretty labels: param type +levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type) + +levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="foldx_scaled"] <- "Foldx" +#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="ligand_distance"] <- "Ligand Distance" +#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="asa"] <- "ASA" +#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rsa"] <- "RSA" +#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="rd_values"] <- "RD" +#levels(df_lf_foldx$param_type)[levels(df_lf_foldx$param_type)=="kd_values"] <- "KD" +# check +levels(df_lf_foldx$param_type); table(df_lf_foldx$param_type) + +# assign pretty labels: mutation_info +levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info) +sum(table(df_lf_foldx$mutation_info)) == nrow(df_lf_foldx) + +levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==dr_muts_col] <- "DM" +levels(df_lf_foldx$mutation_info)[levels(df_lf_foldx$mutation_info)==other_muts_col] <- "OM" +# check +levels(df_lf_foldx$mutation_info); table(df_lf_foldx$mutation_info) + +############################################################################ # clear excess variables rm(cols_to_select_ps, cols_to_select_foldx, cols_to_select_lig diff --git a/scripts/plotting/test.R b/scripts/plotting/test.R deleted file mode 100644 index 8033517..0000000 --- a/scripts/plotting/test.R +++ /dev/null @@ -1,96 +0,0 @@ -setwd("/home/tanu/git/LSHTM_analysis/scripts/plotting") - -source("combining_dfs_plotting.R") - -table(merged_df3$mutation_info) - -# assign foldx -#ddg<0 = "Stabilising" (-ve) -table(merged_df3$ddg < 0) -merged_df3$foldx_outcome = ifelse(merged_df3$ddg < 0, "Stabilising", "Destabilising") - -#=========== -# PS data -#=========== -dr_muts = merged_df3[merged_df3$mutation_info == "dr_mutations_pyrazinamide",] -other_muts = merged_df3[merged_df3$mutation_info == "other_mutations_pyrazinamide",] - -par(mfrow = c(1,1)) -par(mfrow = c(2,6)) - -# mcsm duet -boxplot(dr_muts$duet_scaled, other_muts$duet_scaled, main = "DUET" - #, col = factor(merged_df3$duet_outcome) - ) -wilcox.test(dr_muts$duet_scaled, other_muts$duet_scaled, paired = F) - -# foldx ddg -boxplot(dr_muts$ddg, other_muts$ddg, main = "Foldx") -wilcox.test(dr_muts$ddg, other_muts$ddg, paired = F) - -# rd -boxplot(dr_muts$rd_values, other_muts$rd_values, main = "RD") -wilcox.test(dr_muts$rd_values, other_muts$rd_values) - -# kd -boxplot(dr_muts$kd_values, other_muts$kd_values, main = "KD") -wilcox.test(dr_muts$kd_values, other_muts$kd_values) - -# asa -boxplot(dr_muts$asa, other_muts$asa, main = "ASA") -wilcox.test(dr_muts$asa, other_muts$asa) - -# rsa -boxplot(dr_muts$rsa, other_muts$rsa, main = "RSA") -wilcox.test(dr_muts$rsa, other_muts$rsa) - -#=================================================================== -#========== -# LIG data -#========== -dr_muts_lig = merged_df3_lig[merged_df3_lig$mutation_info == "dr_mutations_pyrazinamide",] -other_muts_lig = merged_df3_lig[merged_df3_lig$mutation_info == "other_mutations_pyrazinamide",] - -# mcsm ligand affinity -boxplot(dr_muts_lig$duet_scaled, other_muts_lig$duet_scaled, main = "Ligand affinity") -wilcox.test(dr_muts_lig$duet_scaled, other_muts_lig$duet_scaled, paired = F) - -# rd -boxplot(dr_muts_lig$rd_values, other_muts_lig$rd_values, main = "RD") -wilcox.test(dr_muts_lig$rd_values, other_muts_lig$rd_values) - -# kd -boxplot(dr_muts_lig$kd_values, other_muts_lig$kd_values, main = "KD") -wilcox.test(dr_muts_lig$kd_values, other_muts_lig$kd_values) - -# asa -boxplot(dr_muts_lig$asa, other_muts_lig$asa, main = "ASA") -wilcox.test(dr_muts_lig$asa, other_muts_lig$asa) - -# rsa -boxplot(dr_muts_lig$rsa, other_muts_lig$rsa, main = "RSA") -wilcox.test(dr_muts_lig$rsa, other_muts_lig$rsa) - -# checking agreement b/w mcsm and foldx -cols_to_select = c("mutationinformation" - , "mutation_info" - , "duet_scaled" - , "ddg" - , "duet_outcome" - , "foldx_outcome") - -merged_df3_short = select(merged_df3, cols_to_select) - -mcsm_foldx = merged_df3_short[which(merged_df3_short$duet_outcome != merged_df3_short$foldx_outcome),] - -mcsm_foldx$sign_comp = ifelse(sign(mcsm_foldx$duet_scaled)==sign(mcsm_foldx$ddg), "PASS", "FAIL") -table(mcsm_foldx$sign_comp) - -# another way of checking -merged_df3$sign_comp = ifelse(sign(merged_df3$duet_scaled)==sign(merged_df3$ddg), "PASS", "FAIL") -table(merged_df3$sign_comp) - -disagreement = table(merged_df3$sign_comp)[2]/nrow(merged_df3)*100 -agreement = 100 - disagreement - -cat("There is", agreement, "% between mcsm and foldx predictions")