# # small_df3 = merged_df3[merged_df3['position'] <= 670 & merged_df3['position'] >= 300,] # # small_df3 = merged_df3[merged_df3['position'] <= 451 & merged_df3['position'] >= 300,] # # # mutable_df3 = cbind(small_df3) # plot_df = cbind(small_df3) source("~/git/LSHTM_analysis/config/gid.R") source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") #outdir="/home/pub/Work/LSHTM/Thesis_Plots/" outdir="~/git/Writing/thesis/tex/images-corrected/gid/" # add "pos_count" position count column merged_df3=merged_df3 %>% dplyr::add_count(position) merged_df3$pos_count=merged_df3$n merged_df3$n=NULL # re-sort the dataframe according to position count sorted_df = cbind(merged_df3) sorted_df = sorted_df %>% arrange(pos_count) # determine some split points # further split up our sorted dataframe into low/mid/high for stability plotting #snpcount_high_df = sorted_df[mid_snpcount:nrow(sorted_df),] #snpcount_mid_df = sorted_df[low_snpcount:mid_snpcount,] #snpcount_low_df = sorted_df[1:low_snpcount,] snpcount_high_df = sorted_df[sorted_df$pos_count>1,] snpcount_low_df = sorted_df[sorted_df$pos_count<2,] low_half_snpcount = round(nrow(snpcount_low_df)/2) snpcount_low_low_df = snpcount_low_df[1:low_half_snpcount,] snpcount_low_high_df = snpcount_low_df[low_half_snpcount:nrow(snpcount_low_df),] #### Put sorted-by-pos_count bp_stability_hmaps here #### #...if they look better :-) low=round(max(merged_df3$position)/3) centre=low*2 # Generate heat bar for whole DF heatbar_df = generate_distance_colour_map(merged_df3) heat_bar_legend = get_legend( ggplot(merged_df3, aes(factor(ligand_distance), y=0)) + theme(legend.direction="horizontal", legend.title = element_text(size=10), #legend.text = element_text(size=10), #legend.key.size = unit(10, "pt") )+ geom_point(aes( colour = ligand_distance, ) ) + scale_colour_gradientn( name="Ligand\nDistance (Å)", colours = magma(5, direction = -1)) ) # Position Tile Legend # STR #00ff00 tile_colour_map = c("STR" = "green", "SAM" = "#2f4f4f" ,"RNA" = "#ff1493" ,"AMP" = "#000080") tile_legend=get_legend( ggplot(tile_map, aes(factor(tile),y=0 , colour=tile_colour , fill=tile_colour))+ geom_tile() + theme(legend.title = element_blank(), legend.direction="horizontal", legend.text = element_text(size=10), legend.key.size = unit(10, "pt")) + scale_colour_manual(name=NULL #, values = tile_map$tile_colour , values=tile_colour_map) + scale_fill_manual(name=NULL #,values=tile_map$tile_colour , values = tile_colour_map) ) # SAV Tile Red/Blue Legend temp_snp_df=merged_df3%>%dplyr::add_count(position) snp_tile_legend = get_legend( ggplot(temp_snp_df, aes(factor(avg_stability_scaled), y=0)) + theme(#legend.title = element_blank(), legend.direction="horizontal", legend.title = element_text(size=10), #legend.text = element_text(size=10), #legend.key.size = unit(10, "pt") )+ geom_point(aes( colour = avg_stability_scaled, ) ) + scale_colour_gradientn( name="Protein Stability\n(ΔΔG Kcal/mol)", colours = rev( c("#007ba4", # rev() because FUCK "#00bfc4", "#ffffff", "#ffd9d0", "#f8766d", "#ae3121") ) ) ) # Logop Chemistry Colours temp_logo_p_snp=cbind(merged_df3) tab_mt = table(temp_logo_p_snp[["mutant_type"]], temp_logo_p_snp[["position"]]) tab_mt = unclass(tab_mt) logoplot_chemistry_legend=get_legend( ggplot() + geom_logo( tab_mt , method = 'custom' , col_scheme = "chemistry" , seq_type = 'aa') + theme( legend.title = element_blank(), legend.direction="horizontal", legend.text = element_text(size=10), legend.key.size = unit(10, "pt") ) ) stab=function(...){bp_stability_hmap( small_df3, p_title = NULL, yvar_colname = 'avg_stability_scaled', stability_colname = "avg_stability_scaled", stability_outcome_colname = "avg_stability_outcome", my_ylab = "", my_xaxls = 6, # x-axis label size my_yaxls = 8, # y-axis label size drug_colour = tile_map$tile_colour[1], lig1_colour = tile_map$tile_colour[2], lig2_colour = tile_map$tile_colour[3], lig3_colour = tile_map$tile_colour[4], y_max_override=max(merged_df3$pos_count), aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3, ... ) } # LOW small_df3 = merged_df3[merged_df3['position'] < low,] mutable_df3 = cbind(small_df3) print(paste0("Plotting LOW Rows: ", nrow(small_df3))) logo_p_snps_low = LogoPlotSnps( mutable_df3, y_lab="", aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3, drug_colour = tile_map$tile_colour[1], lig1_colour = tile_map$tile_colour[2], lig2_colour = tile_map$tile_colour[3], lig3_colour = tile_map$tile_colour[4] ) # needs to be: active_aa_pos AND odds ratio >10 logo_or_low = LogoPlotCustomH( small_df3, y_axis_increment=10, y_lab="", aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3, rm_empty_y=T, drug_colour = tile_map$tile_colour[1], lig1_colour = tile_map$tile_colour[2], lig2_colour = tile_map$tile_colour[3], lig3_colour = tile_map$tile_colour[4] ) stability_low = stab() # MID small_df3 = merged_df3[merged_df3['position'] <= centre & merged_df3['position'] >= low,] mutable_df3 = cbind(small_df3) print(paste0("Plotting MID Rows: ", nrow(small_df3))) logo_p_snps_centre = LogoPlotSnps( mutable_df3, y_lab = "", aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3, drug_colour = tile_map$tile_colour[1], lig1_colour = tile_map$tile_colour[2], lig2_colour = tile_map$tile_colour[3], lig3_colour = tile_map$tile_colour[4] ) logo_or_centre = LogoPlotCustomH( small_df3, y_axis_increment=10, rm_empty_y=T, aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3, drug_colour = tile_map$tile_colour[1], lig1_colour = tile_map$tile_colour[2], lig2_colour = tile_map$tile_colour[3], lig3_colour = tile_map$tile_colour[4] ) stability_centre = stab() # HIGH small_df3 = merged_df3[merged_df3['position'] > centre,] mutable_df3 = cbind(small_df3) print(paste0("Plotting HIGH Rows: ", nrow(small_df3))) logo_p_snps_high = LogoPlotSnps( mutable_df3, y_lab="" , aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3, drug_colour = tile_map$tile_colour[1], lig1_colour = tile_map$tile_colour[2], lig2_colour = tile_map$tile_colour[3], lig3_colour = tile_map$tile_colour[4] ) logo_or_high = LogoPlotCustomH( small_df3, y_axis_increment=10, y_lab="", aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3, rm_empty_y=T, drug_colour = tile_map$tile_colour[1], lig1_colour = tile_map$tile_colour[2], lig2_colour = tile_map$tile_colour[3], lig3_colour = tile_map$tile_colour[4] ) stability_high = stab() #### "sorted" stability plots #### # do stability plots for later assembly stability_high_reorder = bp_stability_hmap( snpcount_high_df, reorder_position = T, p_title = NULL, my_ylab = NULL, yvar_colname = 'avg_stability_scaled', stability_colname = "avg_stability_scaled", stability_outcome_colname = "avg_stability_outcome", my_xaxls = 6, # x-axis label size my_yaxls = 8, # y-axis label size drug_colour = tile_map$tile_colour[1], lig1_colour = tile_map$tile_colour[2], lig2_colour = tile_map$tile_colour[3], lig3_colour = tile_map$tile_colour[4], aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3, y_max_override=max(merged_df3$pos_count) ) stability_centre_reorder = bp_stability_hmap( snpcount_low_high_df, reorder_position = T, p_title = NULL, yvar_colname = 'avg_stability_scaled', stability_colname = "avg_stability_scaled", stability_outcome_colname = "avg_stability_outcome", my_ylab = "", my_xaxls = 6, # x-axis label size my_yaxls = 8, # y-axis label size drug_colour = tile_map$tile_colour[1], lig1_colour = tile_map$tile_colour[2], lig2_colour = tile_map$tile_colour[3], lig3_colour = tile_map$tile_colour[4], aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3, y_max_override=max(merged_df3$pos_count) ) stability_low_reorder = bp_stability_hmap( snpcount_low_low_df, reorder_position = T, p_title = NULL, yvar_colname = 'avg_stability_scaled', stability_colname = "avg_stability_scaled", stability_outcome_colname = "avg_stability_outcome", my_ylab = NULL, my_xaxls = 6, # x-axis label size my_yaxls = 8, # y-axis label size drug_colour = tile_map$tile_colour[1], lig1_colour = tile_map$tile_colour[2], lig2_colour = tile_map$tile_colour[3], lig3_colour = tile_map$tile_colour[4], aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3, y_max_override=max(merged_df3$pos_count) ) jpeg(paste0(outdir,"gid_average_stability-sorted.jpeg"), height = 6, width=11.5, unit="in",res=300) cowplot::plot_grid( cowplot::plot_grid(heat_bar_legend, tile_legend, snp_tile_legend, nrow=1), cowplot::plot_grid(stability_high_reorder, stability_centre_reorder,stability_low_reorder, ncol=1), ncol=1, rel_heights = c(1,12) ) dev.off() jpeg(paste0(outdir,"gid_average_stability-unsorted.jpeg"), height = 6, width=11.5, unit="in",res=300) cowplot::plot_grid( cowplot::plot_grid(heat_bar_legend, tile_legend, snp_tile_legend, nrow=1), cowplot::plot_grid(stability_low, stability_centre, stability_high, ncol=1), ncol=1, rel_heights = c(1,12) ) dev.off() jpeg(paste0(outdir,"gid_snp.jpeg"), height = 6, width=11.5, unit="in",res=300) cowplot::plot_grid( cowplot::plot_grid(heat_bar_legend, tile_legend, logoplot_chemistry_legend, nrow=1), cowplot::plot_grid(logo_p_snps_low, logo_p_snps_centre, logo_p_snps_high, ncol=1, align='hv'), ncol=1, rel_heights = c(1,12) ) dev.off() jpeg(paste0(outdir,"gid_or.jpeg"), height = 6, width=11.5, unit="in",res=300) cowplot::plot_grid( cowplot::plot_grid(heat_bar_legend, tile_legend, logoplot_chemistry_legend, nrow=1), cowplot::plot_grid(logo_or_low, logo_or_centre, logo_or_high, ncol=1, align='hv'), ncol=1, rel_heights = c(1,12) ) dev.off()