# # small_df3 = merged_df3[merged_df3['position'] <= 670 & merged_df3['position'] >= 300,] # # small_df3 = merged_df3[merged_df3['position'] <= 451 & merged_df3['position'] >= 300,] # # # mutable_df3 = cbind(small_df3) # plot_df = cbind(small_df3) source("~/git/LSHTM_analysis/config/embb.R") source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") #outdir="/home/pub/Work/LSHTM/Thesis_Plots/" outdir="~/git/Writing/thesis/tex/images-corrected/embb/" #outdir="~/tmp/embb/" # add "pos_count" position count column merged_df3=merged_df3 %>% dplyr::add_count(position) merged_df3$pos_count=merged_df3$n merged_df3$n=NULL # re-sort the dataframe according to position count sorted_df = cbind(merged_df3) sorted_df = sorted_df %>% arrange(pos_count) # determine some split points # further split up our sorted dataframe into low/mid/high for stability plotting # snpcount_high_df = sorted_df[mid_snpcount:nrow(sorted_df),] # snpcount_mid_df = sorted_df[low_snpcount:mid_snpcount,] # snpcount_low_df = sorted_df[1:low_snpcount,] snpcount_high_df = sorted_df[sorted_df$pos_count>1,] snpcount_low_df = sorted_df[sorted_df$pos_count<2,] low_half_snpcount = round(nrow(snpcount_low_df)/2) snpcount_low_low_df = snpcount_low_df[1:low_half_snpcount,] snpcount_low_high_df = snpcount_low_df[low_half_snpcount:nrow(snpcount_low_df),] # do stability plots for later assembly stability_high = bp_stability_hmap(snpcount_high_df, reorder_position = T, p_title = NULL, my_ylab = "", yvar_colname = 'avg_stability_scaled', stability_colname = "avg_stability_scaled", stability_outcome_colname = "avg_stability_outcome", my_xaxls = 6, # x-axis label size my_yaxls = 8, # y-axis label size aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3 ) stability_centre = bp_stability_hmap(snpcount_low_high_df, reorder_position = T, p_title = NULL, yvar_colname = 'avg_stability_scaled', stability_colname = "avg_stability_scaled", stability_outcome_colname = "avg_stability_outcome", my_ylab = "SAV Count", y_max_override = max(snpcount_high_df$pos_count), my_xaxls = 6, # x-axis label size my_yaxls = 8, # y-axis label size aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3 ) stability_low = bp_stability_hmap(snpcount_low_low_df, reorder_position = T, p_title = NULL, yvar_colname = 'avg_stability_scaled', stability_colname = "avg_stability_scaled", stability_outcome_colname = "avg_stability_outcome", my_ylab = "", y_max_override = max(snpcount_high_df$pos_count), my_xaxls = 6, # x-axis label size my_yaxls = 8, # y-axis label size aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3, x_label="Wild-Type Position" ) low=round(max(merged_df3$position)/3) centre=low*2 # Generate heat bar for whole DF heatbar_df = generate_distance_colour_map(merged_df3) heat_bar_legend = get_legend( ggplot(merged_df3, aes(factor(ligand_distance), y=0)) + theme(legend.direction="horizontal", legend.title = element_text(size=10), #legend.text = element_text(size=10), #legend.key.size = unit(10, "pt") )+ geom_point(aes( colour = ligand_distance, ) ) + scale_colour_gradientn(name="Ligand\nDistance (Å)", colours = magma(5, direction = -1)) ) # Position Tile Legend tile_colour_map = c("EMB" = "green" ,"DPA" = "darkslategrey" , "CDL" = "navyblue" , "Ca" = "purple") tile_legend=get_legend( ggplot(tile_map, aes(factor(tile),y=0 , colour=tile_colour , fill=tile_colour))+ geom_tile() + theme( legend.direction="horizontal", legend.text = element_text(size=10), legend.key.size = unit(10, "pt")) + scale_colour_manual(name="Active Site:" , values=tile_colour_map) + scale_fill_manual(name="Active Site:" , values = tile_colour_map) ) # SAV Tile Red/Blue Legend temp_snp_df=merged_df3%>%dplyr::add_count(position) snp_tile_legend = get_legend( ggplot(temp_snp_df, aes(factor(avg_stability_scaled), y=0)) + theme(#legend.title = element_blank(), legend.direction="horizontal", legend.title = element_text(size=10), #legend.text = element_text(size=10), #legend.key.size = unit(10, "pt") )+ geom_point(aes( colour = avg_stability_scaled, ) ) + scale_colour_gradientn(name="Protein Stability\n(ΔΔG Kcal/mol)", colours = rev(c("#007ba4", # rev() because FUCK "#00bfc4", "#ffffff", "#ffd9d0", "#f8766d", "#ae3121"))) ) # Logop Chemistry Colours temp_logo_p_snp=cbind(merged_df3) tab_mt = table(temp_logo_p_snp[["mutant_type"]], temp_logo_p_snp[["position"]]) tab_mt = unclass(tab_mt) logoplot_chemistry_legend=get_legend( ggplot() + geom_logo(tab_mt , method = 'custom' , col_scheme = "chemistry" , seq_type = 'aa') + theme( legend.title = element_blank(), legend.direction="horizontal", legend.text = element_text(size=10), legend.key.size = unit(10, "pt")) ) # LOW small_df3 = merged_df3[merged_df3['position'] < low,] mutable_df3 = cbind(small_df3) print(paste0("Plotting LOW Rows: ", nrow(small_df3))) logo_p_snps_low = LogoPlotSnps(mutable_df3, y_lab="SAV Diversity", aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3 ) # needs to be: active_aa_pos AND odds ratio >10 logo_or_low = LogoPlotCustomH(small_df3, y_axis_increment=10, y_lab="Odds Ratio", rm_empty_y=T, aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3 ) # MID small_df3 = merged_df3[merged_df3['position'] <= centre & merged_df3['position'] >= low,] mutable_df3 = cbind(small_df3) print(paste0("Plotting MID Rows: ", nrow(small_df3))) logo_p_snps_centre = LogoPlotSnps( mutable_df3, aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3, y_lab = "SAV Diversity" ) logo_or_centre = LogoPlotCustomH(small_df3, y_axis_increment=10, y_lab = "Odds Ratio", y_axis_log = FALSE, rm_empty_y=T, aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3 ) # HIGH small_df3 = merged_df3[merged_df3['position'] > centre,] mutable_df3 = cbind(small_df3) print(paste0("Plotting HIGH Rows: ", nrow(small_df3))) logo_p_snps_high = LogoPlotSnps(mutable_df3, y_lab="SAV Diversity", x_lab = "Position", aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3 ) + annotate(geom="text", label="Wild-Type Position",x=0.5,y=0.025, size=3) logo_or_high = LogoPlotCustomH(small_df3, y_axis_increment=10, y_lab="Odds Ratio", rm_empty_y=T, aa_pos_drug=aa_pos_drug, active_aa_pos=active_aa_pos, aa_pos_lig1=aa_pos_lig1, aa_pos_lig2=aa_pos_lig2, aa_pos_lig3=aa_pos_lig3 )+ annotate(geom="text", label="Wild-Type Position",x=0.5,y=0.025, size=3) jpeg(paste0(outdir,"embb_average_stability.jpeg"), height = 8.25, width=11.5, unit="in",res=300) cowplot::plot_grid( cowplot::plot_grid(heat_bar_legend, tile_legend, snp_tile_legend, nrow=1), cowplot::plot_grid(stability_high, stability_centre, stability_low,ncol=1), ncol=1, rel_heights = c(1,12) ) dev.off() jpeg(paste0(outdir,"embb_snp.jpeg"), height = 8.25, width=11.5, unit="in",res=300) cowplot::plot_grid( cowplot::plot_grid(heat_bar_legend, tile_legend, logoplot_chemistry_legend, nrow=1), cowplot::plot_grid(logo_p_snps_low, logo_p_snps_centre, logo_p_snps_high, ncol=1, align='hv'), ncol=1, rel_heights = c(1,12) ) dev.off() jpeg(paste0(outdir,"embb_or.jpeg"), height = 8.25, width=11.5, unit="in",res=300) cowplot::plot_grid( cowplot::plot_grid(heat_bar_legend, tile_legend, logoplot_chemistry_legend, nrow=1), cowplot::plot_grid(logo_or_low, logo_or_centre, logo_or_high, ncol=1, align='hv'), ncol=1, rel_heights = c(1,12) ) dev.off()