378 lines
11 KiB
R
378 lines
11 KiB
R
#
|
|
# small_df3 = merged_df3[merged_df3['position'] <= 670 & merged_df3['position'] >= 300,]
|
|
#
|
|
# small_df3 = merged_df3[merged_df3['position'] <= 451 & merged_df3['position'] >= 300,]
|
|
#
|
|
#
|
|
# mutable_df3 = cbind(small_df3)
|
|
# plot_df = cbind(small_df3)
|
|
|
|
source("~/git/LSHTM_analysis/config/alr.R")
|
|
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
|
#outdir="/home/pub/Work/LSHTM/Thesis_Plots/"
|
|
outdir="~/git/Writing/thesis/tex/images-corrected/alr/"
|
|
|
|
|
|
# add "pos_count" position count column
|
|
merged_df3=merged_df3 %>% dplyr::add_count(position)
|
|
merged_df3$pos_count=merged_df3$n
|
|
merged_df3$n=NULL
|
|
|
|
# re-sort the dataframe according to position count
|
|
sorted_df = cbind(merged_df3)
|
|
sorted_df = sorted_df %>% arrange(pos_count)
|
|
|
|
# determine some split points
|
|
|
|
# further split up our sorted dataframe into low/mid/high for stability plotting
|
|
#snpcount_high_df = sorted_df[mid_snpcount:nrow(sorted_df),]
|
|
#snpcount_mid_df = sorted_df[low_snpcount:mid_snpcount,]
|
|
#snpcount_low_df = sorted_df[1:low_snpcount,]
|
|
|
|
snpcount_high_df = sorted_df[sorted_df$pos_count>1,]
|
|
snpcount_low_df = sorted_df[sorted_df$pos_count<2,]
|
|
|
|
low_half_snpcount = round(nrow(snpcount_low_df)/2)
|
|
|
|
snpcount_low_low_df = snpcount_low_df[1:low_half_snpcount,]
|
|
snpcount_low_high_df = snpcount_low_df[low_half_snpcount:nrow(snpcount_low_df),]
|
|
|
|
#### Put sorted-by-pos_count bp_stability_hmaps here ####
|
|
#...if they look better :-)
|
|
|
|
length(merged_df3$position)
|
|
|
|
# manually picked numbers due to irregular position distribution
|
|
low = 170
|
|
centre=300
|
|
#low=round(max(merged_df3$position)/3)
|
|
#centre=low*2
|
|
|
|
# Generate heat bar for whole DF
|
|
heatbar_df = generate_distance_colour_map(merged_df3)
|
|
|
|
heat_bar_legend = get_legend(
|
|
ggplot(merged_df3, aes(factor(ligand_distance), y=0)) +
|
|
theme(legend.direction="horizontal",
|
|
legend.title = element_text(size=10),
|
|
#legend.text = element_text(size=10),
|
|
#legend.key.size = unit(10, "pt")
|
|
)+
|
|
geom_point(aes(
|
|
colour = ligand_distance,
|
|
|
|
)
|
|
) +
|
|
scale_colour_gradientn(
|
|
name="Ligand\nDistance (Å)",
|
|
colours = magma(5, direction = -1))
|
|
)
|
|
# Position Tile Legend
|
|
# STR #00ff00
|
|
tile_colour_map = c("DCS" = "#00ff00"
|
|
,"PLP" = "#000080")
|
|
|
|
tile_legend=get_legend(
|
|
ggplot(tile_map, aes(factor(tile),y=0
|
|
, colour=tile_colour
|
|
, fill=tile_colour))+
|
|
geom_tile() +
|
|
theme(
|
|
legend.direction="horizontal",
|
|
legend.text = element_text(size=10),
|
|
legend.key.size = unit(10, "pt")) +
|
|
scale_colour_manual(name="Active Site:"
|
|
, values=tile_colour_map) +
|
|
scale_fill_manual(name="Active Site:"
|
|
, values = tile_colour_map)
|
|
)
|
|
|
|
# SAV Tile Red/Blue Legend
|
|
|
|
temp_snp_df=merged_df3%>%dplyr::add_count(position)
|
|
|
|
snp_tile_legend = get_legend(
|
|
ggplot(temp_snp_df, aes(factor(avg_stability_scaled), y=0)) +
|
|
theme(#legend.title = element_blank(),
|
|
legend.direction="horizontal",
|
|
legend.title = element_text(size=10),
|
|
#legend.text = element_text(size=10),
|
|
#legend.key.size = unit(10, "pt")
|
|
)+
|
|
geom_point(aes(
|
|
colour = avg_stability_scaled,
|
|
|
|
)
|
|
) +
|
|
scale_colour_gradientn(
|
|
name="Protein Stability\n(ΔΔG Kcal/mol)", colours = rev(
|
|
c("#007ba4", # rev() because FUCK
|
|
"#00bfc4",
|
|
"#ffffff",
|
|
"#ffd9d0",
|
|
"#f8766d",
|
|
"#ae3121")
|
|
)
|
|
)
|
|
)
|
|
# Logop Chemistry Colours
|
|
temp_logo_p_snp=cbind(merged_df3)
|
|
tab_mt = table(temp_logo_p_snp[["mutant_type"]], temp_logo_p_snp[["position"]])
|
|
tab_mt = unclass(tab_mt)
|
|
logoplot_chemistry_legend=get_legend(
|
|
ggplot() +
|
|
geom_logo(
|
|
tab_mt
|
|
, method = 'custom'
|
|
, col_scheme = "chemistry"
|
|
, seq_type = 'aa') + theme(
|
|
legend.title = element_blank(),
|
|
legend.direction="horizontal",
|
|
legend.text = element_text(size=10),
|
|
legend.key.size = unit(10, "pt")
|
|
)
|
|
)
|
|
|
|
stab=function(...){bp_stability_hmap(
|
|
small_df3,
|
|
p_title = NULL,
|
|
yvar_colname = 'avg_stability_scaled',
|
|
stability_colname = "avg_stability_scaled",
|
|
stability_outcome_colname = "avg_stability_outcome",
|
|
my_ylab = "",
|
|
my_xaxls = 6, # x-axis label size
|
|
my_yaxls = 8, # y-axis label size
|
|
drug_colour = tile_map$tile_colour[1],
|
|
lig1_colour = tile_map$tile_colour[2],
|
|
lig2_colour = tile_map$tile_colour[3],
|
|
lig3_colour = tile_map$tile_colour[4],
|
|
y_max_override=max(merged_df3$pos_count),
|
|
aa_pos_drug=aa_pos_drug,
|
|
active_aa_pos=active_aa_pos,
|
|
aa_pos_lig1=aa_pos_lig1,
|
|
aa_pos_lig2=aa_pos_lig2,
|
|
aa_pos_lig3=aa_pos_lig3,
|
|
...
|
|
)
|
|
}
|
|
|
|
|
|
|
|
# LOW
|
|
small_df3 = merged_df3[merged_df3['position'] < low,]
|
|
mutable_df3 = cbind(small_df3)
|
|
print(paste0("Plotting LOW Rows: ", nrow(small_df3)))
|
|
|
|
|
|
logo_p_snps_low = LogoPlotSnps(
|
|
mutable_df3,
|
|
y_lab="SAV Diversity",
|
|
aa_pos_drug=aa_pos_drug,
|
|
active_aa_pos=active_aa_pos,
|
|
aa_pos_lig1=aa_pos_lig1,
|
|
aa_pos_lig2=aa_pos_lig2,
|
|
aa_pos_lig3=aa_pos_lig3,
|
|
drug_colour = tile_map$tile_colour[1],
|
|
lig1_colour = tile_map$tile_colour[2],
|
|
lig2_colour = tile_map$tile_colour[3],
|
|
lig3_colour = tile_map$tile_colour[4]
|
|
)
|
|
|
|
# needs to be: active_aa_pos AND odds ratio >10
|
|
logo_or_low = LogoPlotCustomH(
|
|
small_df3,
|
|
y_axis_increment=10,
|
|
y_lab="",
|
|
aa_pos_drug=aa_pos_drug,
|
|
active_aa_pos=active_aa_pos,
|
|
aa_pos_lig1=aa_pos_lig1,
|
|
aa_pos_lig2=aa_pos_lig2,
|
|
aa_pos_lig3=aa_pos_lig3,
|
|
rm_empty_y=FALSE,
|
|
drug_colour = tile_map$tile_colour[1],
|
|
lig1_colour = tile_map$tile_colour[2],
|
|
lig2_colour = tile_map$tile_colour[3],
|
|
lig3_colour = tile_map$tile_colour[4]
|
|
)
|
|
|
|
stability_low = stab()+ annotate(geom="text", label="Wild-Type Position",x=0.5,y=0.025, size=3)
|
|
|
|
|
|
# MID
|
|
small_df3 = merged_df3[merged_df3['position'] <= centre & merged_df3['position'] >= low,]
|
|
mutable_df3 = cbind(small_df3)
|
|
print(paste0("Plotting MID Rows: ", nrow(small_df3)))
|
|
|
|
logo_p_snps_centre = LogoPlotSnps(
|
|
mutable_df3,
|
|
y_lab = "SAV Diversity",
|
|
aa_pos_drug=aa_pos_drug,
|
|
active_aa_pos=active_aa_pos,
|
|
aa_pos_lig1=aa_pos_lig1,
|
|
aa_pos_lig2=aa_pos_lig2,
|
|
aa_pos_lig3=aa_pos_lig3,
|
|
drug_colour = tile_map$tile_colour[1],
|
|
lig1_colour = tile_map$tile_colour[2],
|
|
lig2_colour = tile_map$tile_colour[3],
|
|
lig3_colour = tile_map$tile_colour[4]
|
|
)
|
|
|
|
logo_or_centre = LogoPlotCustomH(
|
|
small_df3, y_axis_increment=10,
|
|
rm_empty_y=FALSE,
|
|
aa_pos_drug=aa_pos_drug,
|
|
active_aa_pos=active_aa_pos,
|
|
aa_pos_lig1=aa_pos_lig1,
|
|
aa_pos_lig2=aa_pos_lig2,
|
|
aa_pos_lig3=aa_pos_lig3,
|
|
drug_colour = tile_map$tile_colour[1],
|
|
lig1_colour = tile_map$tile_colour[2],
|
|
lig2_colour = tile_map$tile_colour[3],
|
|
lig3_colour = tile_map$tile_colour[4]
|
|
)
|
|
|
|
stability_centre = stab()
|
|
|
|
# HIGH
|
|
small_df3 = merged_df3[merged_df3['position'] > centre,]
|
|
mutable_df3 = cbind(small_df3)
|
|
print(paste0("Plotting HIGH Rows: ", nrow(small_df3)))
|
|
|
|
|
|
logo_p_snps_high = LogoPlotSnps(
|
|
mutable_df3,
|
|
y_lab="SAV Diversity" ,
|
|
aa_pos_drug=aa_pos_drug,
|
|
active_aa_pos=active_aa_pos,
|
|
aa_pos_lig1=aa_pos_lig1,
|
|
aa_pos_lig2=aa_pos_lig2,
|
|
aa_pos_lig3=aa_pos_lig3,
|
|
drug_colour = tile_map$tile_colour[1],
|
|
lig1_colour = tile_map$tile_colour[2],
|
|
lig2_colour = tile_map$tile_colour[3],
|
|
lig3_colour = tile_map$tile_colour[4]
|
|
)+ annotate(geom="text", label="Wild-Type Position",x=0.5,y=0.025, size=3)
|
|
|
|
logo_or_high = LogoPlotCustomH(
|
|
small_df3,
|
|
y_axis_increment=10,
|
|
y_lab="",
|
|
aa_pos_drug=aa_pos_drug,
|
|
active_aa_pos=active_aa_pos,
|
|
aa_pos_lig1=aa_pos_lig1,
|
|
aa_pos_lig2=aa_pos_lig2,
|
|
aa_pos_lig3=aa_pos_lig3,
|
|
rm_empty_y=FALSE,
|
|
drug_colour = tile_map$tile_colour[1],
|
|
lig1_colour = tile_map$tile_colour[2],
|
|
lig2_colour = tile_map$tile_colour[3],
|
|
lig3_colour = tile_map$tile_colour[4]
|
|
)+ annotate(geom="text", label="Wild-Type Position",x=0.5,y=0.025, size=3)
|
|
|
|
|
|
stability_high = stab()+ annotate(geom="text", label="Wild-Type Position",x=0.5,y=0.025, size=3)
|
|
#### "sorted" stability plots ####
|
|
|
|
# do stability plots for later assembly
|
|
stability_high_reorder = bp_stability_hmap(
|
|
snpcount_high_df,
|
|
reorder_position = T,
|
|
p_title = NULL,
|
|
my_ylab = NULL,
|
|
yvar_colname = 'avg_stability_scaled',
|
|
stability_colname = "avg_stability_scaled",
|
|
stability_outcome_colname = "avg_stability_outcome",
|
|
my_xaxls = 6, # x-axis label size
|
|
my_yaxls = 8, # y-axis label size
|
|
aa_pos_drug=aa_pos_drug,
|
|
active_aa_pos=active_aa_pos,
|
|
aa_pos_lig1=aa_pos_lig1,
|
|
aa_pos_lig2=aa_pos_lig2,
|
|
aa_pos_lig3=aa_pos_lig3,
|
|
drug_colour = tile_map$tile_colour[1],
|
|
lig1_colour = tile_map$tile_colour[2],
|
|
lig2_colour = tile_map$tile_colour[3],
|
|
lig3_colour = tile_map$tile_colour[4],
|
|
x_label="Wild-Type Position"
|
|
|
|
)
|
|
stability_centre_reorder = bp_stability_hmap(
|
|
snpcount_low_high_df,
|
|
reorder_position = T,
|
|
p_title = NULL,
|
|
yvar_colname = 'avg_stability_scaled',
|
|
stability_colname = "avg_stability_scaled",
|
|
stability_outcome_colname = "avg_stability_outcome",
|
|
my_ylab = "",
|
|
y_max_override = max(snpcount_high_df$pos_count),
|
|
my_xaxls = 6, # x-axis label size
|
|
my_yaxls = 8, # y-axis label size
|
|
aa_pos_drug=aa_pos_drug,
|
|
active_aa_pos=active_aa_pos,
|
|
aa_pos_lig1=aa_pos_lig1,
|
|
aa_pos_lig2=aa_pos_lig2,
|
|
aa_pos_lig3=aa_pos_lig3,
|
|
drug_colour = tile_map$tile_colour[1],
|
|
lig1_colour = tile_map$tile_colour[2],
|
|
lig2_colour = tile_map$tile_colour[3],
|
|
lig3_colour = tile_map$tile_colour[4]
|
|
)
|
|
|
|
stability_low_reorder = bp_stability_hmap(
|
|
snpcount_low_low_df,
|
|
reorder_position = T,
|
|
p_title = NULL,
|
|
yvar_colname = 'avg_stability_scaled',
|
|
stability_colname = "avg_stability_scaled",
|
|
stability_outcome_colname = "avg_stability_outcome",
|
|
my_ylab = NULL,
|
|
y_max_override = max(snpcount_high_df$pos_count),
|
|
my_xaxls = 6, # x-axis label size
|
|
my_yaxls = 8, # y-axis label size
|
|
aa_pos_drug=aa_pos_drug,
|
|
active_aa_pos=active_aa_pos,
|
|
aa_pos_lig1=aa_pos_lig1,
|
|
aa_pos_lig2=aa_pos_lig2,
|
|
aa_pos_lig3=aa_pos_lig3,
|
|
drug_colour = tile_map$tile_colour[1],
|
|
lig1_colour = tile_map$tile_colour[2],
|
|
lig2_colour = tile_map$tile_colour[3],
|
|
lig3_colour = tile_map$tile_colour[4]
|
|
|
|
)
|
|
|
|
jpeg(paste0(outdir,"alr_average_stability-sorted.jpeg"), height = 6, width=11.5, unit="in",res=300)
|
|
cowplot::plot_grid(
|
|
cowplot::plot_grid(heat_bar_legend, tile_legend, snp_tile_legend, nrow=1),
|
|
cowplot::plot_grid(stability_high_reorder, stability_centre_reorder, stability_low_reorder,ncol=1),
|
|
ncol=1,
|
|
rel_heights = c(1,12)
|
|
)
|
|
dev.off()
|
|
|
|
jpeg(paste0(outdir,"alr_average_stability-unsorted.jpeg"), height = 6, width=11.5, unit="in",res=300)
|
|
cowplot::plot_grid(
|
|
cowplot::plot_grid(heat_bar_legend, tile_legend, snp_tile_legend, nrow=1),
|
|
cowplot::plot_grid(stability_high, stability_centre, stability_low,ncol=1),
|
|
ncol=1,
|
|
rel_heights = c(1,12)
|
|
)
|
|
dev.off()
|
|
|
|
jpeg(paste0(outdir,"alr_snp.jpeg"), height = 6, width=11.5, unit="in",res=300)
|
|
cowplot::plot_grid(
|
|
cowplot::plot_grid(heat_bar_legend, tile_legend, logoplot_chemistry_legend, nrow=1),
|
|
cowplot::plot_grid(logo_p_snps_low, logo_p_snps_centre, logo_p_snps_high, ncol=1, align='hv'),
|
|
ncol=1,
|
|
rel_heights = c(1,12)
|
|
)
|
|
dev.off()
|
|
|
|
jpeg(paste0(outdir,"alr_or.jpeg"), height = 6, width=11.5, unit="in",res=300)
|
|
cowplot::plot_grid(
|
|
cowplot::plot_grid(heat_bar_legend, tile_legend, logoplot_chemistry_legend, nrow=1),
|
|
cowplot::plot_grid(logo_or_low, logo_or_centre, logo_or_high, ncol=1, align='hv'),
|
|
ncol=1,
|
|
rel_heights = c(1,12)
|
|
)
|
|
dev.off()
|