LSHTM_analysis/scripts/functions/wideP_consurf.R
2022-08-10 11:06:13 +01:00

548 lines
No EOL
21 KiB
R

#!/usr/bin/env Rscript
#########################################################
# TASK: function for wide plot
#with consurf score and error bars
#position numbers coloured by
# - ligand distance
# - active site residues
#########################################################
#==========================================================
# wideP():
# input args
#==========================================================
wideP_consurf2 <- function(plot_df
, aa_pos_drug = NULL
, aa_pos_lig1 = NULL
, aa_pos_lig2 = NULL
, aa_pos_lig3 = NULL
, active_aa_pos = NULL
, xvar_colname = "position"
, yvar_colname = "consurf_score"
, yvar_colourN_colname = "consurf_colour_rev" # num from 0-1
, plot_error_bars = T
, upper_EB_colname = "consurf_ci_upper"
, lower_EB_colname = "consurf_ci_lower"
, plot_type = "point" # default is point
, point_colours = consurf_bp_colours
, p_size = 2
, leg_title1 = ""
, leg_labels = c("0": "Insufficient Data"
, "1": "Variable"
, "2", "3", "4", "5", "6", "7", "8"
, "9": "Conserved")
, panel_col = "grey"
, panel_col_fill = "grey"
# axes title and label sizes
, x_axls = 12 # x-axis label size
, y_axls = 15 # y-axis label size
, x_axts = 12 # x-axis text size
, y_axts = 12 # y-axis text size
, default_xtc = "black" # x-axis text colour
, ptitle = ""
, xlab = ""
, ylab = ""
, pts = 20
# plot margins
, t_margin = 0.5
, r_margin = 0.5
, b_margin = 1
, l_margin = 1
, unit_margin = "cm"
# Custom 1: x-axis: text colour
, xtext_colour_aa = F
, xtext_colour_aa1 = active_aa_pos
, xtext_colour_aa2 = aa_pos_drug
, xtext_colours = c("purple", "brown", "black")
# Custom 2: x-axis: geom tiles ~ lig distance
, annotate_ligand_distance = T
, leg_title2 = "Ligand Distance"
, lig_dist_colname = LigDist_colname # from globals
, lig_dist_colours = c("green", "yellow", "orange", "red")
, tpos0 = 0 # 0 is a magic number that does my sensible default
, tW0 = 1
, tH0 = 0.3
# Custom 3: x-axis: geom tiles ~ active sites and ligand
, annotate_active_sites = T
, drug_aa_colour = "purple"
, active_aa_colour = "brown"
, aa_colour_lig1 = "blue"
, tpos1 = 0
, aa_colour_lig2 = "cyan"
, tpos2 = 0
, aa_colour_lig3 = "cornflowerblue"
, tpos3 = 0
, default_gt_clr = "white"
, build_plot_df=FALSE
, debug=FALSE
){
if(missing(point_colours)){
temp_cols = colorRampPalette(c("seagreen", "sienna3"))(30)
point_colours = temp_cols
}else{
point_colours = point_colours
}
###############################
# custom 1: x-axis text colour
##############################
if (xtext_colour_aa){
positionF <- levels(as.factor(plot_df[[xvar_colname]]))
length(positionF)
aa_pos_colours = ifelse(positionF%in%xtext_colour_aa1, xtext_colours[1]
, ifelse(positionF%in%xtext_colour_aa2
, xtext_colours[2]
, xtext_colours[3]))
}else{
aa_pos_colours = default_xtc
}
################################################
# Custom 2: x-axis geom tiles ~ lig distance
################################################
#=========================
# Build data with colours
# ~ ligand distance
#=========================
if (annotate_ligand_distance){
cat("\nAnnotating x-axis ~", lig_dist_colname, "requested...")
#-------------------------------------
# round column values: to colour by
#--------------------------------------
#plot_df = plot_df[order(plot_df[[lig_dist_colname]]),]
plot_df['lig_distR'] = round(plot_df[[lig_dist_colname]], digits = 0)
head(plot_df['lig_distR'])
#-------------------------------------
# ligand distance range, min, max, etc
#--------------------------------------
lig_min = min(round(plot_df[[lig_dist_colname]]), na.rm = T); lig_min
lig_max = max(round(plot_df[[lig_dist_colname]]), na.rm = T); lig_max
lig_mean = round(mean(round(plot_df[[lig_dist_colname]]), na.rm = T)); lig_mean
#-------------------------------------
# Create mapping colour key
#--------------------------------------
# sorting removes NA, so that n_colours == length(ligD_valsR)
n_colours = length(sort(unique(round(plot_df[[lig_dist_colname]], digits = 0)))); n_colours
lig_cols = colorRampPalette(lig_dist_colours)(n_colours); lig_cols
ligD_valsR = sort(unique(round(plot_df[[lig_dist_colname]], digits = 0))); ligD_valsR
length(ligD_valsR)
if (n_colours == length(ligD_valsR)) {
cat("\nStarting: mapping b/w"
, lig_dist_colname
, "and colours")
}else{
cat("\nCannot start mapping b/w", lig_dist_colname, "and colours..."
, "\nLength mismatch:"
, "No. of colours: ", n_colours
, "\nValues to map:", length(ligD_valsR))
}
ligDcolKey <- data.frame(ligD_colours = lig_cols
, lig_distR = ligD_valsR); ligDcolKey
#names(ligDcolKey)
cat("\nSuccessful: Mapping b/w", lig_dist_colname, "and colours")
#-------------------------------------
# merge colour key with plot_df
#--------------------------------------
plot_df = merge(plot_df, ligDcolKey, by = 'lig_distR')
plot_df_check = as.data.frame(cbind(position = plot_df[[xvar_colname]]
, ligD = plot_df[[lig_dist_colname]]
, ligDR = plot_df$lig_distR
, ligD_cols = plot_df$ligD_colours))
}
###############################################
# Custom 3: x-axis geom tiles ~ active sites
################################################
#==========================
# Build Data with colours
# ~ on active sites
#==========================
aa_colour_colname = "bg_all"
aa_colour_colname1 = "col_bg1"
aa_colour_colname2 = "col_bg2"
aa_colour_colname3 = "col_bg3"
if (build_plot_df) {
if(annotate_active_sites) {
cat("\nAnnotation for xvar requested. Building colours for annotation...")
#--------------------------------------------------
# column colour 0: Active site + drug binding sites
#--------------------------------------------------
plot_df[[aa_colour_colname]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_drug
, drug_aa_colour
, ifelse(plot_df[[xvar_colname]]%in%active_aa_pos
, active_aa_colour, default_gt_clr ))
plot_df[[aa_colour_colname]]
cat("\nColumn created 'bg_all':", length(plot_df[[aa_colour_colname]]))
#------------------------------------------------
# column colour 1: Ligand 1 + drug binding sites
#------------------------------------------------
cat("\nAssigning colours to drug binding and ligand-1 binding residues")
plot_df[[aa_colour_colname1]] = plot_df[[aa_colour_colname]]
plot_df[[aa_colour_colname1]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_lig1
, aa_colour_lig1, plot_df[[aa_colour_colname]])
#------------------------------------------------
# column colour 2: Ligand 2
#------------------------------------------------
plot_df[[aa_colour_colname2]] = plot_df[[aa_colour_colname1]]
plot_df[[aa_colour_colname2]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_lig2
, aa_colour_lig2, plot_df[[aa_colour_colname1]])
#------------------------------------------------
# column colour 3: Ligand 3
#------------------------------------------------
plot_df[[aa_colour_colname3]] = plot_df[[aa_colour_colname2]]
plot_df[[aa_colour_colname3]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_lig3
, aa_colour_lig3, plot_df[[aa_colour_colname2]])
}
} else {
# set these to the string "DUMMY" so that the build-up-the-tiles bit works
aa_pos_drug = "DUMMY"
aa_pos_lig1 = "DUMMY"
active_aa_pos = "DUMMY"
if (aa_colour_colname2 %in% colnames(merged_df3)) {
aa_pos_lig2 = "DUMMY"
if (aa_colour_colname3 %in% colnames(merged_df3)) {
aa_pos_lig2 = "DUMMY"
}
}
}
###################
# start plot
###################
#-------------------
# x and y axis
# range, scale, etc
#-------------------
my_xlim = length(unique(plot_df[[xvar_colname]])); my_xlim
ymin = min(plot_df[[yvar_colname]]); ymin
ymax = max(plot_df[[yvar_colname]]); ymax
g = ggplot(plot_df, aes_string(x = sprintf("factor(%s)", xvar_colname)
, y = yvar_colname
, colour = sprintf("factor(%s)", yvar_colourN_colname)
))
#"if SPECIAL do SPECIAL THING, otherwise do NORMAL THING"
if (plot_type == "bar"){
cat('\ndoing bar plot')
g0 = g +
geom_bar(stat = "identity")
}
else{
g0 = g +
coord_cartesian(xlim = c(1, my_xlim)
, ylim = c(ymin, ymax)
, clip = "off") +
geom_point(size = p_size) +
scale_colour_manual(values = point_colours)
}
if (plot_error_bars){
cat('\nplotting error bars')
g0 = g0 +
geom_errorbar(aes(ymin = eval(parse(text = lower_EB_colname))
, ymax = eval(parse(text = upper_EB_colname))
))
}else{
g0 = g0
}
#---------------------
# add axis formatting
#---------------------
g1 = g0 + theme( axis.text.x = element_text(size = x_axts
, angle = 90
, hjust = 1
, vjust = 0.4
, face = "bold"
, colour = aa_pos_colours)
, axis.text.y = element_text(size = y_axts
, angle = 0
, hjust = 1
, vjust = 0)
, axis.title.x = element_text(size = x_axls)
, axis.title.y = element_text(size = y_axls )
, panel.background = element_rect(fill = panel_col_fill, color = "transparent")
, plot.background = element_rect(fill = panel_col_fill, color = "transparent")
, panel.grid = element_blank()
#, panel.grid.major = element_line(color = "black")
#, panel.grid.minor = element_line(color = "black")
, plot.title = element_text(size = pts
, hjust = 0.5)
, plot.margin = margin(t = t_margin
, r = r_margin
, b = b_margin
, l = l_margin
, unit = unit_margin))+
#guides(colour = guide_legend(title = "ConsurfXXXX")) +
labs(title = ptitle
, x = xlab
, y = ylab)
#------------------
#Extract legend1
#------------------
# yayy
g1_leg = ggplot(plot_df
, aes_string(
x = sprintf("factor(%s)"
, xvar_colname))
, theme(
panel.background = element_rect(fill = panel_col_fill, color = "transparent")
, plot.background = element_rect(fill = panel_col_fill, color = "transparent")
))
g1_leg = g1_leg + geom_bar(); g1_leg
g1_leg = g1_leg + geom_bar(aes_string(fill = sprintf("factor(%s)"
, yvar_colourN_colname)))
g1_leg = g1_leg + scale_fill_manual(values = consurf_bp_colours , name = leg_title1)
g1_leg
legend1 = get_legend(g1_leg)
#####################################################
#============================================
# x-axis: geom_tiles ~ ligand distance
#============================================
#-------
# plot
#-------
if(annotate_ligand_distance){ # 0 is a magic number that does my sensible default
if (tpos0 == 0){
tpos0 = ymin-0.5
}
if (tpos1 == 0){
tpos1 = ymin-0.65
}
if (tpos2 == 0){
tpos2 = ymin-0.75
}
if (tpos3 == 0){
tpos3 = ymin-0.85
}
cat("\nColouring x-axis aa based on", lig_dist_colname
, "\nNo. of colours:", n_colours)
g2 = g1 + geom_tile(aes(, tpos0
, width = tW0
, height = tH0)
, fill = plot_df$ligD_colours
, colour = plot_df$ligD_colours
, linetype = "blank")
#cat("Nrows of plot df", length(plot_df$ligD_colours))
out = g2
cat('\nOut is g2')
}else{
cat('\nOut is g1')
out = g1
}
#==============================================
# x-axis: geom_tiles ~ active sites and others
#==============================================
if(annotate_active_sites){
cat('\nDrug Positions: ',aa_pos_drug)
cat ('\nLig 1 Positions:', aa_pos_lig1)
cat ('\nLig 2 Positions:', aa_pos_lig2)
cat ('\nLig 2 Positions:', aa_pos_lig3)
#tpos = 0
#tW = 1
#tH = 0.2
#---------------------
# Add2plot: 3 ligands
#---------------------
if (all(!is.null(active_aa_pos) &&
!is.null(aa_pos_drug) &&
!is.null(aa_pos_lig1) &&
!is.null(aa_pos_lig2) &&
!is.null(aa_pos_lig3))) {
#if (debug){
cat("\n\nAnnotating xvar with active, drug binding, and Lig 1 & 2 & 3 sites")
#cat("\nCreating column colours, column name:", aa_colour_colname3)
cat("\nDoing Plot with 3 ligands")
#}
out = out + geom_tile(aes(,tpos3
, width = tW0
, height = tH0 )
, fill = plot_df[[aa_colour_colname3]]
, colour = plot_df[[aa_colour_colname3]]
, linetype = "solid") +
geom_tile(aes(, tpos2
, width = tW0
, height = tH0 )
, fill = plot_df[[aa_colour_colname2]]
, colour = plot_df[[aa_colour_colname2]]
, linetype = "solid")+
geom_tile(aes(, tpos1
, width = tW0
, height = tH0)
, fill = plot_df[[aa_colour_colname1]]
, colour = plot_df[[aa_colour_colname1]]
, linetype = "solid")
#if (debug){
cat("\nDone Plot with 3 ligands")
#}
} else {cat("\n3 Lig Plot condition FAIL")}
#---------------------
# Add2plot: 2 ligands
#---------------------
if (all(!is.null(active_aa_pos) &&
!is.null(aa_pos_drug) &&
!is.null(aa_pos_lig1) &&
!is.null(aa_pos_lig2) &&
is.null(aa_pos_lig3))) {
#if (debug){
cat("\n\nAnnotating xvar with active, drug binding, and Lig 1&2 sites")
cat("\nCreating column colours, column name:", aa_colour_colname2)
cat("\nDoing Plot with 2 ligands")
#}
out = out +
geom_tile(aes(, tpos2
, width = tW0
, height = tH0)
, fill = plot_df[[aa_colour_colname2]]
, colour = plot_df[[aa_colour_colname2]]
, linetype = "solid")+
geom_tile(aes(, tpos1
, width = tW0
, height = tH0)
, fill = plot_df[[aa_colour_colname1]]
, colour = plot_df[[aa_colour_colname1]]
, linetype = "solid")
#if (debug){
cat("\nDone Plot with 2 ligands")
#}
} else {cat("\n2 Lig Plot condition FAIL")}
#---------------------
# Add2plot: 1 ligand
#---------------------
if (all(!is.null(active_aa_pos) &&
!is.null(aa_pos_drug) &&
!is.null(aa_pos_lig1) && is.null(aa_pos_lig2) && is.null(aa_pos_lig3))) {
#if (debug){
cat("\n\nAnnotating xvar with active, drug binding, and Lig 1 sites")
cat("\nCreating column colours, column name:", aa_colour_colname1)
cat("\nDoing Plot with 1 ligands")
#}
out = out +
geom_tile(aes(, tpos1
, width = tW0
, height = tH0)
, fill = plot_df[[aa_colour_colname1]]
, colour = plot_df[[aa_colour_colname1]]
, linetype = "solid")
cat("\nDone Plot with 1 ligand")
} else {cat("\n1 Lig Plot condition FAIL")}
#-----------------------------------
# Add2plot:NO ligands
# No Ligs: Just drug and active site
# DEFAULT: annotate_active_sites == TRUE
#----------------------------------
if (all(!is.null(active_aa_pos) &&
!is.null(aa_pos_drug) &&
is.null(aa_pos_lig1) &&
is.null(aa_pos_lig2) &&
is.null(aa_pos_lig3))) {
#if (debug){
cat("\n\nAnnotating xvar with active and drug binding sites")
cat("\nCreating column colours, column name:", aa_colour_colname)
cat("\nDoing Plot with 0 ligands: active and drug site only")
#}
out = out + geom_tile(aes(, tpos3
, width = tW0
, height = tH0)
, fill = plot_df[[aa_colour_colname]]
, colour = plot_df[[aa_colour_colname]]
, linetype = "solid")
#if (debug){
cat("\nDone Plot with: Active and Drug sites")
}
#}
} else {
cat("\nNo annotation for additional ligands on xvar requested")
}
if (annotate_ligand_distance){
cat('\nOutput: Plot + distance heat-bar + other stuff')
legs = cowplot::plot_grid(legend1
, generate_distance_legend(plot_df)
, ncol = 1
, align = "hv"
, rel_heights = c(2/4,3/4))
out2 = cowplot::plot_grid( out + theme(legend.position = "none")
, legs
, ncol = 2
, align = "hv"
, rel_widths = c(9/10, 0.4/10)
)
}else{
out2 = cowplot::plot_grid( out + theme(legend.position = "none")
, legend1
, ncol = 2
, align = "hv"
, rel_widths = c(9/10, 0.5/10)
)
}
return(out2)
#return(out2)
}
#
# wideP_consurf2(
# merged_df3,
# annotate_active_sites = TRUE,
# active_aa_pos = active_aa_pos,
# aa_pos_drug = aa_pos_drug,
# aa_pos_lig1 = aa_pos_lig1,
# aa_pos_lig2 = aa_pos_lig2,
# aa_pos_lig3 = aa_pos_lig3,
# debug=T)