This commit is contained in:
Tanushree Tunstall 2022-08-14 12:17:36 +01:00
parent c09d7530c9
commit 6f354ab390
10 changed files with 833 additions and 285 deletions

0
scripts/dm_om_data.R Normal file
View file

View file

@ -40,12 +40,12 @@ bp_stability_hmap <- function(plot_df = merged_df3
#, bar_col_colname = "group"
, stability_colname = "duet_scaled" # Only here so that you can do function(df)
, stability_outcome_colname = "duet_outcome" # Only here so that you can do function(df)
, p_title = "DUMMY TITLE" # Only here so that you can do function(df)
, my_xaxls = 12 # x-axis label size
, my_yaxls = 20 # y-axis label size
, my_xaxts = 18 # x-axis text size
, my_yaxts = 20 # y-axis text size
, my_pts = 20 # plot-title size
, p_title = "DUMMY TITLE", # Only here so that you can do function(df)
my_xaxls = 6, # x-axis label size
my_yaxls = 6, # y-axis label size
my_xaxts = 9, # x-axis text size
my_yaxts = 10, # y-axis text size
my_pts = 10 # plot-title size
, my_xlab = "Position"
, my_ylab = "No. of nsSNPs"
@ -68,7 +68,7 @@ bp_stability_hmap <- function(plot_df = merged_df3
# Build data with colours
# ~ ligand distance
#=========================
plot_df = generate_distance_colour_map(plot_df, debug=TRUE)
# plot_df = generate_distance_colour_map(plot_df, debug=TRUE)
# order the df by position and ensure it is a factor
plot_df = plot_df[order(plot_df[[xvar_colname]]), ]
@ -104,7 +104,7 @@ bp_stability_hmap <- function(plot_df = merged_df3
# , ordered = T)
)) +
geom_bar(aes(fill = group)
, colour = "grey") +
, colour = "grey", size=0.125) +
scale_fill_manual( values = subcols_bp_hmap
, guide = "none") +
@ -120,11 +120,12 @@ bp_stability_hmap <- function(plot_df = merged_df3
, hjust = 1
, vjust = 0)
, axis.title.x = element_blank()
, axis.ticks = element_blank()
#, axis.title.x = element_text(size = my_xaxts)
, axis.title.y = element_text(size = my_yaxts )
, plot.title = element_text(size = my_pts
, hjust = 0.5)
, panel.grid = element_blank()
# , panel.grid = element_blank()
, panel.background = element_rect(fill = "transparent", colour=NA)
) +
@ -132,25 +133,20 @@ bp_stability_hmap <- function(plot_df = merged_df3
, x = my_xlab
, y = my_ylab),
NULL,
ggplot(plot_df,
aes(x=factor(position), # THIS STUPID FUCKING FACTOR THING
)
) +
geom_tile(aes(y=0),
fill=plot_df$ligD_colours) +
scale_x_discrete("Position", labels=factor(plot_df$position)) +
theme_nothing() +
theme(plot.background = element_rect(fill = "transparent", colour=NA),
plot.margin = margin(t=0,b=0)) +
labs(x = NULL, y = NULL), #end of distance-heat-bar
NULL,
position_annotation(plot_df),
position_annotation(plot_df,
aa_pos_drug=aa_pos_drug,
active_aa_pos=active_aa_pos,
aa_pos_lig1=aa_pos_lig1,
aa_pos_lig2=aa_pos_lig2,
aa_pos_lig3=aa_pos_lig3
)
,
#generate_distance_legend(plot_df),
ncol = 1,
align = "v",
rel_heights = c(10,-0.1,1,-0.1,1)
rel_heights = c(10,-0.1,1)
#rel_widths = c(9/10, 0.4/10)
)
}
#bp_stability_hmap(small_df3)
#bp_stability_hmap(merged_df3)

View file

@ -3,7 +3,7 @@ generate_distance_colour_map = function(plot_df,
xvar_colname = "position",
lig_dist_colname = "ligand_distance",
#lig_dist_colours = c("green", "yellow", "orange", "red"),
lig_dist_colours = c("tan", "black"),
lig_dist_colours = c("green", "yellow", "magenta"),
debug = TRUE
)
{
@ -75,9 +75,9 @@ generate_distance_legend = function(plot_df,
geom_tile(aes(fill = .data[[lig_dist_colname]])
, colour = "white") +
scale_fill_gradient2(midpoint = lig_mean
, low = "tan"
, mid = "grey50"
, high = "black"
, low = "green"
, mid = "yellow"
, high = "magenta"
, breaks = labels
, limits = c(lig_min, lig_max)
, labels = labelsD

View file

@ -250,8 +250,16 @@ LogoPlotCustomH <- function(plot_df
#scale_x_discrete(x_lab, labels=factor(unique_colour_map$position)) +
scale_color_manual(values=unique_colour_map$ligD_colours) +
scale_fill_manual(values=unique_colour_map$ligD_colours) +
labs(y = NULL), NULL,
position_annotation(plot_df, bg=theme_bgc),
labs(y = NULL),
NULL,
position_annotation(plot_df,
bg = theme_bgc,
aa_pos_drug=aa_pos_drug,
active_aa_pos=active_aa_pos,
aa_pos_lig1=aa_pos_lig1,
aa_pos_lig2=aa_pos_lig2,
aa_pos_lig3=aa_pos_lig3
),
ncol=1, align='v', rel_heights = c(16,0,1,0,1)
)

View file

@ -1,7 +1,7 @@
########################a###########################################################
# Input:
# Data
# plot_df: merged_df3 containing the OR column to use as y-axis or any other relevant column
# mutable_df: merged_df3 containing the OR column to use as y-axis or any other relevant column
# x_axis_colname = "position"
# symbol_mut_colname = "mutant_type"
@ -38,16 +38,16 @@ LogoPlotSnps <- function(plot_df
, my_logo_col = "chemistry"
, x_lab = "Position"
, y_lab = "Count"
, x_ats = 14 # text size
, x_ats = 7 # text size
, x_tangle = 90 # text angle
, y_ats = 22
, y_ats = 10
, y_tangle = 0
, x_tts = 20 # title size
, y_tts = 23
, x_tts = 10 # title size
, y_tts = 10
, leg_pos = "none" # can be top, left, right and bottom or c(0.8, 0.9)
, leg_dir = "horizontal" #can be vertical or horizontal
, leg_ts = 20 # leg text size
, leg_tts = 16 # leg title size
, leg_ts = 10 # leg text size
, leg_tts = 8 # leg title size
, tpos0 = 0 # 0 is a magic number that does my sensible default
, tW0 = 1
, tH0 = 0.2
@ -56,6 +56,7 @@ LogoPlotSnps <- function(plot_df
)
{
mutable_df=cbind(plot_df)
# handle funky omit_snp_count. DOES NOT WORK YET
if (class(omit_snp_count) != "numeric"){
omit_snp_count <- as.numeric(unlist(str_extract_all(omit_snp_count, regex("[0-9]+"))))
@ -65,51 +66,51 @@ LogoPlotSnps <- function(plot_df
############################################
# Generate "ligand distance" colour map
plot_df = generate_distance_colour_map(plot_df, debug=TRUE)
unique_colour_map = unique(plot_df[,c("position","ligD_colours")])
unique_colour_map = unique_colour_map[order(unique_colour_map$position), ]
rownames(unique_colour_map) = unique_colour_map$position
unique_colour_map2 = unique_colour_map
unique_colour_map2$position=as.factor(unique_colour_map2$position)
unique_colour_map2$ligD_colours = as.factor(unique_colour_map2$ligD_colours)
# mutable_df = generate_distance_colour_map(mutable_df, debug=TRUE)
# unique_colour_map = unique(mutable_df[,c("position","ligD_colours")])
# unique_colour_map = unique_colour_map[order(unique_colour_map$position), ]
# rownames(unique_colour_map) = unique_colour_map$position
# unique_colour_map2 = unique_colour_map
# unique_colour_map2$position=as.factor(unique_colour_map2$position)
# unique_colour_map2$ligD_colours = as.factor(unique_colour_map2$ligD_colours)
#
setDT(plot_df)[, mut_pos_occurrence := .N, by = .(eval(parse(text=x_axis_colname)))]
setDT(mutable_df)[, mut_pos_occurrence := .N, by = .(eval(parse(text=x_axis_colname)))]
if (debug) {
table(plot_df[[x_axis_colname]])
table(plot_df$mut_pos_occurrence)
table(mutable_df[[x_axis_colname]])
table(mutable_df$mut_pos_occurrence)
}
max_mut = max(table(plot_df[[x_axis_colname]]))
max_mut = max(table(mutable_df[[x_axis_colname]]))
# Subset Data as specified by user
cat("\nDisplaying nsSNP position frequency:\n")
print(table(plot_df$mut_pos_occurrence))
print(table(mutable_df$mut_pos_occurrence))
if ( (length(omit_snp_count) ==1) && (omit_snp_count == 0) ){
my_data_snp = plot_df
my_data_snp = mutable_df
u = unique(my_data_snp[[x_axis_colname]])
max_mult_mut = max(table(my_data_snp[[x_axis_colname]]))
if (debug) {
cat("\nNo filtering requested:"
, "\nTotal no. of nsSNPs:", sum(table(plot_df$mut_pos_occurrence))
, "\nTotal no. of nsSNPs omitted:", sum(table(plot_df$mut_pos_occurrence)[omit_snp_count])
, "\nTotal no. of nsSNPs:", sum(table(mutable_df$mut_pos_occurrence))
, "\nTotal no. of nsSNPs omitted:", sum(table(mutable_df$mut_pos_occurrence)[omit_snp_count])
, "\nDim of data:", dim(my_data_snp)
, "\nNo. of positions:", length(u)
, "\nMax no. of muts at any position:", max_mult_mut)
}
} else {
my_data_snp = subset(plot_df, !(mut_pos_occurrence%in%omit_snp_count) )
my_data_snp = subset(mutable_df, !(mut_pos_occurrence%in%omit_snp_count) )
exp_nrows = sum(table(plot_df$mut_pos_occurrence)) - sum(table(plot_df$mut_pos_occurrence)[omit_snp_count])
exp_nrows = sum(table(mutable_df$mut_pos_occurrence)) - sum(table(mutable_df$mut_pos_occurrence)[omit_snp_count])
got_rows = sum(table(my_data_snp$mut_pos_occurrence))
u = unique(my_data_snp[[x_axis_colname]])
max_mult_mut = max(table(my_data_snp[[x_axis_colname]]))
if (debug) {
if (got_rows == exp_nrows) {
cat("\nPass: Position with the stated nsSNP frequency filtered:", omit_snp_count
, "\nTotal no. of nsSNPs:", sum(table(plot_df$mut_pos_occurrence))
, "\nTotal no. of nsSNPs omitted:", sum(table(plot_df$mut_pos_occurrence)[omit_snp_count])
, "\nTotal no. of nsSNPs:", sum(table(mutable_df$mut_pos_occurrence))
, "\nTotal no. of nsSNPs omitted:", sum(table(mutable_df$mut_pos_occurrence)[omit_snp_count])
, "\nDim of subsetted data:", dim(my_data_snp)
, "\nNo. of positions:", length(u)
, "\nMax no. of muts at any position:", max_mult_mut)
@ -145,7 +146,7 @@ LogoPlotSnps <- function(plot_df
if (is.matrix(tab_mt)){
if (debug) {
cat("\nCreating mutant matrix..."
#, "\nRownames of mutant matrix:", rownames(tab_mt)
#, "\nRowna mes of mutant matrix:", rownames(tab_mt)
#, "\nColnames of mutant matrix:", colnames(tab_mt)
)
}
@ -211,114 +212,95 @@ LogoPlotSnps <- function(plot_df
#####################################
# Generating logo plots for nsSNPs
#####################################
cowplot::plot_grid(
#-------------------
# Mutant logo plot
#-------------------
ggseqlogo(tab_mt
, method = 'custom'
, col_scheme = my_logo_col
, seq_type = 'aa') +
scale_x_continuous(breaks = 1:ncol(tab_mt)
, expand = c(0.01,0)
, labels = colnames(tab_mt))+
scale_y_continuous(breaks = 0:(max_mult_mut-1)
, labels = c(1:max_mult_mut)
, limits = c(0, max_mult_mut)) +
ylab(y_lab) +
theme(text=element_text(family="FreeSans")
, legend.position = leg_pos
, legend.direction = leg_dir
, legend.title = element_text(size = leg_tts
, colour = ytt_col)
, legend.text = element_text(size = leg_ts)
, axis.text.x = element_text(size = x_ats
, angle = x_tangle
, hjust = 1
, vjust = 0.4
, colour = xfont_bgc)
, axis.text.y = element_text(size = y_ats
, angle = y_tangle
, hjust = 1
, vjust = -1.0
, colour = yfont_bgc)
, axis.title.x = element_text(size = x_tts
, colour = xtt_col)
, axis.title.y = element_text(size = y_tts
, colour = ytt_col)
, plot.background = element_rect(fill = theme_bgc, colour=NA)
),
ggseqlogo(tab_wt
, method = 'custom'
, col_scheme = my_logo_col
, seq_type = 'aa') +
scale_x_continuous(breaks = 1:ncol(tab_wt)
, expand = c(0.01,0)
, labels = as.factor(colnames(tab_wt))) +
theme(text = element_text(family="FreeSans")
, legend.position = "none"
, axis.text.x = element_blank()
, axis.text.y = element_blank()
, axis.title.x = element_blank()
, axis.title.y = element_blank()
, plot.background = element_rect(fill = theme_bgc, colour=NA)
) +
labs(x=NULL, y=NULL),
ggplot(
data=unique_colour_map2,
aes(
x=factor(position), 0 # heat-mapped distance tiles along the bot
, fill = position
, colour = position
, linetype = "blank"
)
#-------------------
# Mutant logo plot
#-------------------
logo_top =ggseqlogo(tab_mt
, method = 'custom'
, col_scheme = my_logo_col
, seq_type = 'aa') +
scale_x_continuous(breaks = 1:ncol(tab_mt)
, expand = c(0.01,0)
, labels = colnames(tab_mt))+
scale_y_continuous(breaks = 0:(max_mult_mut-1)
, labels = c(1:max_mult_mut)
, limits = c(0, max_mult_mut)) +
ylab(y_lab) +
theme(text=element_text(family="FreeSans")
, legend.position = leg_pos
, legend.direction = leg_dir
, legend.title = element_text(size = leg_tts
, colour = ytt_col)
, legend.text = element_text(size = leg_ts)
, axis.text.x = element_text(size = x_ats
, angle = x_tangle
#, hjust = 1
#, vjust = 0.4
, colour = xfont_bgc)
, axis.text.y = element_text(size = y_ats
, angle = y_tangle
, hjust = 1
, vjust = -1.0
, colour = yfont_bgc)
# , axis.title.x = element_text(size = x_tts
# , colour = xtt_col)
, axis.title.x = element_blank()
, axis.title.y = element_text(size = y_tts
, colour = ytt_col)
, plot.background = element_rect(fill = theme_bgc, colour=NA)
)
logo_bottom = ggseqlogo(tab_wt
, method = 'custom'
, col_scheme = my_logo_col
, seq_type = 'aa') +
scale_x_continuous(breaks = 1:ncol(tab_wt)
, expand = c(0.01,0)
, labels = as.factor(colnames(tab_wt))) +
theme(text = element_text(family="FreeSans")
, legend.position = "none"
, axis.text.x = element_blank()
, axis.text.y = element_blank()
, axis.title.x = element_blank()
, axis.title.y = element_blank()
, plot.background = element_rect(fill = theme_bgc, colour=NA)
) +
geom_tile() +
theme(
axis.text.x = element_blank()
, axis.ticks.x = element_blank()
# axis.text.x = element_text(size = x_ats
# , angle = x_tangle
# , hjust = 1
# , vjust = 0.4
# , colour = xfont_bgc)
, axis.text.y = element_blank()
, axis.ticks.y = element_blank()
, axis.title.x = element_blank()
# , axis.title.x = element_text(size = x_tts
# , colour = xtt_col)
# , axis.title.y = element_text(size = y_tts
# , colour = ytt_col)
# , legend.title = element_text(size = leg_tts
# , colour = ytt_col)
, legend.text = element_text(size = leg_ts)
, legend.position = leg_pos
, legend.direction = leg_dir
, plot.background = element_rect(fill = theme_bgc, colour=NA)
, plot.margin = margin(t=0)
, panel.grid=element_blank()
, panel.background = element_rect(fill = theme_bgc, colour=NA)
) +
scale_x_discrete(x_lab, labels=unique_colour_map$position) +
#scale_x_discrete(x_lab, labels=factor(unique_colour_map$position)) +
scale_color_manual(values=unique_colour_map$ligD_colours) +
scale_fill_manual(values=unique_colour_map$ligD_colours) +
labs(y = NULL)
, NULL
, position_annotation(plot_df, bg=theme_bgc)
, ncol=1
, align = "v"
, axis='lr'
, rel_heights = c(7/10, 2/7,1/7, -0.1, 0.5/7))
labs(x=NULL, y=NULL)
anno_bar = position_annotation(plot_df,
bg = theme_bgc,
aa_pos_drug=aa_pos_drug,
active_aa_pos=active_aa_pos,
aa_pos_lig1=aa_pos_lig1,
aa_pos_lig2=aa_pos_lig2,
aa_pos_lig3=aa_pos_lig3)
aligned=align_plots(logo_top, logo_bottom, anno_bar, align='v', axis='lr')
cowplot::plot_grid(
aligned[[1]], aligned[[2]], aligned[[3]],
ncol=1,
#align = "v",
rel_heights = c(7, 1,1),
rel_widths = c(1,1,0.75)
)
# cowplot::plot_grid(
# logo_top,
# #NULL,
# logo_bottom,
# #NULL,
# anno_bar,
# ncol=1,
# align = "v",
# rel_heights = c(7, 1,1)
# )
# top logo, bottom logo, heat bar, NULL, position annotation
#------------------
# Wild logo plot
#------------------
}
#LogoPlotSnps(small_df3)
#LogoPlotSnps(mutable_df3)

View file

@ -1,7 +1,15 @@
# position_annotation takes a Data Frame (df) and returns a ggplot object.
#
# This plots position tiles for the (up to) three ligands as well as drug
position_annotation=function(plot_df, bg="transparent"){
position_annotation=function(plot_df,
bg="transparent",
aa_pos_drug=1:100,
active_aa_pos=1:100,
aa_pos_lig1=1:100,
aa_pos_lig2=1:100,
aa_pos_lig3=1:100
)
{
x_ats = 12
x_tangle = 90
x_tts = 20
@ -13,48 +21,117 @@ position_annotation=function(plot_df, bg="transparent"){
leg_tts = 16
leg_pos = "none"
# plot_df=plot_df[order(plot_df$ligand_distance),]
#
# plot_df$position = factor(plot_df$position)
#plot_df = generate_distance_colour_map(plot_df, debug=TRUE)
# plot_df$col_aa = ifelse(plot_df[["position"]]%in%active_aa_pos,
# "brown", "transparent")
plot_df$col_aa = ifelse(plot_df[["position"]]%in%active_aa_pos,
"transparent", "transparent")
ggplot(plot_df,
aes(x=factor(position), # THIS STUPID FUCKING FACTOR THING
)
) +
geom_tile(aes(y=0, fill= bg_all, colour = bg_all)
plot_df$bg_all = plot_df$col_aa
plot_df$bg_all = ifelse(plot_df[["position"]]%in%aa_pos_drug,
"green", plot_df$bg_all)
plot_df$col_bg1 = plot_df$bg_all
plot_df$col_bg1 = ifelse(plot_df[["position"]]%in%aa_pos_lig1,
"slategrey", plot_df$col_bg1)
plot_df$col_bg2 = plot_df$col_bg1
plot_df$col_bg2 = ifelse(plot_df[["position"]]%in%aa_pos_lig2,
"navyblue", plot_df$col_bg2)
plot_df$col_bg3 = plot_df$col_bg2
plot_df$col_bg3 = ifelse(plot_df[["position"]]%in%aa_pos_lig3
, "purple", plot_df$col_bg3)
plot_df = generate_distance_colour_map(plot_df, debug=TRUE)
cowplot::plot_grid(
ggplot(plot_df,
aes(x=factor(position), # THIS STUPID FUCKING FACTOR THING
)
) +
geom_tile(aes(y=1, fill= col_bg1, colour = col_bg1)
geom_tile(aes(y=0),
fill=plot_df$ligD_colours) +
#scale_x_discrete("Position", labels=factor(plot_df$position)) +
#theme_nothing() +
theme(plot.background = element_rect(fill = "transparent", colour=NA),
plot.margin = margin(t=0,b=0),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
panel.grid = element_blank(),
panel.background = element_rect(fill = "transparent", colour=NA),
) +
labs(x = NULL, y = NULL), #end of distance-heat-bar
#NULL,
ggplot(plot_df,
aes(x=factor(position), # THIS STUPID FUCKING FACTOR THING
#reorder(ligand_distance)
)
) +
geom_tile(aes(y=2, fill= col_bg2, colour = col_bg2)
) +
geom_tile(aes(y=3, fill= col_bg3, colour = col_bg3)
) +
scale_x_discrete("Position", labels=factor(plot_df$position)) +
scale_color_manual(values = c(
"brown"="brown",
"green"="green",
"transparent"="transparent",
"slategrey"="slategrey",
"navyblue"="navyblue",
"purple"="purple"
),
expand=c(0,0)
) +
scale_fill_manual(values = c(
"brown"="brown",
"green"="green",
"transparent"="transparent",
"slategrey"="slategrey",
"navyblue"="navyblue",
"purple"="purple"
),
expand=c(0,0)
) +
#scale_x_continuous(expand=c(0,0)) +
#scale_y_continuous(expand=c(0,0)) +
theme_nothing() +
theme(plot.background = element_rect(fill = bg, colour=NA),
plot.margin = margin(t=0,b=0)) +
labs(x = NULL, y = NULL)
# geom_tile(aes(y = 0, fill = col_aa, colour = col_aa)
# ) +
geom_tile(aes(y = 1, fill = bg_all, colour = bg_all)
) +
geom_tile(aes(y = 2, fill = col_bg1, colour = col_bg1)
) +
geom_tile(aes(y = 3, fill = col_bg2, colour = col_bg2)
) +
geom_tile(aes(y = 4, fill = col_bg3, colour = col_bg3)
) +
#scale_x_discrete("Position", labels=factor(plot_df$position)) +
scale_color_manual(values = c(
"brown"="brown",
"green"="#00ff00",
"transparent"="transparent",
"slategrey"="#2f4f4f",
"navyblue"="#000080",
"purple"="#a020f0"
),
expand=c(0,0)
) +
scale_fill_manual(values = c(
"brown"="brown",
"green"="#00ff00",
"transparent"="transparent",
"slategrey"="#2f4f4f",
"navyblue"="#000080",
"purple"="#a020f0"
),
expand=c(0,0)
) +
#scale_x_continuous(expand=c(0,0)) +
#scale_y_continuous(expand=c(0,0)) +
theme_nothing() +
theme(plot.background = element_rect(fill = bg, colour=NA),
plot.margin = margin(t=0,b=0)) +
labs(x = NULL, y = NULL),
ncol=1,
rel_heights = c(1,
#-0.1,
1)
)
}
position_annotation(merged_df3,
aa_pos_drug=aa_pos_drug,
active_aa_pos=active_aa_pos,
aa_pos_lig1=aa_pos_lig1,
aa_pos_lig2=aa_pos_lig2,
aa_pos_lig3=aa_pos_lig3
)
#
# # proof that you can use this function to pass arbitrary lists of numbers :-)
# position_annotation(merged_df3,
# aa_pos_drug=1:1000,
# active_aa_pos=1:1000,
# aa_pos_lig1=1:1000,
# aa_pos_lig2=1:1000,
# aa_pos_lig3=1:1000
# )

View file

@ -13,12 +13,6 @@
# input args
#==========================================================
wideP_consurf3 <- function(plot_df
, aa_pos_drug = NULL
, aa_pos_lig1 = NULL
, aa_pos_lig2 = NULL
, aa_pos_lig3 = NULL
, active_aa_pos = NULL
, xvar_colname = "position"
, yvar_colname = "consurf_score"
, yvar_colourN_colname = "consurf_colour_rev" # num from 0-1
@ -65,26 +59,13 @@ wideP_consurf3 <- function(plot_df
, annotate_ligand_distance = T
, leg_title2 = "Ligand Distance"
, lig_dist_colname = LigDist_colname # from globals
, lig_dist_colours = c("green", "yellow", "orange", "red")
, lig_dist_colours = c("tan", "black")
, tpos0 = 0 # 0 is a magic number that does my sensible default
, tW0 = 1
, tH0 = 0.3
# Custom 3: x-axis: geom tiles ~ active sites and ligand
, annotate_active_sites = T
, drug_aa_colour = "purple"
, active_aa_colour = "brown"
, aa_colour_lig1 = "blue"
, tpos1 = 0
, aa_colour_lig2 = "cyan"
, tpos2 = 0
, aa_colour_lig3 = "cornflowerblue"
, tpos3 = 0
, default_gt_clr = "white"
, build_plot_df=FALSE
, debug=FALSE
@ -174,68 +155,6 @@ wideP_consurf3 <- function(plot_df
, ligD_cols = plot_df$ligD_colours))
}
###############################################
# Custom 3: x-axis geom tiles ~ active sites
################################################
#==========================
# Build Data with colours
# ~ on active sites
#==========================
aa_colour_colname = "bg_all"
aa_colour_colname1 = "col_bg1"
aa_colour_colname2 = "col_bg2"
aa_colour_colname3 = "col_bg3"
if (build_plot_df) {
if(annotate_active_sites) {
cat("\nAnnotation for xvar requested. Building colours for annotation...")
#--------------------------------------------------
# column colour 0: Active site + drug binding sites
#--------------------------------------------------
plot_df[[aa_colour_colname]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_drug
, drug_aa_colour
, ifelse(plot_df[[xvar_colname]]%in%active_aa_pos
, active_aa_colour, default_gt_clr ))
plot_df[[aa_colour_colname]]
cat("\nColumn created 'bg_all':", length(plot_df[[aa_colour_colname]]))
#------------------------------------------------
# column colour 1: Ligand 1 + drug binding sites
#------------------------------------------------
cat("\nAssigning colours to drug binding and ligand-1 binding residues")
plot_df[[aa_colour_colname1]] = plot_df[[aa_colour_colname]]
plot_df[[aa_colour_colname1]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_lig1
, aa_colour_lig1, plot_df[[aa_colour_colname]])
#------------------------------------------------
# column colour 2: Ligand 2
#------------------------------------------------
plot_df[[aa_colour_colname2]] = plot_df[[aa_colour_colname1]]
plot_df[[aa_colour_colname2]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_lig2
, aa_colour_lig2, plot_df[[aa_colour_colname1]])
#------------------------------------------------
# column colour 3: Ligand 3
#------------------------------------------------
plot_df[[aa_colour_colname3]] = plot_df[[aa_colour_colname2]]
plot_df[[aa_colour_colname3]] = ifelse(plot_df[[xvar_colname]]%in%aa_pos_lig3
, aa_colour_lig3, plot_df[[aa_colour_colname2]])
}
} else {
# set these to the string "DUMMY" so that the build-up-the-tiles bit works
aa_pos_drug = "DUMMY"
aa_pos_lig1 = "DUMMY"
active_aa_pos = "DUMMY"
if (aa_colour_colname2 %in% colnames(merged_df3)) {
aa_pos_lig2 = "DUMMY"
if (aa_colour_colname3 %in% colnames(merged_df3)) {
aa_pos_lig2 = "DUMMY"
}
}
}
###################
# start plot
###################
@ -355,7 +274,14 @@ wideP_consurf3 <- function(plot_df
plot.margin = margin(t=0,b=0)) +
labs(x = NULL, y = NULL), #end of distance-heat-bar
NULL,
position_annotation(plot_df, bg = panel_col),
position_annotation(plot_df,
bg = panel_col,
aa_pos_drug=aa_pos_drug,
active_aa_pos=active_aa_pos,
aa_pos_lig1=aa_pos_lig1,
aa_pos_lig2=aa_pos_lig2,
aa_pos_lig3=aa_pos_lig3
),
ncol=1,
align='v',
axis='lr',

View file

@ -0,0 +1,366 @@
#!/usr/bin/env Rscript
#source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
# get plottting dfs
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
####################################################
# ggpairs wrapper
my_gg_pairs=function(plot_df){
ggpairs(plot_df, columns = 1:(ncol(plot_df)-1),
upper = list(continuous = wrap('cor',
method = "spearman",
title="ρ",
digits=2,
title_args=c(colour="black")
)
),
lower = list(
continuous = wrap("points", alpha = 0.7, size=0.5),
combo = wrap("dot", alpha = 0.7, size=0.5)
),
aes(colour = factor(ifelse(plot_df$dst_mode==0, "S", "R")), alpha = 0.5),
title="Stability") +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
theme(
text = element_text(size=12, face="bold")
)
}
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
#=======
# Input
#=======
merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
colnames(corr_plotdf)
if (all(colnames(corr_df_m3_f) == colnames(corr_plotdf))){
cat("PASS: corr plot colnames match for dashboard")
}else{
stop("Abort: corr plot colnames DO NOT match for dashboard")
}
#corr_plotdf = corr_df_m3_f #for downstream code
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
aff_dist_cols
static_cols = c("Log10(MAF)"
, "Log10(OR)"
#, "-Log10(P)"
)
#================
# stability
#================
#affinity_dist_colnames# lIg DIst and ppi Di
corr_ps_colnames = c(static_cols
, "DUET"
, "FoldX"
, "DeepDDG"
, "Dynamut2"
, aff_dist_cols
, "dst_mode")
if (all(corr_ps_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for correlation")
}else{
stop("Abort: all colnames DO NOT exist for correlation")
}
corr_df_ps = corr_plotdf[, corr_ps_colnames]
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_ps)
color_coln = which(colnames(corr_df_ps) == "dst_mode")
#end = which(colnames(corr_df_ps) == drug)
#ncol_omit = 2
#corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: stability corrP
#------------------------
corr_psP = paste0(outdir_images
,tolower(gene)
,"_corr_stability.svg" )
cat("Corr plot stability with coloured dots:", corr_psP)
svg(corr_psP, width = 15, height = 15)
my_corr_pairs(corr_data_all = corr_df_ps
, corr_cols = colnames(corr_df_ps[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 1.6
, ats = 1.5
, corr_lab_size =2.5
, corr_value_size = 1)
dev.off()
#===============
# CONSERVATION
#==============
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
, "PROVEAN"
, aff_dist_cols
, "dst_mode"
, drug)
if (all(corr_conservation_cols%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for ConSurf-correlation")
}else{
stop("Abort: all colnames DO NOT exist for ConSurf-correlation")
}
corr_df_cons = corr_plotdf[, corr_conservation_cols]
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_cons)
color_coln = which(colnames(corr_df_cons) == "dst_mode")
# end = which(colnames(corr_df_cons) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#---------------------------
# Output: Conservation corrP
#----------------------------
corr_consP = paste0(outdir_images
,tolower(gene)
,"_corr_conservation.svg" )
cat("Corr plot conservation coloured dots:", corr_consP)
svg(corr_consP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_cons
, corr_cols = colnames(corr_df_cons[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size =1.1
, ats = 1.5
, corr_lab_size = 1.8
, corr_value_size = 1)
dev.off()
#####################################################
#DistCutOff = 10
#LigDist_colname # = "ligand_distance" # from globals
#ppi2Dist_colname = "interface_dist"
#naDist_colname = "TBC"
#####################################################
#================
# ligand affinity
#================
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
corr_lig_colnames = c(static_cols
, "mCSM-lig"
, "mmCSM-lig"
, "dst_mode")
#, drug)
if (all(corr_lig_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for Lig-correlation")
}else{
stop("Abort: all colnames DO NOT exist for Lig-correlation")
}
corr_df_lig = corr_plotdf[, corr_lig_colnames]
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
cat("\nComplete muts for lig affinity for", gene, ":", complete_obs_lig)
color_coln = which(colnames(corr_df_lig) == "dst_mode")
# end = which(colnames(corr_df_lig) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: ligand corrP
#------------------------
corr_ligP = paste0(outdir_images
,tolower(gene)
,"_corr_lig.svg" )
cat("Corr plot affinity with coloured dots:", corr_ligP)
svg(corr_ligP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_lig
, corr_cols = colnames(corr_df_lig[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_lig[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size =3
, corr_value_size = 1)
dev.off()
####################################################
#================
# ppi2 affinity
#================
if (tolower(gene)%in%geneL_ppi2){
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
corr_ppi2_colnames = c(static_cols
, "mCSM-PPI2"
, "dst_mode"
, drug)
if (all(corr_ppi2_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for mcsm-ppi2 correlation")
}else{
stop("Abort: all colnames DO NOT exist for mcsm-ppi2 correlation")
}
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
cat("\nComplete muts for ppi2 affinity for", gene, ":", complete_obs_ppi2)
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
# end = which(colnames(corr_df_ppi2) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: ppi2 corrP
#------------------------
corr_ppi2P = paste0(outdir_images
,tolower(gene)
,"_corr_ppi2.svg" )
cat("Corr plot ppi2 with coloured dots:", corr_ppi2P)
svg(corr_ppi2P, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_ppi2
, corr_cols = colnames(corr_df_ppi2[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size = 3
, corr_value_size = 1)
dev.off()
}
# FIXME: ADD distance
#==================
# mCSSM-NA affinity
#==================
#================
# NA affinity
#================
if (tolower(gene)%in%geneL_na){
corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
corr_na_colnames = c(static_cols
, "mCSM-NA"
, "dst_mode"
, drug)
if (all(corr_na_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for mcsm-NA-correlation")
}else{
stop("Abort: all colnames DO NOT exist for mcsm-NA-correlation")
}
corr_na_colnames%in%colnames(corr_plotdf)
corr_df_na = corr_plotdf[, corr_na_colnames]
complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
cat("\nComplete muts for NA affinity for", gene, ":", complete_obs_na)
color_coln = which(colnames(corr_df_na) == "dst_mode")
# end = which(colnames(corr_df_na) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: mCSM-NA corrP
#------------------------
corr_naP = paste0(outdir_images
,tolower(gene)
,"_corr_na.svg" )
cat("Corr plot mCSM-NA with coloured dots:", corr_naP)
svg(corr_naP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_na
, corr_cols = colnames(corr_df_na[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_na[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size = 3
, corr_value_size = 1)
dev.off()
}
####################################################
#===============
#ggpairs:
#================
#corr_df_ps$dst_mode = ifelse(corr_df_cons$dst_mode=="1", "R", "S")
corr_plotting_df = corr_df_ps
svg('~/tmp/foo.svg',
width=10,
height=10,
units="in",
res=300)
my_gg_pairs(corr_plotting_df)
dev.off()
png('~/tmp/foo.png',
width=10,
height=10,
units="in",
res=300)
my_gg_pairs(corr_plotting_df)
dev.off()
#

View file

@ -0,0 +1,49 @@
# Tweak for layout, fonts, and text sizes.
#svg('~/tmp/foo.svg', width=10, height=10, )
# Set the width/height to inches for print. 300 dpi is reasonably ok for "draft"
# output. To raise quality while preserving sanity, increase 'res' and
# DO NOT alter font/point/line sizes
#- [X] Black text for "Corr:" or replace with Rho symbol
#- [X] 0/1 == R/S
#- [X] "rho" symbol instead of "Corr:" text
#- [X] Dot size a bit smaller
#- [X] Plot lines slightly thinner
#
#
png('~/tmp/foo.png',
width=10,
height=10,
units="in",
res=300)
#
corr_plotting_df = corr_df_ps
ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
upper = list(continuous = wrap('cor',
method = "spearman",
title="ρ",
digits=2,
title_args=c(colour="black")
)
),
lower = list(
continuous = wrap("points", alpha = 0.7, size=0.5),
combo = wrap("dot", alpha = 0.7, size=0.5)
),
aes(colour = factor(ifelse(corr_plotting_df$dst_mode==0, "S", "R")), alpha = 0.5),
title="Stability") +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
theme(
text = element_text(size=12, face="bold")
)
dev.off()
#Check all plots with LSHTM_analysis/scripts/plotting/plotting_colnames.R

View file

@ -0,0 +1,144 @@
source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
my_gg_pairs=function(plot_df){
ggpairs(plot_df,
columns = 1:(ncol(plot_df)-1),
upper = list(
continuous = wrap('cor',
method = "spearman",
title="ρ",
digits=2,
justify_labels = "left",
title_args=c(colour="black")
)
),
lower = list(
continuous = wrap("points",
alpha = 0.7,
size=0.5),
combo = wrap("dot",
alpha = 0.7,
size=0.5)
),
aes(colour = factor(ifelse(plot_df$dst_mode==0,
"S",
"R") ),
alpha = 0.5),
title="Stability") +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
theme(text = element_text(size=12,
face="bold") )
}
DistCutOff = 10
merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
static_cols = c("Log10(MAF)"
, "Log10(OR)")
corr_ps_colnames = c(static_cols
, "DUET"
, "FoldX"
, "DeepDDG"
, "Dynamut2"
, aff_dist_cols
, "dst_mode")
corr_df_ps = corr_plotdf[, corr_ps_colnames]
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
color_coln = which(colnames(corr_df_ps) == "dst_mode")
corr_end = color_coln-1
# Plot #1
plot_corr_df_ps = my_gg_pairs(corr_df_ps)
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
, "PROVEAN"
, aff_dist_cols
, "dst_mode"
)
corr_df_cons = corr_plotdf[, corr_conservation_cols]
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
color_coln = which(colnames(corr_df_cons) == "dst_mode")
corr_end = color_coln-1
# Plot #2
#my_gg_pairs(corr_df_cons)
plot_corr_df_cons = my_gg_pairs(corr_df_cons)
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
corr_lig_colnames = c(static_cols
, "mCSM-lig"
, "mmCSM-lig"
, "dst_mode")
corr_df_lig = corr_plotdf[, corr_lig_colnames]
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
color_coln = which(colnames(corr_df_lig) == "dst_mode")
corr_end = color_coln-1
# Plot #3
#my_gg_pairs(corr_df_lig)
plot_corr_df_lig = my_gg_pairs(corr_df_lig)
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
corr_ppi2_colnames = c(static_cols
, "mCSM-PPI2"
, "dst_mode"
)
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
corr_end = color_coln-1
# NOTE: DELETE LOG OR FROM CORRELATION PLOTS!!!!!
# NOTE: ALSO MAYBE DELETE DISTANCES AS WELL
# NOTE: http://ggobi.github.io/ggally/reference/ggally_cor.html
# "***" if the p-value is < 0.001
# "**" if the p-value is < 0.01
# "*" if the p-value is < 0.05
# "." if the p-value is < 0.10
# "" otherwise
#
# Plot #4
#my_gg_pairs(corr_df_ppi2)
plot_corr_df_ppi2 = my_gg_pairs(corr_df_ppi2)
# corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
# corr_na_colnames = c(static_cols
# , "mCSM-NA"
# , "dst_mode"
# )
#
# corr_df_na = corr_plotdf[, corr_na_colnames]
# complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
# color_coln = which(colnames(corr_df_na) == "dst_mode")
# corr_end = color_coln-1
#
# # Plot #5
# #my_gg_pairs(corr_df_na)
# plot_corr_df_na = my_gg_pairs(corr_df_na)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),ggmatrix_gtable(plot_corr_df_cons),
ggmatrix_gtable(plot_corr_df_lig),ggmatrix_gtable(plot_corr_df_ppi2),
nrow=2, ncol=2, rel_heights = 7,7,3,3)