generated ggpairs plots finally

This commit is contained in:
Tanushree Tunstall 2022-08-15 19:05:22 +01:00
parent b68841b337
commit a3e5283a9b
11 changed files with 657 additions and 939 deletions

View file

@ -38,7 +38,7 @@ source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
class(merged_df3)
merged_df3 = as.data.frame(merged_df3)
class(df3)
class(merged_df3)
head(merged_df3$pos_count)
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
@ -198,10 +198,10 @@ rects <- data.frame(x = 1:6,
)
rects
rects$text = c("-ve Lig affinty"
, "+ve Lig affinity"
, "+ve PPI2 affinity"
, "-ve PPI2 affinity"
rects$text = c("-ve Lig"
, "+ve Lig"
, "+ve PPI2"
, "-ve PPI2"
, "+ve stability"
, "-ve stability")
@ -221,7 +221,7 @@ peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_l
coord_flip()+ scale_x_reverse() +
# theme_void() # remove any axis markings
theme_nothing() # remove any axis markings
peP
peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
geom_tile() + # make square tiles
@ -229,7 +229,7 @@ peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
theme_nothing() # remove any axis markings
peP2
# ------------------------------
# bp site site count: ALL
@ -252,24 +252,24 @@ posC_all = site_snp_count_bp(plotdf = df3
#------------------------------
# barplot for sensitivity:
#------------------------------
sensP = stability_count_bp(plotdf = df3
, df_colname = "sensitivity"
#, leg_title = "mCSM-ppi2"
#, label_categories = labels_ppi2
#, bp_plot_title = paste(common_bp_title, "PP-interface")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "Sensitivity"
, bar_fill_values = c("red", "blue")
, subtitle_colour= "black"
, sts = 10
, lts = 8
, ats = 8
, als =8
, ltis = 11
, geom_ls =2
)
# sensP = stability_count_bp(plotdf = df3
# , df_colname = "sensitivity"
# #, leg_title = "mCSM-ppi2"
# #, label_categories = labels_ppi2
# #, bp_plot_title = paste(common_bp_title, "PP-interface")
#
# , yaxis_title = "Number of nsSNPs"
# , leg_position = "none"
# , subtitle_text = "Sensitivity"
# , bar_fill_values = c("red", "blue")
# , subtitle_colour= "black"
# , sts = 10
# , lts = 8
# , ats = 8
# , als =8
# , ltis = 11
# , geom_ls =2
# )
consurfP = stability_count_bp(plotdf = df3
@ -290,3 +290,95 @@ consurfP = stability_count_bp(plotdf = df3
consurfP
####################
# Sensitivity count
####################
table(df3$sensitivity)
rect_sens=data.frame(mutation_class=c("Resistant","Sensitive")
, tile_colour =c("red","blue")
, numbers = c(table(df3$sensitivity)[[1]], table(df3$sensitivity)[[2]]))
sensP = ggplot(rect_sens, aes(mutation_class, y = 0
, fill = tile_colour
, label = paste0("n=", numbers)
)) +
geom_tile(width = 1, height = 1) + # make square tiles
geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + # add white text in the middle
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
#coord_flip()+ scale_x_reverse() +
# theme_void() # remove any axis markings
theme_nothing() # remove any axis markings
sensP
# sensP2 = sensP +
# coord_flip() + scale_x_reverse()
# sensP2
##############################
# FIXME for other genes: ATTEMPTED to derive numbers
##############################
#
# table(str_df_short$pe_effect_outcome)
# # extract the numbers
# DD_lig_n = table(str_df_short$pe_effect_outcome)[[1]]
# SS_lig_n = 0
# DD_ppi2_n = table(str_df_short$pe_effect_outcome)[[2]]
# SS_ppi2_n = table(str_df_short$pe_effect_outcome)[[4]]
# DD_stability_n = table(str_df_short$pe_effect_outcome)[[3]]
# SS_stability_n = table(str_df_short$pe_effect_outcome)[[5]]
#
# nums = c(DD_lig_n, SS_lig_n,DD_ppi2_n,SS_ppi2_n, DD_stability_n, SS_stability_n )
#
# rect_pe = data.frame(x = 1:6
# , pe_effect_type=c("-ve Lig aff"
# , "+ve Lig aff"
# , "-ve PPI2 aff"
# , " +ve PPI2 aff"
# , "-ve stability"
# , "+ve stability")
#
# , tile_colour =c("#ffd700" #gold
# ,"#f0e68c" # khaki
# , "#ff1493" #deeppink
# , "#da70d6" #orchid
# , "#F8766D" # Sred
# , "#00BFC4") #Sblue
# # , numbers = c(DD_lig_n
# # , SS_lig_n
# # , DD_ppi2_n
# # , SS_ppi2_n
# # , DD_stability_n
# # , SS_stability_n )
# , numbers = nums
# )
#
# rect_pe$num_labels = paste0("n=", rect_pe$numbers)
# rect_pe
#
# # create plot
# peP = ggplot(rect_pe, aes(x=pe_effect_type , y = 0, fill = tile_colour
# , label = paste0(pe_effect_type,"\n", num_labels))) +
# geom_tile(width = 1, height = 1) + # make square tiles
# geom_text(color = "black", size = 1.7) + # add white text in the middle
# scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
# coord_fixed() + # make sure tiles are square
# coord_flip()+ scale_x_reverse() +
# # theme_void() # remove any axis markings
# theme_nothing() # remove any axis markings
# peP
#
# peP2 = ggplot(rect_pe, aes(x=pe_effect_type, y = 0, fill = tile_colour
# , label = paste0(pe_effect_type,"\n", num_labels))) +
# geom_tile() +
# geom_text(color = "black", size = 1.6) +
# scale_fill_identity(guide = "none") +
# coord_fixed() +
# theme_nothing()
# peP2

View file

@ -4,7 +4,6 @@ posC_lig
ppi2P
posC_ppi2
peP
pe_allCL
theme_georgia <- function(...) {
@ -22,12 +21,127 @@ common_legend_outcome = get_legend(mLigP +
guides(color = guide_legend(nrow = 1)) +
theme(legend.position = "top"))
###############################################################
# ###############################################################
# #================================
# # Lig Affinity: outcome + site
# #================================
# ligT = paste0(common_bp_title, " ligand")
# lig_affT = ggdraw() +
# draw_label(
# ligT,
# fontfamily = title_theme$family,
# fontface = title_theme$face,
# #size = title_theme$size
# size = 8
# )
# #-------------
# # Outplot
# #-------------
# ligaffP = paste0(outdir_images
# ,tolower(gene)
# ,"_lig_oc.png")
#
# #svg(affP, width = 20, height = 5.5)
# print(paste0("plot filename:", ligaffP))
# png(ligaffP, units = "in", width = 6, height = 4, res = 300 )
# cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome,
# nrow = 2,
# rel_heights = c(1,1)
# ),
# cowplot::plot_grid(mLigP, mmLigP, posC_lig
# , nrow = 1
# #, labels = c("A", "B", "C","D")
# , rel_widths = c(1,1,1.8)
# , align = "h"),
# nrow = 2,
# labels = c("A", ""),
# label_size = 12,
# rel_heights = c(1,8))
# dev.off()
# #############################################################
# #================================
# # PPI2 Affinity: outcome + site
# #================================
# ppi2T = paste0(common_bp_title, " PP-interface")
# ppi2_affT = ggdraw() +
# draw_label(
# ppi2T,
# fontfamily = title_theme$family,
# fontface = title_theme$face,
# #size = title_theme$size
# size = 8
# )
#
#
# #-------------
# # Outplot: PPI2
# #-------------
# ppiaffP = paste0(outdir_images
# ,tolower(gene)
# ,"_ppi2_oc.png")
#
# #svg(affP, width = 20, height = 5.5)
# print(paste0("plot filename:", ppiaffP))
# png(ppiaffP, units = "in", width = 6, height = 4, res = 300 )
#
#
# cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome,
# nrow = 2,
# rel_heights = c(1,1)),
# cowplot::plot_grid(ppi2P, posC_ppi2
# , nrow = 1
# , rel_widths = c(1.2,1.8)
# , align = "h"
# , label_size = my_label_size),
# nrow = 2,
# labels = c("B", ""),
# label_size = 12,
# rel_heights = c(1,8)
# )
#
# dev.off()
# #############################################################
#peP # pe counts
#================================
# Lig Affinity: outcome + site
# PE + All position count
#================================
# peT_allT = ggdraw() +
# draw_label(
# paste0("All mutation sites"),
# fontfamily = title_theme$family,
# fontface = title_theme$face,
# #size = title_theme$size
# size = 8
# )
# #------------------------
# # Outplot: lig+ppi2+pe
# #------------------------
# pe_allCL = paste0(outdir_images
# ,tolower(gene)
# ,"_pe_oc.png")
#
# #svg(affP, width = 20, height = 5.5)
# print(paste0("plot filename:", pe_allCL))
# png(pe_allCL, units = "in", width = 6, height = 4, res = 300 )
#
#
# cowplot::plot_grid(peT_allT,
# cowplot::plot_grid(peP, posC_all
# , nrow = 1
# , rel_widths = c(1, 2)
# , align = "h"),
# nrow = 2,
# labels = c("C", "", ""),
# label_size = 12,
# rel_heights = c(1,8))
#
# dev.off()
#===========================================
# COMBINE ALL three
#==========================================
ligT = paste0(common_bp_title, " ligand")
lig_affT = ggdraw() +
lig_affT = ggdraw() +
draw_label(
ligT,
fontfamily = title_theme$family,
@ -36,113 +150,6 @@ lig_affT = ggdraw() +
size = 8
)
#-------------
# Outplot
#-------------
ligaffP = paste0(outdir_images
,tolower(gene)
,"_lig_oc.png")
#svg(affP, width = 20, height = 5.5)
print(paste0("plot filename:", ligaffP))
png(ligaffP, units = "in", width = 6, height = 4, res = 300 )
cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome,
nrow = 2,
rel_heights = c(1,1)
),
cowplot::plot_grid(mLigP, mmLigP, posC_lig
, nrow = 1
#, labels = c("A", "B", "C","D")
, rel_widths = c(1,1,1.8)
, align = "h"),
nrow = 2,
labels = c("A", ""),
label_size = 12,
rel_heights = c(1,8))
dev.off()
#############################################################
#================================
# PPI2 Affinity: outcome + site
#================================
ppi2T = paste0(common_bp_title, " PP-interface")
ppi2_affT = ggdraw() +
draw_label(
ppi2T,
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = 8
)
#-------------
# Outplot: PPI2
#-------------
ppiaffP = paste0(outdir_images
,tolower(gene)
,"_ppi2_oc.png")
#svg(affP, width = 20, height = 5.5)
print(paste0("plot filename:", ppiaffP))
png(ppiaffP, units = "in", width = 6, height = 4, res = 300 )
cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome,
nrow = 2,
rel_heights = c(1,1)),
cowplot::plot_grid(ppi2P, posC_ppi2
, nrow = 1
, rel_widths = c(1.2,1.8)
, align = "h"
, label_size = my_label_size),
nrow = 2,
labels = c("B", ""),
label_size = 12,
rel_heights = c(1,8)
)
dev.off()
#############################################################
peP # pe counts
#================================
# PE + All position count
#================================
peT_allT = ggdraw() +
draw_label(
paste0("All mutation sites"),
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = 8
)
#-------------
# Outplot: PPI2
#-------------
pe_allCL = paste0(outdir_images
,tolower(gene)
,"_pe_oc.png")
#svg(affP, width = 20, height = 5.5)
print(paste0("plot filename:", pe_allCL))
png(pe_allCL, units = "in", width = 6, height = 4, res = 300 )
cowplot::plot_grid(peT_allT,
cowplot::plot_grid(peP, posC_all
, nrow = 1
, rel_widths = c(1, 2)
, align = "h"),
nrow = 2,
labels = c("C", "", ""),
label_size = 12,
rel_heights = c(1,8))
dev.off()
#===========================================
# COMBINE ALL three
#==========================================
p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome, nrow=2),
cowplot::plot_grid(mLigP, mmLigP, posC_lig
, nrow = 1
@ -152,8 +159,18 @@ p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome, nrow=
rel_heights = c(1,8)
)
#p1
###########################################################
ppi2T = paste0(common_bp_title, " PP-interface")
ppi2_affT = ggdraw() +
draw_label(
ppi2T,
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = 8
)
###########################################################
p2 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nrow=2),
cowplot::plot_grid(ppi2P, posC_ppi2
, nrow = 1
@ -162,7 +179,17 @@ p2 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nro
nrow = 2,
rel_heights = c(1,8)
)
#p2
###########################################################
# PE + All position count
peT_allT = ggdraw() +
draw_label(
paste0("All mutation sites"),
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = 8
)
p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
, rel_widths = c(1,3),axis = "lr"),
@ -174,16 +201,14 @@ p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
axis = "lr",
rel_heights = c(1,8)
),
rel_heights = c(1,10),
rel_heights = c(1,18),
nrow = 2,axis = "lr")
p3
#===============
# Final combine
#===============
w = 11.75
h = 3.7
w = 11.79
h = 3.5
mut_impact_CLP = paste0(outdir_images
,tolower(gene)
,"_mut_impactCLP.png")
@ -229,9 +254,21 @@ conCLP = paste0(outdir_images
,tolower(gene)
,"_consurf_BP.png")
print(paste0("plot filename:", sens_conP))
png(sens_conP, units = "in", width = w, height = h, res = 300 )
print(paste0("plot filename:", conCLP))
png(conCLP, units = "in", width = w, height = h, res = 300 )
consurfP
dev.off()
#================================
# Sensitivity numbers: geom_tile
#================================
sensCLP = paste0(outdir_images
,tolower(gene)
,"_sensN_tile.png")
print(paste0("plot filename:", sensCLP))
png(sensCLP, units = "in", width = 1, height = 1, res = 300 )
sensP
dev.off()

View file

@ -1,182 +0,0 @@
colnames(str_df_short)
table(str_df_short$effect_type)
table(str_df_short$effect_sign)
str(str_df_short)
str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
table(str_df_short$pe_outcome )
table(str_df_short$effect_sign)
affcols = c("affinity_scaled", "mmcsm_lig_scaled")
ppi2_cols = c("mcsm_ppi2_scaled")
#lig
table(str_df_short$effect_type)
str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
, "affinity"
, str_df_short$effect_type)
table(str_df_short$effect_grouped)
#ppi2
str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
, "ppi2"
, str_df_short$effect_grouped)
table(str_df_short$effect_grouped)
#stability
str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("affinity", "ppi2")
, "stability"
, str_df_short$effect_grouped)
table(str_df_short$effect_grouped)
# create a sign as well
str_df_short$effect_outcome = paste0(str_df_short$pe_outcome
, str_df_short$effect_grouped)
table(str_df_short$effect_outcome)
pe_colour_map2 = c( "DDaffinity" = "#ffd700" # gold
, "SSaffinity" = "#f0e68c" # khaki
, "DDppi2" = "#ff1493" # deeppink
, "SSppi2" = "#da70d6" # orchid
, "DDstability " = "#ae301e"
, "SSstability" = "#007d85"
)
str_df_short$effect_colours = str_df_short$effect_outcome
str_df_short = dplyr::mutate(str_df_short
, effect_colours = case_when(effect_colours == "DDaffinity" ~ "#ffd700"
, effect_colours == "DDppi2" ~ '#ff1493'
, effect_colours == "SSppi2" ~ '#da70d6'
, effect_colours == "DDstability" ~ '#ae301e'
, effect_colours =="SSstability" ~ '#007d85'
, TRUE ~ 'ns'))
"#F8766D" #red
"#00BFC4" #blue
table(str_df_short$effect_colours)
###########################################
ggplot(str_df_short
, aes( x=effect_grouped
, fill = effect_colours)) +
geom_bar() +
scale_fill_manual(values = str_df_short$effect_colours)
first_col = c(38, 0)
second_col = c(9, 22)
third_col = c(681, 108)
thing_df = data.frame(first_row, second_row, third_row)
rownames(thing_df) = c("Destabilising","Stabilising")
thing_df
###############################################
rect_colour_map = c("EMB" = "green"
,"DSL" = "slategrey"
, "CDL" = "navyblue"
, "Ca" = "purple")
rects <- data.frame(x = 1:6,
colors = c("#ffd700" #gold
, "#f0e68c" #khaki
, "#da70d6"# orchid
, "#ff1493"# deeppink
, "#00BFC4" #, "#007d85" #blue
, "#F8766D" )# red,
)
rects
rects$text = c("-ve Lig affinty"
, "+ve Lig affinity"
, "+ve PPI2 affinity"
, "-ve PPI2 affinity"
, "+ve stability"
, "-ve stability")
rects$numbers = c(38, 0, 22, 9, 108, 681)
rects$num_labels = paste0("n=", rects$numbers)
rects
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
png(paste0(outdir_images, "test.png")
, width = 0.5
, height = 2.5
, units = "in", res = 300)
ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
geom_tile(width = 1, height = 1) + # make square tiles
geom_text(color = "black", size = 1.5) + # add white text in the middle
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
coord_flip()+ scale_x_reverse() +
# theme_void() # remove any axis markings
theme_nothing() # remove any axis markings
dev.off()
##########################################################
tile_map=data.frame(tile=c("EMB","DSL","CDL","Ca")
,tile_colour =c("green","darkslategrey","navyblue","purple"))
# great
tile_colour_map = c("EMB" = "green"
,"DSL" = "darkslategrey"
, "CDL" = "navyblue"
, "Ca" = "purple")
tile_legend=get_legend(
ggplot(tile_map, aes(factor(tile),y=0
, colour=tile_colour
, fill=tile_colour))+
geom_tile() +
theme(legend.direction="horizontal") +
scale_colour_manual(name=NULL
#, values = tile_map$tile_colour
, values=tile_colour_map) +
scale_fill_manual(name=NULL
#,values=tile_map$tile_colour
, values = tile_colour_map)
)
#############################################################
###############################################
library(ggplot2)
library(viridis)
library(hrbrthemes)
ggplot(str_df_short, aes(fill=effect_colours,x=effect_type)) +
geom_bar() +
scale_fill_viridis(discrete = T) +
ggtitle("Studying 4 species..")
####################################################

View file

@ -1,366 +0,0 @@
#!/usr/bin/env Rscript
#source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
# get plottting dfs
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
####################################################
# ggpairs wrapper
my_gg_pairs=function(plot_df){
ggpairs(plot_df, columns = 1:(ncol(plot_df)-1),
upper = list(continuous = wrap('cor',
method = "spearman",
title="ρ",
digits=2,
title_args=c(colour="black")
)
),
lower = list(
continuous = wrap("points", alpha = 0.7, size=0.5),
combo = wrap("dot", alpha = 0.7, size=0.5)
),
aes(colour = factor(ifelse(plot_df$dst_mode==0, "S", "R")), alpha = 0.5),
title="Stability") +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
theme(
text = element_text(size=12, face="bold")
)
}
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
#=======
# Input
#=======
merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
colnames(corr_plotdf)
if (all(colnames(corr_df_m3_f) == colnames(corr_plotdf))){
cat("PASS: corr plot colnames match for dashboard")
}else{
stop("Abort: corr plot colnames DO NOT match for dashboard")
}
#corr_plotdf = corr_df_m3_f #for downstream code
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
aff_dist_cols
static_cols = c("Log10(MAF)"
, "Log10(OR)"
#, "-Log10(P)"
)
#================
# stability
#================
#affinity_dist_colnames# lIg DIst and ppi Di
corr_ps_colnames = c(static_cols
, "DUET"
, "FoldX"
, "DeepDDG"
, "Dynamut2"
, aff_dist_cols
, "dst_mode")
if (all(corr_ps_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for correlation")
}else{
stop("Abort: all colnames DO NOT exist for correlation")
}
corr_df_ps = corr_plotdf[, corr_ps_colnames]
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_ps)
color_coln = which(colnames(corr_df_ps) == "dst_mode")
#end = which(colnames(corr_df_ps) == drug)
#ncol_omit = 2
#corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: stability corrP
#------------------------
corr_psP = paste0(outdir_images
,tolower(gene)
,"_corr_stability.svg" )
cat("Corr plot stability with coloured dots:", corr_psP)
svg(corr_psP, width = 15, height = 15)
my_corr_pairs(corr_data_all = corr_df_ps
, corr_cols = colnames(corr_df_ps[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 1.6
, ats = 1.5
, corr_lab_size =2.5
, corr_value_size = 1)
dev.off()
#===============
# CONSERVATION
#==============
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
, "PROVEAN"
, aff_dist_cols
, "dst_mode"
, drug)
if (all(corr_conservation_cols%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for ConSurf-correlation")
}else{
stop("Abort: all colnames DO NOT exist for ConSurf-correlation")
}
corr_df_cons = corr_plotdf[, corr_conservation_cols]
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_cons)
color_coln = which(colnames(corr_df_cons) == "dst_mode")
# end = which(colnames(corr_df_cons) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#---------------------------
# Output: Conservation corrP
#----------------------------
corr_consP = paste0(outdir_images
,tolower(gene)
,"_corr_conservation.svg" )
cat("Corr plot conservation coloured dots:", corr_consP)
svg(corr_consP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_cons
, corr_cols = colnames(corr_df_cons[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size =1.1
, ats = 1.5
, corr_lab_size = 1.8
, corr_value_size = 1)
dev.off()
#####################################################
#DistCutOff = 10
#LigDist_colname # = "ligand_distance" # from globals
#ppi2Dist_colname = "interface_dist"
#naDist_colname = "TBC"
#####################################################
#================
# ligand affinity
#================
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
corr_lig_colnames = c(static_cols
, "mCSM-lig"
, "mmCSM-lig"
, "dst_mode")
#, drug)
if (all(corr_lig_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for Lig-correlation")
}else{
stop("Abort: all colnames DO NOT exist for Lig-correlation")
}
corr_df_lig = corr_plotdf[, corr_lig_colnames]
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
cat("\nComplete muts for lig affinity for", gene, ":", complete_obs_lig)
color_coln = which(colnames(corr_df_lig) == "dst_mode")
# end = which(colnames(corr_df_lig) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: ligand corrP
#------------------------
corr_ligP = paste0(outdir_images
,tolower(gene)
,"_corr_lig.svg" )
cat("Corr plot affinity with coloured dots:", corr_ligP)
svg(corr_ligP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_lig
, corr_cols = colnames(corr_df_lig[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_lig[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size =3
, corr_value_size = 1)
dev.off()
####################################################
#================
# ppi2 affinity
#================
if (tolower(gene)%in%geneL_ppi2){
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
corr_ppi2_colnames = c(static_cols
, "mCSM-PPI2"
, "dst_mode"
, drug)
if (all(corr_ppi2_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for mcsm-ppi2 correlation")
}else{
stop("Abort: all colnames DO NOT exist for mcsm-ppi2 correlation")
}
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
cat("\nComplete muts for ppi2 affinity for", gene, ":", complete_obs_ppi2)
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
# end = which(colnames(corr_df_ppi2) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: ppi2 corrP
#------------------------
corr_ppi2P = paste0(outdir_images
,tolower(gene)
,"_corr_ppi2.svg" )
cat("Corr plot ppi2 with coloured dots:", corr_ppi2P)
svg(corr_ppi2P, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_ppi2
, corr_cols = colnames(corr_df_ppi2[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size = 3
, corr_value_size = 1)
dev.off()
}
# FIXME: ADD distance
#==================
# mCSSM-NA affinity
#==================
#================
# NA affinity
#================
if (tolower(gene)%in%geneL_na){
corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
corr_na_colnames = c(static_cols
, "mCSM-NA"
, "dst_mode"
, drug)
if (all(corr_na_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for mcsm-NA-correlation")
}else{
stop("Abort: all colnames DO NOT exist for mcsm-NA-correlation")
}
corr_na_colnames%in%colnames(corr_plotdf)
corr_df_na = corr_plotdf[, corr_na_colnames]
complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
cat("\nComplete muts for NA affinity for", gene, ":", complete_obs_na)
color_coln = which(colnames(corr_df_na) == "dst_mode")
# end = which(colnames(corr_df_na) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: mCSM-NA corrP
#------------------------
corr_naP = paste0(outdir_images
,tolower(gene)
,"_corr_na.svg" )
cat("Corr plot mCSM-NA with coloured dots:", corr_naP)
svg(corr_naP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_na
, corr_cols = colnames(corr_df_na[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_na[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size = 3
, corr_value_size = 1)
dev.off()
}
####################################################
#===============
#ggpairs:
#================
#corr_df_ps$dst_mode = ifelse(corr_df_cons$dst_mode=="1", "R", "S")
corr_plotting_df = corr_df_ps
svg('~/tmp/foo.svg',
width=10,
height=10,
units="in",
res=300)
my_gg_pairs(corr_plotting_df)
dev.off()
png('~/tmp/foo.png',
width=10,
height=10,
units="in",
res=300)
my_gg_pairs(corr_plotting_df)
dev.off()
#

View file

@ -21,12 +21,19 @@ png('~/tmp/foo.png',
units="in",
res=300)
#
corr_plotting_df = corr_df_ps
ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
#corr_plotting_df = corr_df_ps
colnames(corr_plotdf)
corr_plotting_df = subset(corr_plotdf, select = -c(ethambutol,`Log10(OR)`,`-Log10(P)`, ASA, RSA, KD, RD
, FoldX
, DeepDDG
, Dynamut2 ))
colnames(corr_plotting_df)
#ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)),
upper = list(continuous = wrap('cor',
method = "spearman",
use = "pairwise.complete.obs",
title="ρ",
digits=2,
title_args=c(colour="black")
@ -36,7 +43,7 @@ ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
continuous = wrap("points", alpha = 0.7, size=0.5),
combo = wrap("dot", alpha = 0.7, size=0.5)
),
aes(colour = factor(ifelse(corr_plotting_df$dst_mode==0, "S", "R")), alpha = 0.5),
aes(colour = factor(ifelse(dst_mode==0, "S", "R")), alpha = 0.5),
title="Stability") +
scale_colour_manual(values = c("red", "blue")) +

View file

@ -1,51 +1,88 @@
source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
my_gg_pairs=function(plot_df){
my_gg_pairs=function(plot_df, plot_title
, tt_args_size = 2.5
, gp_args_size = 2.5){
ggpairs(plot_df,
columns = 1:(ncol(plot_df)-1),
upper = list(
continuous = wrap('cor',
continuous = wrap('cor', # ggally_cor()
method = "spearman",
use = "pairwise.complete.obs",
title="ρ",
digits=2,
justify_labels = "left",
title_args=c(colour="black")
justify_labels = "centre",
#title_args=c(colour="black"),
title_args=c(size=tt_args_size),#2.5
group_args=c(size=gp_args_size)#2.5
)
),
lower = list(
continuous = wrap("points",
alpha = 0.7,
size=0.5),
size=0.125),
combo = wrap("dot",
alpha = 0.7,
size=0.5)
size=0.125)
),
aes(colour = factor(ifelse(plot_df$dst_mode==0,
aes(colour = factor(ifelse(dst_mode==0,
"S",
"R") ),
alpha = 0.5),
title="Stability") +
title=plot_title) +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
theme(text = element_text(size=12,
face="bold") )
scale_fill_manual(values = c("red", "blue")) #+
# theme(text = element_text(size=7,
# face="bold"))
}
DistCutOff = 10
###########################################################################
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
static_cols = c("Log10(MAF)"
, "Log10(OR)")
static_cols = c("Log10(MAF)")
#, "Log10(OR)")
############################################################
#=============================================
# Creating masked df for affinity data
#=============================================
corr_affinity_df = corr_plotdf
#----------------------
# Mask affinity columns
#-----------------------
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mCSM-lig"]=0
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0
if (tolower(gene)%in%geneL_ppi2){
corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0
}
# if (tolower(gene)%in%geneL_na){
# corr_affinity_df[corr_affinity_df["NA-Dist"]>DistCutOff,"mCSM-NA"]=0
# }
# count 0
#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100
unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0)
unmasked_vals
##########################################################
#================
# Stability
#================
corr_ps_colnames = c(static_cols
, "DUET"
, "FoldX"
@ -54,14 +91,13 @@ corr_ps_colnames = c(static_cols
, aff_dist_cols
, "dst_mode")
corr_df_ps = corr_plotdf[, corr_ps_colnames]
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
color_coln = which(colnames(corr_df_ps) == "dst_mode")
corr_end = color_coln-1
# Plot #1
plot_corr_df_ps = my_gg_pairs(corr_df_ps)
plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability features")
##########################################################
#================
# Conservation
#================
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
@ -71,74 +107,66 @@ corr_conservation_cols = c( static_cols
)
corr_df_cons = corr_plotdf[, corr_conservation_cols]
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
color_coln = which(colnames(corr_df_cons) == "dst_mode")
corr_end = color_coln-1
# Plot #2
plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation features")
#my_gg_pairs(corr_df_cons)
plot_corr_df_cons = my_gg_pairs(corr_df_cons)
##########################################################
#================
# Affinity: lig, ppi and na as applicable
#================
#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
common_aff_colnames = c("mCSM-lig"
, "mmCSM-lig")
if (tolower(gene)%in%geneL_normal){
aff_colnames = common_aff_colnames
}
if (tolower(gene)%in%geneL_ppi2){
aff_colnames = c(common_aff_colnames, "mCSM-PPI2")
}
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
corr_lig_colnames = c(static_cols
, "mCSM-lig"
, "mmCSM-lig"
, "dst_mode")
if (tolower(gene)%in%geneL_na){
aff_colnames = c(common_aff_colnames, "mCSM-NA")
}
corr_df_lig = corr_plotdf[, corr_lig_colnames]
# building ffinal affinity colnames for correlation
corr_aff_colnames = c(static_cols
, aff_colnames
, "dst_mode") # imp
corr_df_aff = corr_affinity_df[, corr_aff_colnames]
colnames(corr_df_aff)
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
color_coln = which(colnames(corr_df_lig) == "dst_mode")
corr_end = color_coln-1
# Plot #3
plot_corr_df_aff = my_gg_pairs(corr_df_aff, plot_title="Affinity features", tt_args_size = 4, gp_args_size =4)
#my_gg_pairs(corr_df_lig)
plot_corr_df_lig = my_gg_pairs(corr_df_lig)
#=============
# combine
#=============
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
corr_ppi2_colnames = c(static_cols
, "mCSM-PPI2"
, "dst_mode"
)
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
corr_end = color_coln-1
#png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
png(paste0(outdir_images
,tolower(gene)
,"_CorrAB.png"), height = 6, width=11.75, unit="in",res=300)
# NOTE: DELETE LOG OR FROM CORRELATION PLOTS!!!!!
# NOTE: ALSO MAYBE DELETE DISTANCES AS WELL
# NOTE: http://ggobi.github.io/ggally/reference/ggally_cor.html
# "***" if the p-value is < 0.001
# "**" if the p-value is < 0.01
# "*" if the p-value is < 0.05
# "." if the p-value is < 0.10
# "" otherwise
#
# Plot #4
#my_gg_pairs(corr_df_ppi2)
plot_corr_df_ppi2 = my_gg_pairs(corr_df_ppi2)
# corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
# corr_na_colnames = c(static_cols
# , "mCSM-NA"
# , "dst_mode"
# )
#
# corr_df_na = corr_plotdf[, corr_na_colnames]
# complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
# color_coln = which(colnames(corr_df_na) == "dst_mode")
# corr_end = color_coln-1
#
# # Plot #5
# #my_gg_pairs(corr_df_na)
# plot_corr_df_na = my_gg_pairs(corr_df_na)
png("/tmp/gg_pairs_all.png", height = 8, width=11.75, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),ggmatrix_gtable(plot_corr_df_cons),
ggmatrix_gtable(plot_corr_df_lig),ggmatrix_gtable(plot_corr_df_ppi2),
nrow=2, ncol=2, rel_heights = 7,7,3,3)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
ggmatrix_gtable(plot_corr_df_cons),
# ggmatrix_gtable(plot_corr_df_aff),
# nrow=1, ncol=3, rel_heights = 7,7,3
nrow=1,
#rel_heights = 1,1
labels = "AUTO",
label_size = 12)
dev.off()
# affinity corr
#png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
png(paste0(outdir_images
,tolower(gene)
,"_CorrC.png"), height =7, width=7, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
labels = "C",
label_size = 12)
dev.off()

View file

@ -154,6 +154,11 @@ for (i in unique(str_df$position) ){
str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
colnames(str_df)
# check
str_df_check = str_df[str_df$position%in%c(24, 32,160, 303, 334),]
table(str_df$effect_type)
#================
# for Plots
#================
@ -161,9 +166,56 @@ str_df_short = str_df[, c("mutationinformation","position","sensitivity"
, "effect_type"
, "effect_sign")]
# check
str_df_check = str_df[str_df$position%in%c(24, 32,160, 303, 334),]
table(str_df$effect_type)
table(str_df_short$effect_type)
table(str_df_short$effect_sign)
str(str_df_short)
# assign pe outcome
str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
table(str_df_short$pe_outcome )
table(str_df_short$effect_sign)
#==============
# group effect type:
# lig, ppi2, nuc. acid, stability
#==============
affcols = c("affinity_scaled", "mmcsm_lig_scaled")
ppi2_cols = c("mcsm_ppi2_scaled")
#nuc_na_cols = c("mcsm_a_scaled")
#lig
table(str_df_short$effect_type)
str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
, "lig"
, str_df_short$effect_type)
table(str_df_short$effect_grouped)
#ppi2
str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
, "ppi2"
, str_df_short$effect_grouped)
table(str_df_short$effect_grouped)
#stability
str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig", "ppi2")
, "stability"
, str_df_short$effect_grouped)
table(str_df_short$effect_grouped)
# create a sign as well
str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_"
, str_df_short$effect_grouped)
table(str_df_short$pe_effect_outcome)
#####################################################################
# Chimera: for colouring
####################################################################
#-------------------------------------
# get df with unique position