LSHTM_analysis/scripts/plotting/plotting_thesis/gg_pairs_all.R
2022-08-14 12:17:42 +01:00

144 lines
4.5 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
my_gg_pairs=function(plot_df){
ggpairs(plot_df,
columns = 1:(ncol(plot_df)-1),
upper = list(
continuous = wrap('cor',
method = "spearman",
title="ρ",
digits=2,
justify_labels = "left",
title_args=c(colour="black")
)
),
lower = list(
continuous = wrap("points",
alpha = 0.7,
size=0.5),
combo = wrap("dot",
alpha = 0.7,
size=0.5)
),
aes(colour = factor(ifelse(plot_df$dst_mode==0,
"S",
"R") ),
alpha = 0.5),
title="Stability") +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
theme(text = element_text(size=12,
face="bold") )
}
DistCutOff = 10
merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
static_cols = c("Log10(MAF)"
, "Log10(OR)")
corr_ps_colnames = c(static_cols
, "DUET"
, "FoldX"
, "DeepDDG"
, "Dynamut2"
, aff_dist_cols
, "dst_mode")
corr_df_ps = corr_plotdf[, corr_ps_colnames]
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
color_coln = which(colnames(corr_df_ps) == "dst_mode")
corr_end = color_coln-1
# Plot #1
plot_corr_df_ps = my_gg_pairs(corr_df_ps)
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
, "PROVEAN"
, aff_dist_cols
, "dst_mode"
)
corr_df_cons = corr_plotdf[, corr_conservation_cols]
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
color_coln = which(colnames(corr_df_cons) == "dst_mode")
corr_end = color_coln-1
# Plot #2
#my_gg_pairs(corr_df_cons)
plot_corr_df_cons = my_gg_pairs(corr_df_cons)
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
corr_lig_colnames = c(static_cols
, "mCSM-lig"
, "mmCSM-lig"
, "dst_mode")
corr_df_lig = corr_plotdf[, corr_lig_colnames]
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
color_coln = which(colnames(corr_df_lig) == "dst_mode")
corr_end = color_coln-1
# Plot #3
#my_gg_pairs(corr_df_lig)
plot_corr_df_lig = my_gg_pairs(corr_df_lig)
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
corr_ppi2_colnames = c(static_cols
, "mCSM-PPI2"
, "dst_mode"
)
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
corr_end = color_coln-1
# NOTE: DELETE LOG OR FROM CORRELATION PLOTS!!!!!
# NOTE: ALSO MAYBE DELETE DISTANCES AS WELL
# NOTE: http://ggobi.github.io/ggally/reference/ggally_cor.html
# "***" if the p-value is < 0.001
# "**" if the p-value is < 0.01
# "*" if the p-value is < 0.05
# "." if the p-value is < 0.10
# "" otherwise
#
# Plot #4
#my_gg_pairs(corr_df_ppi2)
plot_corr_df_ppi2 = my_gg_pairs(corr_df_ppi2)
# corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
# corr_na_colnames = c(static_cols
# , "mCSM-NA"
# , "dst_mode"
# )
#
# corr_df_na = corr_plotdf[, corr_na_colnames]
# complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
# color_coln = which(colnames(corr_df_na) == "dst_mode")
# corr_end = color_coln-1
#
# # Plot #5
# #my_gg_pairs(corr_df_na)
# plot_corr_df_na = my_gg_pairs(corr_df_na)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),ggmatrix_gtable(plot_corr_df_cons),
ggmatrix_gtable(plot_corr_df_lig),ggmatrix_gtable(plot_corr_df_ppi2),
nrow=2, ncol=2, rel_heights = 7,7,3,3)