rearranged corr plot cols and also added example for ggpairs

This commit is contained in:
Tanushree Tunstall 2022-08-11 20:56:34 +01:00
parent fdb3f00503
commit b302daaa60
7 changed files with 227 additions and 681 deletions

View file

@ -8,6 +8,7 @@ source("~/git/LSHTM_analysis/config/embb.R")
# get plottting dfs
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
####################################################
#=======
# output
@ -23,55 +24,126 @@ corr_plotdf = corr_data_extract(merged_df3
, drug = drug
, extract_scaled_cols = F)
colnames(corr_plotdf)
colnames(corr_df_m3_f)
corr_plotdf = corr_df_m3_f #for downstream code
if (all(colnames(corr_df_m3_f) == colnames(corr_plotdf))){
cat("PASS: corr plot colnames match for dashboard")
}else{
stop("Abort: corr plot colnames DO NOT match for dashboard")
}
#corr_plotdf = corr_df_m3_f #for downstream code
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
aff_dist_cols
static_cols = c("Log(MAF)"
, "Log(OR)"
#, "-Log(P)"
)
#================
# stability
#================
corr_ps_colnames = c("DUET"
#affinity_dist_colnames# lIg DIst and ppi Di
corr_ps_colnames = c(static_cols
, "DUET"
, "FoldX"
, "DeepDDG"
, "Dynamut2"
, "MAF"
, "Log(OR)"
, "-Log(P)"
#, "ligand_distance"
, "dst_mode"
, drug)
, aff_dist_cols
, "dst_mode")
corr_ps_colnames%in%colnames(corr_plotdf)
if (all(corr_ps_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for correlation")
}else{
stop("Abort: all colnames DO NOT exist for correlation")
}
corr_df_ps = corr_plotdf[, corr_ps_colnames]
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_ps)
color_coln = which(colnames(corr_df_ps) == "dst_mode")
end = which(colnames(corr_df_ps) == drug)
ncol_omit = 2
corr_end = end-ncol_omit
#end = which(colnames(corr_df_ps) == drug)
#ncol_omit = 2
#corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: stability corrP
#------------------------
corr_psP = paste0(outdir_images
,tolower(gene)
,"_corr_stability.svg" )
,tolower(gene)
,"_corr_stability.svg" )
cat("Corr plot stability with coloured dots:", corr_psP)
svg(corr_psP, width = 15, height = 15)
my_corr_pairs(corr_data_all = corr_df_ps
, corr_cols = colnames(corr_df_ps[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 1.6
, ats = 1.5
, corr_lab_size = 3
, corr_value_size = 1)
, corr_cols = colnames(corr_df_ps[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 1.6
, ats = 1.5
, corr_lab_size = 3
, corr_value_size = 1)
dev.off()
#===============
# CONSERVATION
#==============
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
, "PROVEAN"
, aff_dist_cols
, "dst_mode"
, drug)
if (all(corr_conservation_cols%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for ConSurf-correlation")
}else{
stop("Abort: all colnames DO NOT exist for ConSurf-correlation")
}
corr_df_cons = corr_plotdf[, corr_conservation_cols]
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_cons)
color_coln = which(colnames(corr_df_cons) == "dst_mode")
# end = which(colnames(corr_df_cons) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#---------------------------
# Output: Conservation corrP
#----------------------------
corr_consP = paste0(outdir_images
,tolower(gene)
,"_corr_conservation.svg" )
cat("Corr plot conservation coloured dots:", corr_consP)
svg(corr_consP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_cons
, corr_cols = colnames(corr_df_cons[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size =1.1
, ats = 1.5
, corr_lab_size = 2.15
, corr_value_size = 1)
dev.off()
#####################################################
#DistCutOff = 10
#LigDist_colname # = "ligand_distance" # from globals
@ -82,31 +154,36 @@ dev.off()
#================
# ligand affinity
#================
corr_lig_colnames = c("mCSM-lig"
, "MAF"
, "Log(OR)"
, "-Log(P)"
, "Lig-Dist"
, "dst_mode"
, drug)
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
corr_lig_colnames = c(static_cols
, "mCSM-lig"
, "mmCSM-lig"
, "dst_mode")
#, drug)
if (all(corr_lig_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for Lig-correlation")
}else{
stop("Abort: all colnames DO NOT exist for Lig-correlation")
}
corr_lig_colnames%in%colnames(corr_plotdf)
corr_df_lig = corr_plotdf[, corr_lig_colnames]
corr_df_lig = corr_df_lig[corr_df_lig["Lig-Dist"]<DistCutOff,]
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
cat("\nComplete muts for lig affinity for", gene, ":", complete_obs_lig)
color_coln = which(colnames(corr_df_lig) == "dst_mode")
end = which(colnames(corr_df_lig) == drug)
ncol_omit = 3 #omit dist col
corr_end = end-ncol_omit
# end = which(colnames(corr_df_lig) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: ligand corrP
#------------------------
corr_ligP = paste0(outdir_images
,tolower(gene)
,"_corr_lig.svg" )
,tolower(gene)
,"_corr_lig.svg" )
cat("Corr plot affinity with coloured dots:", corr_ligP)
svg(corr_ligP, width = 10, height = 10)
@ -127,32 +204,38 @@ dev.off()
#================
# ppi2 affinity
#================
if (tolower(gene)%in%geneL_ppi2){
corr_ppi2_colnames = c("mCSM-PPI2"
, "MAF"
, "Log(OR)"
, "-Log(P)"
, "PPI-Dist" # "interface_dist"
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
corr_ppi2_colnames = c(static_cols
, "mCSM-PPI2"
, "dst_mode"
, drug)
corr_ppi2_colnames%in%colnames(corr_plotdf)
if (all(corr_ppi2_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for mcsm-ppi2 correlation")
}else{
stop("Abort: all colnames DO NOT exist for mcsm-ppi2 correlation")
}
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
corr_df_ppi2 = corr_df_ppi2[corr_df_ppi2["PPI-Dist"]<DistCutOff,]
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
cat("\nComplete muts for ppi2 affinity for", gene, ":", complete_obs_ppi2)
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
end = which(colnames(corr_df_ppi2) == drug)
ncol_omit = 3 #omit dist col
corr_end = end-ncol_omit
# end = which(colnames(corr_df_ppi2) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: ppi2 corrP
#------------------------
corr_ppi2P = paste0(outdir_images
,tolower(gene)
,"_corr_ppi2.svg" )
,tolower(gene)
,"_corr_ppi2.svg" )
cat("Corr plot ppi2 with coloured dots:", corr_ppi2P)
svg(corr_ppi2P, width = 10, height = 10)
@ -180,25 +263,29 @@ if (tolower(gene)%in%geneL_ppi2){
# NA affinity
#================
if (tolower(gene)%in%geneL_na){
corr_na_colnames = c("mCSM-NA"
, "MAF"
, "Log(OR)"
, "-Log(P)"
, "NA-Dist" # "NA_dist"
corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
corr_na_colnames = c(static_cols
, "mCSM-NA"
, "dst_mode"
, drug)
if (all(corr_na_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for mcsm-NA-correlation")
}else{
stop("Abort: all colnames DO NOT exist for mcsm-NA-correlation")
}
corr_na_colnames%in%colnames(corr_plotdf)
corr_df_na = corr_plotdf[, corr_na_colnames]
corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
cat("\nComplete muts for NA affinity for", gene, ":", complete_obs_na)
color_coln = which(colnames(corr_df_na) == "dst_mode")
end = which(colnames(corr_df_na) == drug)
ncol_omit = 3 #omit dist col
corr_end = end-ncol_omit
# end = which(colnames(corr_df_na) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: mCSM-NA corrP
@ -224,51 +311,21 @@ if (tolower(gene)%in%geneL_na){
dev.off()
}
####################################################
# CONSERVATION
corr_conservation_cols = c("ConSurf"
, "SNAP2"
, "PROVEAN"
, "MAF"
, "Log(OR)"
, "-Log(P)"
, "dst_mode"
, drug)
#===============
#ggpairs:
#================
#corr_df_ps$dst_mode = ifelse(corr_df_cons$dst_mode=="1", "R", "S")
colnames(corr_plotdf)
corr_conservation_cols%in%colnames(corr_plotdf)
corr_df_cons = corr_plotdf[, corr_conservation_cols]
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_cons)
svg('/tmp/foo.svg', width=10, height=10, )
color_coln = which(colnames(corr_df_cons) == "dst_mode")
end = which(colnames(corr_df_cons) == drug)
ncol_omit = 2
corr_end = end-ncol_omit
ggpairs(corr_df_ps, columns = 1:(ncol(corr_df_ps)-corr_end)
, upper = list(continuous = wrap('cor', method = "spearman"))
, aes(colour = factor(dst_mode), alpha = 0.5)
, title="correlogram with ggpairs()") +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue"))
#---------------------------
# Output: Conservation corrP
#----------------------------
corr_consP = paste0(outdir_images
,tolower(gene)
,"_corr_conservation.svg" )
cat("Corr plot conservation coloured dots:", corr_consP)
svg(corr_consP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_cons
, corr_cols = colnames(corr_df_cons[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size =1.1
, ats = 1.5
, corr_lab_size = 2.5
, corr_value_size = 1)
dev.off()

View file

@ -9,6 +9,7 @@ source("~/git/LSHTM_analysis/config/embb.R")
# get plottting dfs
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
#=======
# output
#=======