generated ggpairs plots finally
This commit is contained in:
parent
b68841b337
commit
a3e5283a9b
11 changed files with 657 additions and 939 deletions
|
@ -143,6 +143,9 @@ site_snp_count_bp <- function (plotdf
|
|||
#, legend.position = c(0.73,0.8)
|
||||
#, legend.text = element_text(size = leg_text_size)
|
||||
#, legend.title = element_text(size = axis_label_size)
|
||||
#, panel.grid.major = element_blank(),
|
||||
#, panel.grid.minor = element_blank(),
|
||||
, panel.grid = element_blank()
|
||||
, plot.title = element_text(size = leg_text_size
|
||||
, colour = title_colour
|
||||
, hjust = 0.5)
|
||||
|
|
|
@ -56,6 +56,9 @@ stability_count_bp <- function(plotdf
|
|||
, legend.position = leg_position
|
||||
, legend.text = element_text(size = lts)
|
||||
, legend.title = element_text(size = ltis)
|
||||
#, panel.grid.major = element_blank(),
|
||||
#, panel.grid.minor = element_blank(),
|
||||
, panel.grid = element_blank()
|
||||
, legend.key.size = unit(lts,"pt")
|
||||
, plot.title = element_text(size = als
|
||||
, colour = title_colour
|
||||
|
|
|
@ -4,22 +4,41 @@ library("ggforce")
|
|||
#install.packages("gginference")
|
||||
library(gginference)
|
||||
library(ggpubr)
|
||||
library(svglite)
|
||||
##################################################
|
||||
#%% read data
|
||||
# DOME: read data using gene and drug combination
|
||||
# gene must be lowercase
|
||||
# tolower(gene)
|
||||
#################################################
|
||||
############################################################
|
||||
#gene="pncA"
|
||||
#drug="pyrazinamide"
|
||||
|
||||
#lineage_filename=paste0(tolower(gene),"_merged_df2.csv")
|
||||
#lineage_data_path="~/git/Data/pyrazinamide/output"
|
||||
|
||||
df2 = read.csv(paste0(lineage_data_path,"/",lineage_filename))
|
||||
#=============
|
||||
# Data: Input
|
||||
#==============
|
||||
#source("~/git/LSHTM_analysis/config/alr.R")
|
||||
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||
# source("~/git/LSHTM_analysis/config/gid.R")
|
||||
source("~/git/LSHTM_analysis/config/katg.R")
|
||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
|
||||
foo = as.data.frame(colnames(df2))
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
|
||||
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
|
||||
cat("plots will output to:", outdir_images)
|
||||
|
||||
###########################################################
|
||||
class(merged_df2)
|
||||
foo = as.data.frame(colnames(merged_df2))
|
||||
|
||||
cols_to_subset = c('mutationinformation'
|
||||
, 'snp_frequency'
|
||||
|
@ -36,7 +55,7 @@ cols_to_subset = c('mutationinformation'
|
|||
|
||||
#cols_to_subset%in%foo
|
||||
|
||||
my_df = df2[ ,cols_to_subset]
|
||||
my_df = merged_df2[ ,cols_to_subset]
|
||||
|
||||
# r24p_embb = df_embb[df_embb$mutationinformation == "R24P",]
|
||||
# #tm = c("A102P", "M1T")
|
||||
|
@ -73,10 +92,9 @@ table(my_df2$lineage)
|
|||
|
||||
sel_lineages2 = c("L1", "L2", "L3", "L4")
|
||||
my_df2 = my_df2[my_df2$lineage%in%sel_lineages2,]
|
||||
table(my_df2$lineage)
|
||||
|
||||
sum(table(my_df2$lineage)) == nrow(my_df2)
|
||||
table(my_df2$lineage)
|
||||
table(my_df2$lineage, my_df2$sensitivity)
|
||||
|
||||
# %%
|
||||
# str(my_df2)
|
||||
|
@ -85,6 +103,7 @@ table(my_df2$lineage)
|
|||
|
||||
#%% get only muts which belong to > 1 lineage and have different sensitivity classifications
|
||||
muts = unique(my_df2$mutationinformation)
|
||||
cat ("Total unique muts in L1-L4", tolower(gene), ":", length(muts))
|
||||
#-----------------------------------------------
|
||||
# step 0 : get muts with more than one lineage
|
||||
#-----------------------------------------------
|
||||
|
@ -100,7 +119,6 @@ for (i in muts) {
|
|||
}
|
||||
cat("\nGot:", length(lin_muts), "mutations belonging to >1 lineage with differing drug sensitivities")
|
||||
|
||||
|
||||
#-----------------------------------------------
|
||||
# step 1 : get other muts that do not have this
|
||||
#-----------------------------------------------
|
||||
|
@ -111,7 +129,6 @@ cat("\nGot:", length(consist_muts), "mutations that are consistent")
|
|||
# step 2: subset these muts for plotting
|
||||
#-----------------------------------------------
|
||||
plot_df = my_df2[my_df2$mutationinformation%in%lin_muts,]
|
||||
|
||||
cat("\nnrow of plot_df:", nrow(plot_df))
|
||||
|
||||
#-----------------------------------------------
|
||||
|
@ -125,7 +142,9 @@ for (i in lin_muts) {
|
|||
s_tab = table(s_mut$lineage, s_mut$sens2)
|
||||
#print(s_tab)
|
||||
#ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3)
|
||||
ft_pvalue_i = fisher.test(s_tab)$p.value
|
||||
ft_pvalue_i = fisher.test(s_tab
|
||||
#, workspace=2e9
|
||||
, simulate.p.value=TRUE,B=1e7)$p.value
|
||||
#print(ft_pvalue_i)
|
||||
plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i
|
||||
#print(s_tab)
|
||||
|
@ -155,8 +174,6 @@ plot_df
|
|||
head(plot_df)
|
||||
table(plot_df$pvalR<0.05)
|
||||
|
||||
|
||||
|
||||
# format p value
|
||||
# TODO: add case statement for correct pvalue formatting
|
||||
#plot_df$pvalF = ifelse(plot_df$pval <= 0.0001, paste0(round(plot_df$pval, 3), "**** "), plot_df$pval )
|
||||
|
@ -233,6 +250,7 @@ cat("\nGot:", sig_muts, "mutations that are significant")
|
|||
plot_df_ns = plot_df2[plot_df2$pvalR>0.05,]
|
||||
ns_muts = length(unique(plot_df_ns$mutationinformation))
|
||||
cat("\nGot:", ns_muts, "mutations that are NOT significant")
|
||||
|
||||
p_title = gene
|
||||
ts = 8
|
||||
gls = 3
|
||||
|
@ -244,7 +262,7 @@ gls = 3
|
|||
#3) Add *: Extend yaxis for each plot to allow geom_label to have space (or see
|
||||
# if this self resolving with facet_wrap_paginate())
|
||||
#================================================
|
||||
#svg(paste0(outdir_images, "embb_linDS.svg"), width = 6, height = 10 ) # old-school square 4:3 CRT shape 1.3:1
|
||||
#svg(paste0(outdir_images, tolower(gene), "_linDS.svg"), width = 6, height = 10 ) # old-school square 4:3 CRT shape 1.3:1
|
||||
ds_s = ggplot(plot_df_sig, aes(x = lineage
|
||||
, fill = sens2)) +
|
||||
geom_bar(stat = 'count') +
|
||||
|
@ -280,7 +298,7 @@ ds_s = ggplot(plot_df_sig, aes(x = lineage
|
|||
###################################
|
||||
#ns muts
|
||||
|
||||
#svg(paste0(outdir_images, "embb_linDS_ns.svg"), width =10 , height = 8) # old-school square 4:3 CRT shape 1.3:1
|
||||
#svg(paste0(outdir_images, tolower(gene), "_linDS_ns.svg"), width =10 , height = 8) # old-school square 4:3 CRT shape 1.3:1
|
||||
ds_ns = ggplot(plot_df_ns, aes(x = lineage
|
||||
, fill = sens2)) +
|
||||
geom_bar(stat = 'count') +
|
||||
|
@ -309,31 +327,57 @@ ds_ns = ggplot(plot_df_ns, aes(x = lineage
|
|||
labs(title = paste0(p_title, ": sensitivity by lineage")
|
||||
, y = 'Sample Count')
|
||||
#dev.off()
|
||||
#####################################################################
|
||||
#===================
|
||||
# Combine output
|
||||
#====================
|
||||
|
||||
|
||||
# svg(paste0(outdir_images, "embb_linDS_CL.svg")
|
||||
# svg(paste0(outdir_images, tolower(gene), "_linDS_CL.svg")
|
||||
# , width = 11
|
||||
# , height = 8 )
|
||||
png(paste0(outdir_images, "embb_linDS_CL.png")
|
||||
, width = 11.75
|
||||
png(paste0(outdir_images, tolower(gene), "_linDS_CL2.png")
|
||||
, width = 11.75*1.15
|
||||
, height = 8, units = "in", res = 300 )
|
||||
|
||||
cowplot::plot_grid(ds_s, ds_ns
|
||||
, ncol = 2
|
||||
,rel_widths = c(1,2)
|
||||
#, align = "hv"
|
||||
, rel_widths = c(1,2.5)
|
||||
, labels = "AUTO")
|
||||
|
||||
dev.off()
|
||||
|
||||
########################################################################
|
||||
#==================
|
||||
# Summary output
|
||||
#==================
|
||||
cat ("Total unique muts in ALL samples for", tolower(gene), ":", length(unique(merged_df2$mutationinformation)))
|
||||
other_lin_muts = unique(merged_df2$mutationinformation)[!unique(merged_df2$mutationinformation)%in%unique(my_df2$mutationinformation)]
|
||||
|
||||
cat ("Total unique muts NOT in L1-L4:", length(other_lin_muts))
|
||||
cat("These are:\n", other_lin_muts)
|
||||
other_lin_muts_df = merged_df2[merged_df2$mutationinformation%in%other_lin_muts,]
|
||||
|
||||
if ( length(unique(other_lin_muts_df$mutationinformation)) == length(other_lin_muts)) {
|
||||
cat("\nPASS: other lin muts extracted")
|
||||
}else{
|
||||
stop("\nAbort: other lin muts numbers mismatch")
|
||||
}
|
||||
|
||||
table(other_lin_muts_df$mutationinformation, other_lin_muts_df$lineage)
|
||||
|
||||
cat("\n==============================================\n")
|
||||
cat ("Total samples L1-L4:", nrow(my_df2))
|
||||
table(my_df2$lineage)
|
||||
table(my_df2$lineage, my_df2$sensitivity)
|
||||
|
||||
cat ("Total unique muts in L1-L4", tolower(gene), ":", length(muts))
|
||||
cat("\nGot:", length(lin_muts), "mutations belonging to >1 lineage with differing drug sensitivities")
|
||||
|
||||
cat("\nGot:", sig_muts, "mutations that are significant"
|
||||
, "\nThese are:", unique(plot_df_sig$mutationinformation))
|
||||
|
||||
#geom_text(aes(label = paste0("p=",pvalF), x = 2.5, ypos_label+1))# +
|
||||
cat("\nGot:", ns_muts, "mutations that are NOT significant"
|
||||
, "\nThese are:", unique(plot_df_ns$mutationinformation))
|
||||
|
||||
#geom_segment(aes(x = 1, y = ypos_label+0.5, xend = 4, yend = ypos_label+0.5))
|
||||
#geom_hline(data = lin_muts_dfM, aes(yintercept=ypos_label+0.5))
|
||||
#geom_bracket(data=lin_muts_dfM, aes(xmin = 1, xmax = 4, y.position = ypos_label+0.5, label=''))
|
||||
cat("\n==============================================\n")
|
||||
|
|
|
@ -109,137 +109,137 @@ merged_df3 = all_plot_dfs[[2]]
|
|||
####################################################################
|
||||
# Data for logoplots
|
||||
####################################################################
|
||||
|
||||
source(paste0(plot_script_path, "logo_data_msa.R"))
|
||||
s1 = c("\nSuccessfully sourced logo_data_msa.R")
|
||||
cat(s1)
|
||||
|
||||
####################################################################
|
||||
# Data for DM OM Plots: WF and LF dfs
|
||||
# My function: dm_om_wf_lf_data()
|
||||
# location: scripts/functions/dm_om_data.R
|
||||
#source("other_plots_data.R")
|
||||
####################################################################
|
||||
|
||||
#source(paste0(plot_script_path, "dm_om_data.R")) # calling the function directly instead
|
||||
geneL_normal = c("pnca")
|
||||
geneL_na = c("gid", "rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
|
||||
|
||||
wf_duet = all_dm_om_df[['wf_duet']]
|
||||
lf_duet = all_dm_om_df[['lf_duet']]
|
||||
|
||||
wf_mcsm_lig = all_dm_om_df[['wf_mcsm_lig']]
|
||||
lf_mcsm_lig = all_dm_om_df[['lf_mcsm_lig']]
|
||||
|
||||
wf_foldx = all_dm_om_df[['wf_foldx']]
|
||||
lf_foldx = all_dm_om_df[['lf_foldx']]
|
||||
|
||||
wf_deepddg = all_dm_om_df[['wf_deepddg']]
|
||||
lf_deepddg = all_dm_om_df[['lf_deepddg']]
|
||||
|
||||
wf_dynamut2 = all_dm_om_df[['wf_dynamut2']]
|
||||
lf_dynamut2 = all_dm_om_df[['lf_dynamut2']]
|
||||
|
||||
wf_consurf = all_dm_om_df[['wf_consurf']]
|
||||
lf_consurf = all_dm_om_df[['lf_consurf']]
|
||||
|
||||
wf_snap2 = all_dm_om_df[['wf_snap2']]
|
||||
lf_snap2 = all_dm_om_df[['lf_snap2']]
|
||||
|
||||
wf_provean = all_dm_om_df[['wf_provean']]
|
||||
lf_provean = all_dm_om_df[['lf_provean']]
|
||||
|
||||
# NEW
|
||||
wf_dist_gen = all_dm_om_df[['wf_dist_gen']]
|
||||
lf_dist_gen = all_dm_om_df[['lf_dist_gen']]
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
|
||||
lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
|
||||
lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
|
||||
}
|
||||
|
||||
s2 = c("\nSuccessfully sourced other_plots_data.R")
|
||||
cat(s2)
|
||||
|
||||
####################################################################
|
||||
# Data for Lineage barplots: WF and LF dfs
|
||||
# My function: lineage_plot_data()
|
||||
# location: scripts/functions/lineage_plot_data.R
|
||||
####################################################################
|
||||
|
||||
#source(paste0(plot_script_path, "lineage_data.R"))
|
||||
# converted to a function. Moved lineage_data.R to redundant/
|
||||
lineage_dfL = lineage_plot_data(merged_df2
|
||||
, lineage_column_name = "lineage"
|
||||
, remove_empty_lineage = F
|
||||
, lineage_label_col_name = "lineage_labels"
|
||||
, id_colname = "id"
|
||||
, snp_colname = "mutationinformation"
|
||||
)
|
||||
|
||||
lin_wf = lineage_dfL[['lin_wf']]
|
||||
lin_lf = lineage_dfL[['lin_lf']]
|
||||
|
||||
s3 = c("\nSuccessfully sourced lineage_data.R")
|
||||
cat(s3)
|
||||
|
||||
####################################################################
|
||||
# Data for corr plots:
|
||||
# My function: corr_data_extract()
|
||||
# location: scripts/functions/corr_plot_data.R
|
||||
####################################################################
|
||||
# make sure the above script works because merged_df2_combined is needed
|
||||
merged_df3 = as.data.frame(merged_df3)
|
||||
|
||||
corr_df_m3_f = corr_data_extract(merged_df3
|
||||
, gene = gene
|
||||
, drug = drug
|
||||
, extract_scaled_cols = F)
|
||||
head(corr_df_m3_f)
|
||||
|
||||
# corr_df_m2_f = corr_data_extract(merged_df2
|
||||
#
|
||||
# source(paste0(plot_script_path, "logo_data_msa.R"))
|
||||
# s1 = c("\nSuccessfully sourced logo_data_msa.R")
|
||||
# cat(s1)
|
||||
#
|
||||
# ####################################################################
|
||||
# # Data for DM OM Plots: WF and LF dfs
|
||||
# # My function: dm_om_wf_lf_data()
|
||||
# # location: scripts/functions/dm_om_data.R
|
||||
# #source("other_plots_data.R")
|
||||
# ####################################################################
|
||||
#
|
||||
# #source(paste0(plot_script_path, "dm_om_data.R")) # calling the function directly instead
|
||||
# geneL_normal = c("pnca")
|
||||
# geneL_na = c("gid", "rpob")
|
||||
# geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
#
|
||||
# all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
|
||||
#
|
||||
# wf_duet = all_dm_om_df[['wf_duet']]
|
||||
# lf_duet = all_dm_om_df[['lf_duet']]
|
||||
#
|
||||
# wf_mcsm_lig = all_dm_om_df[['wf_mcsm_lig']]
|
||||
# lf_mcsm_lig = all_dm_om_df[['lf_mcsm_lig']]
|
||||
#
|
||||
# wf_foldx = all_dm_om_df[['wf_foldx']]
|
||||
# lf_foldx = all_dm_om_df[['lf_foldx']]
|
||||
#
|
||||
# wf_deepddg = all_dm_om_df[['wf_deepddg']]
|
||||
# lf_deepddg = all_dm_om_df[['lf_deepddg']]
|
||||
#
|
||||
# wf_dynamut2 = all_dm_om_df[['wf_dynamut2']]
|
||||
# lf_dynamut2 = all_dm_om_df[['lf_dynamut2']]
|
||||
#
|
||||
# wf_consurf = all_dm_om_df[['wf_consurf']]
|
||||
# lf_consurf = all_dm_om_df[['lf_consurf']]
|
||||
#
|
||||
# wf_snap2 = all_dm_om_df[['wf_snap2']]
|
||||
# lf_snap2 = all_dm_om_df[['lf_snap2']]
|
||||
#
|
||||
# wf_provean = all_dm_om_df[['wf_provean']]
|
||||
# lf_provean = all_dm_om_df[['lf_provean']]
|
||||
#
|
||||
# # NEW
|
||||
# wf_dist_gen = all_dm_om_df[['wf_dist_gen']]
|
||||
# lf_dist_gen = all_dm_om_df[['lf_dist_gen']]
|
||||
#
|
||||
# if (tolower(gene)%in%geneL_na){
|
||||
# wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
|
||||
# lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]
|
||||
# }
|
||||
#
|
||||
# if (tolower(gene)%in%geneL_ppi2){
|
||||
# wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
|
||||
# lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
|
||||
# }
|
||||
#
|
||||
# s2 = c("\nSuccessfully sourced other_plots_data.R")
|
||||
# cat(s2)
|
||||
#
|
||||
# ####################################################################
|
||||
# # Data for Lineage barplots: WF and LF dfs
|
||||
# # My function: lineage_plot_data()
|
||||
# # location: scripts/functions/lineage_plot_data.R
|
||||
# ####################################################################
|
||||
#
|
||||
# #source(paste0(plot_script_path, "lineage_data.R"))
|
||||
# # converted to a function. Moved lineage_data.R to redundant/
|
||||
# lineage_dfL = lineage_plot_data(merged_df2
|
||||
# , lineage_column_name = "lineage"
|
||||
# , remove_empty_lineage = F
|
||||
# , lineage_label_col_name = "lineage_labels"
|
||||
# , id_colname = "id"
|
||||
# , snp_colname = "mutationinformation"
|
||||
# )
|
||||
#
|
||||
# lin_wf = lineage_dfL[['lin_wf']]
|
||||
# lin_lf = lineage_dfL[['lin_lf']]
|
||||
#
|
||||
# s3 = c("\nSuccessfully sourced lineage_data.R")
|
||||
# cat(s3)
|
||||
#
|
||||
# ####################################################################
|
||||
# # Data for corr plots:
|
||||
# # My function: corr_data_extract()
|
||||
# # location: scripts/functions/corr_plot_data.R
|
||||
# ####################################################################
|
||||
# # make sure the above script works because merged_df2_combined is needed
|
||||
# merged_df3 = as.data.frame(merged_df3)
|
||||
#
|
||||
# corr_df_m3_f = corr_data_extract(merged_df3
|
||||
# , gene = gene
|
||||
# , drug = drug
|
||||
# , extract_scaled_cols = F)
|
||||
# head(corr_df_m2_f)
|
||||
|
||||
s4 = c("\nSuccessfully sourced Corr_data.R")
|
||||
cat(s4)
|
||||
|
||||
########################################################################
|
||||
# End of script
|
||||
########################################################################
|
||||
if ( all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){
|
||||
cat(
|
||||
"\n##################################################"
|
||||
, "\nSuccessful: get_plotting_dfs.R worked!"
|
||||
, "\n###################################################\n")
|
||||
} else {
|
||||
cat(
|
||||
"\n#################################################"
|
||||
, "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
|
||||
, "\n###################################################\n" )
|
||||
}
|
||||
|
||||
########################################################################
|
||||
# clear excess variables: from the global enviornment
|
||||
|
||||
vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))]
|
||||
vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))]
|
||||
vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))]
|
||||
vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))]
|
||||
|
||||
rm( infile_metadata
|
||||
, infile_params
|
||||
, vars0
|
||||
, vars1
|
||||
, vars2
|
||||
, vars3)
|
||||
# head(corr_df_m3_f)
|
||||
#
|
||||
# # corr_df_m2_f = corr_data_extract(merged_df2
|
||||
# # , gene = gene
|
||||
# # , drug = drug
|
||||
# # , extract_scaled_cols = F)
|
||||
# # head(corr_df_m2_f)
|
||||
#
|
||||
# s4 = c("\nSuccessfully sourced Corr_data.R")
|
||||
# cat(s4)
|
||||
#
|
||||
# ########################################################################
|
||||
# # End of script
|
||||
# ########################################################################
|
||||
# if ( all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){
|
||||
# cat(
|
||||
# "\n##################################################"
|
||||
# , "\nSuccessful: get_plotting_dfs.R worked!"
|
||||
# , "\n###################################################\n")
|
||||
# } else {
|
||||
# cat(
|
||||
# "\n#################################################"
|
||||
# , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
|
||||
# , "\n###################################################\n" )
|
||||
# }
|
||||
#
|
||||
# ########################################################################
|
||||
# # clear excess variables: from the global enviornment
|
||||
#
|
||||
# vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))]
|
||||
# vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))]
|
||||
# vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))]
|
||||
# vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))]
|
||||
#
|
||||
# rm( infile_metadata
|
||||
# , infile_params
|
||||
# , vars0
|
||||
# , vars1
|
||||
# , vars2
|
||||
# , vars3)
|
||||
|
|
|
@ -38,7 +38,7 @@ source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
|
|||
class(merged_df3)
|
||||
merged_df3 = as.data.frame(merged_df3)
|
||||
|
||||
class(df3)
|
||||
class(merged_df3)
|
||||
head(merged_df3$pos_count)
|
||||
|
||||
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
|
||||
|
@ -198,10 +198,10 @@ rects <- data.frame(x = 1:6,
|
|||
)
|
||||
rects
|
||||
|
||||
rects$text = c("-ve Lig affinty"
|
||||
, "+ve Lig affinity"
|
||||
, "+ve PPI2 affinity"
|
||||
, "-ve PPI2 affinity"
|
||||
rects$text = c("-ve Lig"
|
||||
, "+ve Lig"
|
||||
, "+ve PPI2"
|
||||
, "-ve PPI2"
|
||||
, "+ve stability"
|
||||
, "-ve stability")
|
||||
|
||||
|
@ -221,7 +221,7 @@ peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_l
|
|||
coord_flip()+ scale_x_reverse() +
|
||||
# theme_void() # remove any axis markings
|
||||
theme_nothing() # remove any axis markings
|
||||
|
||||
peP
|
||||
|
||||
peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
|
||||
geom_tile() + # make square tiles
|
||||
|
@ -229,7 +229,7 @@ peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_
|
|||
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
|
||||
coord_fixed() + # make sure tiles are square
|
||||
theme_nothing() # remove any axis markings
|
||||
|
||||
peP2
|
||||
|
||||
# ------------------------------
|
||||
# bp site site count: ALL
|
||||
|
@ -252,24 +252,24 @@ posC_all = site_snp_count_bp(plotdf = df3
|
|||
#------------------------------
|
||||
# barplot for sensitivity:
|
||||
#------------------------------
|
||||
sensP = stability_count_bp(plotdf = df3
|
||||
, df_colname = "sensitivity"
|
||||
#, leg_title = "mCSM-ppi2"
|
||||
#, label_categories = labels_ppi2
|
||||
#, bp_plot_title = paste(common_bp_title, "PP-interface")
|
||||
|
||||
, yaxis_title = "Number of nsSNPs"
|
||||
, leg_position = "none"
|
||||
, subtitle_text = "Sensitivity"
|
||||
, bar_fill_values = c("red", "blue")
|
||||
, subtitle_colour= "black"
|
||||
, sts = 10
|
||||
, lts = 8
|
||||
, ats = 8
|
||||
, als =8
|
||||
, ltis = 11
|
||||
, geom_ls =2
|
||||
)
|
||||
# sensP = stability_count_bp(plotdf = df3
|
||||
# , df_colname = "sensitivity"
|
||||
# #, leg_title = "mCSM-ppi2"
|
||||
# #, label_categories = labels_ppi2
|
||||
# #, bp_plot_title = paste(common_bp_title, "PP-interface")
|
||||
#
|
||||
# , yaxis_title = "Number of nsSNPs"
|
||||
# , leg_position = "none"
|
||||
# , subtitle_text = "Sensitivity"
|
||||
# , bar_fill_values = c("red", "blue")
|
||||
# , subtitle_colour= "black"
|
||||
# , sts = 10
|
||||
# , lts = 8
|
||||
# , ats = 8
|
||||
# , als =8
|
||||
# , ltis = 11
|
||||
# , geom_ls =2
|
||||
# )
|
||||
|
||||
|
||||
consurfP = stability_count_bp(plotdf = df3
|
||||
|
@ -290,3 +290,95 @@ consurfP = stability_count_bp(plotdf = df3
|
|||
|
||||
consurfP
|
||||
|
||||
|
||||
|
||||
####################
|
||||
# Sensitivity count
|
||||
####################
|
||||
table(df3$sensitivity)
|
||||
|
||||
rect_sens=data.frame(mutation_class=c("Resistant","Sensitive")
|
||||
, tile_colour =c("red","blue")
|
||||
, numbers = c(table(df3$sensitivity)[[1]], table(df3$sensitivity)[[2]]))
|
||||
|
||||
|
||||
|
||||
sensP = ggplot(rect_sens, aes(mutation_class, y = 0
|
||||
, fill = tile_colour
|
||||
, label = paste0("n=", numbers)
|
||||
)) +
|
||||
geom_tile(width = 1, height = 1) + # make square tiles
|
||||
geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + # add white text in the middle
|
||||
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
|
||||
coord_fixed() + # make sure tiles are square
|
||||
#coord_flip()+ scale_x_reverse() +
|
||||
# theme_void() # remove any axis markings
|
||||
theme_nothing() # remove any axis markings
|
||||
sensP
|
||||
|
||||
|
||||
# sensP2 = sensP +
|
||||
# coord_flip() + scale_x_reverse()
|
||||
# sensP2
|
||||
|
||||
##############################
|
||||
# FIXME for other genes: ATTEMPTED to derive numbers
|
||||
##############################
|
||||
#
|
||||
# table(str_df_short$pe_effect_outcome)
|
||||
# # extract the numbers
|
||||
# DD_lig_n = table(str_df_short$pe_effect_outcome)[[1]]
|
||||
# SS_lig_n = 0
|
||||
# DD_ppi2_n = table(str_df_short$pe_effect_outcome)[[2]]
|
||||
# SS_ppi2_n = table(str_df_short$pe_effect_outcome)[[4]]
|
||||
# DD_stability_n = table(str_df_short$pe_effect_outcome)[[3]]
|
||||
# SS_stability_n = table(str_df_short$pe_effect_outcome)[[5]]
|
||||
#
|
||||
# nums = c(DD_lig_n, SS_lig_n,DD_ppi2_n,SS_ppi2_n, DD_stability_n, SS_stability_n )
|
||||
#
|
||||
# rect_pe = data.frame(x = 1:6
|
||||
# , pe_effect_type=c("-ve Lig aff"
|
||||
# , "+ve Lig aff"
|
||||
# , "-ve PPI2 aff"
|
||||
# , " +ve PPI2 aff"
|
||||
# , "-ve stability"
|
||||
# , "+ve stability")
|
||||
#
|
||||
# , tile_colour =c("#ffd700" #gold
|
||||
# ,"#f0e68c" # khaki
|
||||
# , "#ff1493" #deeppink
|
||||
# , "#da70d6" #orchid
|
||||
# , "#F8766D" # Sred
|
||||
# , "#00BFC4") #Sblue
|
||||
# # , numbers = c(DD_lig_n
|
||||
# # , SS_lig_n
|
||||
# # , DD_ppi2_n
|
||||
# # , SS_ppi2_n
|
||||
# # , DD_stability_n
|
||||
# # , SS_stability_n )
|
||||
# , numbers = nums
|
||||
# )
|
||||
#
|
||||
# rect_pe$num_labels = paste0("n=", rect_pe$numbers)
|
||||
# rect_pe
|
||||
#
|
||||
# # create plot
|
||||
# peP = ggplot(rect_pe, aes(x=pe_effect_type , y = 0, fill = tile_colour
|
||||
# , label = paste0(pe_effect_type,"\n", num_labels))) +
|
||||
# geom_tile(width = 1, height = 1) + # make square tiles
|
||||
# geom_text(color = "black", size = 1.7) + # add white text in the middle
|
||||
# scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
|
||||
# coord_fixed() + # make sure tiles are square
|
||||
# coord_flip()+ scale_x_reverse() +
|
||||
# # theme_void() # remove any axis markings
|
||||
# theme_nothing() # remove any axis markings
|
||||
# peP
|
||||
#
|
||||
# peP2 = ggplot(rect_pe, aes(x=pe_effect_type, y = 0, fill = tile_colour
|
||||
# , label = paste0(pe_effect_type,"\n", num_labels))) +
|
||||
# geom_tile() +
|
||||
# geom_text(color = "black", size = 1.6) +
|
||||
# scale_fill_identity(guide = "none") +
|
||||
# coord_fixed() +
|
||||
# theme_nothing()
|
||||
# peP2
|
||||
|
|
|
@ -4,7 +4,6 @@ posC_lig
|
|||
ppi2P
|
||||
posC_ppi2
|
||||
peP
|
||||
pe_allCL
|
||||
|
||||
|
||||
theme_georgia <- function(...) {
|
||||
|
@ -22,10 +21,125 @@ common_legend_outcome = get_legend(mLigP +
|
|||
guides(color = guide_legend(nrow = 1)) +
|
||||
theme(legend.position = "top"))
|
||||
|
||||
###############################################################
|
||||
# ###############################################################
|
||||
# #================================
|
||||
# # Lig Affinity: outcome + site
|
||||
# #================================
|
||||
# ligT = paste0(common_bp_title, " ligand")
|
||||
# lig_affT = ggdraw() +
|
||||
# draw_label(
|
||||
# ligT,
|
||||
# fontfamily = title_theme$family,
|
||||
# fontface = title_theme$face,
|
||||
# #size = title_theme$size
|
||||
# size = 8
|
||||
# )
|
||||
|
||||
# #-------------
|
||||
# # Outplot
|
||||
# #-------------
|
||||
# ligaffP = paste0(outdir_images
|
||||
# ,tolower(gene)
|
||||
# ,"_lig_oc.png")
|
||||
#
|
||||
# #svg(affP, width = 20, height = 5.5)
|
||||
# print(paste0("plot filename:", ligaffP))
|
||||
# png(ligaffP, units = "in", width = 6, height = 4, res = 300 )
|
||||
# cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome,
|
||||
# nrow = 2,
|
||||
# rel_heights = c(1,1)
|
||||
# ),
|
||||
# cowplot::plot_grid(mLigP, mmLigP, posC_lig
|
||||
# , nrow = 1
|
||||
# #, labels = c("A", "B", "C","D")
|
||||
# , rel_widths = c(1,1,1.8)
|
||||
# , align = "h"),
|
||||
# nrow = 2,
|
||||
# labels = c("A", ""),
|
||||
# label_size = 12,
|
||||
# rel_heights = c(1,8))
|
||||
# dev.off()
|
||||
# #############################################################
|
||||
# #================================
|
||||
# # PPI2 Affinity: outcome + site
|
||||
# #================================
|
||||
# ppi2T = paste0(common_bp_title, " PP-interface")
|
||||
# ppi2_affT = ggdraw() +
|
||||
# draw_label(
|
||||
# ppi2T,
|
||||
# fontfamily = title_theme$family,
|
||||
# fontface = title_theme$face,
|
||||
# #size = title_theme$size
|
||||
# size = 8
|
||||
# )
|
||||
#
|
||||
#
|
||||
# #-------------
|
||||
# # Outplot: PPI2
|
||||
# #-------------
|
||||
# ppiaffP = paste0(outdir_images
|
||||
# ,tolower(gene)
|
||||
# ,"_ppi2_oc.png")
|
||||
#
|
||||
# #svg(affP, width = 20, height = 5.5)
|
||||
# print(paste0("plot filename:", ppiaffP))
|
||||
# png(ppiaffP, units = "in", width = 6, height = 4, res = 300 )
|
||||
#
|
||||
#
|
||||
# cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome,
|
||||
# nrow = 2,
|
||||
# rel_heights = c(1,1)),
|
||||
# cowplot::plot_grid(ppi2P, posC_ppi2
|
||||
# , nrow = 1
|
||||
# , rel_widths = c(1.2,1.8)
|
||||
# , align = "h"
|
||||
# , label_size = my_label_size),
|
||||
# nrow = 2,
|
||||
# labels = c("B", ""),
|
||||
# label_size = 12,
|
||||
# rel_heights = c(1,8)
|
||||
# )
|
||||
#
|
||||
# dev.off()
|
||||
# #############################################################
|
||||
#peP # pe counts
|
||||
#================================
|
||||
# Lig Affinity: outcome + site
|
||||
# PE + All position count
|
||||
#================================
|
||||
# peT_allT = ggdraw() +
|
||||
# draw_label(
|
||||
# paste0("All mutation sites"),
|
||||
# fontfamily = title_theme$family,
|
||||
# fontface = title_theme$face,
|
||||
# #size = title_theme$size
|
||||
# size = 8
|
||||
# )
|
||||
# #------------------------
|
||||
# # Outplot: lig+ppi2+pe
|
||||
# #------------------------
|
||||
# pe_allCL = paste0(outdir_images
|
||||
# ,tolower(gene)
|
||||
# ,"_pe_oc.png")
|
||||
#
|
||||
# #svg(affP, width = 20, height = 5.5)
|
||||
# print(paste0("plot filename:", pe_allCL))
|
||||
# png(pe_allCL, units = "in", width = 6, height = 4, res = 300 )
|
||||
#
|
||||
#
|
||||
# cowplot::plot_grid(peT_allT,
|
||||
# cowplot::plot_grid(peP, posC_all
|
||||
# , nrow = 1
|
||||
# , rel_widths = c(1, 2)
|
||||
# , align = "h"),
|
||||
# nrow = 2,
|
||||
# labels = c("C", "", ""),
|
||||
# label_size = 12,
|
||||
# rel_heights = c(1,8))
|
||||
#
|
||||
# dev.off()
|
||||
#===========================================
|
||||
# COMBINE ALL three
|
||||
#==========================================
|
||||
ligT = paste0(common_bp_title, " ligand")
|
||||
lig_affT = ggdraw() +
|
||||
draw_label(
|
||||
|
@ -36,113 +150,6 @@ lig_affT = ggdraw() +
|
|||
size = 8
|
||||
)
|
||||
|
||||
#-------------
|
||||
# Outplot
|
||||
#-------------
|
||||
ligaffP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_lig_oc.png")
|
||||
|
||||
#svg(affP, width = 20, height = 5.5)
|
||||
print(paste0("plot filename:", ligaffP))
|
||||
png(ligaffP, units = "in", width = 6, height = 4, res = 300 )
|
||||
cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome,
|
||||
nrow = 2,
|
||||
rel_heights = c(1,1)
|
||||
),
|
||||
cowplot::plot_grid(mLigP, mmLigP, posC_lig
|
||||
, nrow = 1
|
||||
#, labels = c("A", "B", "C","D")
|
||||
, rel_widths = c(1,1,1.8)
|
||||
, align = "h"),
|
||||
nrow = 2,
|
||||
labels = c("A", ""),
|
||||
label_size = 12,
|
||||
rel_heights = c(1,8))
|
||||
dev.off()
|
||||
#############################################################
|
||||
#================================
|
||||
# PPI2 Affinity: outcome + site
|
||||
#================================
|
||||
ppi2T = paste0(common_bp_title, " PP-interface")
|
||||
ppi2_affT = ggdraw() +
|
||||
draw_label(
|
||||
ppi2T,
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
#size = title_theme$size
|
||||
size = 8
|
||||
)
|
||||
|
||||
|
||||
#-------------
|
||||
# Outplot: PPI2
|
||||
#-------------
|
||||
ppiaffP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_ppi2_oc.png")
|
||||
|
||||
#svg(affP, width = 20, height = 5.5)
|
||||
print(paste0("plot filename:", ppiaffP))
|
||||
png(ppiaffP, units = "in", width = 6, height = 4, res = 300 )
|
||||
|
||||
|
||||
cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome,
|
||||
nrow = 2,
|
||||
rel_heights = c(1,1)),
|
||||
cowplot::plot_grid(ppi2P, posC_ppi2
|
||||
, nrow = 1
|
||||
, rel_widths = c(1.2,1.8)
|
||||
, align = "h"
|
||||
, label_size = my_label_size),
|
||||
nrow = 2,
|
||||
labels = c("B", ""),
|
||||
label_size = 12,
|
||||
rel_heights = c(1,8)
|
||||
)
|
||||
|
||||
dev.off()
|
||||
#############################################################
|
||||
peP # pe counts
|
||||
#================================
|
||||
# PE + All position count
|
||||
#================================
|
||||
peT_allT = ggdraw() +
|
||||
draw_label(
|
||||
paste0("All mutation sites"),
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
#size = title_theme$size
|
||||
size = 8
|
||||
)
|
||||
|
||||
|
||||
#-------------
|
||||
# Outplot: PPI2
|
||||
#-------------
|
||||
pe_allCL = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_pe_oc.png")
|
||||
|
||||
#svg(affP, width = 20, height = 5.5)
|
||||
print(paste0("plot filename:", pe_allCL))
|
||||
png(pe_allCL, units = "in", width = 6, height = 4, res = 300 )
|
||||
|
||||
|
||||
cowplot::plot_grid(peT_allT,
|
||||
cowplot::plot_grid(peP, posC_all
|
||||
, nrow = 1
|
||||
, rel_widths = c(1, 2)
|
||||
, align = "h"),
|
||||
nrow = 2,
|
||||
labels = c("C", "", ""),
|
||||
label_size = 12,
|
||||
rel_heights = c(1,8))
|
||||
|
||||
dev.off()
|
||||
#===========================================
|
||||
# COMBINE ALL three
|
||||
#==========================================
|
||||
p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome, nrow=2),
|
||||
cowplot::plot_grid(mLigP, mmLigP, posC_lig
|
||||
, nrow = 1
|
||||
|
@ -152,8 +159,18 @@ p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome, nrow=
|
|||
rel_heights = c(1,8)
|
||||
|
||||
)
|
||||
|
||||
|
||||
#p1
|
||||
###########################################################
|
||||
ppi2T = paste0(common_bp_title, " PP-interface")
|
||||
ppi2_affT = ggdraw() +
|
||||
draw_label(
|
||||
ppi2T,
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
#size = title_theme$size
|
||||
size = 8
|
||||
)
|
||||
###########################################################
|
||||
p2 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nrow=2),
|
||||
cowplot::plot_grid(ppi2P, posC_ppi2
|
||||
, nrow = 1
|
||||
|
@ -162,7 +179,17 @@ p2 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nro
|
|||
nrow = 2,
|
||||
rel_heights = c(1,8)
|
||||
)
|
||||
|
||||
#p2
|
||||
###########################################################
|
||||
# PE + All position count
|
||||
peT_allT = ggdraw() +
|
||||
draw_label(
|
||||
paste0("All mutation sites"),
|
||||
fontfamily = title_theme$family,
|
||||
fontface = title_theme$face,
|
||||
#size = title_theme$size
|
||||
size = 8
|
||||
)
|
||||
|
||||
p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
|
||||
, rel_widths = c(1,3),axis = "lr"),
|
||||
|
@ -174,16 +201,14 @@ p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
|
|||
axis = "lr",
|
||||
rel_heights = c(1,8)
|
||||
),
|
||||
rel_heights = c(1,10),
|
||||
rel_heights = c(1,18),
|
||||
nrow = 2,axis = "lr")
|
||||
|
||||
|
||||
|
||||
p3
|
||||
#===============
|
||||
# Final combine
|
||||
#===============
|
||||
w = 11.75
|
||||
h = 3.7
|
||||
w = 11.79
|
||||
h = 3.5
|
||||
mut_impact_CLP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_mut_impactCLP.png")
|
||||
|
@ -229,9 +254,21 @@ conCLP = paste0(outdir_images
|
|||
,tolower(gene)
|
||||
,"_consurf_BP.png")
|
||||
|
||||
print(paste0("plot filename:", sens_conP))
|
||||
png(sens_conP, units = "in", width = w, height = h, res = 300 )
|
||||
|
||||
print(paste0("plot filename:", conCLP))
|
||||
png(conCLP, units = "in", width = w, height = h, res = 300 )
|
||||
consurfP
|
||||
|
||||
dev.off()
|
||||
#================================
|
||||
# Sensitivity numbers: geom_tile
|
||||
#================================
|
||||
sensCLP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_sensN_tile.png")
|
||||
|
||||
print(paste0("plot filename:", sensCLP))
|
||||
png(sensCLP, units = "in", width = 1, height = 1, res = 300 )
|
||||
sensP
|
||||
dev.off()
|
||||
|
||||
|
||||
|
|
|
@ -1,182 +0,0 @@
|
|||
colnames(str_df_short)
|
||||
table(str_df_short$effect_type)
|
||||
table(str_df_short$effect_sign)
|
||||
|
||||
str(str_df_short)
|
||||
|
||||
str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
|
||||
table(str_df_short$pe_outcome )
|
||||
table(str_df_short$effect_sign)
|
||||
|
||||
affcols = c("affinity_scaled", "mmcsm_lig_scaled")
|
||||
ppi2_cols = c("mcsm_ppi2_scaled")
|
||||
|
||||
#lig
|
||||
table(str_df_short$effect_type)
|
||||
|
||||
str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
|
||||
, "affinity"
|
||||
, str_df_short$effect_type)
|
||||
table(str_df_short$effect_grouped)
|
||||
|
||||
#ppi2
|
||||
str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
|
||||
, "ppi2"
|
||||
, str_df_short$effect_grouped)
|
||||
table(str_df_short$effect_grouped)
|
||||
|
||||
#stability
|
||||
str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("affinity", "ppi2")
|
||||
, "stability"
|
||||
, str_df_short$effect_grouped)
|
||||
|
||||
table(str_df_short$effect_grouped)
|
||||
|
||||
|
||||
# create a sign as well
|
||||
str_df_short$effect_outcome = paste0(str_df_short$pe_outcome
|
||||
, str_df_short$effect_grouped)
|
||||
|
||||
table(str_df_short$effect_outcome)
|
||||
|
||||
pe_colour_map2 = c( "DDaffinity" = "#ffd700" # gold
|
||||
, "SSaffinity" = "#f0e68c" # khaki
|
||||
, "DDppi2" = "#ff1493" # deeppink
|
||||
, "SSppi2" = "#da70d6" # orchid
|
||||
, "DDstability " = "#ae301e"
|
||||
, "SSstability" = "#007d85"
|
||||
)
|
||||
|
||||
|
||||
str_df_short$effect_colours = str_df_short$effect_outcome
|
||||
|
||||
str_df_short = dplyr::mutate(str_df_short
|
||||
, effect_colours = case_when(effect_colours == "DDaffinity" ~ "#ffd700"
|
||||
, effect_colours == "DDppi2" ~ '#ff1493'
|
||||
, effect_colours == "SSppi2" ~ '#da70d6'
|
||||
, effect_colours == "DDstability" ~ '#ae301e'
|
||||
, effect_colours =="SSstability" ~ '#007d85'
|
||||
, TRUE ~ 'ns'))
|
||||
|
||||
"#F8766D" #red
|
||||
"#00BFC4" #blue
|
||||
table(str_df_short$effect_colours)
|
||||
|
||||
|
||||
###########################################
|
||||
|
||||
ggplot(str_df_short
|
||||
, aes( x=effect_grouped
|
||||
, fill = effect_colours)) +
|
||||
geom_bar() +
|
||||
scale_fill_manual(values = str_df_short$effect_colours)
|
||||
|
||||
|
||||
|
||||
first_col = c(38, 0)
|
||||
second_col = c(9, 22)
|
||||
third_col = c(681, 108)
|
||||
thing_df = data.frame(first_row, second_row, third_row)
|
||||
rownames(thing_df) = c("Destabilising","Stabilising")
|
||||
thing_df
|
||||
|
||||
|
||||
###############################################
|
||||
rect_colour_map = c("EMB" = "green"
|
||||
,"DSL" = "slategrey"
|
||||
, "CDL" = "navyblue"
|
||||
, "Ca" = "purple")
|
||||
|
||||
|
||||
rects <- data.frame(x = 1:6,
|
||||
colors = c("#ffd700" #gold
|
||||
, "#f0e68c" #khaki
|
||||
, "#da70d6"# orchid
|
||||
, "#ff1493"# deeppink
|
||||
, "#00BFC4" #, "#007d85" #blue
|
||||
, "#F8766D" )# red,
|
||||
)
|
||||
rects
|
||||
|
||||
rects$text = c("-ve Lig affinty"
|
||||
, "+ve Lig affinity"
|
||||
, "+ve PPI2 affinity"
|
||||
, "-ve PPI2 affinity"
|
||||
, "+ve stability"
|
||||
, "-ve stability")
|
||||
|
||||
|
||||
rects$numbers = c(38, 0, 22, 9, 108, 681)
|
||||
rects$num_labels = paste0("n=", rects$numbers)
|
||||
|
||||
rects
|
||||
|
||||
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
|
||||
|
||||
#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
|
||||
png(paste0(outdir_images, "test.png")
|
||||
, width = 0.5
|
||||
, height = 2.5
|
||||
, units = "in", res = 300)
|
||||
|
||||
ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
|
||||
geom_tile(width = 1, height = 1) + # make square tiles
|
||||
geom_text(color = "black", size = 1.5) + # add white text in the middle
|
||||
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
|
||||
coord_fixed() + # make sure tiles are square
|
||||
coord_flip()+ scale_x_reverse() +
|
||||
# theme_void() # remove any axis markings
|
||||
theme_nothing() # remove any axis markings
|
||||
|
||||
|
||||
dev.off()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
##########################################################
|
||||
tile_map=data.frame(tile=c("EMB","DSL","CDL","Ca")
|
||||
,tile_colour =c("green","darkslategrey","navyblue","purple"))
|
||||
|
||||
|
||||
# great
|
||||
tile_colour_map = c("EMB" = "green"
|
||||
,"DSL" = "darkslategrey"
|
||||
, "CDL" = "navyblue"
|
||||
, "Ca" = "purple")
|
||||
|
||||
tile_legend=get_legend(
|
||||
|
||||
ggplot(tile_map, aes(factor(tile),y=0
|
||||
, colour=tile_colour
|
||||
, fill=tile_colour))+
|
||||
geom_tile() +
|
||||
theme(legend.direction="horizontal") +
|
||||
scale_colour_manual(name=NULL
|
||||
#, values = tile_map$tile_colour
|
||||
, values=tile_colour_map) +
|
||||
scale_fill_manual(name=NULL
|
||||
#,values=tile_map$tile_colour
|
||||
, values = tile_colour_map)
|
||||
)
|
||||
#############################################################
|
||||
|
||||
|
||||
###############################################
|
||||
library(ggplot2)
|
||||
library(viridis)
|
||||
library(hrbrthemes)
|
||||
|
||||
ggplot(str_df_short, aes(fill=effect_colours,x=effect_type)) +
|
||||
geom_bar() +
|
||||
|
||||
scale_fill_viridis(discrete = T) +
|
||||
ggtitle("Studying 4 species..")
|
||||
####################################################
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,366 +0,0 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#source("~/git/LSHTM_analysis/config/alr.R")
|
||||
source("~/git/LSHTM_analysis/config/embb.R")
|
||||
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||
#source("~/git/LSHTM_analysis/config/gid.R")
|
||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
|
||||
# get plottting dfs
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
|
||||
####################################################
|
||||
|
||||
# ggpairs wrapper
|
||||
|
||||
my_gg_pairs=function(plot_df){
|
||||
ggpairs(plot_df, columns = 1:(ncol(plot_df)-1),
|
||||
upper = list(continuous = wrap('cor',
|
||||
method = "spearman",
|
||||
title="ρ",
|
||||
digits=2,
|
||||
title_args=c(colour="black")
|
||||
)
|
||||
),
|
||||
lower = list(
|
||||
continuous = wrap("points", alpha = 0.7, size=0.5),
|
||||
combo = wrap("dot", alpha = 0.7, size=0.5)
|
||||
),
|
||||
aes(colour = factor(ifelse(plot_df$dst_mode==0, "S", "R")), alpha = 0.5),
|
||||
title="Stability") +
|
||||
|
||||
scale_colour_manual(values = c("red", "blue")) +
|
||||
scale_fill_manual(values = c("red", "blue")) +
|
||||
theme(
|
||||
text = element_text(size=12, face="bold")
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
|
||||
|
||||
#=======
|
||||
# Input
|
||||
#=======
|
||||
merged_df3 = as.data.frame(merged_df3)
|
||||
corr_plotdf = corr_data_extract(merged_df3
|
||||
, gene = gene
|
||||
, drug = drug
|
||||
, extract_scaled_cols = F)
|
||||
colnames(corr_plotdf)
|
||||
|
||||
if (all(colnames(corr_df_m3_f) == colnames(corr_plotdf))){
|
||||
cat("PASS: corr plot colnames match for dashboard")
|
||||
}else{
|
||||
stop("Abort: corr plot colnames DO NOT match for dashboard")
|
||||
}
|
||||
|
||||
#corr_plotdf = corr_df_m3_f #for downstream code
|
||||
|
||||
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
|
||||
aff_dist_cols
|
||||
|
||||
|
||||
static_cols = c("Log10(MAF)"
|
||||
, "Log10(OR)"
|
||||
#, "-Log10(P)"
|
||||
)
|
||||
|
||||
#================
|
||||
# stability
|
||||
#================
|
||||
#affinity_dist_colnames# lIg DIst and ppi Di
|
||||
corr_ps_colnames = c(static_cols
|
||||
, "DUET"
|
||||
, "FoldX"
|
||||
, "DeepDDG"
|
||||
, "Dynamut2"
|
||||
, aff_dist_cols
|
||||
, "dst_mode")
|
||||
|
||||
if (all(corr_ps_colnames%in%colnames(corr_plotdf))){
|
||||
cat("PASS: all colnames exist for correlation")
|
||||
}else{
|
||||
stop("Abort: all colnames DO NOT exist for correlation")
|
||||
}
|
||||
corr_df_ps = corr_plotdf[, corr_ps_colnames]
|
||||
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
|
||||
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_ps)
|
||||
|
||||
color_coln = which(colnames(corr_df_ps) == "dst_mode")
|
||||
#end = which(colnames(corr_df_ps) == drug)
|
||||
#ncol_omit = 2
|
||||
#corr_end = end-ncol_omit
|
||||
corr_end = color_coln-1
|
||||
|
||||
#------------------------
|
||||
# Output: stability corrP
|
||||
#------------------------
|
||||
corr_psP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_corr_stability.svg" )
|
||||
|
||||
cat("Corr plot stability with coloured dots:", corr_psP)
|
||||
svg(corr_psP, width = 15, height = 15)
|
||||
|
||||
my_corr_pairs(corr_data_all = corr_df_ps
|
||||
, corr_cols = colnames(corr_df_ps[1:corr_end])
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
, hist_col = "coral4"
|
||||
, dot_size = 1.6
|
||||
, ats = 1.5
|
||||
, corr_lab_size =2.5
|
||||
, corr_value_size = 1)
|
||||
|
||||
dev.off()
|
||||
#===============
|
||||
# CONSERVATION
|
||||
#==============
|
||||
corr_conservation_cols = c( static_cols
|
||||
, "ConSurf"
|
||||
, "SNAP2"
|
||||
, "PROVEAN"
|
||||
, aff_dist_cols
|
||||
, "dst_mode"
|
||||
, drug)
|
||||
|
||||
if (all(corr_conservation_cols%in%colnames(corr_plotdf))){
|
||||
cat("PASS: all colnames exist for ConSurf-correlation")
|
||||
}else{
|
||||
stop("Abort: all colnames DO NOT exist for ConSurf-correlation")
|
||||
}
|
||||
|
||||
corr_df_cons = corr_plotdf[, corr_conservation_cols]
|
||||
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
|
||||
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_cons)
|
||||
|
||||
color_coln = which(colnames(corr_df_cons) == "dst_mode")
|
||||
# end = which(colnames(corr_df_cons) == drug)
|
||||
# ncol_omit = 2
|
||||
# corr_end = end-ncol_omit
|
||||
corr_end = color_coln-1
|
||||
|
||||
|
||||
#---------------------------
|
||||
# Output: Conservation corrP
|
||||
#----------------------------
|
||||
corr_consP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_corr_conservation.svg" )
|
||||
|
||||
cat("Corr plot conservation coloured dots:", corr_consP)
|
||||
svg(corr_consP, width = 10, height = 10)
|
||||
|
||||
my_corr_pairs(corr_data_all = corr_df_cons
|
||||
, corr_cols = colnames(corr_df_cons[1:corr_end])
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
, hist_col = "coral4"
|
||||
, dot_size =1.1
|
||||
, ats = 1.5
|
||||
, corr_lab_size = 1.8
|
||||
, corr_value_size = 1)
|
||||
|
||||
dev.off()
|
||||
|
||||
#####################################################
|
||||
#DistCutOff = 10
|
||||
#LigDist_colname # = "ligand_distance" # from globals
|
||||
#ppi2Dist_colname = "interface_dist"
|
||||
#naDist_colname = "TBC"
|
||||
#####################################################
|
||||
|
||||
#================
|
||||
# ligand affinity
|
||||
#================
|
||||
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
|
||||
|
||||
corr_lig_colnames = c(static_cols
|
||||
, "mCSM-lig"
|
||||
, "mmCSM-lig"
|
||||
, "dst_mode")
|
||||
#, drug)
|
||||
|
||||
if (all(corr_lig_colnames%in%colnames(corr_plotdf))){
|
||||
cat("PASS: all colnames exist for Lig-correlation")
|
||||
}else{
|
||||
stop("Abort: all colnames DO NOT exist for Lig-correlation")
|
||||
}
|
||||
|
||||
corr_df_lig = corr_plotdf[, corr_lig_colnames]
|
||||
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
|
||||
cat("\nComplete muts for lig affinity for", gene, ":", complete_obs_lig)
|
||||
|
||||
color_coln = which(colnames(corr_df_lig) == "dst_mode")
|
||||
# end = which(colnames(corr_df_lig) == drug)
|
||||
# ncol_omit = 2
|
||||
# corr_end = end-ncol_omit
|
||||
corr_end = color_coln-1
|
||||
|
||||
#------------------------
|
||||
# Output: ligand corrP
|
||||
#------------------------
|
||||
corr_ligP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_corr_lig.svg" )
|
||||
|
||||
cat("Corr plot affinity with coloured dots:", corr_ligP)
|
||||
svg(corr_ligP, width = 10, height = 10)
|
||||
|
||||
my_corr_pairs(corr_data_all = corr_df_lig
|
||||
, corr_cols = colnames(corr_df_lig[1:corr_end])
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_lig[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
, hist_col = "coral4"
|
||||
, dot_size = 2
|
||||
, ats = 1.5
|
||||
, corr_lab_size =3
|
||||
, corr_value_size = 1)
|
||||
dev.off()
|
||||
####################################################
|
||||
#================
|
||||
# ppi2 affinity
|
||||
#================
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
|
||||
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
|
||||
|
||||
corr_ppi2_colnames = c(static_cols
|
||||
, "mCSM-PPI2"
|
||||
, "dst_mode"
|
||||
, drug)
|
||||
|
||||
if (all(corr_ppi2_colnames%in%colnames(corr_plotdf))){
|
||||
cat("PASS: all colnames exist for mcsm-ppi2 correlation")
|
||||
}else{
|
||||
stop("Abort: all colnames DO NOT exist for mcsm-ppi2 correlation")
|
||||
}
|
||||
|
||||
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
|
||||
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
|
||||
cat("\nComplete muts for ppi2 affinity for", gene, ":", complete_obs_ppi2)
|
||||
|
||||
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
|
||||
# end = which(colnames(corr_df_ppi2) == drug)
|
||||
# ncol_omit = 2
|
||||
# corr_end = end-ncol_omit
|
||||
corr_end = color_coln-1
|
||||
|
||||
#------------------------
|
||||
# Output: ppi2 corrP
|
||||
#------------------------
|
||||
corr_ppi2P = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_corr_ppi2.svg" )
|
||||
|
||||
cat("Corr plot ppi2 with coloured dots:", corr_ppi2P)
|
||||
svg(corr_ppi2P, width = 10, height = 10)
|
||||
|
||||
my_corr_pairs(corr_data_all = corr_df_ppi2
|
||||
, corr_cols = colnames(corr_df_ppi2[1:corr_end])
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
, hist_col = "coral4"
|
||||
, dot_size = 2
|
||||
, ats = 1.5
|
||||
, corr_lab_size = 3
|
||||
, corr_value_size = 1)
|
||||
|
||||
dev.off()
|
||||
}
|
||||
|
||||
# FIXME: ADD distance
|
||||
#==================
|
||||
# mCSSM-NA affinity
|
||||
#==================
|
||||
#================
|
||||
# NA affinity
|
||||
#================
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
|
||||
|
||||
corr_na_colnames = c(static_cols
|
||||
, "mCSM-NA"
|
||||
, "dst_mode"
|
||||
, drug)
|
||||
|
||||
if (all(corr_na_colnames%in%colnames(corr_plotdf))){
|
||||
cat("PASS: all colnames exist for mcsm-NA-correlation")
|
||||
}else{
|
||||
stop("Abort: all colnames DO NOT exist for mcsm-NA-correlation")
|
||||
}
|
||||
|
||||
corr_na_colnames%in%colnames(corr_plotdf)
|
||||
corr_df_na = corr_plotdf[, corr_na_colnames]
|
||||
complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
|
||||
cat("\nComplete muts for NA affinity for", gene, ":", complete_obs_na)
|
||||
|
||||
color_coln = which(colnames(corr_df_na) == "dst_mode")
|
||||
# end = which(colnames(corr_df_na) == drug)
|
||||
# ncol_omit = 2
|
||||
# corr_end = end-ncol_omit
|
||||
corr_end = color_coln-1
|
||||
|
||||
#------------------------
|
||||
# Output: mCSM-NA corrP
|
||||
#------------------------
|
||||
corr_naP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_corr_na.svg" )
|
||||
|
||||
cat("Corr plot mCSM-NA with coloured dots:", corr_naP)
|
||||
|
||||
svg(corr_naP, width = 10, height = 10)
|
||||
my_corr_pairs(corr_data_all = corr_df_na
|
||||
, corr_cols = colnames(corr_df_na[1:corr_end])
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_na[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
, hist_col = "coral4"
|
||||
, dot_size = 2
|
||||
, ats = 1.5
|
||||
, corr_lab_size = 3
|
||||
, corr_value_size = 1)
|
||||
|
||||
dev.off()
|
||||
}
|
||||
####################################################
|
||||
#===============
|
||||
#ggpairs:
|
||||
#================
|
||||
#corr_df_ps$dst_mode = ifelse(corr_df_cons$dst_mode=="1", "R", "S")
|
||||
corr_plotting_df = corr_df_ps
|
||||
|
||||
|
||||
svg('~/tmp/foo.svg',
|
||||
width=10,
|
||||
height=10,
|
||||
units="in",
|
||||
res=300)
|
||||
my_gg_pairs(corr_plotting_df)
|
||||
dev.off()
|
||||
|
||||
png('~/tmp/foo.png',
|
||||
width=10,
|
||||
height=10,
|
||||
units="in",
|
||||
res=300)
|
||||
my_gg_pairs(corr_plotting_df)
|
||||
dev.off()
|
||||
|
||||
|
||||
#
|
|
@ -21,12 +21,19 @@ png('~/tmp/foo.png',
|
|||
units="in",
|
||||
res=300)
|
||||
#
|
||||
corr_plotting_df = corr_df_ps
|
||||
#corr_plotting_df = corr_df_ps
|
||||
colnames(corr_plotdf)
|
||||
corr_plotting_df = subset(corr_plotdf, select = -c(ethambutol,`Log10(OR)`,`-Log10(P)`, ASA, RSA, KD, RD
|
||||
, FoldX
|
||||
, DeepDDG
|
||||
, Dynamut2 ))
|
||||
colnames(corr_plotting_df)
|
||||
#ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
|
||||
ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)),
|
||||
|
||||
|
||||
ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
|
||||
upper = list(continuous = wrap('cor',
|
||||
method = "spearman",
|
||||
use = "pairwise.complete.obs",
|
||||
title="ρ",
|
||||
digits=2,
|
||||
title_args=c(colour="black")
|
||||
|
@ -36,7 +43,7 @@ ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
|
|||
continuous = wrap("points", alpha = 0.7, size=0.5),
|
||||
combo = wrap("dot", alpha = 0.7, size=0.5)
|
||||
),
|
||||
aes(colour = factor(ifelse(corr_plotting_df$dst_mode==0, "S", "R")), alpha = 0.5),
|
||||
aes(colour = factor(ifelse(dst_mode==0, "S", "R")), alpha = 0.5),
|
||||
title="Stability") +
|
||||
|
||||
scale_colour_manual(values = c("red", "blue")) +
|
||||
|
|
|
@ -1,51 +1,88 @@
|
|||
source("~/git/LSHTM_analysis/config/embb.R")
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
|
||||
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
|
||||
my_gg_pairs=function(plot_df){
|
||||
my_gg_pairs=function(plot_df, plot_title
|
||||
, tt_args_size = 2.5
|
||||
, gp_args_size = 2.5){
|
||||
ggpairs(plot_df,
|
||||
columns = 1:(ncol(plot_df)-1),
|
||||
upper = list(
|
||||
continuous = wrap('cor',
|
||||
continuous = wrap('cor', # ggally_cor()
|
||||
method = "spearman",
|
||||
use = "pairwise.complete.obs",
|
||||
title="ρ",
|
||||
digits=2,
|
||||
justify_labels = "left",
|
||||
title_args=c(colour="black")
|
||||
justify_labels = "centre",
|
||||
#title_args=c(colour="black"),
|
||||
title_args=c(size=tt_args_size),#2.5
|
||||
group_args=c(size=gp_args_size)#2.5
|
||||
)
|
||||
),
|
||||
lower = list(
|
||||
continuous = wrap("points",
|
||||
alpha = 0.7,
|
||||
size=0.5),
|
||||
size=0.125),
|
||||
combo = wrap("dot",
|
||||
alpha = 0.7,
|
||||
size=0.5)
|
||||
size=0.125)
|
||||
),
|
||||
aes(colour = factor(ifelse(plot_df$dst_mode==0,
|
||||
aes(colour = factor(ifelse(dst_mode==0,
|
||||
"S",
|
||||
"R") ),
|
||||
alpha = 0.5),
|
||||
title="Stability") +
|
||||
title=plot_title) +
|
||||
|
||||
scale_colour_manual(values = c("red", "blue")) +
|
||||
scale_fill_manual(values = c("red", "blue")) +
|
||||
theme(text = element_text(size=12,
|
||||
face="bold") )
|
||||
scale_fill_manual(values = c("red", "blue")) #+
|
||||
# theme(text = element_text(size=7,
|
||||
# face="bold"))
|
||||
}
|
||||
|
||||
DistCutOff = 10
|
||||
###########################################################################
|
||||
geneL_normal = c("pnca")
|
||||
geneL_na = c("gid", "rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
merged_df3 = as.data.frame(merged_df3)
|
||||
|
||||
corr_plotdf = corr_data_extract(merged_df3
|
||||
, gene = gene
|
||||
, drug = drug
|
||||
, extract_scaled_cols = F)
|
||||
|
||||
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
|
||||
static_cols = c("Log10(MAF)"
|
||||
, "Log10(OR)")
|
||||
static_cols = c("Log10(MAF)")
|
||||
#, "Log10(OR)")
|
||||
############################################################
|
||||
#=============================================
|
||||
# Creating masked df for affinity data
|
||||
#=============================================
|
||||
corr_affinity_df = corr_plotdf
|
||||
#----------------------
|
||||
# Mask affinity columns
|
||||
#-----------------------
|
||||
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mCSM-lig"]=0
|
||||
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0
|
||||
}
|
||||
|
||||
# if (tolower(gene)%in%geneL_na){
|
||||
# corr_affinity_df[corr_affinity_df["NA-Dist"]>DistCutOff,"mCSM-NA"]=0
|
||||
# }
|
||||
|
||||
# count 0
|
||||
#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100
|
||||
unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0)
|
||||
unmasked_vals
|
||||
|
||||
##########################################################
|
||||
#================
|
||||
# Stability
|
||||
#================
|
||||
corr_ps_colnames = c(static_cols
|
||||
, "DUET"
|
||||
, "FoldX"
|
||||
|
@ -54,14 +91,13 @@ corr_ps_colnames = c(static_cols
|
|||
, aff_dist_cols
|
||||
, "dst_mode")
|
||||
corr_df_ps = corr_plotdf[, corr_ps_colnames]
|
||||
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
|
||||
color_coln = which(colnames(corr_df_ps) == "dst_mode")
|
||||
corr_end = color_coln-1
|
||||
|
||||
# Plot #1
|
||||
plot_corr_df_ps = my_gg_pairs(corr_df_ps)
|
||||
|
||||
|
||||
plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability features")
|
||||
##########################################################
|
||||
#================
|
||||
# Conservation
|
||||
#================
|
||||
corr_conservation_cols = c( static_cols
|
||||
, "ConSurf"
|
||||
, "SNAP2"
|
||||
|
@ -71,74 +107,66 @@ corr_conservation_cols = c( static_cols
|
|||
)
|
||||
|
||||
corr_df_cons = corr_plotdf[, corr_conservation_cols]
|
||||
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
|
||||
color_coln = which(colnames(corr_df_cons) == "dst_mode")
|
||||
corr_end = color_coln-1
|
||||
|
||||
# Plot #2
|
||||
plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation features")
|
||||
|
||||
#my_gg_pairs(corr_df_cons)
|
||||
plot_corr_df_cons = my_gg_pairs(corr_df_cons)
|
||||
##########################################################
|
||||
#================
|
||||
# Affinity: lig, ppi and na as applicable
|
||||
#================
|
||||
#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
|
||||
common_aff_colnames = c("mCSM-lig"
|
||||
, "mmCSM-lig")
|
||||
|
||||
if (tolower(gene)%in%geneL_normal){
|
||||
aff_colnames = common_aff_colnames
|
||||
}
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
aff_colnames = c(common_aff_colnames, "mCSM-PPI2")
|
||||
}
|
||||
|
||||
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
|
||||
corr_lig_colnames = c(static_cols
|
||||
, "mCSM-lig"
|
||||
, "mmCSM-lig"
|
||||
, "dst_mode")
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
aff_colnames = c(common_aff_colnames, "mCSM-NA")
|
||||
}
|
||||
|
||||
corr_df_lig = corr_plotdf[, corr_lig_colnames]
|
||||
# building ffinal affinity colnames for correlation
|
||||
corr_aff_colnames = c(static_cols
|
||||
, aff_colnames
|
||||
, "dst_mode") # imp
|
||||
|
||||
corr_df_aff = corr_affinity_df[, corr_aff_colnames]
|
||||
colnames(corr_df_aff)
|
||||
|
||||
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
|
||||
color_coln = which(colnames(corr_df_lig) == "dst_mode")
|
||||
corr_end = color_coln-1
|
||||
# Plot #3
|
||||
plot_corr_df_aff = my_gg_pairs(corr_df_aff, plot_title="Affinity features", tt_args_size = 4, gp_args_size =4)
|
||||
|
||||
#my_gg_pairs(corr_df_lig)
|
||||
plot_corr_df_lig = my_gg_pairs(corr_df_lig)
|
||||
#=============
|
||||
# combine
|
||||
#=============
|
||||
|
||||
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
|
||||
corr_ppi2_colnames = c(static_cols
|
||||
, "mCSM-PPI2"
|
||||
, "dst_mode"
|
||||
)
|
||||
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
|
||||
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
|
||||
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
|
||||
corr_end = color_coln-1
|
||||
#png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
|
||||
png(paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_CorrAB.png"), height = 6, width=11.75, unit="in",res=300)
|
||||
|
||||
# NOTE: DELETE LOG OR FROM CORRELATION PLOTS!!!!!
|
||||
# NOTE: ALSO MAYBE DELETE DISTANCES AS WELL
|
||||
# NOTE: http://ggobi.github.io/ggally/reference/ggally_cor.html
|
||||
|
||||
# "***" if the p-value is < 0.001
|
||||
# "**" if the p-value is < 0.01
|
||||
# "*" if the p-value is < 0.05
|
||||
# "." if the p-value is < 0.10
|
||||
# "" otherwise
|
||||
#
|
||||
|
||||
# Plot #4
|
||||
#my_gg_pairs(corr_df_ppi2)
|
||||
plot_corr_df_ppi2 = my_gg_pairs(corr_df_ppi2)
|
||||
|
||||
|
||||
# corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
|
||||
# corr_na_colnames = c(static_cols
|
||||
# , "mCSM-NA"
|
||||
# , "dst_mode"
|
||||
# )
|
||||
#
|
||||
# corr_df_na = corr_plotdf[, corr_na_colnames]
|
||||
# complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
|
||||
# color_coln = which(colnames(corr_df_na) == "dst_mode")
|
||||
# corr_end = color_coln-1
|
||||
#
|
||||
# # Plot #5
|
||||
# #my_gg_pairs(corr_df_na)
|
||||
# plot_corr_df_na = my_gg_pairs(corr_df_na)
|
||||
png("/tmp/gg_pairs_all.png", height = 8, width=11.75, unit="in",res=300)
|
||||
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),ggmatrix_gtable(plot_corr_df_cons),
|
||||
ggmatrix_gtable(plot_corr_df_lig),ggmatrix_gtable(plot_corr_df_ppi2),
|
||||
nrow=2, ncol=2, rel_heights = 7,7,3,3)
|
||||
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
|
||||
ggmatrix_gtable(plot_corr_df_cons),
|
||||
# ggmatrix_gtable(plot_corr_df_aff),
|
||||
# nrow=1, ncol=3, rel_heights = 7,7,3
|
||||
nrow=1,
|
||||
#rel_heights = 1,1
|
||||
labels = "AUTO",
|
||||
label_size = 12)
|
||||
dev.off()
|
||||
|
||||
# affinity corr
|
||||
#png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
|
||||
png(paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_CorrC.png"), height =7, width=7, unit="in",res=300)
|
||||
|
||||
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
|
||||
labels = "C",
|
||||
label_size = 12)
|
||||
dev.off()
|
||||
|
|
|
@ -154,6 +154,11 @@ for (i in unique(str_df$position) ){
|
|||
str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
|
||||
|
||||
colnames(str_df)
|
||||
|
||||
# check
|
||||
str_df_check = str_df[str_df$position%in%c(24, 32,160, 303, 334),]
|
||||
table(str_df$effect_type)
|
||||
|
||||
#================
|
||||
# for Plots
|
||||
#================
|
||||
|
@ -161,9 +166,56 @@ str_df_short = str_df[, c("mutationinformation","position","sensitivity"
|
|||
, "effect_type"
|
||||
, "effect_sign")]
|
||||
|
||||
# check
|
||||
str_df_check = str_df[str_df$position%in%c(24, 32,160, 303, 334),]
|
||||
table(str_df$effect_type)
|
||||
table(str_df_short$effect_type)
|
||||
table(str_df_short$effect_sign)
|
||||
str(str_df_short)
|
||||
|
||||
# assign pe outcome
|
||||
str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
|
||||
table(str_df_short$pe_outcome )
|
||||
table(str_df_short$effect_sign)
|
||||
|
||||
#==============
|
||||
# group effect type:
|
||||
# lig, ppi2, nuc. acid, stability
|
||||
#==============
|
||||
|
||||
affcols = c("affinity_scaled", "mmcsm_lig_scaled")
|
||||
ppi2_cols = c("mcsm_ppi2_scaled")
|
||||
#nuc_na_cols = c("mcsm_a_scaled")
|
||||
|
||||
|
||||
#lig
|
||||
table(str_df_short$effect_type)
|
||||
str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
|
||||
, "lig"
|
||||
, str_df_short$effect_type)
|
||||
table(str_df_short$effect_grouped)
|
||||
|
||||
#ppi2
|
||||
str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
|
||||
, "ppi2"
|
||||
, str_df_short$effect_grouped)
|
||||
table(str_df_short$effect_grouped)
|
||||
|
||||
#stability
|
||||
str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig", "ppi2")
|
||||
, "stability"
|
||||
, str_df_short$effect_grouped)
|
||||
|
||||
table(str_df_short$effect_grouped)
|
||||
|
||||
# create a sign as well
|
||||
str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_"
|
||||
, str_df_short$effect_grouped)
|
||||
|
||||
table(str_df_short$pe_effect_outcome)
|
||||
|
||||
|
||||
|
||||
#####################################################################
|
||||
# Chimera: for colouring
|
||||
####################################################################
|
||||
|
||||
#-------------------------------------
|
||||
# get df with unique position
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue