generated ggpairs plots finally

This commit is contained in:
Tanushree Tunstall 2022-08-15 19:05:22 +01:00
parent b68841b337
commit a3e5283a9b
11 changed files with 657 additions and 939 deletions

View file

@ -143,6 +143,9 @@ site_snp_count_bp <- function (plotdf
#, legend.position = c(0.73,0.8) #, legend.position = c(0.73,0.8)
#, legend.text = element_text(size = leg_text_size) #, legend.text = element_text(size = leg_text_size)
#, legend.title = element_text(size = axis_label_size) #, legend.title = element_text(size = axis_label_size)
#, panel.grid.major = element_blank(),
#, panel.grid.minor = element_blank(),
, panel.grid = element_blank()
, plot.title = element_text(size = leg_text_size , plot.title = element_text(size = leg_text_size
, colour = title_colour , colour = title_colour
, hjust = 0.5) , hjust = 0.5)

View file

@ -56,6 +56,9 @@ stability_count_bp <- function(plotdf
, legend.position = leg_position , legend.position = leg_position
, legend.text = element_text(size = lts) , legend.text = element_text(size = lts)
, legend.title = element_text(size = ltis) , legend.title = element_text(size = ltis)
#, panel.grid.major = element_blank(),
#, panel.grid.minor = element_blank(),
, panel.grid = element_blank()
, legend.key.size = unit(lts,"pt") , legend.key.size = unit(lts,"pt")
, plot.title = element_text(size = als , plot.title = element_text(size = als
, colour = title_colour , colour = title_colour

View file

@ -4,22 +4,41 @@ library("ggforce")
#install.packages("gginference") #install.packages("gginference")
library(gginference) library(gginference)
library(ggpubr) library(ggpubr)
library(svglite)
################################################## ##################################################
#%% read data #%% read data
# DOME: read data using gene and drug combination # DOME: read data using gene and drug combination
# gene must be lowercase # gene must be lowercase
# tolower(gene) # tolower(gene)
################################################# ############################################################
#gene="pncA" #gene="pncA"
#drug="pyrazinamide" #drug="pyrazinamide"
#lineage_filename=paste0(tolower(gene),"_merged_df2.csv") #lineage_filename=paste0(tolower(gene),"_merged_df2.csv")
#lineage_data_path="~/git/Data/pyrazinamide/output" #lineage_data_path="~/git/Data/pyrazinamide/output"
df2 = read.csv(paste0(lineage_data_path,"/",lineage_filename)) #=============
# Data: Input
#==============
#source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/embb.R")
# source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
foo = as.data.frame(colnames(df2)) source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
cat("plots will output to:", outdir_images)
###########################################################
class(merged_df2)
foo = as.data.frame(colnames(merged_df2))
cols_to_subset = c('mutationinformation' cols_to_subset = c('mutationinformation'
, 'snp_frequency' , 'snp_frequency'
@ -36,7 +55,7 @@ cols_to_subset = c('mutationinformation'
#cols_to_subset%in%foo #cols_to_subset%in%foo
my_df = df2[ ,cols_to_subset] my_df = merged_df2[ ,cols_to_subset]
# r24p_embb = df_embb[df_embb$mutationinformation == "R24P",] # r24p_embb = df_embb[df_embb$mutationinformation == "R24P",]
# #tm = c("A102P", "M1T") # #tm = c("A102P", "M1T")
@ -73,10 +92,9 @@ table(my_df2$lineage)
sel_lineages2 = c("L1", "L2", "L3", "L4") sel_lineages2 = c("L1", "L2", "L3", "L4")
my_df2 = my_df2[my_df2$lineage%in%sel_lineages2,] my_df2 = my_df2[my_df2$lineage%in%sel_lineages2,]
table(my_df2$lineage)
sum(table(my_df2$lineage)) == nrow(my_df2) sum(table(my_df2$lineage)) == nrow(my_df2)
table(my_df2$lineage) table(my_df2$lineage)
table(my_df2$lineage, my_df2$sensitivity)
# %% # %%
# str(my_df2) # str(my_df2)
@ -85,6 +103,7 @@ table(my_df2$lineage)
#%% get only muts which belong to > 1 lineage and have different sensitivity classifications #%% get only muts which belong to > 1 lineage and have different sensitivity classifications
muts = unique(my_df2$mutationinformation) muts = unique(my_df2$mutationinformation)
cat ("Total unique muts in L1-L4", tolower(gene), ":", length(muts))
#----------------------------------------------- #-----------------------------------------------
# step 0 : get muts with more than one lineage # step 0 : get muts with more than one lineage
#----------------------------------------------- #-----------------------------------------------
@ -100,7 +119,6 @@ for (i in muts) {
} }
cat("\nGot:", length(lin_muts), "mutations belonging to >1 lineage with differing drug sensitivities") cat("\nGot:", length(lin_muts), "mutations belonging to >1 lineage with differing drug sensitivities")
#----------------------------------------------- #-----------------------------------------------
# step 1 : get other muts that do not have this # step 1 : get other muts that do not have this
#----------------------------------------------- #-----------------------------------------------
@ -111,7 +129,6 @@ cat("\nGot:", length(consist_muts), "mutations that are consistent")
# step 2: subset these muts for plotting # step 2: subset these muts for plotting
#----------------------------------------------- #-----------------------------------------------
plot_df = my_df2[my_df2$mutationinformation%in%lin_muts,] plot_df = my_df2[my_df2$mutationinformation%in%lin_muts,]
cat("\nnrow of plot_df:", nrow(plot_df)) cat("\nnrow of plot_df:", nrow(plot_df))
#----------------------------------------------- #-----------------------------------------------
@ -125,7 +142,9 @@ for (i in lin_muts) {
s_tab = table(s_mut$lineage, s_mut$sens2) s_tab = table(s_mut$lineage, s_mut$sens2)
#print(s_tab) #print(s_tab)
#ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3) #ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3)
ft_pvalue_i = fisher.test(s_tab)$p.value ft_pvalue_i = fisher.test(s_tab
#, workspace=2e9
, simulate.p.value=TRUE,B=1e7)$p.value
#print(ft_pvalue_i) #print(ft_pvalue_i)
plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i
#print(s_tab) #print(s_tab)
@ -155,8 +174,6 @@ plot_df
head(plot_df) head(plot_df)
table(plot_df$pvalR<0.05) table(plot_df$pvalR<0.05)
# format p value # format p value
# TODO: add case statement for correct pvalue formatting # TODO: add case statement for correct pvalue formatting
#plot_df$pvalF = ifelse(plot_df$pval <= 0.0001, paste0(round(plot_df$pval, 3), "**** "), plot_df$pval ) #plot_df$pvalF = ifelse(plot_df$pval <= 0.0001, paste0(round(plot_df$pval, 3), "**** "), plot_df$pval )
@ -233,6 +250,7 @@ cat("\nGot:", sig_muts, "mutations that are significant")
plot_df_ns = plot_df2[plot_df2$pvalR>0.05,] plot_df_ns = plot_df2[plot_df2$pvalR>0.05,]
ns_muts = length(unique(plot_df_ns$mutationinformation)) ns_muts = length(unique(plot_df_ns$mutationinformation))
cat("\nGot:", ns_muts, "mutations that are NOT significant") cat("\nGot:", ns_muts, "mutations that are NOT significant")
p_title = gene p_title = gene
ts = 8 ts = 8
gls = 3 gls = 3
@ -244,7 +262,7 @@ gls = 3
#3) Add *: Extend yaxis for each plot to allow geom_label to have space (or see #3) Add *: Extend yaxis for each plot to allow geom_label to have space (or see
# if this self resolving with facet_wrap_paginate()) # if this self resolving with facet_wrap_paginate())
#================================================ #================================================
#svg(paste0(outdir_images, "embb_linDS.svg"), width = 6, height = 10 ) # old-school square 4:3 CRT shape 1.3:1 #svg(paste0(outdir_images, tolower(gene), "_linDS.svg"), width = 6, height = 10 ) # old-school square 4:3 CRT shape 1.3:1
ds_s = ggplot(plot_df_sig, aes(x = lineage ds_s = ggplot(plot_df_sig, aes(x = lineage
, fill = sens2)) + , fill = sens2)) +
geom_bar(stat = 'count') + geom_bar(stat = 'count') +
@ -280,7 +298,7 @@ ds_s = ggplot(plot_df_sig, aes(x = lineage
################################### ###################################
#ns muts #ns muts
#svg(paste0(outdir_images, "embb_linDS_ns.svg"), width =10 , height = 8) # old-school square 4:3 CRT shape 1.3:1 #svg(paste0(outdir_images, tolower(gene), "_linDS_ns.svg"), width =10 , height = 8) # old-school square 4:3 CRT shape 1.3:1
ds_ns = ggplot(plot_df_ns, aes(x = lineage ds_ns = ggplot(plot_df_ns, aes(x = lineage
, fill = sens2)) + , fill = sens2)) +
geom_bar(stat = 'count') + geom_bar(stat = 'count') +
@ -309,31 +327,57 @@ ds_ns = ggplot(plot_df_ns, aes(x = lineage
labs(title = paste0(p_title, ": sensitivity by lineage") labs(title = paste0(p_title, ": sensitivity by lineage")
, y = 'Sample Count') , y = 'Sample Count')
#dev.off() #dev.off()
#####################################################################
#===================
# Combine output
#====================
# svg(paste0(outdir_images, tolower(gene), "_linDS_CL.svg")
# svg(paste0(outdir_images, "embb_linDS_CL.svg")
# , width = 11 # , width = 11
# , height = 8 ) # , height = 8 )
png(paste0(outdir_images, "embb_linDS_CL.png") png(paste0(outdir_images, tolower(gene), "_linDS_CL2.png")
, width = 11.75 , width = 11.75*1.15
, height = 8, units = "in", res = 300 ) , height = 8, units = "in", res = 300 )
cowplot::plot_grid(ds_s, ds_ns cowplot::plot_grid(ds_s, ds_ns
, ncol = 2 , ncol = 2
,rel_widths = c(1,2) #, align = "hv"
, rel_widths = c(1,2.5)
, labels = "AUTO") , labels = "AUTO")
dev.off() dev.off()
########################################################################
#==================
# Summary output
#==================
cat ("Total unique muts in ALL samples for", tolower(gene), ":", length(unique(merged_df2$mutationinformation)))
other_lin_muts = unique(merged_df2$mutationinformation)[!unique(merged_df2$mutationinformation)%in%unique(my_df2$mutationinformation)]
cat ("Total unique muts NOT in L1-L4:", length(other_lin_muts))
cat("These are:\n", other_lin_muts)
other_lin_muts_df = merged_df2[merged_df2$mutationinformation%in%other_lin_muts,]
if ( length(unique(other_lin_muts_df$mutationinformation)) == length(other_lin_muts)) {
cat("\nPASS: other lin muts extracted")
}else{
stop("\nAbort: other lin muts numbers mismatch")
}
table(other_lin_muts_df$mutationinformation, other_lin_muts_df$lineage)
cat("\n==============================================\n")
cat ("Total samples L1-L4:", nrow(my_df2))
table(my_df2$lineage)
table(my_df2$lineage, my_df2$sensitivity)
cat ("Total unique muts in L1-L4", tolower(gene), ":", length(muts))
cat("\nGot:", length(lin_muts), "mutations belonging to >1 lineage with differing drug sensitivities")
cat("\nGot:", sig_muts, "mutations that are significant"
, "\nThese are:", unique(plot_df_sig$mutationinformation))
#geom_text(aes(label = paste0("p=",pvalF), x = 2.5, ypos_label+1))# + cat("\nGot:", ns_muts, "mutations that are NOT significant"
, "\nThese are:", unique(plot_df_ns$mutationinformation))
#geom_segment(aes(x = 1, y = ypos_label+0.5, xend = 4, yend = ypos_label+0.5)) cat("\n==============================================\n")
#geom_hline(data = lin_muts_dfM, aes(yintercept=ypos_label+0.5))
#geom_bracket(data=lin_muts_dfM, aes(xmin = 1, xmax = 4, y.position = ypos_label+0.5, label=''))

View file

@ -109,137 +109,137 @@ merged_df3 = all_plot_dfs[[2]]
#################################################################### ####################################################################
# Data for logoplots # Data for logoplots
#################################################################### ####################################################################
#
source(paste0(plot_script_path, "logo_data_msa.R")) # source(paste0(plot_script_path, "logo_data_msa.R"))
s1 = c("\nSuccessfully sourced logo_data_msa.R") # s1 = c("\nSuccessfully sourced logo_data_msa.R")
cat(s1) # cat(s1)
#
#################################################################### # ####################################################################
# Data for DM OM Plots: WF and LF dfs # # Data for DM OM Plots: WF and LF dfs
# My function: dm_om_wf_lf_data() # # My function: dm_om_wf_lf_data()
# location: scripts/functions/dm_om_data.R # # location: scripts/functions/dm_om_data.R
#source("other_plots_data.R") # #source("other_plots_data.R")
#################################################################### # ####################################################################
#
#source(paste0(plot_script_path, "dm_om_data.R")) # calling the function directly instead # #source(paste0(plot_script_path, "dm_om_data.R")) # calling the function directly instead
geneL_normal = c("pnca") # geneL_normal = c("pnca")
geneL_na = c("gid", "rpob") # geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob") # geneL_ppi2 = c("alr", "embb", "katg", "rpob")
#
all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene) # all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
#
wf_duet = all_dm_om_df[['wf_duet']] # wf_duet = all_dm_om_df[['wf_duet']]
lf_duet = all_dm_om_df[['lf_duet']] # lf_duet = all_dm_om_df[['lf_duet']]
#
wf_mcsm_lig = all_dm_om_df[['wf_mcsm_lig']] # wf_mcsm_lig = all_dm_om_df[['wf_mcsm_lig']]
lf_mcsm_lig = all_dm_om_df[['lf_mcsm_lig']] # lf_mcsm_lig = all_dm_om_df[['lf_mcsm_lig']]
#
wf_foldx = all_dm_om_df[['wf_foldx']] # wf_foldx = all_dm_om_df[['wf_foldx']]
lf_foldx = all_dm_om_df[['lf_foldx']] # lf_foldx = all_dm_om_df[['lf_foldx']]
#
wf_deepddg = all_dm_om_df[['wf_deepddg']] # wf_deepddg = all_dm_om_df[['wf_deepddg']]
lf_deepddg = all_dm_om_df[['lf_deepddg']] # lf_deepddg = all_dm_om_df[['lf_deepddg']]
#
wf_dynamut2 = all_dm_om_df[['wf_dynamut2']] # wf_dynamut2 = all_dm_om_df[['wf_dynamut2']]
lf_dynamut2 = all_dm_om_df[['lf_dynamut2']] # lf_dynamut2 = all_dm_om_df[['lf_dynamut2']]
#
wf_consurf = all_dm_om_df[['wf_consurf']] # wf_consurf = all_dm_om_df[['wf_consurf']]
lf_consurf = all_dm_om_df[['lf_consurf']] # lf_consurf = all_dm_om_df[['lf_consurf']]
#
wf_snap2 = all_dm_om_df[['wf_snap2']] # wf_snap2 = all_dm_om_df[['wf_snap2']]
lf_snap2 = all_dm_om_df[['lf_snap2']] # lf_snap2 = all_dm_om_df[['lf_snap2']]
#
wf_provean = all_dm_om_df[['wf_provean']] # wf_provean = all_dm_om_df[['wf_provean']]
lf_provean = all_dm_om_df[['lf_provean']] # lf_provean = all_dm_om_df[['lf_provean']]
#
# NEW # # NEW
wf_dist_gen = all_dm_om_df[['wf_dist_gen']] # wf_dist_gen = all_dm_om_df[['wf_dist_gen']]
lf_dist_gen = all_dm_om_df[['lf_dist_gen']] # lf_dist_gen = all_dm_om_df[['lf_dist_gen']]
#
if (tolower(gene)%in%geneL_na){ # if (tolower(gene)%in%geneL_na){
wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']] # wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']] # lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]
} # }
#
if (tolower(gene)%in%geneL_ppi2){ # if (tolower(gene)%in%geneL_ppi2){
wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']] # wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']] # lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
} # }
#
s2 = c("\nSuccessfully sourced other_plots_data.R") # s2 = c("\nSuccessfully sourced other_plots_data.R")
cat(s2) # cat(s2)
#
#################################################################### # ####################################################################
# Data for Lineage barplots: WF and LF dfs # # Data for Lineage barplots: WF and LF dfs
# My function: lineage_plot_data() # # My function: lineage_plot_data()
# location: scripts/functions/lineage_plot_data.R # # location: scripts/functions/lineage_plot_data.R
#################################################################### # ####################################################################
#
#source(paste0(plot_script_path, "lineage_data.R")) # #source(paste0(plot_script_path, "lineage_data.R"))
# converted to a function. Moved lineage_data.R to redundant/ # # converted to a function. Moved lineage_data.R to redundant/
lineage_dfL = lineage_plot_data(merged_df2 # lineage_dfL = lineage_plot_data(merged_df2
, lineage_column_name = "lineage" # , lineage_column_name = "lineage"
, remove_empty_lineage = F # , remove_empty_lineage = F
, lineage_label_col_name = "lineage_labels" # , lineage_label_col_name = "lineage_labels"
, id_colname = "id" # , id_colname = "id"
, snp_colname = "mutationinformation" # , snp_colname = "mutationinformation"
) # )
#
lin_wf = lineage_dfL[['lin_wf']] # lin_wf = lineage_dfL[['lin_wf']]
lin_lf = lineage_dfL[['lin_lf']] # lin_lf = lineage_dfL[['lin_lf']]
#
s3 = c("\nSuccessfully sourced lineage_data.R") # s3 = c("\nSuccessfully sourced lineage_data.R")
cat(s3) # cat(s3)
#
#################################################################### # ####################################################################
# Data for corr plots: # # Data for corr plots:
# My function: corr_data_extract() # # My function: corr_data_extract()
# location: scripts/functions/corr_plot_data.R # # location: scripts/functions/corr_plot_data.R
#################################################################### # ####################################################################
# make sure the above script works because merged_df2_combined is needed # # make sure the above script works because merged_df2_combined is needed
merged_df3 = as.data.frame(merged_df3) # merged_df3 = as.data.frame(merged_df3)
#
corr_df_m3_f = corr_data_extract(merged_df3 # corr_df_m3_f = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
head(corr_df_m3_f)
# corr_df_m2_f = corr_data_extract(merged_df2
# , gene = gene # , gene = gene
# , drug = drug # , drug = drug
# , extract_scaled_cols = F) # , extract_scaled_cols = F)
# head(corr_df_m2_f) # head(corr_df_m3_f)
#
s4 = c("\nSuccessfully sourced Corr_data.R") # # corr_df_m2_f = corr_data_extract(merged_df2
cat(s4) # # , gene = gene
# # , drug = drug
######################################################################## # # , extract_scaled_cols = F)
# End of script # # head(corr_df_m2_f)
######################################################################## #
if ( all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){ # s4 = c("\nSuccessfully sourced Corr_data.R")
cat( # cat(s4)
"\n##################################################" #
, "\nSuccessful: get_plotting_dfs.R worked!" # ########################################################################
, "\n###################################################\n") # # End of script
} else { # ########################################################################
cat( # if ( all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){
"\n#################################################" # cat(
, "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check" # "\n##################################################"
, "\n###################################################\n" ) # , "\nSuccessful: get_plotting_dfs.R worked!"
} # , "\n###################################################\n")
# } else {
######################################################################## # cat(
# clear excess variables: from the global enviornment # "\n#################################################"
# , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))] # , "\n###################################################\n" )
vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))] # }
vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))] #
vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))] # ########################################################################
# # clear excess variables: from the global enviornment
rm( infile_metadata #
, infile_params # vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))]
, vars0 # vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))]
, vars1 # vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))]
, vars2 # vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))]
, vars3) #
# rm( infile_metadata
# , infile_params
# , vars0
# , vars1
# , vars2
# , vars3)

View file

@ -38,7 +38,7 @@ source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
class(merged_df3) class(merged_df3)
merged_df3 = as.data.frame(merged_df3) merged_df3 = as.data.frame(merged_df3)
class(df3) class(merged_df3)
head(merged_df3$pos_count) head(merged_df3$pos_count)
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
@ -198,10 +198,10 @@ rects <- data.frame(x = 1:6,
) )
rects rects
rects$text = c("-ve Lig affinty" rects$text = c("-ve Lig"
, "+ve Lig affinity" , "+ve Lig"
, "+ve PPI2 affinity" , "+ve PPI2"
, "-ve PPI2 affinity" , "-ve PPI2"
, "+ve stability" , "+ve stability"
, "-ve stability") , "-ve stability")
@ -221,7 +221,7 @@ peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_l
coord_flip()+ scale_x_reverse() + coord_flip()+ scale_x_reverse() +
# theme_void() # remove any axis markings # theme_void() # remove any axis markings
theme_nothing() # remove any axis markings theme_nothing() # remove any axis markings
peP
peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) + peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
geom_tile() + # make square tiles geom_tile() + # make square tiles
@ -229,7 +229,7 @@ peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square coord_fixed() + # make sure tiles are square
theme_nothing() # remove any axis markings theme_nothing() # remove any axis markings
peP2
# ------------------------------ # ------------------------------
# bp site site count: ALL # bp site site count: ALL
@ -252,24 +252,24 @@ posC_all = site_snp_count_bp(plotdf = df3
#------------------------------ #------------------------------
# barplot for sensitivity: # barplot for sensitivity:
#------------------------------ #------------------------------
sensP = stability_count_bp(plotdf = df3 # sensP = stability_count_bp(plotdf = df3
, df_colname = "sensitivity" # , df_colname = "sensitivity"
#, leg_title = "mCSM-ppi2" # #, leg_title = "mCSM-ppi2"
#, label_categories = labels_ppi2 # #, label_categories = labels_ppi2
#, bp_plot_title = paste(common_bp_title, "PP-interface") # #, bp_plot_title = paste(common_bp_title, "PP-interface")
#
, yaxis_title = "Number of nsSNPs" # , yaxis_title = "Number of nsSNPs"
, leg_position = "none" # , leg_position = "none"
, subtitle_text = "Sensitivity" # , subtitle_text = "Sensitivity"
, bar_fill_values = c("red", "blue") # , bar_fill_values = c("red", "blue")
, subtitle_colour= "black" # , subtitle_colour= "black"
, sts = 10 # , sts = 10
, lts = 8 # , lts = 8
, ats = 8 # , ats = 8
, als =8 # , als =8
, ltis = 11 # , ltis = 11
, geom_ls =2 # , geom_ls =2
) # )
consurfP = stability_count_bp(plotdf = df3 consurfP = stability_count_bp(plotdf = df3
@ -290,3 +290,95 @@ consurfP = stability_count_bp(plotdf = df3
consurfP consurfP
####################
# Sensitivity count
####################
table(df3$sensitivity)
rect_sens=data.frame(mutation_class=c("Resistant","Sensitive")
, tile_colour =c("red","blue")
, numbers = c(table(df3$sensitivity)[[1]], table(df3$sensitivity)[[2]]))
sensP = ggplot(rect_sens, aes(mutation_class, y = 0
, fill = tile_colour
, label = paste0("n=", numbers)
)) +
geom_tile(width = 1, height = 1) + # make square tiles
geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + # add white text in the middle
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
#coord_flip()+ scale_x_reverse() +
# theme_void() # remove any axis markings
theme_nothing() # remove any axis markings
sensP
# sensP2 = sensP +
# coord_flip() + scale_x_reverse()
# sensP2
##############################
# FIXME for other genes: ATTEMPTED to derive numbers
##############################
#
# table(str_df_short$pe_effect_outcome)
# # extract the numbers
# DD_lig_n = table(str_df_short$pe_effect_outcome)[[1]]
# SS_lig_n = 0
# DD_ppi2_n = table(str_df_short$pe_effect_outcome)[[2]]
# SS_ppi2_n = table(str_df_short$pe_effect_outcome)[[4]]
# DD_stability_n = table(str_df_short$pe_effect_outcome)[[3]]
# SS_stability_n = table(str_df_short$pe_effect_outcome)[[5]]
#
# nums = c(DD_lig_n, SS_lig_n,DD_ppi2_n,SS_ppi2_n, DD_stability_n, SS_stability_n )
#
# rect_pe = data.frame(x = 1:6
# , pe_effect_type=c("-ve Lig aff"
# , "+ve Lig aff"
# , "-ve PPI2 aff"
# , " +ve PPI2 aff"
# , "-ve stability"
# , "+ve stability")
#
# , tile_colour =c("#ffd700" #gold
# ,"#f0e68c" # khaki
# , "#ff1493" #deeppink
# , "#da70d6" #orchid
# , "#F8766D" # Sred
# , "#00BFC4") #Sblue
# # , numbers = c(DD_lig_n
# # , SS_lig_n
# # , DD_ppi2_n
# # , SS_ppi2_n
# # , DD_stability_n
# # , SS_stability_n )
# , numbers = nums
# )
#
# rect_pe$num_labels = paste0("n=", rect_pe$numbers)
# rect_pe
#
# # create plot
# peP = ggplot(rect_pe, aes(x=pe_effect_type , y = 0, fill = tile_colour
# , label = paste0(pe_effect_type,"\n", num_labels))) +
# geom_tile(width = 1, height = 1) + # make square tiles
# geom_text(color = "black", size = 1.7) + # add white text in the middle
# scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
# coord_fixed() + # make sure tiles are square
# coord_flip()+ scale_x_reverse() +
# # theme_void() # remove any axis markings
# theme_nothing() # remove any axis markings
# peP
#
# peP2 = ggplot(rect_pe, aes(x=pe_effect_type, y = 0, fill = tile_colour
# , label = paste0(pe_effect_type,"\n", num_labels))) +
# geom_tile() +
# geom_text(color = "black", size = 1.6) +
# scale_fill_identity(guide = "none") +
# coord_fixed() +
# theme_nothing()
# peP2

View file

@ -4,7 +4,6 @@ posC_lig
ppi2P ppi2P
posC_ppi2 posC_ppi2
peP peP
pe_allCL
theme_georgia <- function(...) { theme_georgia <- function(...) {
@ -22,12 +21,127 @@ common_legend_outcome = get_legend(mLigP +
guides(color = guide_legend(nrow = 1)) + guides(color = guide_legend(nrow = 1)) +
theme(legend.position = "top")) theme(legend.position = "top"))
############################################################### # ###############################################################
# #================================
# # Lig Affinity: outcome + site
# #================================
# ligT = paste0(common_bp_title, " ligand")
# lig_affT = ggdraw() +
# draw_label(
# ligT,
# fontfamily = title_theme$family,
# fontface = title_theme$face,
# #size = title_theme$size
# size = 8
# )
# #-------------
# # Outplot
# #-------------
# ligaffP = paste0(outdir_images
# ,tolower(gene)
# ,"_lig_oc.png")
#
# #svg(affP, width = 20, height = 5.5)
# print(paste0("plot filename:", ligaffP))
# png(ligaffP, units = "in", width = 6, height = 4, res = 300 )
# cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome,
# nrow = 2,
# rel_heights = c(1,1)
# ),
# cowplot::plot_grid(mLigP, mmLigP, posC_lig
# , nrow = 1
# #, labels = c("A", "B", "C","D")
# , rel_widths = c(1,1,1.8)
# , align = "h"),
# nrow = 2,
# labels = c("A", ""),
# label_size = 12,
# rel_heights = c(1,8))
# dev.off()
# #############################################################
# #================================
# # PPI2 Affinity: outcome + site
# #================================
# ppi2T = paste0(common_bp_title, " PP-interface")
# ppi2_affT = ggdraw() +
# draw_label(
# ppi2T,
# fontfamily = title_theme$family,
# fontface = title_theme$face,
# #size = title_theme$size
# size = 8
# )
#
#
# #-------------
# # Outplot: PPI2
# #-------------
# ppiaffP = paste0(outdir_images
# ,tolower(gene)
# ,"_ppi2_oc.png")
#
# #svg(affP, width = 20, height = 5.5)
# print(paste0("plot filename:", ppiaffP))
# png(ppiaffP, units = "in", width = 6, height = 4, res = 300 )
#
#
# cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome,
# nrow = 2,
# rel_heights = c(1,1)),
# cowplot::plot_grid(ppi2P, posC_ppi2
# , nrow = 1
# , rel_widths = c(1.2,1.8)
# , align = "h"
# , label_size = my_label_size),
# nrow = 2,
# labels = c("B", ""),
# label_size = 12,
# rel_heights = c(1,8)
# )
#
# dev.off()
# #############################################################
#peP # pe counts
#================================ #================================
# Lig Affinity: outcome + site # PE + All position count
#================================ #================================
# peT_allT = ggdraw() +
# draw_label(
# paste0("All mutation sites"),
# fontfamily = title_theme$family,
# fontface = title_theme$face,
# #size = title_theme$size
# size = 8
# )
# #------------------------
# # Outplot: lig+ppi2+pe
# #------------------------
# pe_allCL = paste0(outdir_images
# ,tolower(gene)
# ,"_pe_oc.png")
#
# #svg(affP, width = 20, height = 5.5)
# print(paste0("plot filename:", pe_allCL))
# png(pe_allCL, units = "in", width = 6, height = 4, res = 300 )
#
#
# cowplot::plot_grid(peT_allT,
# cowplot::plot_grid(peP, posC_all
# , nrow = 1
# , rel_widths = c(1, 2)
# , align = "h"),
# nrow = 2,
# labels = c("C", "", ""),
# label_size = 12,
# rel_heights = c(1,8))
#
# dev.off()
#===========================================
# COMBINE ALL three
#==========================================
ligT = paste0(common_bp_title, " ligand") ligT = paste0(common_bp_title, " ligand")
lig_affT = ggdraw() + lig_affT = ggdraw() +
draw_label( draw_label(
ligT, ligT,
fontfamily = title_theme$family, fontfamily = title_theme$family,
@ -36,113 +150,6 @@ lig_affT = ggdraw() +
size = 8 size = 8
) )
#-------------
# Outplot
#-------------
ligaffP = paste0(outdir_images
,tolower(gene)
,"_lig_oc.png")
#svg(affP, width = 20, height = 5.5)
print(paste0("plot filename:", ligaffP))
png(ligaffP, units = "in", width = 6, height = 4, res = 300 )
cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome,
nrow = 2,
rel_heights = c(1,1)
),
cowplot::plot_grid(mLigP, mmLigP, posC_lig
, nrow = 1
#, labels = c("A", "B", "C","D")
, rel_widths = c(1,1,1.8)
, align = "h"),
nrow = 2,
labels = c("A", ""),
label_size = 12,
rel_heights = c(1,8))
dev.off()
#############################################################
#================================
# PPI2 Affinity: outcome + site
#================================
ppi2T = paste0(common_bp_title, " PP-interface")
ppi2_affT = ggdraw() +
draw_label(
ppi2T,
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = 8
)
#-------------
# Outplot: PPI2
#-------------
ppiaffP = paste0(outdir_images
,tolower(gene)
,"_ppi2_oc.png")
#svg(affP, width = 20, height = 5.5)
print(paste0("plot filename:", ppiaffP))
png(ppiaffP, units = "in", width = 6, height = 4, res = 300 )
cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome,
nrow = 2,
rel_heights = c(1,1)),
cowplot::plot_grid(ppi2P, posC_ppi2
, nrow = 1
, rel_widths = c(1.2,1.8)
, align = "h"
, label_size = my_label_size),
nrow = 2,
labels = c("B", ""),
label_size = 12,
rel_heights = c(1,8)
)
dev.off()
#############################################################
peP # pe counts
#================================
# PE + All position count
#================================
peT_allT = ggdraw() +
draw_label(
paste0("All mutation sites"),
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = 8
)
#-------------
# Outplot: PPI2
#-------------
pe_allCL = paste0(outdir_images
,tolower(gene)
,"_pe_oc.png")
#svg(affP, width = 20, height = 5.5)
print(paste0("plot filename:", pe_allCL))
png(pe_allCL, units = "in", width = 6, height = 4, res = 300 )
cowplot::plot_grid(peT_allT,
cowplot::plot_grid(peP, posC_all
, nrow = 1
, rel_widths = c(1, 2)
, align = "h"),
nrow = 2,
labels = c("C", "", ""),
label_size = 12,
rel_heights = c(1,8))
dev.off()
#===========================================
# COMBINE ALL three
#==========================================
p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome, nrow=2), p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome, nrow=2),
cowplot::plot_grid(mLigP, mmLigP, posC_lig cowplot::plot_grid(mLigP, mmLigP, posC_lig
, nrow = 1 , nrow = 1
@ -152,8 +159,18 @@ p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT,common_legend_outcome, nrow=
rel_heights = c(1,8) rel_heights = c(1,8)
) )
#p1
###########################################################
ppi2T = paste0(common_bp_title, " PP-interface")
ppi2_affT = ggdraw() +
draw_label(
ppi2T,
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = 8
)
###########################################################
p2 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nrow=2), p2 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nrow=2),
cowplot::plot_grid(ppi2P, posC_ppi2 cowplot::plot_grid(ppi2P, posC_ppi2
, nrow = 1 , nrow = 1
@ -162,7 +179,17 @@ p2 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nro
nrow = 2, nrow = 2,
rel_heights = c(1,8) rel_heights = c(1,8)
) )
#p2
###########################################################
# PE + All position count
peT_allT = ggdraw() +
draw_label(
paste0("All mutation sites"),
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = 8
)
p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2 p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
, rel_widths = c(1,3),axis = "lr"), , rel_widths = c(1,3),axis = "lr"),
@ -174,16 +201,14 @@ p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
axis = "lr", axis = "lr",
rel_heights = c(1,8) rel_heights = c(1,8)
), ),
rel_heights = c(1,10), rel_heights = c(1,18),
nrow = 2,axis = "lr") nrow = 2,axis = "lr")
p3
#=============== #===============
# Final combine # Final combine
#=============== #===============
w = 11.75 w = 11.79
h = 3.7 h = 3.5
mut_impact_CLP = paste0(outdir_images mut_impact_CLP = paste0(outdir_images
,tolower(gene) ,tolower(gene)
,"_mut_impactCLP.png") ,"_mut_impactCLP.png")
@ -229,9 +254,21 @@ conCLP = paste0(outdir_images
,tolower(gene) ,tolower(gene)
,"_consurf_BP.png") ,"_consurf_BP.png")
print(paste0("plot filename:", sens_conP)) print(paste0("plot filename:", conCLP))
png(sens_conP, units = "in", width = w, height = h, res = 300 ) png(conCLP, units = "in", width = w, height = h, res = 300 )
consurfP consurfP
dev.off() dev.off()
#================================
# Sensitivity numbers: geom_tile
#================================
sensCLP = paste0(outdir_images
,tolower(gene)
,"_sensN_tile.png")
print(paste0("plot filename:", sensCLP))
png(sensCLP, units = "in", width = 1, height = 1, res = 300 )
sensP
dev.off()

View file

@ -1,182 +0,0 @@
colnames(str_df_short)
table(str_df_short$effect_type)
table(str_df_short$effect_sign)
str(str_df_short)
str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
table(str_df_short$pe_outcome )
table(str_df_short$effect_sign)
affcols = c("affinity_scaled", "mmcsm_lig_scaled")
ppi2_cols = c("mcsm_ppi2_scaled")
#lig
table(str_df_short$effect_type)
str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
, "affinity"
, str_df_short$effect_type)
table(str_df_short$effect_grouped)
#ppi2
str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
, "ppi2"
, str_df_short$effect_grouped)
table(str_df_short$effect_grouped)
#stability
str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("affinity", "ppi2")
, "stability"
, str_df_short$effect_grouped)
table(str_df_short$effect_grouped)
# create a sign as well
str_df_short$effect_outcome = paste0(str_df_short$pe_outcome
, str_df_short$effect_grouped)
table(str_df_short$effect_outcome)
pe_colour_map2 = c( "DDaffinity" = "#ffd700" # gold
, "SSaffinity" = "#f0e68c" # khaki
, "DDppi2" = "#ff1493" # deeppink
, "SSppi2" = "#da70d6" # orchid
, "DDstability " = "#ae301e"
, "SSstability" = "#007d85"
)
str_df_short$effect_colours = str_df_short$effect_outcome
str_df_short = dplyr::mutate(str_df_short
, effect_colours = case_when(effect_colours == "DDaffinity" ~ "#ffd700"
, effect_colours == "DDppi2" ~ '#ff1493'
, effect_colours == "SSppi2" ~ '#da70d6'
, effect_colours == "DDstability" ~ '#ae301e'
, effect_colours =="SSstability" ~ '#007d85'
, TRUE ~ 'ns'))
"#F8766D" #red
"#00BFC4" #blue
table(str_df_short$effect_colours)
###########################################
ggplot(str_df_short
, aes( x=effect_grouped
, fill = effect_colours)) +
geom_bar() +
scale_fill_manual(values = str_df_short$effect_colours)
first_col = c(38, 0)
second_col = c(9, 22)
third_col = c(681, 108)
thing_df = data.frame(first_row, second_row, third_row)
rownames(thing_df) = c("Destabilising","Stabilising")
thing_df
###############################################
rect_colour_map = c("EMB" = "green"
,"DSL" = "slategrey"
, "CDL" = "navyblue"
, "Ca" = "purple")
rects <- data.frame(x = 1:6,
colors = c("#ffd700" #gold
, "#f0e68c" #khaki
, "#da70d6"# orchid
, "#ff1493"# deeppink
, "#00BFC4" #, "#007d85" #blue
, "#F8766D" )# red,
)
rects
rects$text = c("-ve Lig affinty"
, "+ve Lig affinity"
, "+ve PPI2 affinity"
, "-ve PPI2 affinity"
, "+ve stability"
, "-ve stability")
rects$numbers = c(38, 0, 22, 9, 108, 681)
rects$num_labels = paste0("n=", rects$numbers)
rects
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
png(paste0(outdir_images, "test.png")
, width = 0.5
, height = 2.5
, units = "in", res = 300)
ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
geom_tile(width = 1, height = 1) + # make square tiles
geom_text(color = "black", size = 1.5) + # add white text in the middle
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
coord_flip()+ scale_x_reverse() +
# theme_void() # remove any axis markings
theme_nothing() # remove any axis markings
dev.off()
##########################################################
tile_map=data.frame(tile=c("EMB","DSL","CDL","Ca")
,tile_colour =c("green","darkslategrey","navyblue","purple"))
# great
tile_colour_map = c("EMB" = "green"
,"DSL" = "darkslategrey"
, "CDL" = "navyblue"
, "Ca" = "purple")
tile_legend=get_legend(
ggplot(tile_map, aes(factor(tile),y=0
, colour=tile_colour
, fill=tile_colour))+
geom_tile() +
theme(legend.direction="horizontal") +
scale_colour_manual(name=NULL
#, values = tile_map$tile_colour
, values=tile_colour_map) +
scale_fill_manual(name=NULL
#,values=tile_map$tile_colour
, values = tile_colour_map)
)
#############################################################
###############################################
library(ggplot2)
library(viridis)
library(hrbrthemes)
ggplot(str_df_short, aes(fill=effect_colours,x=effect_type)) +
geom_bar() +
scale_fill_viridis(discrete = T) +
ggtitle("Studying 4 species..")
####################################################

View file

@ -1,366 +0,0 @@
#!/usr/bin/env Rscript
#source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
# get plottting dfs
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
####################################################
# ggpairs wrapper
my_gg_pairs=function(plot_df){
ggpairs(plot_df, columns = 1:(ncol(plot_df)-1),
upper = list(continuous = wrap('cor',
method = "spearman",
title="ρ",
digits=2,
title_args=c(colour="black")
)
),
lower = list(
continuous = wrap("points", alpha = 0.7, size=0.5),
combo = wrap("dot", alpha = 0.7, size=0.5)
),
aes(colour = factor(ifelse(plot_df$dst_mode==0, "S", "R")), alpha = 0.5),
title="Stability") +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
theme(
text = element_text(size=12, face="bold")
)
}
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
#=======
# Input
#=======
merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
colnames(corr_plotdf)
if (all(colnames(corr_df_m3_f) == colnames(corr_plotdf))){
cat("PASS: corr plot colnames match for dashboard")
}else{
stop("Abort: corr plot colnames DO NOT match for dashboard")
}
#corr_plotdf = corr_df_m3_f #for downstream code
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
aff_dist_cols
static_cols = c("Log10(MAF)"
, "Log10(OR)"
#, "-Log10(P)"
)
#================
# stability
#================
#affinity_dist_colnames# lIg DIst and ppi Di
corr_ps_colnames = c(static_cols
, "DUET"
, "FoldX"
, "DeepDDG"
, "Dynamut2"
, aff_dist_cols
, "dst_mode")
if (all(corr_ps_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for correlation")
}else{
stop("Abort: all colnames DO NOT exist for correlation")
}
corr_df_ps = corr_plotdf[, corr_ps_colnames]
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_ps)
color_coln = which(colnames(corr_df_ps) == "dst_mode")
#end = which(colnames(corr_df_ps) == drug)
#ncol_omit = 2
#corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: stability corrP
#------------------------
corr_psP = paste0(outdir_images
,tolower(gene)
,"_corr_stability.svg" )
cat("Corr plot stability with coloured dots:", corr_psP)
svg(corr_psP, width = 15, height = 15)
my_corr_pairs(corr_data_all = corr_df_ps
, corr_cols = colnames(corr_df_ps[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 1.6
, ats = 1.5
, corr_lab_size =2.5
, corr_value_size = 1)
dev.off()
#===============
# CONSERVATION
#==============
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
, "PROVEAN"
, aff_dist_cols
, "dst_mode"
, drug)
if (all(corr_conservation_cols%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for ConSurf-correlation")
}else{
stop("Abort: all colnames DO NOT exist for ConSurf-correlation")
}
corr_df_cons = corr_plotdf[, corr_conservation_cols]
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
cat("\nComplete muts for Conservation for", gene, ":", complete_obs_cons)
color_coln = which(colnames(corr_df_cons) == "dst_mode")
# end = which(colnames(corr_df_cons) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#---------------------------
# Output: Conservation corrP
#----------------------------
corr_consP = paste0(outdir_images
,tolower(gene)
,"_corr_conservation.svg" )
cat("Corr plot conservation coloured dots:", corr_consP)
svg(corr_consP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_cons
, corr_cols = colnames(corr_df_cons[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size =1.1
, ats = 1.5
, corr_lab_size = 1.8
, corr_value_size = 1)
dev.off()
#####################################################
#DistCutOff = 10
#LigDist_colname # = "ligand_distance" # from globals
#ppi2Dist_colname = "interface_dist"
#naDist_colname = "TBC"
#####################################################
#================
# ligand affinity
#================
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
corr_lig_colnames = c(static_cols
, "mCSM-lig"
, "mmCSM-lig"
, "dst_mode")
#, drug)
if (all(corr_lig_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for Lig-correlation")
}else{
stop("Abort: all colnames DO NOT exist for Lig-correlation")
}
corr_df_lig = corr_plotdf[, corr_lig_colnames]
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
cat("\nComplete muts for lig affinity for", gene, ":", complete_obs_lig)
color_coln = which(colnames(corr_df_lig) == "dst_mode")
# end = which(colnames(corr_df_lig) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: ligand corrP
#------------------------
corr_ligP = paste0(outdir_images
,tolower(gene)
,"_corr_lig.svg" )
cat("Corr plot affinity with coloured dots:", corr_ligP)
svg(corr_ligP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_lig
, corr_cols = colnames(corr_df_lig[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_lig[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size =3
, corr_value_size = 1)
dev.off()
####################################################
#================
# ppi2 affinity
#================
if (tolower(gene)%in%geneL_ppi2){
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,]
corr_ppi2_colnames = c(static_cols
, "mCSM-PPI2"
, "dst_mode"
, drug)
if (all(corr_ppi2_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for mcsm-ppi2 correlation")
}else{
stop("Abort: all colnames DO NOT exist for mcsm-ppi2 correlation")
}
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
cat("\nComplete muts for ppi2 affinity for", gene, ":", complete_obs_ppi2)
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
# end = which(colnames(corr_df_ppi2) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: ppi2 corrP
#------------------------
corr_ppi2P = paste0(outdir_images
,tolower(gene)
,"_corr_ppi2.svg" )
cat("Corr plot ppi2 with coloured dots:", corr_ppi2P)
svg(corr_ppi2P, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_ppi2
, corr_cols = colnames(corr_df_ppi2[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size = 3
, corr_value_size = 1)
dev.off()
}
# FIXME: ADD distance
#==================
# mCSSM-NA affinity
#==================
#================
# NA affinity
#================
if (tolower(gene)%in%geneL_na){
corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
corr_na_colnames = c(static_cols
, "mCSM-NA"
, "dst_mode"
, drug)
if (all(corr_na_colnames%in%colnames(corr_plotdf))){
cat("PASS: all colnames exist for mcsm-NA-correlation")
}else{
stop("Abort: all colnames DO NOT exist for mcsm-NA-correlation")
}
corr_na_colnames%in%colnames(corr_plotdf)
corr_df_na = corr_plotdf[, corr_na_colnames]
complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
cat("\nComplete muts for NA affinity for", gene, ":", complete_obs_na)
color_coln = which(colnames(corr_df_na) == "dst_mode")
# end = which(colnames(corr_df_na) == drug)
# ncol_omit = 2
# corr_end = end-ncol_omit
corr_end = color_coln-1
#------------------------
# Output: mCSM-NA corrP
#------------------------
corr_naP = paste0(outdir_images
,tolower(gene)
,"_corr_na.svg" )
cat("Corr plot mCSM-NA with coloured dots:", corr_naP)
svg(corr_naP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_na
, corr_cols = colnames(corr_df_na[1:corr_end])
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_na[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue")
, density_show = F
, hist_col = "coral4"
, dot_size = 2
, ats = 1.5
, corr_lab_size = 3
, corr_value_size = 1)
dev.off()
}
####################################################
#===============
#ggpairs:
#================
#corr_df_ps$dst_mode = ifelse(corr_df_cons$dst_mode=="1", "R", "S")
corr_plotting_df = corr_df_ps
svg('~/tmp/foo.svg',
width=10,
height=10,
units="in",
res=300)
my_gg_pairs(corr_plotting_df)
dev.off()
png('~/tmp/foo.png',
width=10,
height=10,
units="in",
res=300)
my_gg_pairs(corr_plotting_df)
dev.off()
#

View file

@ -21,12 +21,19 @@ png('~/tmp/foo.png',
units="in", units="in",
res=300) res=300)
# #
corr_plotting_df = corr_df_ps #corr_plotting_df = corr_df_ps
colnames(corr_plotdf)
corr_plotting_df = subset(corr_plotdf, select = -c(ethambutol,`Log10(OR)`,`-Log10(P)`, ASA, RSA, KD, RD
ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1), , FoldX
, DeepDDG
, Dynamut2 ))
colnames(corr_plotting_df)
#ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)),
upper = list(continuous = wrap('cor', upper = list(continuous = wrap('cor',
method = "spearman", method = "spearman",
use = "pairwise.complete.obs",
title="ρ", title="ρ",
digits=2, digits=2,
title_args=c(colour="black") title_args=c(colour="black")
@ -36,7 +43,7 @@ ggpairs(corr_plotting_df, columns = 1:(ncol(corr_plotting_df)-1),
continuous = wrap("points", alpha = 0.7, size=0.5), continuous = wrap("points", alpha = 0.7, size=0.5),
combo = wrap("dot", alpha = 0.7, size=0.5) combo = wrap("dot", alpha = 0.7, size=0.5)
), ),
aes(colour = factor(ifelse(corr_plotting_df$dst_mode==0, "S", "R")), alpha = 0.5), aes(colour = factor(ifelse(dst_mode==0, "S", "R")), alpha = 0.5),
title="Stability") + title="Stability") +
scale_colour_manual(values = c("red", "blue")) + scale_colour_manual(values = c("red", "blue")) +

View file

@ -1,51 +1,88 @@
source("~/git/LSHTM_analysis/config/embb.R") #source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") #source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") #source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
my_gg_pairs=function(plot_df){ my_gg_pairs=function(plot_df, plot_title
, tt_args_size = 2.5
, gp_args_size = 2.5){
ggpairs(plot_df, ggpairs(plot_df,
columns = 1:(ncol(plot_df)-1), columns = 1:(ncol(plot_df)-1),
upper = list( upper = list(
continuous = wrap('cor', continuous = wrap('cor', # ggally_cor()
method = "spearman", method = "spearman",
use = "pairwise.complete.obs",
title="ρ", title="ρ",
digits=2, digits=2,
justify_labels = "left", justify_labels = "centre",
title_args=c(colour="black") #title_args=c(colour="black"),
title_args=c(size=tt_args_size),#2.5
group_args=c(size=gp_args_size)#2.5
) )
), ),
lower = list( lower = list(
continuous = wrap("points", continuous = wrap("points",
alpha = 0.7, alpha = 0.7,
size=0.5), size=0.125),
combo = wrap("dot", combo = wrap("dot",
alpha = 0.7, alpha = 0.7,
size=0.5) size=0.125)
), ),
aes(colour = factor(ifelse(plot_df$dst_mode==0, aes(colour = factor(ifelse(dst_mode==0,
"S", "S",
"R") ), "R") ),
alpha = 0.5), alpha = 0.5),
title="Stability") + title=plot_title) +
scale_colour_manual(values = c("red", "blue")) + scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) + scale_fill_manual(values = c("red", "blue")) #+
theme(text = element_text(size=12, # theme(text = element_text(size=7,
face="bold") ) # face="bold"))
} }
DistCutOff = 10 DistCutOff = 10
###########################################################################
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
merged_df3 = as.data.frame(merged_df3) merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3 corr_plotdf = corr_data_extract(merged_df3
, gene = gene , gene = gene
, drug = drug , drug = drug
, extract_scaled_cols = F) , extract_scaled_cols = F)
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))] aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
static_cols = c("Log10(MAF)" static_cols = c("Log10(MAF)")
, "Log10(OR)") #, "Log10(OR)")
############################################################
#=============================================
# Creating masked df for affinity data
#=============================================
corr_affinity_df = corr_plotdf
#----------------------
# Mask affinity columns
#-----------------------
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mCSM-lig"]=0
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0
if (tolower(gene)%in%geneL_ppi2){
corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0
}
# if (tolower(gene)%in%geneL_na){
# corr_affinity_df[corr_affinity_df["NA-Dist"]>DistCutOff,"mCSM-NA"]=0
# }
# count 0
#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100
unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0)
unmasked_vals
##########################################################
#================
# Stability
#================
corr_ps_colnames = c(static_cols corr_ps_colnames = c(static_cols
, "DUET" , "DUET"
, "FoldX" , "FoldX"
@ -54,14 +91,13 @@ corr_ps_colnames = c(static_cols
, aff_dist_cols , aff_dist_cols
, "dst_mode") , "dst_mode")
corr_df_ps = corr_plotdf[, corr_ps_colnames] corr_df_ps = corr_plotdf[, corr_ps_colnames]
complete_obs_ps = nrow(corr_df_ps) - sum(is.na(corr_df_ps$`Log(OR)`))
color_coln = which(colnames(corr_df_ps) == "dst_mode")
corr_end = color_coln-1
# Plot #1 # Plot #1
plot_corr_df_ps = my_gg_pairs(corr_df_ps) plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability features")
##########################################################
#================
# Conservation
#================
corr_conservation_cols = c( static_cols corr_conservation_cols = c( static_cols
, "ConSurf" , "ConSurf"
, "SNAP2" , "SNAP2"
@ -71,74 +107,66 @@ corr_conservation_cols = c( static_cols
) )
corr_df_cons = corr_plotdf[, corr_conservation_cols] corr_df_cons = corr_plotdf[, corr_conservation_cols]
complete_obs_cons = nrow(corr_df_cons) - sum(is.na(corr_df_cons$`Log(OR)`))
color_coln = which(colnames(corr_df_cons) == "dst_mode")
corr_end = color_coln-1
# Plot #2 # Plot #2
plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation features")
#my_gg_pairs(corr_df_cons) ##########################################################
plot_corr_df_cons = my_gg_pairs(corr_df_cons) #================
# Affinity: lig, ppi and na as applicable
#================
#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
common_aff_colnames = c("mCSM-lig"
, "mmCSM-lig")
if (tolower(gene)%in%geneL_normal){
aff_colnames = common_aff_colnames
}
if (tolower(gene)%in%geneL_ppi2){
aff_colnames = c(common_aff_colnames, "mCSM-PPI2")
}
corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,] if (tolower(gene)%in%geneL_na){
corr_lig_colnames = c(static_cols aff_colnames = c(common_aff_colnames, "mCSM-NA")
, "mCSM-lig" }
, "mmCSM-lig"
, "dst_mode")
corr_df_lig = corr_plotdf[, corr_lig_colnames] # building ffinal affinity colnames for correlation
corr_aff_colnames = c(static_cols
, aff_colnames
, "dst_mode") # imp
corr_df_aff = corr_affinity_df[, corr_aff_colnames]
colnames(corr_df_aff)
complete_obs_lig = nrow(corr_df_lig) - sum(is.na(corr_df_lig$`Log(OR)`))
color_coln = which(colnames(corr_df_lig) == "dst_mode")
corr_end = color_coln-1
# Plot #3 # Plot #3
plot_corr_df_aff = my_gg_pairs(corr_df_aff, plot_title="Affinity features", tt_args_size = 4, gp_args_size =4)
#my_gg_pairs(corr_df_lig) #=============
plot_corr_df_lig = my_gg_pairs(corr_df_lig) # combine
#=============
corr_df_ppi2 = corr_plotdf[corr_plotdf["PPI-Dist"]<DistCutOff,] #png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
corr_ppi2_colnames = c(static_cols png(paste0(outdir_images
, "mCSM-PPI2" ,tolower(gene)
, "dst_mode" ,"_CorrAB.png"), height = 6, width=11.75, unit="in",res=300)
)
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
corr_end = color_coln-1
# NOTE: DELETE LOG OR FROM CORRELATION PLOTS!!!!! cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
# NOTE: ALSO MAYBE DELETE DISTANCES AS WELL ggmatrix_gtable(plot_corr_df_cons),
# NOTE: http://ggobi.github.io/ggally/reference/ggally_cor.html # ggmatrix_gtable(plot_corr_df_aff),
# nrow=1, ncol=3, rel_heights = 7,7,3
# "***" if the p-value is < 0.001 nrow=1,
# "**" if the p-value is < 0.01 #rel_heights = 1,1
# "*" if the p-value is < 0.05 labels = "AUTO",
# "." if the p-value is < 0.10 label_size = 12)
# "" otherwise dev.off()
#
# affinity corr
# Plot #4 #png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
#my_gg_pairs(corr_df_ppi2) png(paste0(outdir_images
plot_corr_df_ppi2 = my_gg_pairs(corr_df_ppi2) ,tolower(gene)
,"_CorrC.png"), height =7, width=7, unit="in",res=300)
# corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,] cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
# corr_na_colnames = c(static_cols labels = "C",
# , "mCSM-NA" label_size = 12)
# , "dst_mode"
# )
#
# corr_df_na = corr_plotdf[, corr_na_colnames]
# complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
# color_coln = which(colnames(corr_df_na) == "dst_mode")
# corr_end = color_coln-1
#
# # Plot #5
# #my_gg_pairs(corr_df_na)
# plot_corr_df_na = my_gg_pairs(corr_df_na)
png("/tmp/gg_pairs_all.png", height = 8, width=11.75, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),ggmatrix_gtable(plot_corr_df_cons),
ggmatrix_gtable(plot_corr_df_lig),ggmatrix_gtable(plot_corr_df_ppi2),
nrow=2, ncol=2, rel_heights = 7,7,3,3)
dev.off() dev.off()

View file

@ -154,6 +154,11 @@ for (i in unique(str_df$position) ){
str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
colnames(str_df) colnames(str_df)
# check
str_df_check = str_df[str_df$position%in%c(24, 32,160, 303, 334),]
table(str_df$effect_type)
#================ #================
# for Plots # for Plots
#================ #================
@ -161,9 +166,56 @@ str_df_short = str_df[, c("mutationinformation","position","sensitivity"
, "effect_type" , "effect_type"
, "effect_sign")] , "effect_sign")]
# check table(str_df_short$effect_type)
str_df_check = str_df[str_df$position%in%c(24, 32,160, 303, 334),] table(str_df_short$effect_sign)
table(str_df$effect_type) str(str_df_short)
# assign pe outcome
str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
table(str_df_short$pe_outcome )
table(str_df_short$effect_sign)
#==============
# group effect type:
# lig, ppi2, nuc. acid, stability
#==============
affcols = c("affinity_scaled", "mmcsm_lig_scaled")
ppi2_cols = c("mcsm_ppi2_scaled")
#nuc_na_cols = c("mcsm_a_scaled")
#lig
table(str_df_short$effect_type)
str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
, "lig"
, str_df_short$effect_type)
table(str_df_short$effect_grouped)
#ppi2
str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
, "ppi2"
, str_df_short$effect_grouped)
table(str_df_short$effect_grouped)
#stability
str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig", "ppi2")
, "stability"
, str_df_short$effect_grouped)
table(str_df_short$effect_grouped)
# create a sign as well
str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_"
, str_df_short$effect_grouped)
table(str_df_short$pe_effect_outcome)
#####################################################################
# Chimera: for colouring
####################################################################
#------------------------------------- #-------------------------------------
# get df with unique position # get df with unique position