more plot files

This commit is contained in:
Tanushree Tunstall 2022-08-22 22:57:56 +01:00
parent 04253b961f
commit 3d817fde0c
29 changed files with 3252 additions and 760 deletions

View file

@ -0,0 +1,120 @@
# find the most "impactful" effect value
biggest=max(abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')])))
abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))==biggest
abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))==c(,biggest)
max(abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')])))
a2 = (a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')])
a2
biggest = max(abs(a2[1,]))
#hmm
#which(abs(a2) == biggest)
#names(a2)[apply(a2, 1:4, function(i) which(i == max()))]
# get row max
a2$row_maximum = apply(abs(a2[,-1]), 1, max)
# get colname for abs(max_value)
#https://stackoverflow.com/questions/36960010/get-column-name-that-matches-specific-row-value-in-dataframe
#names(df)[which(df == 1, arr.ind=T)[, "col"]]
# yayy
names(a2)[which(abs(a2) == biggest, arr.ind=T)[, "col"]]
#another:https://statisticsglobe.com/return-column-name-of-largest-value-for-each-row-in-r
colnames(a2)[max.col(abs(a2), ties.method = "first")] # Apply colnames & max.col functions
#################################################
# use whole df
#gene_aff_cols = c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')
biggest = max(abs(a[gene_aff_cols]))
a$max_es = biggest
a$effect = names(a[gene_aff_cols])[which(abs(a[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]]
effect_name = unique(a$effect)
#get index of value of max effect
ind = (which(abs(a[effect_name]) == biggest, arr.ind=T))
a[effect_name][ind]
# extract sign
a$effect_sign = sign(a[effect_name][ind])
########################################################
# maxn <- function(n) function(x) order(x, decreasing = TRUE)[n]
# second_big = abs(a[gene_aff_cols])[maxn(2)(abs(a[gene_aff_cols])]
# apply(df, 1, function(x)x[maxn(1)(x)])
# apply(a[gene_aff_cols], 1, function(x) abs(a[gene_aff_cols])[maxn(2)(abs(a[gene_aff_cols]))])
#########################################################
# loop
a2 = df2[df2$position%in%c(167, 423, 427),]
test <- a2 %>%
dplyr::group_by(position) %>%
biggest = max(abs(a2[gene_aff_cols]))
a2$max_es = max(abs(a2[gene_aff_cols]))
a2$effect = names(a2[gene_aff_cols])[which(abs(a2[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]]
effect_name = unique(a2$effect)
#get index of value of max effect
ind = (which(abs(a2[effect_name]) == biggest, arr.ind=T))
a2[effect_name][ind]
# extract sign
a2$effect_dir = sign(a2[effect_name][ind])
#################################
df2_short = df2[df2$position%in%c(167, 423, 427),]
for (i in unique(df2_short$position) ){
#print(i)
#print(paste0("\nNo. of unique positions:", length(unique(df2$position))) )
#cat(length(unique(df2$position)))
a2 = df2_short[df2_short$position==i,]
biggest = max(abs(a2[gene_aff_cols]))
a2$max_es = max(abs(a2[gene_aff_cols]))
a2$effect = names(a2[gene_aff_cols])[which(abs(a2[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]]
effect_name = unique(a2$effect)
#get index of value of max effect
ind = (which(abs(a2[effect_name]) == biggest, arr.ind=T))
a2[effect_name][ind]
# extract sign
a2$effect_sign = sign(a2[effect_name][ind])
}
#========================
df2_short = df2[df2$position%in%c(167, 423, 427),]
df2_short = df2[df2$position%in%c(170, 167, 493, 453, 435, 433, 480, 456, 445),]
df2_short = df2[df2$position%in%c(435, 480),]
give_col=function(x,y,df=df2_short){
df[df$position==x,y]
}
for (i in unique(df2_short$position) ){
#print(i)
#print(paste0("\nNo. of unique positions:", length(unique(df2$position))) )
#cat(length(unique(df2$position)))
#df2_short[df2_short$position==i,gene_aff_cols]
biggest = max(abs(give_col(i,gene_aff_cols)))
df2_short[df2_short$position==i,'abs_max_effect'] = biggest
df2_short[df2_short$position==i,'effect_type']= names(
give_col(i,gene_aff_cols)[which(
abs(
give_col(i,gene_aff_cols)
) == biggest, arr.ind=T
)[, "col"]])
effect_name = unique(df2_short[df2_short$position==i,'effect_type'])
# get index/rowname for value of max effect, and then use it to get the original sign
# here
#df2_short[df2_short$position==i,c(effect_name)]
#which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])==biggest, arr.ind=T)
ind = rownames(which(abs(df2_short[df2_short$position==i
,c('position',effect_name)][effect_name])== biggest
, arr.ind=T))
df2_short[df2_short$position==i,'effect_sign'] = sign(df2_short[effect_name][ind,])
}

View file

@ -0,0 +1,223 @@
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/katg.R")
source("~/git/LSHTM_analysis/config/rpob.R")
source("/home/tanu/git/LSHTM_analysis/my_header.R")
#########################################################
# TASK: Generate averaged affinity values
# across all affinity tools for a given structure
# as applicable...
#########################################################
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene))
#OutFile1
outfile_mean_aff = paste0(outdir_images, "/", tolower(gene)
, "_mean_affinity_all.csv")
print(paste0("Output file:", outfile_mean_aff))
#OutFile2
outfile_mean_aff_priorty = paste0(outdir_images, "/", tolower(gene)
, "_mean_affinity_priority.csv")
print(paste0("Output file:", outfile_mean_aff_priorty))
#%%===============================================================
#=============
# Input
#=============
df3_filename = paste0("/home/tanu/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv")
df3 = read.csv(df3_filename)
length(df3$mutationinformation)
# mut_info checks
table(df3$mutation_info)
table(df3$mutation_info_orig)
table(df3$mutation_info_labels_orig)
# used in plots and analyses
table(df3$mutation_info_labels) # different, and matches dst_mode
table(df3$dst_mode)
# create column based on dst mode with different colname
table(is.na(df3$dst))
table(is.na(df3$dst_mode))
#===============
# Create column: sensitivity mapped to dst_mode
#===============
df3$sensitivity = ifelse(df3$dst_mode == 1, "R", "S")
table(df3$sensitivity)
length(unique((df3$mutationinformation)))
all_colnames = as.data.frame(colnames(df3))
common_cols = c("mutationinformation"
, "X5uhc_position"
, "X5uhc_offset"
, "position"
, "dst_mode"
, "mutation_info_labels"
, "sensitivity"
, "ligand_distance"
, "interface_dist")
all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)]
all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)]
#===================
# stability cols
#===================
raw_cols_stability = c("duet_stability_change"
, "deepddg"
, "ddg_dynamut2"
, "ddg_foldx")
scaled_cols_stability = c("duet_scaled"
, "deepddg_scaled"
, "ddg_dynamut2_scaled"
, "foldx_scaled")
outcome_cols_stability = c("duet_outcome"
, "deepddg_outcome"
, "ddg_dynamut2_outcome"
, "foldx_outcome")
#===================
# affinity cols
#===================
raw_cols_affinity = c("ligand_affinity_change"
, "mmcsm_lig"
, "mcsm_ppi2_affinity"
, "mcsm_na_affinity")
scaled_cols_affinity = c("affinity_scaled"
, "mmcsm_lig_scaled"
, "mcsm_ppi2_scaled"
, "mcsm_na_scaled" )
outcome_cols_affinity = c( "ligand_outcome"
, "mmcsm_lig_outcome"
, "mcsm_ppi2_outcome"
, "mcsm_na_outcome")
#===================
# conservation cols
#===================
# raw_cols_conservation = c("consurf_score"
# , "snap2_score"
# , "provean_score")
#
# scaled_cols_conservation = c("consurf_scaled"
# , "snap2_scaled"
# , "provean_scaled")
#
# # CANNOT strictly be used, as categories are not identical with conssurf missing altogether
# outcome_cols_conservation = c("provean_outcome"
# , "snap2_outcome"
# #consurf outcome doesn't exist
# )
######################################################################
cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols
, raw_cols_affinity
, scaled_cols_affinity
, outcome_cols_affinity
, raw_cols_stability
, scaled_cols_stability
, outcome_cols_stability
)]
cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols
#, raw_cols_affinity
, scaled_cols_stability
, scaled_cols_affinity)]
df3_plot = df3[, cols_to_extract]
# FIXME: ADD distance to NA when SP replies
DistCutOff_colnames = c("ligand_distance", "interface_dist")
DistCutOff = 10
#df3_affinity_filtered = df3_plot[ (df3_plot$ligand_distance<10 | df3_plot$interface_dist <10),]
df3_affinity_filtered = df3_plot[ (df3_plot$ligand_distance<10 & df3_plot$interface_dist <10),]
c0u = unique(df3_affinity_filtered$position)
length(c0u)
foo = df3_affinity_filtered[df3_affinity_filtered$ligand_distance<10,]
bar = df3_affinity_filtered[df3_affinity_filtered$interface_dist<10,]
wilcox.test(foo$mmcsm_lig_scaled~foo$sensitivity)
wilcox.test(foo$mmcsm_lig~foo$sensitivity)
wilcox.test(foo$affinity_scaled~foo$sensitivity)
wilcox.test(foo$ligand_affinity_change~foo$sensitivity)
wilcox.test(bar$mcsm_na_scaled~bar$sensitivity)
wilcox.test(bar$mcsm_na_affinity~bar$sensitivity)
wilcox.test(bar$mcsm_ppi2_scaled~bar$sensitivity)
wilcox.test(bar$mcsm_ppi2_affinity~bar$sensitivity)
##############################################################
df = df3_affinity_filtered
sum(is.na(df))
df2 = na.omit(df) # Apply na.omit function
gene_dist_cols = colnames(df2)[colnames(df2)%in%DistCutOff_colnames]
gene_aff_cols = colnames(df2)[colnames(df2)%in%scaled_cols_affinity]
gene_stab_cols = colnames(df2)[colnames(df2)%in%scaled_cols_stability]
sel_cols = c("mutationinformation", "position", gene_stab_cols, gene_dist_cols, gene_aff_cols)
df3 = df2[, sel_cols]
give_col=function(x,y,df=df3){
df[df$position==x,y]
}
for (i in unique(df3$position) ){
#print(i)
biggest = max(abs(give_col(i,gene_aff_cols)))
df3[df3$position==i,'abs_max_effect'] = biggest
df3[df3$position==i,'effect_type']= names(
give_col(i,gene_aff_cols)[which(
abs(
give_col(i,gene_aff_cols)
) == biggest, arr.ind=T
)[, "col"]])
effect_name = unique(df3[df3$position==i,'effect_type'])
# get index/rowname for value of max effect, and then use it to get the original sign
# here
#df3[df3$position==i,c(effect_name)]
#which(abs(df3[df3$position==i,c('position',effect_name)][effect_name])==biggest, arr.ind=T)
ind = rownames(which(abs(df3[df3$position==i
,c('position', effect_name)][effect_name])== biggest
, arr.ind=T))
df3[df3$position==i,'effect_sign'] = sign(df3[effect_name][ind,])
}
#%%============================================================
# output
write.csv(combined_df, outfile_mean_ens_st_aff
, row.names = F)
cat("Finished writing file:\n"
, outfile_mean_ens_st_aff
, "\nNo. of rows:", nrow(combined_df)
, "\nNo. of cols:", ncol(combined_df))
# end of script
#===============================================================

View file

@ -0,0 +1,364 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Barplots
# basic barplots with outcome
# basic barplots with frequency of count of mutations
#########################################################
#=============
# Data: Input
#==============
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#cat("\nSourced plotting cols as well:", length(plotting_cols))
####################################################
class(merged_df3)
df3 = subset(merged_df3, select = -c(pos_count))
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
cat("plots will output to:", outdir_images)
##########################################################
# blue, red bp
sts = 8
lts = 8
ats = 8
als = 8
ltis = 8
geom_ls = 2.2
#pos_count
subtitle_size = 8
geom_ls_pc = 2.2
leg_text_size = 8
axis_text_size = 8
axis_label_size = 8
###########################################################
#------------------------------
# plot default sizes
#------------------------------
#=========================
# Affinity outcome
# check this var: outcome_cols_affinity
# get from preformatting or put in globals
#==========================
DistCutOff
LigDist_colname # = "ligand_distance" # from globals
ppi2Dist_colname
naDist_colname
###########################################################
# get plotting data within the distance
df3_lig = df3[df3[[LigDist_colname]]<DistCutOff,]
df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
df3_na = df3[df3[[naDist_colname]]<DistCutOff,]
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
#------------------------------
mLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "ligand_outcome"
#, leg_title = "mCSM-lig"
#, bp_plot_title = paste(common_bp_title, "ligand")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM\nLig"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls
)
mLigP
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
# mmCSM-lig: will be the same no. of sites but the effect will be different
#------------------------------
mmLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "mmcsm_lig_outcome"
#, leg_title = "mmCSM-lig"
#, label_categories = labels_mmlig
#, bp_plot_title = paste(common_bp_title, "ligand")
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "mmCSM\nLig"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls
)
mmLigP
#------------------------------
# barplot for ppi2 affinity
# <10 Ang of interface
#------------------------------
if (tolower(gene)%in%geneL_ppi2){
ppi2P = stability_count_bp(plotdf = df3_ppi2
, df_colname = "mcsm_ppi2_outcome"
#, leg_title = "mCSM-ppi2"
#, label_categories = labels_ppi2
#, bp_plot_title = paste(common_bp_title, "PP-interface")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM\nPPI2"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls
)
ppi2P
}
#----------------------------
# barplot for ppi2 affinity
# <10 Ang of interface
#------------------------------
if (tolower(gene)%in%geneL_na){
nca_distP = stability_count_bp(plotdf = df3_na
, df_colname = "mcsm_na_outcome"
#, leg_title = "mCSM-NA"
#, label_categories =
#, bp_plot_title = paste(common_bp_title, "Dist to NA")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM\nNA"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls
)
nca_distP
}
#####################################################################
# ------------------------------
# bp site site count: mCSM-lig
# < 10 Ang ligand
# ------------------------------
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
posC_lig = site_snp_count_bp(plotdf = df3_lig
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = subtitle_size
, geom_ls = geom_ls_pc
, leg_text_size = leg_text_size
, axis_text_size = axis_text_size
, axis_label_size = axis_label_size)
posC_lig
#------------------------------
# bp site site count: ppi2
# < 10 Ang interface
#------------------------------
if (tolower(gene)%in%geneL_ppi2){
posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = subtitle_size
, geom_ls = geom_ls_pc
, leg_text_size = leg_text_size
, axis_text_size = axis_text_size
, axis_label_size = axis_label_size)
posC_ppi2
}
#------------------------------
# bp site site count: NCA dist
# < 10 Ang nca
#------------------------------
if (tolower(gene)%in%geneL_na){
posC_nca = site_snp_count_bp(plotdf = df3_na
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = subtitle_size
, geom_ls = geom_ls_pc
, leg_text_size = leg_text_size
, axis_text_size = axis_text_size
, axis_label_size = axis_label_size)
posC_nca
}
#===============================================================
#------------------------------
# bp site site count: ALL
# <10 Ang ligand
#------------------------------
posC_all = site_snp_count_bp(plotdf = df3
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = "All mutations sites"
, subtitle_size = subtitle_size
, geom_ls = geom_ls_pc
, leg_text_size = leg_text_size
, axis_text_size = axis_text_size
, axis_label_size = axis_label_size)
posC_all
##################################################################
consurfP = stability_count_bp(plotdf = df3
, df_colname = "consurf_outcome"
#, leg_title = "ConSurf"
#, label_categories = labels_consurf
, yaxis_title = "Number of nsSNPs"
, leg_position = "top"
, subtitle_text = "ConSurf"
, bar_fill_values = consurf_colours # from globals
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls)
consurfP
##############################################################
sts_so = 10
lts_so = 10
ats_so = 10
als_so = 10
ltis_so = 10
geom_ls_so = 2.5
#===================
# Stability
#===================
# duetP
duetP = stability_count_bp(plotdf = df3
, df_colname = "duet_outcome"
, leg_title = "mCSM-DUET"
#, label_categories = labels_duet
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-DUET"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
duetP
# foldx
foldxP = stability_count_bp(plotdf = df3
, df_colname = "foldx_outcome"
#, leg_title = "FoldX"
#, label_categories = labels_foldx
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "FoldX"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
foldxP
# deepddg
deepddgP = stability_count_bp(plotdf = df3
, df_colname = "deepddg_outcome"
#, leg_title = "DeepDDG"
#, label_categories = labels_deepddg
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "DeepDDG"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
deepddgP
# deepddg
dynamut2P = stability_count_bp(plotdf = df3
, df_colname = "ddg_dynamut2_outcome"
#, leg_title = "Dynamut2"
#, label_categories = labels_ddg_dynamut2_outcome
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "Dynamut2"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
dynamut2P
# provean
proveanP = stability_count_bp(plotdf = df3
, df_colname = "provean_outcome"
#, leg_title = "PROVEAN"
#, label_categories = labels_provean
, yaxis_title = "Number of nsSNPs"
, leg_position = "none" # top
, subtitle_text = "PROVEAN"
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
proveanP
# snap2
snap2P = stability_count_bp(plotdf = df3
, df_colname = "snap2_outcome"
#, leg_title = "SNAP2"
#, label_categories = labels_snap2
, yaxis_title = ""
, leg_position = "none" # top
, subtitle_text = "SNAP2"
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
snap2P
#####################################################################################

View file

@ -0,0 +1,310 @@
#=============
# Data: Input
#==============
source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/alr/basic_barplots_alr.R")
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/alr/pe_sens_site_count_alr.R")
if ( tolower(gene)%in%c("alr") ){
cat("\nPlots available for layout are:")
duetP
foldxP
deepddgP
dynamut2P
proveanP
snap2P
mLigP
mmLigP
posC_lig
ppi2P
posC_ppi2
peP2
sens_siteP
peP # not used
sensP # not used
}
#========================
# Common title settings
#=========================
theme_georgia <- function(...) {
theme_gray(base_family = "sans", ...) +
theme(plot.title = element_text(face = "bold"))
}
title_theme <- calc_element("plot.title", theme_georgia())
###############################################################
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
# extract common legends
# lig affinity
common_legend_outcome = get_legend(mLigP +
guides(color = guide_legend(nrow = 1)) +
theme(legend.position = "top"))
# stability
common_legend_outcome = get_legend(duetP +
guides(color = guide_legend(nrow = 1)) +
theme(legend.position = "top"))
# conservation
cons_common_legend_outcome = get_legend(snap2P +
guides(color = guide_legend(nrow = 1)) +
theme(legend.position = "top"))
###################################################################
#==================================
# Stability+Conservation: COMBINE
#==================================
tt_size = 10
#----------------------------
# stability and consv title
#----------------------------
tt_stab = ggdraw() +
draw_label(
paste0("Stability outcome"),
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = tt_size
)
tt_cons = ggdraw() +
draw_label(
paste0("Conservation outcome"),
fontfamily = title_theme$family,
fontface = title_theme$face,
size = tt_size
)
#----------------------
# Output plot
#-----------------------
stab_cons_CLP = paste0(outdir_images
,tolower(gene)
,"_stab_cons_BP_CLP.png")
print(paste0("plot filename:", stab_cons_CLP))
png(stab_cons_CLP, units = "in", width = 10, height = 5, res = 300 )
cowplot::plot_grid(
cowplot::plot_grid(
cowplot::plot_grid(
tt_stab,
common_legend_outcome,
nrow = 2
),
cowplot::plot_grid(
duetP,
foldxP,
deepddgP,
dynamut2P,
nrow = 1,
labels = c("A", "B", "C", "D"),
label_size = 12),
nrow = 2,
rel_heights=c(1,10)
),
NULL,
cowplot::plot_grid(
cowplot::plot_grid(
cowplot::plot_grid(
tt_cons,
cons_common_legend_outcome,
nrow = 2
),
cowplot::plot_grid(
proveanP,
snap2P,
nrow=1,
labels = c("E", "F"),
align = "hv"),
nrow = 2,
rel_heights = c(1, 10),
label_size = 12),
nrow=1
),
rel_widths = c(2,0.15,1),
nrow=1
)
dev.off()
#################################################################
#=======================================
# Affinity barplots: COMBINE ALL four
#========================================
ligT = paste0(common_bp_title, " ligand")
lig_affT = ggdraw() +
draw_label(
ligT,
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = 8
)
p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT
, common_legend_outcome
, nrow=2),
cowplot::plot_grid(mLigP, mmLigP, posC_lig
, nrow = 1
, rel_widths = c(1,0.65,1.8)
, align = "h"),
nrow = 2,
rel_heights = c(1,8)
)
p1
###########################################################
ppi2T = paste0(common_bp_title, " PP-interface")
ppi2_affT = ggdraw() +
draw_label(
ppi2T,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = 8
)
p3 = cowplot::plot_grid(cowplot::plot_grid(ppi2_affT, common_legend_outcome, nrow=2),
cowplot::plot_grid(ppi2P, posC_ppi2
, nrow = 1
, rel_widths = c(1,1.9)
, align = "h"),
nrow = 2,
rel_heights = c(1,8)
)
p3
# PE + All position count
peT_allT = ggdraw() +
draw_label(
paste0("All mutation sites"),
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = 8
)
p4 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
, rel_widths = c(1,3),axis = "lr"),
cowplot::plot_grid(
peP2, posC_all,
nrow = 2,
rel_widths = c(1,1),
align = "v",
axis = "lr",
rel_heights = c(1,8)
),
rel_heights = c(1,18),
nrow = 2,axis = "lr")
p4
#### Combine p1+p3+p4 ####
w = 11.79
h = 3.5
mut_impact_CLP = paste0(outdir_images
,tolower(gene)
,"_mut_impactCLP.png")
#svg(affP, width = 20, height = 5.5)
print(paste0("plot filename:", mut_impact_CLP))
png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
cowplot::plot_grid(p1,
p3,
p4,
nrow = 1,
labels = "AUTO",
label_size = 12,
rel_widths = c(2.5,2,2)
#, rel_heights = c(1)
)
dev.off()
w = 11.79
h = 3.5
mut_impact_CLP = paste0(outdir_images
,tolower(gene)
,"_mut_impactCLP.png")
#svg(affP, width = 20, height = 5.5)
print(paste0("plot filename:", mut_impact_CLP))
png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
cowplot::plot_grid(p1,
p3,
p4,
nrow = 1,
labels = "AUTO",
label_size = 12,
rel_widths = c(2.5,2,2)
#, rel_heights = c(1)
)
dev.off()
##################################################
sensP
consurfP
#=================
#### Combine sensitivity + ConSurf ####
# or ConSurf
#=================
w = 3
h = 3
# sens_conP = paste0(outdir_images
# ,tolower(gene)
# ,"_sens_cons_CLP.png")
#
# print(paste0("plot filename:", sens_conP))
# png(sens_conP, units = "in", width = w, height = h, res = 300 )
#
# cowplot::plot_grid(sensP, consurfP,
# nrow = 2,
# rel_heights = c(1, 1.5)
# )
#
# dev.off()
conCLP = paste0(outdir_images
,tolower(gene)
,"_consurf_BP.png")
print(paste0("plot filename:", conCLP))
png(conCLP, units = "in", width = w, height = h, res = 300 )
consurfP
dev.off()
#================================
# Sensitivity mutation numbers: geom_tile
#================================
sensCLP = paste0(outdir_images
,tolower(gene)
,"_sensN_tile.png")
print(paste0("plot filename:", sensCLP))
png(sensCLP, units = "in", width = 1, height = 1, res = 300 )
sensP
dev.off()
#================================
# Sensitivity SITE numbers: geom_tile
#================================
sens_siteCLP = paste0(outdir_images
,tolower(gene)
,"_sens_siteC_tile.png")
print(paste0("plot filename:", sens_siteCLP))
png(sens_siteCLP, units = "in", width = 1.2, height = 1, res = 300 )
sens_siteP
dev.off()
###########################################################

View file

@ -1,164 +1,177 @@
# source dm_om_plots.R
#============
# Plot labels
#============
tit1 = "Stability changes"
tit2 = "Genomic measure"
tit3 = "Distance to partners"
tit4 = "Evolutionary Conservation"
tit5 = "Affinity changes"
pt_size = 30
theme_georgia <- function(...) {
theme_gray(base_family = "sans", ...) +
theme(plot.title = element_text(face = "bold"))
}
title_theme <- calc_element("plot.title", theme_georgia())
pt1 = ggdraw() +
draw_label(
tit1,
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = pt_size
)
pt2 = ggdraw() +
draw_label(
tit2,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
pt3 = ggdraw() +
draw_label(
tit3,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
pt4 = ggdraw() +
draw_label(
tit4,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
pt5 = ggdraw() +
draw_label(
tit5,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
#======================
# Output plot function
#======================
OutPlot_dm_om = function(x){
# dist b/w plot title and plot
relH_tp = c(0.08, 0.92)
my_label_size = 25
#----------------
# Top panel
#----------------
top_panel = cowplot::plot_grid(
cowplot::plot_grid(pt1,
cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P
, nrow = 1
, labels = c("A", "B", "C", "D")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),
NULL,
cowplot::plot_grid(pt2,
cowplot::plot_grid(genomicsP
, nrow = 1
, labels = c("E")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),
NULL,
cowplot::plot_grid(pt3,
cowplot::plot_grid( #distanceP
distanceP_lig
, distanceP_ppi2
#, distanceP_na
, nrow = 1
, labels = c("F", "G")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),
nrow = 1,
rel_widths = c(2/7, 0.1/7, 0.5/7, 0.1/7, 1/7)
)
#----------------
# Bottom panel
#----------------
bottom_panel = cowplot::plot_grid(
cowplot::plot_grid(pt4,
cowplot::plot_grid(consurfP, proveanP, snap2P
, nrow = 1
, labels = c("H", "I", "J")
, label_size = my_label_size)
, ncol = 1
, rel_heights =relH_tp
),NULL,
cowplot::plot_grid(pt5,
cowplot::plot_grid(mcsmligP, mcsmlig2P
, mcsmppi2P
#, mcsmnaP
, nrow = 1
, labels = c("K", "L", "M")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),NULL,
nrow = 1,
rel_widths = c(3/6,0.1/6,3/6, 0.1/6 )
)
#-------------------------------
# combine: Top and Bottom panel
#-------------------------------
cowplot::plot_grid (top_panel, bottom_panel
, nrow =2
, rel_widths = c(1, 1)
, align = "hv")
}
#=====================
# OutPlot: svg and png
#======================
dm_om_combinedP = paste0(outdir_images
,tolower(gene)
,"_dm_om_all.svg")
cat("DM OM plots with stats:", dm_om_combinedP)
svg(dm_om_combinedP, width = 32, height = 18)
OutPlot_dm_om()
dev.off()
dm_om_combinedP_png = paste0(outdir_images
,tolower(gene)
,"_dm_om_all.png")
cat("DM OM plots with stats:", dm_om_combinedP_png)
png(dm_om_combinedP_png, width = 32, height = 18, units = "in", res = 300)
OutPlot_dm_om()
dev.off()
# # source dm_om_plots.R
# source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/dm_om_plots.R")
#
# ##### plots to combine ####
# duetP
# foldxP
# deepddgP
# dynamut2P
# genomicsP
# consurfP
# proveanP
# snap2P
# mcsmligP
# mcsmlig2P
# mcsmppi2P
#
#
# # Plot labels
# tit1 = "Stability changes"
# tit2 = "Genomic measure"
# tit3 = "Distance to partners"
# tit4 = "Evolutionary Conservation"
# tit5 = "Affinity changes"
# pt_size = 30
#
# theme_georgia <- function(...) {
# theme_gray(base_family = "sans", ...) +
# theme(plot.title = element_text(face = "bold"))
# }
#
#
# title_theme <- calc_element("plot.title", theme_georgia())
#
# pt1 = ggdraw() +
# draw_label(
# tit1,
# fontfamily = title_theme$family,
# fontface = title_theme$face,
# #size = title_theme$size
# size = pt_size
# )
#
# pt2 = ggdraw() +
# draw_label(
# tit2,
# fontfamily = title_theme$family,
# fontface = title_theme$face,
# size = pt_size
# )
#
# pt3 = ggdraw() +
# draw_label(
# tit3,
# fontfamily = title_theme$family,
# fontface = title_theme$face,
# size = pt_size
# )
#
# pt4 = ggdraw() +
# draw_label(
# tit4,
# fontfamily = title_theme$family,
# fontface = title_theme$face,
# size = pt_size
# )
#
#
# pt5 = ggdraw() +
# draw_label(
# tit5,
# fontfamily = title_theme$family,
# fontface = title_theme$face,
# size = pt_size
# )
#
# #======================
# # Output plot function
# #======================
# OutPlot_dm_om = function(x){
#
# # dist b/w plot title and plot
# relH_tp = c(0.08, 0.92)
#
# my_label_size = 25
# #----------------
# # Top panel
# #----------------
# top_panel = cowplot::plot_grid(
# cowplot::plot_grid(pt1,
# cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P
# , nrow = 1
# , labels = c("A", "B", "C", "D")
# , label_size = my_label_size)
# , ncol = 1
# , rel_heights = relH_tp
# ),
# NULL,
# cowplot::plot_grid(pt2,
# cowplot::plot_grid(genomicsP
# , nrow = 1
# , labels = c("E")
# , label_size = my_label_size)
# , ncol = 1
# , rel_heights = relH_tp
# ),
# NULL,
# cowplot::plot_grid(pt3,
# cowplot::plot_grid( #distanceP
# distanceP_lig
# , distanceP_ppi2
# , nrow = 1
# , labels = c("F", "G")
# , label_size = my_label_size)
# , ncol = 1
# , rel_heights = relH_tp
# ),
# nrow = 1,
# rel_widths = c(2/7, 0.1/7, 0.5/7, 0.1/7, 1/7)
# )
#
# #----------------
# # Bottom panel
# #----------------
# bottom_panel = cowplot::plot_grid(
# cowplot::plot_grid(pt4,
# cowplot::plot_grid(consurfP, proveanP, snap2P
# , nrow = 1
# , labels = c("H", "I", "J")
# , label_size = my_label_size)
# , ncol = 1
# , rel_heights =relH_tp
# ),NULL,
# cowplot::plot_grid(pt5,
# cowplot::plot_grid(mcsmligP
# , mcsmlig2P
# , mcsmppi2P
# , nrow = 1
# , labels = c("K", "L", "M")
# , label_size = my_label_size)
# , ncol = 1
# , rel_heights = relH_tp
# ),NULL,
# nrow = 1,
# rel_widths = c(3/6,0.1/6,3/6, 0.1/6 )
# )
#
# #-------------------------------
# # combine: Top and Bottom panel
# #-------------------------------
# cowplot::plot_grid (top_panel, bottom_panel
# , nrow =2
# , rel_widths = c(1, 1)
# , align = "hv")
# }
#
# #=====================
# # OutPlot: svg and png
# #======================
# dm_om_combinedP = paste0(outdir_images
# ,tolower(gene)
# ,"_dm_om_all.svg")
#
# cat("DM OM plots with stats:", dm_om_combinedP)
# svg(dm_om_combinedP, width = 32, height = 18)
#
# OutPlot_dm_om()
# dev.off()
#
#
# dm_om_combinedP_png = paste0(outdir_images
# ,tolower(gene)
# ,"_dm_om_all.png")
# cat("DM OM plots with stats:", dm_om_combinedP_png)
# png(dm_om_combinedP_png, width = 32, height = 18, units = "in", res = 300)
#
# OutPlot_dm_om()
# dev.off()

View file

@ -0,0 +1,209 @@
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#=======
# output
#=======
#outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
#cat("plots will output to:", outdir_images)
my_gg_pairs=function(plot_df, plot_title
, tt_args_size = 2.5
, gp_args_size = 2.5){
ggpairs(plot_df,
columns = 1:(ncol(plot_df)-1),
upper = list(
continuous = wrap('cor', # ggally_cor()
method = "spearman",
use = "pairwise.complete.obs",
title="ρ",
digits=2,
justify_labels = "centre",
title_args=list(size=tt_args_size, colour="black"),#2.5
group_args=list(size=gp_args_size)#2.5
)
),
lower = list(
continuous = wrap("points",
alpha = 0.7,
size=0.125),
combo = wrap("dot",
alpha = 0.7,
size=0.125)
),
aes(colour = factor(ifelse(dst_mode==0,
"S",
"R") ),
alpha = 0.5),
title=plot_title) +
scale_colour_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) #+
# theme(text = element_text(size=7,
# face="bold"))
}
DistCutOff = 10
###########################################################################
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
merged_df3 = as.data.frame(merged_df3)
corr_plotdf = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
static_cols = c("Log10(MAF)"
, "Log10(OR)"
)
############################################################
#=============================================
# Creating masked df for affinity data
#=============================================
corr_affinity_df = corr_plotdf
#----------------------
# Mask affinity columns
#-----------------------
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mCSM-lig"]=0
corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0
if (tolower(gene)%in%geneL_ppi2){
corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0
}
if (tolower(gene)%in%geneL_na){
corr_affinity_df[corr_affinity_df["NA-Dist"]>DistCutOff,"mCSM-NA"]=0
}
# count 0
#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100
unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0)
unmasked_vals
##########################################################
#================
# Stability
#================
corr_ps_colnames = c(static_cols
, "DUET"
, "FoldX"
, "DeepDDG"
, "Dynamut2"
, aff_dist_cols
, "dst_mode")
corr_df_ps = corr_plotdf[, corr_ps_colnames]
# Plot #1
plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates")
##########################################################
#================
# Conservation
#================
corr_conservation_cols = c( static_cols
, "ConSurf"
, "SNAP2"
, "PROVEAN"
#, aff_dist_cols
, "dst_mode"
)
corr_df_cons = corr_plotdf[, corr_conservation_cols]
# Plot #2
plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation estimates")
##########################################################
#================
# Affinity: lig, ppi and na as applicable
#================
#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]<DistCutOff,]
common_aff_colnames = c("mCSM-lig"
, "mmCSM-lig")
if (tolower(gene)%in%geneL_normal){
aff_colnames = common_aff_colnames
}
if (tolower(gene)%in%geneL_ppi2){
aff_colnames = c(common_aff_colnames, "mCSM-PPI2")
}
if (tolower(gene)%in%geneL_na){
aff_colnames = c(common_aff_colnames, "mCSM-NA")
}
# building ffinal affinity colnames for correlation
corr_aff_colnames = c(static_cols
, aff_colnames
, "dst_mode") # imp
corr_df_aff = corr_affinity_df[, corr_aff_colnames]
colnames(corr_df_aff)
# Plot #3
plot_corr_df_aff = my_gg_pairs(corr_df_aff
, plot_title="Affinity estimates"
#, tt_args_size = 4
#, gp_args_size = 4
)
#### Combine plots #####
# #png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
# png(paste0(outdir_images
# ,tolower(gene)
# ,"_CorrAB.png"), height = 6, width=11.75, unit="in",res=300)
#
# cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
# ggmatrix_gtable(plot_corr_df_cons),
# # ggmatrix_gtable(plot_corr_df_aff),
# # nrow=1, ncol=3, rel_heights = 7,7,3
# nrow=1,
# #rel_heights = 1,1
# labels = "AUTO",
# label_size = 12)
# dev.off()
#
# # affinity corr
# #png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
# png(paste0(outdir_images
# ,tolower(gene)
# ,"_CorrC.png"), height =7, width=7, unit="in",res=300)
#
# cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
# labels = "C",
# label_size = 12)
# dev.off()
#### Combine A ####
png(paste0(outdir_images
,tolower(gene)
,"_CorrA.png"), height =8, width=8, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
labels = "A",
label_size = 12)
dev.off()
#### Combine B+C ####
# B + C
png(paste0(outdir_images
,tolower(gene)
,"_CorrBC.png"), height = 6, width=11.75, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_cons),
ggmatrix_gtable(plot_corr_df_aff),
# ggmatrix_gtable(plot_corr_df_aff),
# nrow=1, ncol=3, rel_heights = 7,7,3
nrow=1,
#rel_heights = 1,1
labels = c("B", "C"),
label_size = 12)
dev.off()

View file

@ -0,0 +1,173 @@
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/alr/prominent_effects_alr.R")
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/alr/sensitivity_count_alr.R")
##############################################################
# PE count
#lig-- na--ppi2--stab
# pe_colour_map = c("DD_lig" = "#ffd700" # gold
# , "SS_lig" = "#f0e68c" # khaki
#
# , "DD_nucleic_acid"= "#a0522d" # sienna
# , "SS_nucleic_acid"= "#d2b48c" # tan
#
# , "DD_ppi2" = "#da70d6" # orchid
# , "SS_ppi2" = "#ff1493" # deeppink
#
# , "DD_stability" = "#f8766d" # red
# , "SS_stability" = "#00BFC4") # blue
table(str_df_plot_cols$pe_effect_outcome)
##############################################################
#===========
#PE count
#===========
rects <- data.frame(x=1:6,
colors = c("#ffd700" ,
"#f0e68c" ,
"#da70d6" ,
"#ff1493" ,
"#f8766d" ,
"#00BFC4")
)
rects$text = c("-ve Lig"
, "+ve Lig"
, "-ve PPI2"
, "+ve PPI2"
, "-ve stability"
, "+ve stability"
)
cell1 = table(str_df_plot_cols$pe_effect_outcome)[["DD_lig"]]
cell2 = 0
#cell3 = table(str_df_plot_cols$pe_effect_outcome)[["DD_nucleic_acid"]]
#cell4 = table(str_df_plot_cols$pe_effect_outcome)[["SS_nucleic_acid"]]
cell5 = table(str_df_plot_cols$pe_effect_outcome)[["DD_ppi2"]]
cell6 = table(str_df_plot_cols$pe_effect_outcome)[["SS_ppi2"]]
cell7 = table(str_df_plot_cols$pe_effect_outcome)[["DD_stability"]]
cell8 = table(str_df_plot_cols$pe_effect_outcome)[["SS_stability"]]
#rects$numbers = c(38, 0, 22, 9, 108, 681) #for embb
rects$numbers = c(cell1, cell2,
#cell3, cell4,
cell5, cell6,
cell7, cell8)
rects$num_labels = paste0("n=", rects$numbers)
rects
#------
# Plot
#------
#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
geom_tile(width = 1, height = 1) + # make square tiles
geom_text(color = "black", size = 1.7) + # add white text in the middle
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
coord_flip()+ scale_x_reverse() +
# theme_void() # remove any axis markings
theme_nothing() # remove any axis markings
peP
#------
# Plot: this one is better
#------
peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
geom_tile() + # make square tiles
geom_text(color = "black", size = 1.6) + # add white text in the middle
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
theme_nothing() # remove any axis markings
peP2
########################################################
# From: script sensitivity_count per gene
#===============================
# Sensitivity count: SITE
#===============================
#--------
# embb
#--------
#rsc = 54
#ccc = 46
#ssc = 470
rsc = site_Rc; rsc
ccc = site_Cc; ccc
ssc = site_Sc; ssc
rect_rs_siteC <- data.frame(x=1:3,
colors = c("red",
"purple",
"blue")
)
rect_rs_siteC
rect_rs_siteC$text = c("Resistant",
"Common",
"Sensitive")
rect_rs_siteC$numbers = c(rsc,ccc,ssc)
rect_rs_siteC$num_labels = paste0("n=", rect_rs_siteC$numbers)
rect_rs_siteC
#------
# Plot
#------
sens_siteP = ggplot(rect_rs_siteC, aes(x, y = 0,
fill = colors,
label = num_labels
#,label = paste0(text,"\n", num_labels)
)) +
geom_tile(width = 1, height = 1) +
#geom_text(color = "black", size = 1.7) +
geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) +
scale_fill_identity(guide = "none") +
coord_fixed()+
theme_nothing() # remove any axis markings
sens_siteP
################################################################
#===============================
# Sensitivity count: Mutations
#===============================
table(sensP_df$sensitivity)
muts_Rc = table(sensP_df$sensitivity)[["R"]]
muts_Sc = table(sensP_df$sensitivity)[["S"]]
rect_sens <- data.frame(x=1:2,
colors = c("red",
"blue")
)
rect_sens$text = c("Resistant",
"Sensitive")
rect_sens$numbers = c(muts_Rc,muts_Sc)
rect_sens$num_labels = paste0("n=", rect_sens$numbers)
rect_sens
#------
# Plot
#------
sensP = ggplot(rect_sens, aes(x, y = 0,
fill = colors,
label = paste0(text,"\n", num_labels))) +
geom_tile(width = 1, height = 1) +
#geom_text(color = "black", size = 1.7) +
geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) +
scale_fill_identity(guide = "none") +
coord_fixed()+
theme_nothing() # remove any axis markings
sensP
sensP2 = sensP +
coord_flip() + scale_x_reverse()
sensP2

View file

@ -0,0 +1,331 @@
########################################################
pos_colname = "position"
#-------------
# from ~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R
#-------------
length(all_stability_cols); length(raw_stability_cols)
length(scaled_stability_cols); length(outcome_stability_cols)
length(affinity_dist_colnames)
static_cols = c("mutationinformation",
#"position",
pos_colname,
"sensitivity")
other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames)
#omit avg cols and foldx_scaled_signC cols
other_cols = other_cols_all[grep("avg", other_cols_all, invert = T)]
other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )]
other_cols
cols_to_extract = c(static_cols, other_cols)
cat("\nExtracting cols:", cols_to_extract)
expected_ncols = length(static_cols) + length(other_cols)
expected_ncols
str_df = merged_df3[, cols_to_extract]
if (ncol(str_df) == expected_ncols){
cat("\nPASS: successfully extracted cols for calculating prominent effects")
}else{
stop("\nAbort: Could not extract cols for calculating prominent effects")
}
#=========================
# Masking affinity columns
#=========================
# First make values for affinity cols 0 when their corresponding dist >10
head(str_df)
# replace in place affinity values >10
str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0
str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0
#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10
if (tolower(gene)%in%geneL_ppi2){
str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0
}
# na gene: replace in place na affinity values where na dist >10
if (tolower(gene)%in%geneL_na){
str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0
}
colnames(str_df)
head(str_df)
scaled_cols_tc = other_cols[grep("scaled", other_cols)]
################################################
#===============
# whole df
#===============
give_col=function(x,y,df=str_df){
df[df[[pos_colname]]==x,y]
}
for (i in unique(str_df[[pos_colname]]) ){
print(i)
#cat(length(unique(str_df[[pos_colname]])))
biggest = max(abs(give_col(i,scaled_cols_tc)))
str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest
str_df[str_df[[pos_colname]]==i,'effect_type']= names(
give_col(i,scaled_cols_tc)[which(
abs(
give_col(i,scaled_cols_tc)
) == biggest, arr.ind=T
)[, "col"]])[1]
effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values
# get index/rowname for value of max effect, and then use it to get the original sign
# here
#ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T))
str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1]
}
# ends with suffix 2 if dups
str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
colnames(str_df)
table(str_df$effect_type)
# check
str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),]
#================
# for Plots
#================
str_df_short = str_df[, c("mutationinformation",
#"position",
pos_colname,
"sensitivity"
, "effect_type"
, "effect_sign")]
table(str_df_short$effect_type)
table(str_df_short$effect_sign)
str(str_df_short)
# assign pe outcome
str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
table(str_df_short$pe_outcome )
table(str_df_short$effect_sign)
#==============
# group effect type:
# lig, ppi2, nuc. acid, stability
#==============
affcols = c("affinity_scaled", "mmcsm_lig_scaled")
ppi2_cols = c("mcsm_ppi2_scaled")
#lig
table(str_df_short$effect_type)
str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
, "lig"
, str_df_short$effect_type)
table(str_df_short$effect_grouped)
#ppi2
str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols
, "ppi2"
, str_df_short$effect_grouped)
table(str_df_short$effect_grouped)
#stability
str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig",
"ppi2"
)
, "stability"
, str_df_short$effect_grouped)
table(str_df_short$effect_grouped)
# create a sign as well
str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_"
, str_df_short$effect_grouped)
table(str_df_short$pe_effect_outcome)
#####################################################################
# Chimera: for colouring
####################################################################
#-------------------------------------
# get df with unique position
#--------------------------------------
#data[!duplicated(data$x), ]
str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),]
if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){
cat("\nPASS: successfully extracted df with unique positions")
}else{
stop("\nAbort: Could not extract df with unique positions")
}
#-------------------------------------
# generate colours for effect types
#--------------------------------------
str_df_plot_cols = str_df_plot[, c(pos_colname,
"sensitivity",
"pe_outcome",
"effect_grouped",
"pe_effect_outcome")]
head(str_df_plot_cols)
# colour intensity based on sign
#str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light")
str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light")
table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome)
head(str_df_plot_cols)
# colour based on effect
table(str_df_plot_cols$pe_effect_outcome)
# colors = c("#ffd700" #gold
# , "#f0e68c" #khaki
# , "#da70d6"# orchid
# , "#ff1493"# deeppink
# , "#a0522d" #sienna
# , "#d2b48c" # tan
# , "#00BFC4" #, "#007d85" #blue
# , "#F8766D" )# red
pe_colour_map = c("DD_lig" = "#ffd700" # gold
, "SS_lig" = "#f0e68c" # khaki
, "DD_nucleic_acid"= "#a0522d" # sienna
, "SS_nucleic_acid"= "#d2b48c" # tan
, "DD_ppi2" = "#da70d6" # orchid
, "SS_ppi2" = "#ff1493" # deeppink
, "DD_stability" = "#f8766d" # red
, "SS_stability" = "#00BFC4") # blue
#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE)
#map the colours
str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome
,function(x){pe_colour_map[[x]]}
))
head(str_df_plot_cols$colour_map)
table(str_df_plot_cols$colour_map)
table(str_df_plot_cols$pe_effect_outcome)
# str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue
# , "_"
# , str_df_plot_cols$colour_map)
# head(str_df_plot_cols$colours)
# table(str_df_plot_cols$colours)
#
#
# class(str_df_plot_cols$colour_map)
# str(str_df_plot_cols)
# sort by colour
head(str_df_plot_cols)
str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ]
head(str_df_plot_cols)
#======================================
# write file with prominent effects
#======================================
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv"))
################################
# printing for chimera
###############################
chain_suffix = ".A"
str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], chain_suffix)
table(str_df_plot_cols$colour_map)
table(str_df_plot_cols$pe_effect_outcome)
#===================================================
#-------------------
# Ligand Affinity
#-------------------
# -ve Lig Aff
dd_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",]
if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){
dd_lig_pos = dd_lig[[pos_colname]]
}else{
stop("Abort: DD affinity colour numbers mismtatch")
}
# +ve Lig Aff
ss_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",]
if (!empty(ss_lig)){
if (nrow(ss_lig) == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){
ss_lig_pos = ss_lig[[pos_colname]]
}else{
stop("Abort: SS affinity colour numbers mismtatch")
}
#put in chimera cmd
paste0(dd_lig_pos, chain_suffix)
paste0(ss_lig_pos, chain_suffix)
}
#===================================================
#-------------------
# PPI2 Affinity
#-------------------
# -ve PPI2
dd_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_ppi2",]
if (nrow(dd_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['DD_ppi2']]){
dd_ppi2_pos = dd_ppi2[[pos_colname]]
}else{
stop("Abort: DD PPI2 colour numbers mismtatch")
}
# +ve PPI2
ss_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_ppi2",]
if (nrow(ss_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['SS_ppi2']]){
ss_ppi2_pos = ss_ppi2[[pos_colname]]
}else{
stop("Abort: SS PPI2 colour numbers mismtatch")
}
#put in chimera cmd
paste0(dd_ppi2_pos,chain_suffix)
paste0(ss_ppi2_pos,chain_suffix)
#=========================================================
#------------------------
# Stability
#------------------------
# -ve Stability
dd_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",]
if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){
dd_stability_pos = dd_stability[[pos_colname]]
}else{
stop("Abort: DD Stability colour numbers mismtatch")
}
# +ve Stability
ss_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",]
if (nrow(ss_stability) == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){
ss_stability_pos = ss_stability[[pos_colname]]
}else{
stop("Abort: SS Stability colour numbers mismtatch")
}
#put in chimera cmd
# stabiliting first as it has less numbers
paste0(ss_stability_pos, chain_suffix)
paste0(dd_stability_pos, chain_suffix)
####################################################################

View file

@ -0,0 +1,65 @@
#=========================
# Count Sensitivity
# Mutations and positions
#=========================
pos_colname_c ="position"
sensP_df = merged_df3[,c("mutationinformation",
#"position",
pos_colname_c,
"sensitivity")]
head(sensP_df)
table(sensP_df$sensitivity)
#---------------
# Total unique positions
#----------------
tot_mut_pos = length(unique(sensP_df[[pos_colname_c]]))
cat("\nNo of Tot muts sites:", tot_mut_pos)
# resistant mut pos
sens_site_allR = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="R"]
sens_site_UR = unique(sens_site_allR)
length(sens_site_UR)
# Sensitive mut pos
sens_site_allS = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="S"]
sens_site_US = unique(sens_site_allS)
length(sens_site_UR)
#---------------
# Common Sites
#----------------
common_pos = intersect(sens_site_UR,sens_site_US)
site_Cc = length(common_pos)
cat("\nNo of Common sites:", site_Cc
, "\nThese are:", common_pos)
#---------------
# Resistant muts
#----------------
site_R = sens_site_UR[!sens_site_UR%in%common_pos]
site_Rc = length(site_R)
if ( length(sens_site_allR) == table(sensP_df$sensitivity)[['R']] ){
cat("\nNo of R muts:", length(sens_site_allR)
, "\nNo. of R sites:",site_Rc
, "\nThese are:", site_R
)
}
#---------------
# Sensitive muts
#----------------
site_S = sens_site_US[!sens_site_US%in%common_pos]
site_Sc = length(site_S)
if ( length(sens_site_allS) == table(sensP_df$sensitivity)[['S']] ){
cat("\nNo of S muts:", length(sens_site_allS)
, "\nNo. of S sites:", site_Sc
, "\nThese are:", site_S)
}
#########################

View file

@ -0,0 +1,363 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Barplots
# basic barplots with outcome
# basic barplots with frequency of count of mutations
#########################################################
#=============
# Data: Input
#==============
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#cat("\nSourced plotting cols as well:", length(plotting_cols))
####################################################
class(merged_df3)
df3 = subset(merged_df3, select = -c(pos_count))
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
cat("plots will output to:", outdir_images)
##########################################################
# blue, red bp
sts = 8
lts = 8
ats = 8
als = 8
ltis = 8
geom_ls = 2.2
#pos_count
subtitle_size = 8
geom_ls_pc = 2.2
leg_text_size = 8
axis_text_size = 8
axis_label_size = 8
###########################################################
#------------------------------
# plot default sizes
#------------------------------
#=========================
# Affinity outcome
# check this var: outcome_cols_affinity
# get from preformatting or put in globals
#==========================
DistCutOff
LigDist_colname # = "ligand_distance" # from globals
ppi2Dist_colname
naDist_colname
###########################################################
# get plotting data within the distance
df3_lig = df3[df3[[LigDist_colname]]<DistCutOff,]
df3_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
df3_na = df3[df3[[naDist_colname]]<DistCutOff,]
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
#------------------------------
mLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "ligand_outcome"
#, leg_title = "mCSM-lig"
#, bp_plot_title = paste(common_bp_title, "ligand")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM\nLig"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls
)
mLigP
#------------------------------
# barplot for ligand affinity:
# <10 Ang of ligand
# mmCSM-lig: will be the same no. of sites but the effect will be different
#------------------------------
mmLigP = stability_count_bp(plotdf = df3_lig
, df_colname = "mmcsm_lig_outcome"
#, leg_title = "mmCSM-lig"
#, label_categories = labels_mmlig
#, bp_plot_title = paste(common_bp_title, "ligand")
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "mmCSM\nLig"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls
)
mmLigP
#------------------------------
# barplot for ppi2 affinity
# <10 Ang of interface
#------------------------------
if (tolower(gene)%in%geneL_ppi2){
ppi2P = stability_count_bp(plotdf = df3_ppi2
, df_colname = "mcsm_ppi2_outcome"
#, leg_title = "mCSM-ppi2"
#, label_categories = labels_ppi2
#, bp_plot_title = paste(common_bp_title, "PP-interface")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM\nPPI2"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls
)
ppi2P
}
#----------------------------
# barplot for ppi2 affinity
# <10 Ang of interface
#------------------------------
if (tolower(gene)%in%geneL_na){
nca_distP = stability_count_bp(plotdf = df3_na
, df_colname = "mcsm_na_outcome"
#, leg_title = "mCSM-NA"
#, label_categories =
#, bp_plot_title = paste(common_bp_title, "Dist to NA")
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM\nNA"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls
)
nca_distP
}
#####################################################################
# ------------------------------
# bp site site count: mCSM-lig
# < 10 Ang ligand
# ------------------------------
common_bp_title = paste0("Sites <", DistCutOff, angstroms_symbol)
posC_lig = site_snp_count_bp(plotdf = df3_lig
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = subtitle_size
, geom_ls = geom_ls_pc
, leg_text_size = leg_text_size
, axis_text_size = axis_text_size
, axis_label_size = axis_label_size)
posC_lig
#------------------------------
# bp site site count: ppi2
# < 10 Ang interface
#------------------------------
if (tolower(gene)%in%geneL_ppi2){
posC_ppi2 = site_snp_count_bp(plotdf = df3_ppi2
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = subtitle_size
, geom_ls = geom_ls_pc
, leg_text_size = leg_text_size
, axis_text_size = axis_text_size
, axis_label_size = axis_label_size)
posC_ppi2
}
#------------------------------
# bp site site count: NCA dist
# < 10 Ang nca
#------------------------------
if (tolower(gene)%in%geneL_na){
posC_nca = site_snp_count_bp(plotdf = df3_na
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = ""
, subtitle_size = subtitle_size
, geom_ls = geom_ls_pc
, leg_text_size = leg_text_size
, axis_text_size = axis_text_size
, axis_label_size = axis_label_size)
posC_nca
}
#===============================================================
#------------------------------
# bp site site count: ALL
# <10 Ang ligand
#------------------------------
posC_all = site_snp_count_bp(plotdf = df3
, df_colname = "position"
, xaxis_title = "Number of nsSNPs"
, yaxis_title = "Number of Sites"
, subtitle_colour = "chocolate4"
, subtitle_text = "All mutations sites"
, subtitle_size = subtitle_size
, geom_ls = geom_ls_pc
, leg_text_size = leg_text_size
, axis_text_size = axis_text_size
, axis_label_size = axis_label_size)
posC_all
##################################################################
consurfP = stability_count_bp(plotdf = df3
, df_colname = "consurf_outcome"
#, leg_title = "ConSurf"
#, label_categories = labels_consurf
, yaxis_title = "Number of nsSNPs"
, leg_position = "top"
, subtitle_text = "ConSurf"
, bar_fill_values = consurf_colours # from globals
, subtitle_colour= "black"
, sts = sts
, lts = lts
, ats = ats
, als = als
, ltis = ltis
, geom_ls = geom_ls)
consurfP
##############################################################
sts_so = 10
lts_so = 10
ats_so = 10
als_so = 10
ltis_so = 10
geom_ls_so = 2.5
#===================
# Stability
#===================
# duetP
duetP = stability_count_bp(plotdf = df3
, df_colname = "duet_outcome"
, leg_title = "mCSM-DUET"
#, label_categories = labels_duet
, yaxis_title = "Number of nsSNPs"
, leg_position = "none"
, subtitle_text = "mCSM-DUET"
, bar_fill_values = c("#F8766D", "#00BFC4")
, subtitle_colour= "black"
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
duetP
# foldx
foldxP = stability_count_bp(plotdf = df3
, df_colname = "foldx_outcome"
#, leg_title = "FoldX"
#, label_categories = labels_foldx
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "FoldX"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
foldxP
# deepddg
deepddgP = stability_count_bp(plotdf = df3
, df_colname = "deepddg_outcome"
#, leg_title = "DeepDDG"
#, label_categories = labels_deepddg
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "DeepDDG"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
deepddgP
# deepddg
dynamut2P = stability_count_bp(plotdf = df3
, df_colname = "ddg_dynamut2_outcome"
#, leg_title = "Dynamut2"
#, label_categories = labels_ddg_dynamut2_outcome
, yaxis_title = ""
, leg_position = "none"
, subtitle_text = "Dynamut2"
, bar_fill_values = c("#F8766D", "#00BFC4")
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
dynamut2P
# provean
proveanP = stability_count_bp(plotdf = df3
, df_colname = "provean_outcome"
#, leg_title = "PROVEAN"
#, label_categories = labels_provean
, yaxis_title = "Number of nsSNPs"
, leg_position = "none" # top
, subtitle_text = "PROVEAN"
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
proveanP
# snap2
snap2P = stability_count_bp(plotdf = df3
, df_colname = "snap2_outcome"
#, leg_title = "SNAP2"
#, label_categories = labels_snap2
, yaxis_title = ""
, leg_position = "none" # top
, subtitle_text = "SNAP2"
, bar_fill_values = c("#D01C8B", "#F1B6DA") # light pink and deep
, sts = sts_so
, lts = lts_so
, ats = ats_so
, als = als_so
, ltis = ltis_so
, geom_ls = geom_ls_so)
snap2P
#####################################################################################

View file

@ -1,17 +1,38 @@
duetP
foldxP
deepddgP
dynamut2P
proveanP
snap2P
#=============
# Data: Input
#==============
source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#cat("\nSourced plotting cols as well:", length(plotting_cols))
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/gid/basic_barplots_gid.R")
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/gid/pe_sens_site_count_gid.R")
if ( tolower(gene)%in%c("gid") ){
cat("\nPlots available for layout are:")
duetP
foldxP
deepddgP
dynamut2P
proveanP
snap2P
mLigP
mmLigP
posC_lig
nca_distP
posC_nca
peP2
sens_siteP
peP # not used
sensP # not used
}
mLigP
mmLigP
posC_lig
ppi2P
posC_nca
peP
sensP
#========================
# Common title settings
#=========================
@ -118,9 +139,8 @@ dev.off()
#################################################################
#=======================================
# Affinity barplots: COMBINE ALL three
# Affinity barplots: COMBINE ALL four
#========================================
ligT = paste0(common_bp_title, " ligand")
lig_affT = ggdraw() +
draw_label(
@ -136,7 +156,7 @@ p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT
, nrow=2),
cowplot::plot_grid(mLigP, mmLigP, posC_lig
, nrow = 1
, rel_widths = c(1,1,1.8)
, rel_widths = c(1,0.65,1.8)
, align = "h"),
nrow = 2,
rel_heights = c(1,8)
@ -144,7 +164,7 @@ p1 = cowplot::plot_grid(cowplot::plot_grid(lig_affT
)
p1
###########################################################
ncaT = paste0(common_bp_title, " Dist-NA")
ncaT = paste0(common_bp_title, " Nucleic Acid")
nca_affT = ggdraw() +
draw_label(
ncaT,
@ -159,7 +179,7 @@ p2 = cowplot::plot_grid(cowplot::plot_grid(nca_affT
, nrow=2),
cowplot::plot_grid(nca_distP, posC_nca
, nrow = 1
, rel_widths = c(1.2,1.8)
, rel_widths = c(1,1.8)
, align = "h"),
nrow = 2,
rel_heights = c(1,8)
@ -176,7 +196,7 @@ peT_allT = ggdraw() +
size = 8
)
p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
p4 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
, rel_widths = c(1,3),axis = "lr"),
cowplot::plot_grid(
peP2, posC_all,
@ -185,13 +205,12 @@ p3 = cowplot::plot_grid(cowplot::plot_grid(peT_allT, nrow = 2
align = "v",
axis = "lr",
rel_heights = c(1,8)
),
),
rel_heights = c(1,18),
nrow = 2,axis = "lr")
p3
#===============
# Final combine
#===============
p4
#### Combine p1+p2+p4 ####
w = 11.79
h = 3.5
mut_impact_CLP = paste0(outdir_images
@ -202,21 +221,46 @@ mut_impact_CLP = paste0(outdir_images
print(paste0("plot filename:", mut_impact_CLP))
png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
cowplot::plot_grid(p1, p2
#, p3
, nrow = 1
, labels = "AUTO"
, label_size = 12
, rel_widths = c(3,2,2)
cowplot::plot_grid(p1,
p2,
p4,
nrow = 1,
labels = "AUTO",
label_size = 12,
rel_widths = c(2.5,2,2)
#, rel_heights = c(1)
)
)
dev.off()
w = 11.79
h = 3.5
mut_impact_CLP = paste0(outdir_images
,tolower(gene)
,"_mut_impactCLP.png")
#svg(affP, width = 20, height = 5.5)
print(paste0("plot filename:", mut_impact_CLP))
png(mut_impact_CLP, units = "in", width = w, height = h, res = 300 )
cowplot::plot_grid(p1,
p2,
p4,
nrow = 1,
labels = "AUTO",
label_size = 12,
rel_widths = c(2.5,2,2)
#, rel_heights = c(1)
)
dev.off()
##################################################
sensP
consurfP
#=================
# Combine sensitivity + ConSurf
#### Combine sensitivity + ConSurf ####
# or ConSurf
#=================
w = 3
@ -263,7 +307,7 @@ sens_siteCLP = paste0(outdir_images
,"_sens_siteC_tile.png")
print(paste0("plot filename:", sens_siteCLP))
png(sens_siteCLP, units = "in", width = 1, height = 1, res = 300 )
png(sens_siteCLP, units = "in", width = 1.2, height = 1, res = 300 )
sens_siteP
dev.off()

View file

@ -1,320 +0,0 @@
#################
# Numbers
##################
#all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
all_dm_om_df = dm_om_wf_lf_data(df = df3, gene = gene)
# lf_duet = all_dm_om_df[['lf_duet']]
# table(lf_duet$param_type)
################################################################
#======================
# Data: Dist+Genomics
#======================
lf_dist_genP = all_dm_om_df[['lf_dist_gen']]
wf_dist_genP = all_dm_om_df[['wf_dist_gen']]
levels(lf_dist_genP$param_type)
#lf_dist_genP$param_type <- factor(lf_dist_genP$param_type, levels=c("Log10(MAF)", "Lig Dist(Å)", "PPI Dist(Å)"))
table(lf_dist_genP$param_type)
genomics_param = c("Log10(MAF)")
dist_genP = lf_bp2(lf_dist_genP
#, p_title
, violin_quantiles = c(0.5), monochrome = F)
dist_genP
#-------------------
# Genomics data plot
#-------------------
genomics_dataP = lf_dist_genP[lf_dist_genP$param_type%in%genomics_param,]
genomics_dataP$param_type = factor(genomics_dataP$param_type)
table(genomics_dataP$param_type)
genomicsP = lf_bp2(genomics_dataP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
genomicsP
#check
wilcox.test(wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="R"]
, wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
tapply(wf_dist_genP$`Log10(MAF)`, wf_dist_genP$mutation_info_labels, summary)
#-------------------
# Distance data plot:
#--------------------
# not genomics
dist_dataP = lf_dist_genP[!lf_dist_genP$param_type%in%genomics_param,]
dist_dataP$param_type = factor(dist_dataP$param_type)
table(dist_dataP$param_type)
levels(dist_dataP$param_type)
# relevel factor to control ordering of appearance of plot
dist_dataP$param_type <-relevel(dist_dataP$param_type, "Lig Dist(Å)" )
table(dist_dataP$param_type)
levels(dist_dataP$param_type)
distanceP = lf_bp2(dist_dataP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
distanceP
# check
wilcox.test(wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
, wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
wilcox.test(wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
, wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
wilcox.test(wf_dist_genP$`NA Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
, wf_dist_genP$`NA Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
tapply(wf_dist_genP$`PPI Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
tapply(wf_dist_genP$`Lig Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
#-------------------
# Distance data plot: LigDist
#--------------------
levels(dist_dataP$param_type)[[1]]
#Lig Dist(Å), PPI Dist(Å)
dist_data_lig = dist_dataP[dist_dataP$param_type%in%c(levels(dist_dataP$param_type)[[1]]),]
dist_data_lig$param_type = factor(dist_data_lig$param_type)
table(dist_data_lig$param_type)
levels(dist_data_lig$param_type)
distanceP_lig = lf_bp2(dist_data_lig
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
distanceP_lig
if (tolower(gene)%in%geneL_ppi2){
#-------------------
# Distance data plot: LigDist
#--------------------
levels(dist_dataP$param_type)[[2]]
#Lig Dist(Å), PPI Dist(Å)
dist_data_ppi2 = dist_dataP[dist_dataP$param_type%in%c(levels(dist_dataP$param_type)[[2]]),]
dist_data_ppi2$param_type = factor(dist_data_ppi2$param_type)
table(dist_data_ppi2$param_type)
levels(dist_data_ppi2$param_type)
distanceP_ppi2 = lf_bp2(dist_data_ppi2
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
distanceP_ppi2
}
if (tolower(gene)%in%geneL_na){
#-------------------
# Distance data plot: NADist
#--------------------
levels(dist_dataP$param_type)[[2]]
#Lig Dist(Å), PPI Dist(Å)
dist_data_na = dist_dataP[dist_dataP$param_type%in%c(levels(dist_dataP$param_type)[[2]]),]
dist_data_na$param_type = factor(dist_data_na$param_type)
table(dist_data_na$param_type)
levels(dist_data_na$param_type)
distanceP_na = lf_bp2(dist_data_na
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
distanceP_na
}
#==============
# Plot:DUET
#==============
lf_duetP = all_dm_om_df[['lf_duet']]
#lf_duetP = lf_duet[!lf_duet$param_type%in%c(static_colsP),]
table(lf_duetP$param_type)
lf_duetP$param_type = factor(lf_duetP$param_type)
table(lf_duetP$param_type)
duetP = lf_bp2(lf_duetP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.2)
#==============
# Plot:FoldX
#==============
lf_foldxP = all_dm_om_df[['lf_foldx']]
#lf_foldxP = lf_foldx[!lf_foldx$param_type%in%c(static_colsP),]
table(lf_foldxP$param_type)
lf_foldxP$param_type = factor(lf_foldxP$param_type)
table(lf_foldxP$param_type)
foldxP = lf_bp2(lf_foldxP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.1)
#==============
# Plot:DeepDDG
#==============
lf_deepddgP = all_dm_om_df[['lf_deepddg']]
#lf_deepddgP = lf_deepddg[!lf_deepddg$param_type%in%c(static_colsP),]
table(lf_deepddgP$param_type)
lf_deepddgP$param_type = factor(lf_deepddgP$param_type)
table(lf_deepddgP$param_type)
deepddgP = lf_bp2(lf_deepddgP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.2)
deepddgP
#==============
# Plot: Dynamut2
#==============
lf_dynamut2P = all_dm_om_df[['lf_dynamut2']]
#lf_dynamut2P = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_colsP),]
table(lf_dynamut2P$param_type)
lf_dynamut2P$param_type = factor(lf_dynamut2P$param_type)
table(lf_dynamut2P$param_type)
dynamut2P = lf_bp2(lf_dynamut2P
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.2)
#==============
# Plot:ConSurf
#==============
lf_consurfP = all_dm_om_df[['lf_consurf']]
#lf_consurfP = lf_consurf[!lf_consurf$param_type%in%c(static_colsP),]
table(lf_consurfP$param_type)
lf_consurfP$param_type = factor(lf_consurfP$param_type)
table(lf_consurfP$param_type)
consurfP = lf_bp2(lf_consurfP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
#==============
# Plot:PROVEAN
#==============
lf_proveanP = all_dm_om_df[['lf_provean']]
#lf_proveanP = lf_provean[!lf_provean$param_type%in%c(static_colsP),]
table(lf_proveanP$param_type)
lf_proveanP$param_type = factor(lf_proveanP$param_type)
table(lf_proveanP$param_type)
proveanP = lf_bp2(lf_proveanP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
#==============
# Plot:SNAP2
#==============
lf_snap2P = all_dm_om_df[['lf_snap2']]
#lf_snap2P = lf_snap2[!lf_snap2$param_type%in%c(static_colsP),]
table(lf_snap2P$param_type)
lf_snap2P$param_type = factor(lf_snap2P$param_type)
table(lf_snap2P$param_type)
snap2P = lf_bp2(lf_snap2P
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
############################################################################
#================
# Plot: mCSM-lig
#================
lf_mcsm_ligP = all_dm_om_df[['lf_mcsm_lig']]
#lf_mcsm_ligP = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_colsP),]
table(lf_mcsm_ligP$param_type)
lf_mcsm_ligP$param_type = factor(lf_mcsm_ligP$param_type)
table(lf_mcsm_ligP$param_type)
mcsmligP = lf_bp2(lf_mcsm_ligP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 1)
#=================
# Plot: mmCSM-lig2
#=================
lf_mmcsm_lig2P = all_dm_om_df[['lf_mmcsm_lig2']]
#lf_mmcsm_lig2P = lf_mmcsm_lig2P[!lf_mmcsm_lig2P$param_type%in%c(static_colsP),]
table(lf_mmcsm_lig2P$param_type)
lf_mmcsm_lig2P$param_type = factor(lf_mmcsm_lig2P$param_type)
table(lf_mmcsm_lig2P$param_type)
mcsmlig2P = lf_bp2(lf_mmcsm_lig2P
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 1)
mcsmlig2P
#================
# Plot: mCSM-ppi2
#================
if (tolower(gene)%in%geneL_ppi2){
lf_mcsm_ppi2P = all_dm_om_df[['lf_mcsm_ppi2']]
#lf_mcsm_ppi2P = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_colsP),]
table(lf_mcsm_ppi2P$param_type)
lf_mcsm_ppi2P$param_type = factor(lf_mcsm_ppi2P$param_type)
table(lf_mcsm_ppi2P$param_type)
mcsmppi2P = lf_bp2(lf_mcsm_ppi2P
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 1)
}
#==============
# Plot: mCSM-NA
#==============
if (tolower(gene)%in%geneL_na){
lf_mcsm_naP = all_dm_om_df[['lf_mcsm_na']]
#lf_mcsm_naP = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_colsP),]
table(lf_mcsm_naP$param_type)
lf_mcsm_naP$param_type = factor(lf_mcsm_naP$param_type)
table(lf_mcsm_naP$param_type)
mcsmnaP = lf_bp2(lf_mcsm_naP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 1)
}
######################################
# Outplot with stats
######################################
# outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
#
# dm_om_combinedP = paste0(outdir_images
# ,tolower(gene)
# ,"_dm_om_all.svg" )
#
# cat("DM OM plots with stats:", dm_om_combinedP)
# svg(dm_om_combinedP, width = 32, height = 18)
# cowplot::plot_grid(
# cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P, genomicsP, distanceP
# , nrow=1
# , rel_widths = c(1/7, 1/7,1/7,1/7, 1/7, 1.75/7)),
# #, rel_widths = c(1/8, 1/8,1/8,1/8, 1/8, 2.75/8)), # for 3 distances
# cowplot::plot_grid(consurfP, proveanP, snap2P
# , mcsmligP
# , mcsmlig2P
# , mcsmppi2P
# #, mcsmnaP
# , nrow=1),
# nrow=2)
#
# dev.off()

View file

@ -0,0 +1,176 @@
# source dm_om_plots.R
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/dm_om_plots.R")
##### plots to combine ####
duetP
foldxP
deepddgP
dynamut2P
genomicsP
consurfP
proveanP
snap2P
mcsmligP
mcsmlig2P
mcsmnaP
# Plot labels
tit1 = "Stability changes"
tit2 = "Genomic measure"
tit3 = "Distance to partners"
tit4 = "Evolutionary Conservation"
tit5 = "Affinity changes"
pt_size = 30
theme_georgia <- function(...) {
theme_gray(base_family = "sans", ...) +
theme(plot.title = element_text(face = "bold"))
}
title_theme <- calc_element("plot.title", theme_georgia())
pt1 = ggdraw() +
draw_label(
tit1,
fontfamily = title_theme$family,
fontface = title_theme$face,
#size = title_theme$size
size = pt_size
)
pt2 = ggdraw() +
draw_label(
tit2,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
pt3 = ggdraw() +
draw_label(
tit3,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
pt4 = ggdraw() +
draw_label(
tit4,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
pt5 = ggdraw() +
draw_label(
tit5,
fontfamily = title_theme$family,
fontface = title_theme$face,
size = pt_size
)
#======================
# Output plot function
#======================
OutPlot_dm_om = function(x){
# dist b/w plot title and plot
relH_tp = c(0.08, 0.92)
my_label_size = 25
#----------------
# Top panel
#----------------
top_panel = cowplot::plot_grid(
cowplot::plot_grid(pt1,
cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P
, nrow = 1
, labels = c("A", "B", "C", "D")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),
NULL,
cowplot::plot_grid(pt2,
cowplot::plot_grid(genomicsP
, nrow = 1
, labels = c("E")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),
NULL,
cowplot::plot_grid(pt3,
cowplot::plot_grid( #distanceP
distanceP_lig
, distanceP_na
, nrow = 1
, labels = c("F", "G")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),
nrow = 1,
rel_widths = c(2/7, 0.1/7, 0.5/7, 0.1/7, 1/7)
)
#----------------
# Bottom panel
#----------------
bottom_panel = cowplot::plot_grid(
cowplot::plot_grid(pt4,
cowplot::plot_grid(consurfP, proveanP, snap2P
, nrow = 1
, labels = c("H", "I", "J")
, label_size = my_label_size)
, ncol = 1
, rel_heights =relH_tp
),NULL,
cowplot::plot_grid(pt5,
cowplot::plot_grid(mcsmligP
, mcsmlig2P
, mcsmnaP
, nrow = 1
, labels = c("K", "L", "M")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),NULL,
nrow = 1,
rel_widths = c(3/6,0.1/6,3/6, 0.1/6 )
)
#-------------------------------
# combine: Top and Bottom panel
#-------------------------------
cowplot::plot_grid (top_panel, bottom_panel
, nrow =2
, rel_widths = c(1, 1)
, align = "hv")
}
#=====================
# OutPlot: svg and png
#======================
dm_om_combinedP = paste0(outdir_images
,tolower(gene)
,"_dm_om_all.svg")
cat("DM OM plots with stats:", dm_om_combinedP)
svg(dm_om_combinedP, width = 32, height = 18)
OutPlot_dm_om()
dev.off()
dm_om_combinedP_png = paste0(outdir_images
,tolower(gene)
,"_dm_om_all.png")
cat("DM OM plots with stats:", dm_om_combinedP_png)
png(dm_om_combinedP_png, width = 32, height = 18, units = "in", res = 300)
OutPlot_dm_om()
dev.off()

View file

@ -14,9 +14,8 @@ my_gg_pairs=function(plot_df, plot_title
title="ρ",
digits=2,
justify_labels = "centre",
#title_args=c(colour="black"),
title_args=c(size=tt_args_size),#2.5
group_args=c(size=gp_args_size)#2.5
title_args=list(size=tt_args_size, colour="black"),#2.5
group_args=list(size=gp_args_size)#2.5
)
),
lower = list(
@ -73,7 +72,7 @@ if (tolower(gene)%in%geneL_ppi2){
}
if (tolower(gene)%in%geneL_na){
corr_affinity_df[corr_affinity_df["NCA-Dist"]>DistCutOff,"mCSM-NA"]=0
corr_affinity_df[corr_affinity_df["NA-Dist"]>DistCutOff,"mCSM-NA"]=0
}
# count 0
@ -97,6 +96,7 @@ corr_df_ps = corr_plotdf[, corr_ps_colnames]
# Plot #1
plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates")
##########################################################
#================
# Conservation
@ -142,37 +142,62 @@ corr_df_aff = corr_affinity_df[, corr_aff_colnames]
colnames(corr_df_aff)
# Plot #3
plot_corr_df_aff = my_gg_pairs(corr_df_aff,
plot_title="Affinity estimates",
tt_args_size = 4,
gp_args_size = 4)
plot_corr_df_aff = my_gg_pairs(corr_df_aff
, plot_title="Affinity estimates"
#, tt_args_size = 4
#, gp_args_size = 4
)
#=============
# combine
#=============
#### Combine plots #####
# #png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
# png(paste0(outdir_images
# ,tolower(gene)
# ,"_CorrAB.png"), height = 6, width=11.75, unit="in",res=300)
#
# cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
# ggmatrix_gtable(plot_corr_df_cons),
# # ggmatrix_gtable(plot_corr_df_aff),
# # nrow=1, ncol=3, rel_heights = 7,7,3
# nrow=1,
# #rel_heights = 1,1
# labels = "AUTO",
# label_size = 12)
# dev.off()
#
# # affinity corr
# #png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
# png(paste0(outdir_images
# ,tolower(gene)
# ,"_CorrC.png"), height =7, width=7, unit="in",res=300)
#
# cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
# labels = "C",
# label_size = 12)
# dev.off()
#png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
#### Combine A ####
png(paste0(outdir_images
,tolower(gene)
,"_CorrAB.png"), height = 7, width=11.75, unit="in",res=300)
,"_CorrA.png"), height =8, width=8, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps),
ggmatrix_gtable(plot_corr_df_cons),
labels = "A",
label_size = 12)
dev.off()
#### Combine B+C ####
# B + C
png(paste0(outdir_images
,tolower(gene)
,"_CorrBC.png"), height = 6, width=11.75, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_cons),
ggmatrix_gtable(plot_corr_df_aff),
# ggmatrix_gtable(plot_corr_df_aff),
# nrow=1, ncol=3, rel_heights = 7,7,3
nrow=1,
rel_widths = c(1.5,1),
labels = "AUTO",
#rel_heights = 1,1
labels = c("B", "C"),
label_size = 12)
dev.off()
# affinity corr
#png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300)
png(paste0(outdir_images
,tolower(gene)
,"_CorrC.png"), height =7, width=7, unit="in",res=300)
cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff),
labels = "C",
label_size = 12)
dev.off()

View file

@ -0,0 +1,2 @@
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R")

View file

@ -0,0 +1,173 @@
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/gid/prominent_effects_gid.R")
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/gid/sensitivity_count_gid.R")
##############################################################
# PE count
#lig-- na--ppi2--stab
# pe_colour_map = c("DD_lig" = "#ffd700" # gold
# , "SS_lig" = "#f0e68c" # khaki
#
# , "DD_nucleic_acid"= "#a0522d" # sienna
# , "SS_nucleic_acid"= "#d2b48c" # tan
#
# , "DD_ppi2" = "#da70d6" # orchid
# , "SS_ppi2" = "#ff1493" # deeppink
#
# , "DD_stability" = "#f8766d" # red
# , "SS_stability" = "#00BFC4") # blue
table(str_df_plot_cols$pe_effect_outcome)
##############################################################
#===========
#PE count
#===========
rects <- data.frame(x=1:6,
colors = c("#ffd700" ,
"#f0e68c" ,
"#a0522d" ,
"#d2b48c" ,
"#f8766d" ,
"#00BFC4")
)
rects$text = c("-ve Lig"
, "+ve Lig"
, "-ve\nNuc.Acid"
, "+ve\nNuc.Acid"
, "-ve stability"
, "+ve stability"
)
cell1 = table(str_df_plot_cols$pe_effect_outcome)[["DD_lig"]]
cell2 = 0
cell3 = table(str_df_plot_cols$pe_effect_outcome)[["DD_nucleic_acid"]]
cell4 = table(str_df_plot_cols$pe_effect_outcome)[["SS_nucleic_acid"]]
#cell5 = table(str_df_plot_cols$pe_effect_outcome)[["DD_ppi2"]]
#cell6 = table(str_df_plot_cols$pe_effect_outcome)[["SS_ppi2"]]
cell7 = table(str_df_plot_cols$pe_effect_outcome)[["DD_stability"]]
cell8 = table(str_df_plot_cols$pe_effect_outcome)[["SS_stability"]]
#rects$numbers = c(38, 0, 22, 9, 108, 681) #for embb
rects$numbers = c(cell1, cell2,
cell3, cell4,
# cell5, cell6,
cell7, cell8)
rects$num_labels = paste0("n=", rects$numbers)
rects
#------
# Plot
#------
#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text
peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
geom_tile(width = 1, height = 1) + # make square tiles
geom_text(color = "black", size = 1.7) + # add white text in the middle
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
coord_flip()+ scale_x_reverse() +
# theme_void() # remove any axis markings
theme_nothing() # remove any axis markings
peP
#------
# Plot: this one is better
#------
peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) +
geom_tile() + # make square tiles
geom_text(color = "black", size = 1.6) + # add white text in the middle
scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame
coord_fixed() + # make sure tiles are square
theme_nothing() # remove any axis markings
peP2
########################################################
# From: script sensitivity_count per gene
#===============================
# Sensitivity count: SITE
#===============================
#--------
# embb
#--------
#rsc = 54
#ccc = 46
#ssc = 470
rsc = site_Rc; rsc
ccc = site_Cc; ccc
ssc = site_Sc; ssc
rect_rs_siteC <- data.frame(x=1:3,
colors = c("red",
"purple",
"blue")
)
rect_rs_siteC
rect_rs_siteC$text = c("Resistant",
"Common",
"Sensitive")
rect_rs_siteC$numbers = c(rsc,ccc,ssc)
rect_rs_siteC$num_labels = paste0("n=", rect_rs_siteC$numbers)
rect_rs_siteC
#------
# Plot
#------
sens_siteP = ggplot(rect_rs_siteC, aes(x, y = 0,
fill = colors,
label = num_labels
#,label = paste0(text,"\n", num_labels)
)) +
geom_tile(width = 1, height = 1) +
#geom_text(color = "black", size = 1.7) +
geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) +
scale_fill_identity(guide = "none") +
coord_fixed()+
theme_nothing() # remove any axis markings
sens_siteP
################################################################
#===============================
# Sensitivity count: Mutations
#===============================
table(sensP_df$sensitivity)
muts_Rc = table(sensP_df$sensitivity)[["R"]]
muts_Sc = table(sensP_df$sensitivity)[["S"]]
rect_sens <- data.frame(x=1:2,
colors = c("red",
"blue")
)
rect_sens$text = c("Resistant",
"Sensitive")
rect_sens$numbers = c(muts_Rc,muts_Sc)
rect_sens$num_labels = paste0("n=", rect_sens$numbers)
rect_sens
#------
# Plot
#------
sensP = ggplot(rect_sens, aes(x, y = 0,
fill = colors,
label = paste0(text,"\n", num_labels))) +
geom_tile(width = 1, height = 1) +
#geom_text(color = "black", size = 1.7) +
geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) +
scale_fill_identity(guide = "none") +
coord_fixed()+
theme_nothing() # remove any axis markings
sensP
sensP2 = sensP +
coord_flip() + scale_x_reverse()
sensP2

View file

@ -0,0 +1,341 @@
#!/usr/bin/env Rscript
#source("~/git/LSHTM_analysis/config/gid.R")
# get plotting dfs
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
########################################################
pos_colname = "position"
#-------------
# from ~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R
#-------------
length(all_stability_cols); length(raw_stability_cols)
length(scaled_stability_cols); length(outcome_stability_cols)
length(affinity_dist_colnames)
static_cols = c("mutationinformation",
#"position",
pos_colname,
"sensitivity")
other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames)
#omit avg cols and foldx_scaled_signC cols
other_cols = other_cols_all[grep("avg", other_cols_all, invert = T)]
other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )]
other_cols
cols_to_extract = c(static_cols, other_cols)
cat("\nExtracting cols:", cols_to_extract)
expected_ncols = length(static_cols) + length(other_cols)
expected_ncols
str_df = merged_df3[, cols_to_extract]
if (ncol(str_df) == expected_ncols){
cat("\nPASS: successfully extracted cols for calculating prominent effects")
}else{
stop("\nAbort: Could not extract cols for calculating prominent effects")
}
#=========================
# Masking affinity columns
#=========================
# First make values for affinity cols 0 when their corresponding dist >10
head(str_df)
# replace in place affinity values >10
str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0
str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0
#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10
if (tolower(gene)%in%geneL_ppi2){
str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0
}
# na gene: replace in place na affinity values where na dist >10
if (tolower(gene)%in%geneL_na){
str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0
}
colnames(str_df)
head(str_df)
# get names of cols to calculate the prominent effects from
# scaled_cols_tc = c("duet_scaled",
# "deepddg_scaled",
# "ddg_dynamut2_scaled",
# "foldx_scaled",
# "affinity_scaled",
# "mmcsm_lig_scaled",
# "mcsm_ppi2_scaled", "mcsm_na_scaled")
scaled_cols_tc = other_cols[grep("scaled", other_cols)]
###############################################
#===============
# whole df
#===============
give_col=function(x,y,df=str_df){
df[df[[pos_colname]]==x,y]
}
for (i in unique(str_df[[pos_colname]]) ){
print(i)
#cat(length(unique(str_df[[pos_colname]])))
biggest = max(abs(give_col(i,scaled_cols_tc)))
str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest
str_df[str_df[[pos_colname]]==i,'effect_type']= names(
give_col(i,scaled_cols_tc)[which(
abs(
give_col(i,scaled_cols_tc)
) == biggest, arr.ind=T
)[, "col"]])[1]
effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values
# get index/rowname for value of max effect, and then use it to get the original sign
# here
#ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T))
str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1]
}
# ends with suffix 2 if dups
str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
colnames(str_df)
table(str_df$effect_type)
# check
str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),]
#================
# for Plots
#================
str_df_short = str_df[, c("mutationinformation",
#"position",
pos_colname,
"sensitivity"
, "effect_type"
, "effect_sign")]
table(str_df_short$effect_type)
table(str_df_short$effect_sign)
str(str_df_short)
# assign pe outcome
str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS")
table(str_df_short$pe_outcome )
table(str_df_short$effect_sign)
#==============
# group effect type:
# lig, ppi2, nuc. acid, stability
#==============
affcols = c("affinity_scaled", "mmcsm_lig_scaled")
nuc_na_cols = c("mcsm_na_scaled")
#lig
table(str_df_short$effect_type)
str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols
, "lig"
, str_df_short$effect_type)
table(str_df_short$effect_grouped)
#na
str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%nuc_na_cols
, "nucleic_acid"
, str_df_short$effect_grouped)
table(str_df_short$effect_grouped)
#stability
str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig",
"nucleic_acid")
, "stability"
, str_df_short$effect_grouped)
table(str_df_short$effect_grouped)
# create a sign as well
str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_"
, str_df_short$effect_grouped)
table(str_df_short$pe_effect_outcome)
#####################################################################
# Chimera: for colouring
####################################################################
#-------------------------------------
# get df with unique position
#--------------------------------------
#data[!duplicated(data$x), ]
str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),]
if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){
cat("\nPASS: successfully extracted df with unique positions")
}else{
stop("\nAbort: Could not extract df with unique positions")
}
#-------------------------------------
# generate colours for effect types
#--------------------------------------
str_df_plot_cols = str_df_plot[, c(pos_colname,
"sensitivity",
"pe_outcome",
"effect_grouped",
"pe_effect_outcome")]
head(str_df_plot_cols)
# colour intensity based on sign
#str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light")
str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light")
table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome)
head(str_df_plot_cols)
# colour based on effect
table(str_df_plot_cols$pe_effect_outcome)
# colors = c("#ffd700" #gold
# , "#f0e68c" #khaki
# , "#da70d6"# orchid
# , "#ff1493"# deeppink
# , "#a0522d" #sienna
# , "#d2b48c" # tan
# , "#00BFC4" #, "#007d85" #blue
# , "#F8766D" )# red
pe_colour_map = c("DD_lig" = "#ffd700" # gold
, "SS_lig" = "#f0e68c" # khaki
, "DD_nucleic_acid"= "#a0522d" # sienna
, "SS_nucleic_acid"= "#d2b48c" # tan
, "DD_ppi2" = "#da70d6" # orchid
, "SS_ppi2" = "#ff1493" # deeppink
, "DD_stability" = "#f8766d" # red
, "SS_stability" = "#00BFC4") # blue
#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE)
#map the colours
str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome
,function(x){pe_colour_map[[x]]}
))
head(str_df_plot_cols$colour_map)
table(str_df_plot_cols$colour_map)
table(str_df_plot_cols$pe_effect_outcome)
# str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue
# , "_"
# , str_df_plot_cols$colour_map)
# head(str_df_plot_cols$colours)
# table(str_df_plot_cols$colours)
#
#
# class(str_df_plot_cols$colour_map)
# str(str_df_plot_cols)
# sort by colour
head(str_df_plot_cols)
str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ]
head(str_df_plot_cols)
#======================================
# write file with prominent effects
#======================================
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv"))
################################
# printing for chimera
###############################
chain_suffix = ".A"
str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], chain_suffix)
table(str_df_plot_cols$colour_map)
table(str_df_plot_cols$pe_effect_outcome)
#===================================================
#-------------------
# Ligand Affinity
#-------------------
# -ve Lig Aff
dd_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",]
if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){
dd_lig_pos = dd_lig[[pos_colname]]
}else{
stop("Abort: DD affinity colour numbers mismtatch")
}
# +ve Lig Aff
ss_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",]
if (!empty(ss_lig)){
if (nrow(ss_lig) == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){
ss_lig_pos = ss_lig[[pos_colname]]
}else{
stop("Abort: SS affinity colour numbers mismtatch")
}
#put in chimera cmd
paste0(dd_lig_pos,chain_suffix)
paste0(ss_lig_pos,chain_suffix)
}
#===================================================
#------------------------
# Nucleic Acid Affinity
#------------------------
# -ve NA aff
dd_nca = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_nucleic_acid",]
if (nrow(dd_nca) == table(str_df_plot_cols$pe_effect_outcome)[['DD_nucleic_acid']]){
dd_nca_pos = dd_nca[[pos_colname]]
}else{
stop("Abort: DD nucleic_acid colour numbers mismtatch")
}
# +ve NA aff
ss_nca = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_nucleic_acid",]
if (nrow(ss_nca) == table(str_df_plot_cols$pe_effect_outcome)[['SS_nucleic_acid']]){
ss_nca_pos = ss_nca[[pos_colname]]
}else{
stop("Abort: SS nucleic_acid colour numbers mismtatch")
}
#put in chimera cmd
paste0(dd_nca_pos, chain_suffix)
paste0(ss_nca_pos, chain_suffix)
#=========================================================
#------------------------
# Stability
#------------------------
# -ve Stability
dd_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",]
if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){
dd_stability_pos = dd_stability[[pos_colname]]
}else{
stop("Abort: DD Stability colour numbers mismtatch")
}
# +ve Stability
ss_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",]
if (nrow(ss_stability) == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){
ss_stability_pos = ss_stability[[pos_colname]]
}else{
stop("Abort: SS Stability colour numbers mismtatch")
}
#put in chimera cmd
# stabiliting first as it has less numbers
paste0(ss_stability_pos, chain_suffix)
paste0(dd_stability_pos, chain_suffix)
####################################################################

View file

@ -0,0 +1,65 @@
#=========================
# Count Sensitivity
# Mutations and positions
#=========================
pos_colname_c ="position"
sensP_df = merged_df3[,c("mutationinformation",
#"position",
pos_colname_c,
"sensitivity")]
head(sensP_df)
table(sensP_df$sensitivity)
#---------------
# Total unique positions
#----------------
tot_mut_pos = length(unique(sensP_df[[pos_colname_c]]))
cat("\nNo of Tot muts sites:", tot_mut_pos)
# resistant mut pos
sens_site_allR = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="R"]
sens_site_UR = unique(sens_site_allR)
length(sens_site_UR)
# Sensitive mut pos
sens_site_allS = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="S"]
sens_site_US = unique(sens_site_allS)
length(sens_site_UR)
#---------------
# Common Sites
#----------------
common_pos = intersect(sens_site_UR,sens_site_US)
site_Cc = length(common_pos)
cat("\nNo of Common sites:", site_Cc
, "\nThese are:", common_pos)
#---------------
# Resistant muts
#----------------
site_R = sens_site_UR[!sens_site_UR%in%common_pos]
site_Rc = length(site_R)
if ( length(sens_site_allR) == table(sensP_df$sensitivity)[['R']] ){
cat("\nNo of R muts:", length(sens_site_allR)
, "\nNo. of R sites:",site_Rc
, "\nThese are:", site_R
)
}
#---------------
# Sensitive muts
#----------------
site_S = sens_site_US[!sens_site_US%in%common_pos]
site_Sc = length(site_S)
if ( length(sens_site_allS) == table(sensP_df$sensitivity)[['S']] ){
cat("\nNo of S muts:", length(sens_site_allS)
, "\nNo. of S sites:", site_Sc
, "\nThese are:", site_S)
}
#########################

View file

@ -0,0 +1,2 @@
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R")

View file

@ -7,7 +7,6 @@ source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp
#linage_dist_ens_stability.R
###########################################
# svg
# my_label_size = 12
# linPlots_combined = paste0(outdir_images
# , tolower(gene)
# ,"_linP_combined.svg")
@ -31,6 +30,8 @@ source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp
# dev.off()
# png
my_label_size = 12
linPlots_combined = paste0(outdir_images
, tolower(gene)
,"_linP_combined.png")

View file

@ -7,16 +7,9 @@
#=============
# Data: Input
#==============
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above
#cat("\nSourced plotting cols as well:", length(plotting_cols))

View file

@ -1,4 +1,18 @@
#source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pe_sens_site_count_rpob.R")
#=============
# Data: Input
#==============
source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#cat("\nSourced plotting cols as well:", length(plotting_cols))
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
cat("plots will output to:", outdir_images)
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R")
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/rpob/pe_sens_site_count_rpob.R")
if ( tolower(gene)%in%c("rpob") ){
cat("\nPlots available for layout are:")

View file

@ -1,6 +1,11 @@
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
cat("plots will output to:", outdir_images)
my_gg_pairs=function(plot_df, plot_title
, tt_args_size = 2.5
@ -72,7 +77,7 @@ if (tolower(gene)%in%geneL_ppi2){
}
if (tolower(gene)%in%geneL_na){
corr_affinity_df[corr_affinity_df["NCA-Dist"]>DistCutOff,"mCSM-NA"]=0
corr_affinity_df[corr_affinity_df["NA-Dist"]>DistCutOff,"mCSM-NA"]=0
}
# count 0

View file

@ -1,172 +0,0 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Lineage plots [merged_df2]
# Count
# Diversity
# Average stability dist
# Avergae affinity dist: optional
#########################################################
#=======
# output
#=======
# outdir_images = paste0("~/git/Writing/thesis/images/results/"
# , tolower(gene), "/")
# cat("plots will output to:", outdir_images)
#########################################################
#===============
#Quick numbers checks
#===============
nsample_lin = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){
cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin)
, "\nCounting R and S samples")
if( sum(table(nsample_lin$sensitivity)) == nrow(nsample_lin) ){
cat("\nPASSNumbers cross checked:")
print(table(nsample_lin$sensitivity))
}
}else{
stop("Abort: Numbers mismatch. Please check")
}
########################################################################
###################################################
# Lineage barplots #
###################################################
#===============================
# lineage sample and SNP count
#===============================
lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']]
, all_lineages = F
, x_categ = "sel_lineages"
, y_count = "p_count"
, use_lineages = c("L1", "L2", "L3", "L4")
, bar_fill_categ = "count_categ"
, display_label_col = "p_count"
, bar_stat_stype = "identity"
, d_lab_size = 8
, d_lab_col = "black"
, my_xats = 25 # x axis text size
, my_yats = 25 # y axis text sized_lab_size
, my_xals = 25 # x axis label size
, my_yals = 25 # y axis label size
, my_lls = 25 # legend label size
, bar_col_labels = c("SNPs", "Total Samples")
, bar_col_values = c("grey50", "gray75")
, bar_leg_name = ""
, leg_location = "top"
, y_log10 = F
, y_scale_percent = FALSE
, y_label = c("Count")
)
lin_countP
#===============================
# lineage SNP diversity count
#===============================
lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']]
, x_categ = "sel_lineages"
, y_count = "snp_diversity"
#, all_lineages = F
, use_lineages = c("L1", "L2", "L3", "L4")
, display_label_col = "snp_diversity_f"
, bar_stat_stype = "identity"
, x_lab_angle = 90
, d_lab_size =9
, my_xats = 25 # x axis text size
, my_yats = 25 # y axis text size
, my_xals = 25 # x axis label size
, my_yals = 25 # y axis label size
, my_lls = 25 # legend label size
, y_log10 = F
, y_scale_percent = F
, leg_location = "top"
, y_label = "Percent" #"SNP diversity"
, bp_plot_title = "nsSNP diversity"
, title_colour = "black" #"chocolate4"
, subtitle_text = NULL
, sts = 20
, subtitle_colour = "#350E20FF")
lin_diversityP
#=============================================
# Output plots: Lineage count and Diversity
#=============================================
# lineage_bp_CL = paste0(outdir_images
# ,tolower(gene)
# ,"_lineage_bp_CL_Tall.svg")
#
# cat("Lineage barplots:", lineage_bp_CL)
# svg(lineage_bp_CL, width = 8, height = 15)
#
# cowplot::plot_grid(lin_countP, lin_diversityP
# #, labels = c("(a)", "(b)", "(c)", "(d)")
# , nrow = 2
# # , ncols = 2
# , labels = "AUTO"
# , label_size = 25)
#
# dev.off()
########################################################################
###################################################
# Stability dist #
###################################################
# scaled_cols_stability = c("duet_scaled"
# , "deepddg_scaled"
# , "ddg_dynamut2_scaled"
# , "foldx_scaled"
# , "avg_stability_scaled")
my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel
#plotdf = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),]
linP_dm_om = lineage_distP(merged_df2
, with_facet = F
, x_axis = "avg_stability_scaled"
, y_axis = "lineage_labels"
, x_lab = my_xlabel
, use_lineages = c("L1", "L2", "L3", "L4")
#, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
, fill_categ = "sensitivity"
, fill_categ_cols = c("red", "blue")
, label_categories = c("Resistant", "Sensitive")
, leg_label = "Mutation group"
, my_ats = 22 # axis text size
, my_als = 22 # axis label size
, my_leg_ts = 22
, my_leg_title = 22
, my_strip_ts = 22
, alpha = 0.56
)
linP_dm_om
###################################################
# Affinity dist [OPTIONAL] #
###################################################
# scaled_cols_affinity = c("affinity_scaled"
# , "mmcsm_lig_scaled"
# , "mcsm_ppi2_scaled"
# , "mcsm_na_scaled"
# , "avg_lig_affinity_scaled")
# lineage_distP(merged_df2
# , with_facet = F
# , x_axis = "avg_lig_affinity_scaled"
# , y_axis = "lineage_labels"
# , x_lab = my_xlabel
# , use_lineages = c("L1", "L2", "L3", "L4")
# #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999")
# , fill_categ = "sensitivity"
# , fill_categ_cols = c("red", "blue")
# , label_categories = c("Resistant", "Sensitive")
# , leg_label = "Mutation group"
# , my_ats = 22 # axis text size
# , my_als = 22 # axis label size
# , my_leg_ts = 22
# , my_leg_title = 22
# , my_strip_ts = 22
# , alpha = 0.56
# )

View file

@ -1,30 +0,0 @@
#!/usr/bin/env Rscript
###########################################
# TASK: generate plots for lineage
# Individual plots in
#lineage_bp_both.R
#linage_dist_ens_stability.R
###########################################
my_label_size = 25
linPlots_combined = paste0(outdir_images
, tolower(gene)
,"_linP_combined.svg")
cat("\nOutput plot:", linPlots_combined)
svg(linPlots_combined, width = 18, height = 12)
cowplot::plot_grid(
cowplot::plot_grid(lin_countP, lin_diversityP
, nrow = 2
, labels = "AUTO"
, label_size = my_label_size),
NULL,
linP_dm_om,
nrow = 1,
labels = c("", "", "C"),
label_size = my_label_size,
rel_widths = c(35, 3, 52)
)
dev.off()

View file

@ -1,5 +1,5 @@
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/prominent_effects_rpob.R")
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/sensitivity_count_rpob.R")
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/rpob/prominent_effects_rpob.R")
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/rpob/sensitivity_count_rpob.R")
##############################################################
# PE count

View file

@ -0,0 +1,2 @@
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R")