added rpob plot scripts

This commit is contained in:
Tanushree Tunstall 2022-08-29 23:26:43 +01:00
parent 8f97ab7cc8
commit 7c2e4b898e
8 changed files with 109 additions and 48 deletions

View file

@ -7,8 +7,8 @@
#=============
# Data: Input
#==============
source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#cat("\nSourced plotting cols as well:", length(plotting_cols))

View file

@ -4,7 +4,22 @@
source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#cat("\nSourced plotting cols as well:", length(plotting_cols))
##############################################################
nrow(merged_df3)
table(merged_df3$sensitivity)
df3_lig = merged_df3[merged_df3$ligand_distance<10,]
nrow(df3_lig)
table(df3_lig$sensitivity)
df3_na = merged_df3[merged_df3$nca_distance<10,]
nrow(df3_na)
table(df3_na$sensitivity)
df3_ppi2 = merged_df3[merged_df3$interface_dist<10,]
nrow(df3_ppi2)
table(df3_ppi2$sensitivity)
#############################################################
#=======
# output
#=======

View file

@ -1,5 +1,6 @@
# source dm_om_plots.R
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/dm_om_plots.R")
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/rpob/dm_om_plots_rpob.R")
# source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/dm_om_plots.R")
##### plots to combine ####
duetP
@ -14,6 +15,8 @@ mcsmligP
mcsmlig2P
mcsmppi2P
mcsmnaP
distanceP_ppi2
distanceP_na
# Plot labels
tit1 = "Stability changes"
@ -105,18 +108,17 @@ OutPlot_dm_om = function(x){
),
NULL,
cowplot::plot_grid(pt3,
cowplot::plot_grid( #distanceP
distanceP_lig
#, distanceP_ppi2
cowplot::plot_grid(distanceP_lig
, distanceP_na
, distanceP_ppi2
, nrow = 1
, labels = c("F", "G")
, labels = c("F", "G","H")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),
nrow = 1,
rel_widths = c(2/7, 0.1/7, 0.5/7, 0.1/7, 1/7)
rel_widths = c(4/8, 0.1/8, 1/8, 0.1/8, 3/8)
)
#----------------
@ -126,7 +128,7 @@ OutPlot_dm_om = function(x){
cowplot::plot_grid(pt4,
cowplot::plot_grid(consurfP, proveanP, snap2P
, nrow = 1
, labels = c("H", "I", "J")
, labels = c("I", "J", "K")
, label_size = my_label_size)
, ncol = 1
, rel_heights =relH_tp
@ -134,16 +136,16 @@ OutPlot_dm_om = function(x){
cowplot::plot_grid(pt5,
cowplot::plot_grid(mcsmligP
, mcsmlig2P
, mcsmppi2P
, mcsmnaP
, mcsmppi2P
, nrow = 1
, labels = c("K", "L", "M")
, labels = c("L", "M", "N", "O")
, label_size = my_label_size)
, ncol = 1
, rel_heights = relH_tp
),NULL,
nrow = 1,
rel_widths = c(3/6,0.1/6,3/6, 0.1/6 )
rel_widths = c(3/7,0.1/7,4/7, 0.1/7 )
)
#-------------------------------

View file

@ -1,5 +1,5 @@
source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#=======
# output
@ -48,6 +48,7 @@ DistCutOff = 10
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
geneL_both = c("rpob")
merged_df3 = as.data.frame(merged_df3)
@ -58,7 +59,7 @@ corr_plotdf = corr_data_extract(merged_df3
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
static_cols = c("Log10(MAF)"
, "Log10(OR)"
#, "Log10(OR)"
)
############################################################
#=============================================
@ -90,7 +91,7 @@ unmasked_vals
# Stability
#================
corr_ps_colnames = c(static_cols
, "DUET"
, "mCSM-DUET"
, "FoldX"
, "DeepDDG"
, "Dynamut2"
@ -138,6 +139,11 @@ if (tolower(gene)%in%geneL_na){
aff_colnames = c(common_aff_colnames, "mCSM-NA")
}
if (tolower(gene)%in%geneL_both){
aff_colnames = c(common_aff_colnames, "mCSM-NA", "mCSM-PPI2")
}
# building ffinal affinity colnames for correlation
corr_aff_colnames = c(static_cols
, aff_colnames

View file

@ -1,7 +1,6 @@
#!/usr/bin/env Rscript
source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#=======
# output
@ -13,7 +12,7 @@ cat("\nOutput dir for stats:", outdir_stats)
geneL_normal = c("pnca")
#geneL_na = c("gid", "rpob")
geneL_na_v2 = c("gid")
geneL_nca = c("alr", "embb", "katg", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
geneL_both = c("rpob")
@ -21,10 +20,15 @@ if (tolower(gene)%in%geneL_na_v2) {
gene_colnames = c("mcsm_na_affinity", "mcsm_na_outcome")
}
if (tolower(gene)%in%geneL_nca) {
if (tolower(gene)%in%geneL_na) {
gene_colnames = c("mcsm_nca_affinity", "mcsm_nca_outcome")
}
if (tolower(gene)%in%geneL_both) {
gene_colnames = c("mcsm_na_affinity", "mcsm_na_outcome"
, "mcsm_ppi2_affinity", "mcsm_ppi2_outcome")
}
#from plotting_globals()
LigDist_colname
@ -36,6 +40,9 @@ angstroms_symbol
cat("\nAffinity Distance colnames:", length(affinity_dist_colnames)
, "\nThese are:", affinity_dist_colnames)
affinity_dist_colnames = c("ligand_distance","nca_distance","interface_dist") # for consistency
#===========
# Data used
#===========
@ -50,7 +57,7 @@ cols_to_output = c("position"
, "mmcsm_lig"
, "mmcsm_lig_outcome"
, affinity_dist_colnames[2]
# #, affinity_dist_colnames[3]
, affinity_dist_colnames[3]
# , "mcsm_na_affinity"
# , "mcsm_na_outcome"
# #, "mcsm_nca_affinity"
@ -148,8 +155,13 @@ colsNames_to_output_lig = c("position"
colnames(Out_df_ligS) = colsNames_to_output_lig
head(Out_df_ligS)
# ADD: active site annot
nrow(Out_df_ligS)
Out_df_ligS$drug_site = ifelse(Out_df_ligS$position%in%aa_pos_drug, "drug", "no")
table(Out_df_ligS$drug_site)
#--------------------
# write output file: KS test within grpup
# write output file: lig
#----------------------
Out_ligT = paste0(outdir_stats
, tolower(gene)
@ -172,7 +184,9 @@ cols_to_output_nca = c("position"
, "sensitivity"
, "mutationinformation"
, naDist_colname
, gene_colnames
, "mcsm_na_affinity"
, "mcsm_na_outcome"
# , gene_colnames
, "maf_percent"
, "or_mychisq"
, "pval_fisher"
@ -201,8 +215,14 @@ colsNames_to_output_nca = c("position"
colnames(Out_df_ncaS) = colsNames_to_output_nca
Out_df_ncaS
# ADD: active site annot
nrow(Out_df_ncaS)
Out_df_ncaS$drug_site = ifelse(Out_df_ncaS$position%in%aa_pos_drug, "drug", "no")
table(Out_df_ncaS$drug_site)
#--------------------
# write output file: KS test within grpup
# write output file: na
#----------------------
Out_ncaT = paste0(outdir_stats
, tolower(gene)
@ -223,9 +243,9 @@ cols_to_output_ppi2 = c("position"
, "sensitivity"
, "mutationinformation"
, ppi2Dist_colname
#, "mcsm_ppi2_affinity"
#, "mcsm_ppi2_outcome"
, gene_colnames
, "mcsm_ppi2_affinity"
, "mcsm_ppi2_outcome"
#, gene_colnames
, "maf_percent"
, "or_mychisq"
, "pval_fisher"
@ -254,8 +274,14 @@ colsNames_to_output_ppi2 = c("position"
colnames(Out_df_ppi2S) = colsNames_to_output_ppi2
Out_df_ppi2S
# ADD: active site annot
nrow(Out_df_ppi2S)
Out_df_ppi2S$drug_site = ifelse(Out_df_ppi2S$position%in%aa_pos_drug, "drug", "no")
table(Out_df_ppi2S$drug_site)
#--------------------
# write output file: KS test within grpup
# write output file: ppi2
#----------------------
Out_ppi2T = paste0(outdir_stats
, tolower(gene)
@ -270,7 +296,7 @@ write.csv(Out_df_ppi2S, Out_ppi2T, row.names = FALSE)
##########################################################
# higest or/maf and stability effects
###########################################################
# convert to percet
# convert to percent
df3$maf_percent = df3$maf*100
cols_to_output_effects = c("position"
@ -282,6 +308,7 @@ cols_to_output_effects = c("position"
, "avg_lig_affinity"
, "avg_lig_affinity_outcome"
, affinity_dist_colnames[2]
, affinity_dist_colnames[3]
, gene_colnames
, "maf_percent"
, "or_mychisq"
@ -402,7 +429,7 @@ if (tolower(gene)%in%geneL_both ){
# add cols: PPI2
mut_h_ppi2_dd$mutational_effect = "Most Destabilising for PPI affinity"
mut_h_ppi2_dd$mutational_effect = "Most Stabilising for PPI affinity"
mut_h_ppi2_ss$mutational_effect = "Most Stabilising for PPI affinity"
if (identical(colnames(mut_h_ppi2_dd), colnames(mut_h_ppi2_ss)) ){
cat("\nPass 1: ppi2")
@ -417,7 +444,7 @@ if (tolower(gene)%in%geneL_both ){
}
# combine BOTH: NA and PPI2
gene_aff_combined = rbind(mut_h_na_dd, mut_h_na_ss)
gene_aff_combined = rbind(mut_h_na_dd, mut_h_na_ss, mut_h_ppi2_dd, mut_h_ppi2_ss)
}
#===============
@ -447,10 +474,11 @@ colsNames_combined_table = c("position"
, "Ligand affinity outcome"
, paste0("NA-Dist (", angstroms_symbol, ")")
, paste0("PPI-Dist (", angstroms_symbol, ")")
, paste0("mCSM-NA (", delta_symbol,delta_symbol,"G)")
, "mCSM-NA outcome"
, paste0("PPI-Dist (", angstroms_symbol, ")")
, paste0("mCSM-PPI (", delta_symbol,delta_symbol,"G)")
, "mCSM-PPI outcome"
@ -469,8 +497,13 @@ if ( length(colnames(combined_table)) == length(colsNames_combined_table) ) {
stop("\nAbort: No. of cols mismatch. Cannot assign pretty colnames for output")
}
# ADD: active site annot
nrow(combined_table)
combined_table$drug_site = ifelse(combined_table$position%in%aa_pos_drug, "drug", "no")
table(combined_table$drug_site)
#--------------------
# write output file: KS test within grpup
# write output file: extreme effects
#----------------------
Out_combined_effectsT = paste0(outdir_stats
, tolower(gene)