#!/usr/bin/env Rscript ######################################################### # TASK: producing boxplots for dr and other muts ######################################################### #======================================================================= # working dir and loading libraries getwd() setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() #source("Header_TT.R") library(tidyverse) library(ggplot2) library(data.table) library(dplyr) #========= # Input #========= #source("combining_dfs_plotting.R") source("output_tables.R") rm(df, merged_df3_short, df_output) #=============================================================== df_comp = df_ordered[!is.na(df_ordered$af),] #%%%%%%%%%%%%%%%%%%%%% # REASSIGNMENT df = df_comp #%%%%%%%%%%%%%%%%%%%%% cols_all_muts_table = c("mutationinformation" , "mutation_info" , "af" , "af_percent" , "or_mychisq" , "pval_fisher" , "or_kin" , "pwald_kin" , "duet_stability_change" , "duet_outcome" , "ligand_distance" , "ligand_affinity_change" , "ligand_outcome" , "ddg" , "foldx_outcome" , "asa" , "rsa" , "kd_values" , "rd_values") df = df[,cols_all_muts_table] #=============================================================== #Most Frequent mutation mf = df[df$af_percent == max(df$af_percent), ] mf # highest OR hor = df[df$or_mychisq == max(df$or_mychisq), ] hor # Most Destabilising for protein stability (DUET) df_d = df[df$duet_outcome == "Destabilising",] hd_duet = df_d[df_d$duet_stability_change == min(df_d$duet_stability_change), ] hd_duet # Most Stabilising for protein stability (DUET) df_s = df[df$duet_outcome == "Stabilising",] hs_duet = df_s[df_s$duet_stability_change == max(df_s$duet_stability_change), ] hs_duet # Closest Destabilising for protein stability close_d = df_d[order(df_d$ligand_distance, df_d$duet_stability_change),] # Closest Stabilising for protein stability close_s = df_s[order(df_s$ligand_distance, df_s$duet_stability_change),] #=============== # ligand affinity: filtered #================ df_lig = df[df$ligand_distance<10,] df_d_lig = df_lig[df_lig$ligand_outcome == "Destabilising",] hd_lig= df_d_lig[df_d_lig$ligand_affinity_change == min(df_d_lig$ligand_affinity_change), ] hd_lig df_s_lig = df[df$ligand_outcome == "Stabilising",] hs_lig= df_s_lig[df_s_lig$ligand_affinity_change == max(df_s_lig$ligand_affinity_change), ] hs_lig # Closest Destabilising for ligand affintiy close_d_lig = df_d_lig[order(df_d_lig$ligand_distance, df_d_lig$ligand_affinity_change),] # Closest Stabilising for ligand affinity close_s_lig = df_s_lig[order(df_s_lig$ligand_distance, df_s_lig$ligand_affinity_change),] #=============== # foldx #=============== df_d_foldx = df[df$foldx_outcome == "Destabilising",] hd_foldx = df_d_foldx[df_d_foldx$ddg == max(df_d_foldx$ddg), ] hd_foldx # Most Stabilising for protein stability (DUET) df_s_foldx = df[df$foldx_outcome == "Stabilising",] hs_foldx = df_s_foldx[df_s_foldx$ddg == min(df_s_foldx$ddg), ] hs_foldx #=============== # active site muts #=============== aa_muts = merged_df3[merged_df3$ligand_distance<5,] aa_dist = paste0(5, angstroms_symbol) cat("No. of active site residues within", aa_dist, ":", nrow(aa_muts)) #==================== # budding hotspots #==================== # this is what you want foo = merged_df3 %>% group_by(position) %>% tally() bar = merged_df3 %>% group_by(position) %>% count() # sanity check all(table(foo$n) == table(bar$n)) table(foo$n) n_budding_sites = table(foo$n)[[2]] n_mult_muts_sites = sum(table(foo$n)) - (table(foo$n)[[1]] - table(foo$n)[[2]]) cat("No of budding hotspots (sites with 2 mutations):", n_budding_sites , "\nNo. of sites with mutiple (>2) mutations:", n_mult_muts_sites) #========================================================================== #============================== # agreement of foldx and DUET #============================== mcsm_foldx = merged_df3[which(merged_df3$duet_outcome != merged_df3$foldx_outcome),] mcsm_foldx$sign_comp = ifelse(sign(mcsm_foldx$duet_scaled)==sign(mcsm_foldx$ddg), "PASS", "FAIL") table(mcsm_foldx$sign_comp) # another way of checking merged_df3$sign_comp = ifelse(sign(merged_df3$duet_scaled)==sign(merged_df3$ddg), "PASS", "FAIL") table(merged_df3$sign_comp) disagreement = table(merged_df3$sign_comp)[2]/nrow(merged_df3)*100 agreement = 100 - disagreement cat("There is", agreement, "% between mcsm and foldx predictions") ##############################################################################