From 0284122ef233f10051d9f5380f7eefa075852666 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Tue, 23 Aug 2022 16:31:17 +0100 Subject: [PATCH] aadded replace bfactor for na --- .../replaceBfactor_pdb_naaff.R | 217 ++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 scripts/plotting/structure_figures/replaceBfactor_pdb_naaff.R diff --git a/scripts/plotting/structure_figures/replaceBfactor_pdb_naaff.R b/scripts/plotting/structure_figures/replaceBfactor_pdb_naaff.R new file mode 100644 index 0000000..8ee9b7f --- /dev/null +++ b/scripts/plotting/structure_figures/replaceBfactor_pdb_naaff.R @@ -0,0 +1,217 @@ +#!/usr/bin/env Rscript +source("~/git/LSHTM_analysis/config/gid.R") +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +######################################################### +# TASK: Replace B-factors in the pdb file with the mean +# normalised NA affinity values. +######################################################### + +#source("~/git/LSHTM_analysis/scripts/Header_TT.R") +library(bio3d) +require("getopt", quietly = TRUE) # cmd parse arguments + +#======================================================== +cat(gene) +gene_match = paste0(gene,"_p."); cat(gene_match) +cat(gene_match) + +#============= +# directories +#============= +datadir = paste0("~/git/Data") +indir = paste0(datadir, "/", drug, "/input") +outdir = paste0("~/git/Data", "/", drug, "/output") +#outdir_plots = paste0("~/git/Writing/thesis/images/results/", tolower(gene)) + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +cat("plots will output to:", outdir_images) +#====== +# input +#====== +in_filename_pdb = paste0(tolower(gene), "_complex.pdb") +infile_pdb = paste0(indir, "/", in_filename_pdb) +cat(paste0("Input file:", infile_pdb) ) + +#======= +# output +#======= +out_filename_nca_mspdb = paste0(tolower(gene), "_complex_b_nca_ms.pdb") +outfile_nca_mspdb = paste0(outdir_images,out_filename_nca_mspdb) +print(paste0("Output file:", outfile_nca_mspdb)) + +#%%=============================================================== +#NOTE: duet here refers to the ensemble NA affinity values + +########################### +# Read file: average NA affinity values +# or mcsm_normalised file +########################### +my_df_raw = merged_df3[, c("position", "mcsm_na_scaled", "nca_distance")] +head(my_df_raw) +my_df_raw = my_df_raw[my_df_raw$nca_distance<10,] +my_df_raw$position + +# avg by position on the SCALED values +my_df <- my_df_raw %>% + group_by(position) %>% + summarize(avg_na_sc_pos = mean(mcsm_na_scaled)) + +max(my_df$avg_na_sc_pos) +min(my_df$avg_na_sc_pos) +#============================================================ +############# +# Read pdb +############# +# list of 8 +my_pdb = read.pdb(infile_pdb + , maxlines = -1 + , multi = FALSE + , rm.insert = FALSE + , rm.alt = TRUE + , ATOM.only = FALSE + , hex = FALSE + , verbose = TRUE) + +# assign separately for duet and ligand +my_pdb_duet = my_pdb + +#========================================================= +# Replacing B factor with mean NA affinity scores +# within the respective dfs +#========================================================== +df_duet= my_pdb_duet[['atom']] + +# make a copy: required for downstream sanity checks +d2_duet = df_duet + +# sanity checks: B factor +max(df_duet$b); min(df_duet$b) + +#================================================== +# histograms and density plots for inspection +# 1: original B-factors +# 2: original mean NA affinity values +# 3: replaced B-factors with mean NA affinity values +#================================================== +# Set the margin on all sides +par(oma = c(3,2,3,0) + , mar = c(1,3,5,2) + #, mfrow = c(3,2) + #, mfrow = c(3,4)) + , mfrow = c(3,2)) + +#============= +# Row 1 plots: original B-factors +# duet and NA affinity +#============= +hist(df_duet$b + , xlab = "" + , main = "Bfactor NA affinity") + +plot(density(df_duet$b) + , xlab = "" + , main = "Bfactor NA affinity") + +#============= +# Row 2 plots: original mean NA affinity values +# NA affinity +#============= + +#hist(my_df$averaged_duet +hist(my_df$avg_na_sc_pos + , xlab = "" + , main = "mean NA affinity values") + +#plot(density(my_df$averaged_duet) +plot(density(my_df$avg_na_sc_pos) + , xlab = "" + , main = "mean NA affinity values") + +#============== +# Row 3 plots: replaced B-factors with mean NA affinity values +# After actual replacement in the b factor column +#=============== +################################################################ +#========= +# step 1_P1 +#========= +#df_duet$b = my_df$averaged_duet_scaled[match(df_duet$resno, my_df$position)] +df_duet$b = my_df$avg_na_sc_pos[match(df_duet$resno, my_df$position)] + +#========= +# step 2_P1 +#========= +# count NA in Bfactor +b_na_duet = sum(is.na(df_duet$b)) ; b_na_duet + +# count number of 0"s in Bactor +sum(df_duet$b == 0) + +# replace all NA in b factor with 0 +na_rep = 2 +df_duet$b[is.na(df_duet$b)] = na_rep + +#========= +# step 3_P1 +#========= +# sanity check: dim should be same before reassignment +if ( (dim(df_duet)[1] == dim(d2_duet)[1]) & + (dim(df_duet)[2] == dim(d2_duet)[2]) + ){ + print("PASS: Dims of both dfs as expected") +} else { + print ("FAIL: Dims mismatch") + quit()} + +#========= +# step 4_P1: +# VERY important +#========= +# assign it back to the pdb file +my_pdb_duet[['atom']] = df_duet +max(df_duet$b); min(df_duet$b) +table(df_duet$b) +sum(is.na(df_duet$b)) + +#========= +# step 5_P1: OUTPUT +#========= +cat(paste0("output file duet mean NA affinity pdb:", outfile_nca_mspdb)) +write.pdb(my_pdb_duet, outfile_nca_mspdb) + +# OUTPUT: position file +poscsvF = paste0(outdir_images, tolower(gene), "_nca_positions.csv") +cat(paste0("output file duet mean NA affinity POSITIONS:", poscsvF)) + +filtered_pos = toString(my_df$position) +write.table(filtered_pos, poscsvF, row.names = F, col.names = F ) + + +#============================ +# Add the 3rd histogram and density plots for comparisons +#============================ +# Plots continued... +# Row 3 plots: hist and density of replaced B-factors with NA affinity values +hist(df_duet$b + , xlab = "" + , main = "repalcedB duet") + +plot(density(df_duet$b) + , xlab = "" + , main = "replacedB duet") + +# graph titles +mtext(text = "Frequency" + , side = 2 + , line = 0 + , outer = TRUE) + +mtext(text = paste0(tolower(gene), ": NA affinity distribution") + , side = 3 + , line = 0 + , outer = TRUE) +#============================================ \ No newline at end of file