generated replaced Bfactor pdbs

This commit is contained in:
Tanushree Tunstall 2020-08-24 14:37:28 +01:00
parent 35a89f7761
commit e696064d42

70
scripts/plotting/replaceBfactor_pdb.R Normal file → Executable file
View file

@ -25,6 +25,9 @@ cat(c(getwd(),"\n"))
library(bio3d)
require("getopt", quietly = TRUE) # cmd parse arguments
#========================================================
#drug = "pyrazinamide"
#gene = "pncA"
# command line args
spec = matrix(c(
"drug" , "d", 1, "character",
@ -40,9 +43,6 @@ if(is.null(drug)|is.null(gene)) {
stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
}
#========================================================
#%% variable assignment: input and output paths & filenames
drug = "pyrazinamide"
gene = "pncA"
gene_match = paste0(gene,"_p.")
cat(gene_match)
@ -52,6 +52,7 @@ cat(gene_match)
datadir = paste0("~/git/Data")
indir = paste0(datadir, "/", drug, "/input")
outdir = paste0("~/git/Data", "/", drug, "/output")
outdir_plots = paste0("~/git/Data", "/", drug, "/output/plots")
#======
# input
@ -67,12 +68,12 @@ cat(paste0("Input file:", infile_mean_stability) )
#=======
# output
#=======
out_filename_duet_mspdb = paste0(tolower(gene), "_complex_b_duetms.pdb")
outfile_duet_mspdb = paste0(outdir, "/", out_filename_duet_mspdb)
out_filename_duet_mspdb = paste0(tolower(gene), "_complex_bduet_ms.pdb")
outfile_duet_mspdb = paste0(outdir_plots, "/", out_filename_duet_mspdb)
print(paste0("Output file:", outfile_duet_mspdb))
out_filename_lig_mspdb = paste0(tolower(gene), "_complex_b_ligms.pdb")
outfile_lig_mspdb = paste0(outdir, "/", out_filename_lig_mspdb)
out_filename_lig_mspdb = paste0(tolower(gene), "_complex_blig_ms.pdb")
outfile_lig_mspdb = paste0(outdir_plots, "/", out_filename_lig_mspdb)
print(paste0("Output file:", outfile_lig_mspdb))
#%%===============================================================
@ -108,8 +109,9 @@ my_pdb_lig = my_pdb
#==========================================================
# extract atom list into a variable
# since in the list this corresponds to data frame, variable will be a df
df_duet = my_pdb_duet[[1]]
df_lig = my_pdb_lig[[1]]
#df_duet = my_pdb_duet[[1]]
df_duet= my_pdb_duet[['atom']]
df_lig = my_pdb_lig[['atom']]
# make a copy: required for downstream sanity checks
d2_duet = df_duet
@ -156,22 +158,22 @@ plot(density(df_lig$b)
# Row 2 plots: original mean stability values
# duet and affinity
#************
hist(my_df$average_duet_scaled
hist(my_df$averaged_duet
, xlab = ""
, main = "mean duet scaled")
, main = "mean duet values")
plot(density(my_df$average_duet_scaled)
plot(density(my_df$averaged_duet)
, xlab = ""
, main = "mean duet scaled")
, main = "mean duet values")
hist(my_df$average_affinity_scaled
hist(my_df$averaged_affinity
, xlab = ""
, main = "mean affinity scaled")
, main = "mean affinity values")
plot(density(my_df$average_affinity_scaled)
plot(density(my_df$averaged_affinity)
, xlab = ""
, main = "mean affinity scaled")
, main = "mean affinity values")
#************
# Row 3 plots: replaced B-factors with mean stability values
@ -190,8 +192,8 @@ plot(density(my_df$average_affinity_scaled)
#=========
# Be brave and replace in place now (don"t run sanity check)
# this makes all the B-factor values in the non-matched positions as NA
df_duet$b = my_df$average_duet_scaled[match(df_duet$resno, my_df$position)]
df_lig$b = my_df$average_affinity_scaled[match(df_lig$resno, my_df$position)]
df_duet$b = my_df$averaged_duet_scaled[match(df_duet$resno, my_df$position)]
df_lig$b = my_df$averaged_affinity_scaled[match(df_lig$resno, my_df$position)]
#=========
# step 2_P1
@ -209,36 +211,28 @@ df_duet$b[is.na(df_duet$b)] = 0
df_lig$b[is.na(df_lig$b)] = 0
# sanity check: should be 0 and True
# duet
if (sum(df_duet$b == 0) == b_na_duet){
print ("PASS: NA"s replaced with 0"s successfully in df_duet")
# duet and lig
if ( (sum(df_duet$b == 0) == b_na_duet) && (sum(df_lig$b == 0) == b_na_lig) ) {
print ("PASS: NA's replaced with 0s successfully in df_duet and df_lig")
} else {
print("FAIL: NA replacement in df_duet NOT successful")
quit()
}
max(df_duet$b); min(df_duet$b)
# lig
if (sum(df_lig$b == 0) == b_na_lig){
print ("PASS: NA"s replaced with 0"s successfully df_lig")
} else {
print("FAIL: NA replacement in df_lig NOT successful")
quit()
}
max(df_lig$b); min(df_lig$b)
# sanity checks: should be True
if( (max(df_duet$b) == max(my_df$average_duet_scaled)) & (min(df_duet$b) == min(my_df$average_duet_scaled)) ){
if( (max(df_duet$b) == max(my_df$averaged_duet_scaled)) & (min(df_duet$b) == min(my_df$averaged_duet_scaled)) ){
print("PASS: B-factors replaced correctly in df_duet")
} else {
print ("FAIL: To replace B-factors in df_duet")
quit()
}
if( (max(df_lig$b) == max(my_df$average_affinity_scaled)) & (min(df_lig$b) == min(my_df$average_affinity_scaled)) ){
print("PASS: B-factors replaced correctly in lig_duet")
if( (max(df_lig$b) == max(my_df$averaged_affinity_scaled)) & (min(df_lig$b) == min(my_df$averaged_affinity_scaled)) ){
print("PASS: B-factors replaced correctly in df_lig")
} else {
print ("FAIL: To replace B-factors in lig_duet")
print ("FAIL: To replace B-factors in df_lig")
quit()
}
@ -259,10 +253,10 @@ if ( (dim(df_duet)[1] == dim(d2_duet)[1]) & (dim(df_lig)[1] == dim(d2_lig)[1]) &
# VERY important
#=========
# assign it back to the pdb file
my_pdb_duet[[1]] = df_duet
my_pdb_duet[['atom']] = df_duet
max(df_duet$b); min(df_duet$b)
my_pdb_lig[[1]] = df_lig
my_pdb_lig[['atom']] = df_lig
max(df_lig$b); min(df_lig$b)
#=========
@ -302,7 +296,7 @@ mtext(text = "Frequency"
, line = 0
, outer = TRUE)
mtext(text = "stability distribution"
mtext(text = "Stability Distribution"
, side = 3
, line = 0
, outer = TRUE)