generated replaced Bfactor pdbs

This commit is contained in:
Tanushree Tunstall 2020-08-24 14:37:28 +01:00
parent 35a89f7761
commit e696064d42

70
scripts/plotting/replaceBfactor_pdb.R Normal file → Executable file
View file

@ -25,6 +25,9 @@ cat(c(getwd(),"\n"))
library(bio3d) library(bio3d)
require("getopt", quietly = TRUE) # cmd parse arguments require("getopt", quietly = TRUE) # cmd parse arguments
#======================================================== #========================================================
#drug = "pyrazinamide"
#gene = "pncA"
# command line args # command line args
spec = matrix(c( spec = matrix(c(
"drug" , "d", 1, "character", "drug" , "d", 1, "character",
@ -40,9 +43,6 @@ if(is.null(drug)|is.null(gene)) {
stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)") stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
} }
#======================================================== #========================================================
#%% variable assignment: input and output paths & filenames
drug = "pyrazinamide"
gene = "pncA"
gene_match = paste0(gene,"_p.") gene_match = paste0(gene,"_p.")
cat(gene_match) cat(gene_match)
@ -52,6 +52,7 @@ cat(gene_match)
datadir = paste0("~/git/Data") datadir = paste0("~/git/Data")
indir = paste0(datadir, "/", drug, "/input") indir = paste0(datadir, "/", drug, "/input")
outdir = paste0("~/git/Data", "/", drug, "/output") outdir = paste0("~/git/Data", "/", drug, "/output")
outdir_plots = paste0("~/git/Data", "/", drug, "/output/plots")
#====== #======
# input # input
@ -67,12 +68,12 @@ cat(paste0("Input file:", infile_mean_stability) )
#======= #=======
# output # output
#======= #=======
out_filename_duet_mspdb = paste0(tolower(gene), "_complex_b_duetms.pdb") out_filename_duet_mspdb = paste0(tolower(gene), "_complex_bduet_ms.pdb")
outfile_duet_mspdb = paste0(outdir, "/", out_filename_duet_mspdb) outfile_duet_mspdb = paste0(outdir_plots, "/", out_filename_duet_mspdb)
print(paste0("Output file:", outfile_duet_mspdb)) print(paste0("Output file:", outfile_duet_mspdb))
out_filename_lig_mspdb = paste0(tolower(gene), "_complex_b_ligms.pdb") out_filename_lig_mspdb = paste0(tolower(gene), "_complex_blig_ms.pdb")
outfile_lig_mspdb = paste0(outdir, "/", out_filename_lig_mspdb) outfile_lig_mspdb = paste0(outdir_plots, "/", out_filename_lig_mspdb)
print(paste0("Output file:", outfile_lig_mspdb)) print(paste0("Output file:", outfile_lig_mspdb))
#%%=============================================================== #%%===============================================================
@ -108,8 +109,9 @@ my_pdb_lig = my_pdb
#========================================================== #==========================================================
# extract atom list into a variable # extract atom list into a variable
# since in the list this corresponds to data frame, variable will be a df # since in the list this corresponds to data frame, variable will be a df
df_duet = my_pdb_duet[[1]] #df_duet = my_pdb_duet[[1]]
df_lig = my_pdb_lig[[1]] df_duet= my_pdb_duet[['atom']]
df_lig = my_pdb_lig[['atom']]
# make a copy: required for downstream sanity checks # make a copy: required for downstream sanity checks
d2_duet = df_duet d2_duet = df_duet
@ -156,22 +158,22 @@ plot(density(df_lig$b)
# Row 2 plots: original mean stability values # Row 2 plots: original mean stability values
# duet and affinity # duet and affinity
#************ #************
hist(my_df$average_duet_scaled hist(my_df$averaged_duet
, xlab = "" , xlab = ""
, main = "mean duet scaled") , main = "mean duet values")
plot(density(my_df$average_duet_scaled) plot(density(my_df$averaged_duet)
, xlab = "" , xlab = ""
, main = "mean duet scaled") , main = "mean duet values")
hist(my_df$average_affinity_scaled hist(my_df$averaged_affinity
, xlab = "" , xlab = ""
, main = "mean affinity scaled") , main = "mean affinity values")
plot(density(my_df$average_affinity_scaled) plot(density(my_df$averaged_affinity)
, xlab = "" , xlab = ""
, main = "mean affinity scaled") , main = "mean affinity values")
#************ #************
# Row 3 plots: replaced B-factors with mean stability values # Row 3 plots: replaced B-factors with mean stability values
@ -190,8 +192,8 @@ plot(density(my_df$average_affinity_scaled)
#========= #=========
# Be brave and replace in place now (don"t run sanity check) # Be brave and replace in place now (don"t run sanity check)
# this makes all the B-factor values in the non-matched positions as NA # this makes all the B-factor values in the non-matched positions as NA
df_duet$b = my_df$average_duet_scaled[match(df_duet$resno, my_df$position)] df_duet$b = my_df$averaged_duet_scaled[match(df_duet$resno, my_df$position)]
df_lig$b = my_df$average_affinity_scaled[match(df_lig$resno, my_df$position)] df_lig$b = my_df$averaged_affinity_scaled[match(df_lig$resno, my_df$position)]
#========= #=========
# step 2_P1 # step 2_P1
@ -209,36 +211,28 @@ df_duet$b[is.na(df_duet$b)] = 0
df_lig$b[is.na(df_lig$b)] = 0 df_lig$b[is.na(df_lig$b)] = 0
# sanity check: should be 0 and True # sanity check: should be 0 and True
# duet # duet and lig
if (sum(df_duet$b == 0) == b_na_duet){ if ( (sum(df_duet$b == 0) == b_na_duet) && (sum(df_lig$b == 0) == b_na_lig) ) {
print ("PASS: NA"s replaced with 0"s successfully in df_duet") print ("PASS: NA's replaced with 0s successfully in df_duet and df_lig")
} else { } else {
print("FAIL: NA replacement in df_duet NOT successful") print("FAIL: NA replacement in df_duet NOT successful")
quit() quit()
} }
max(df_duet$b); min(df_duet$b) max(df_duet$b); min(df_duet$b)
# lig
if (sum(df_lig$b == 0) == b_na_lig){
print ("PASS: NA"s replaced with 0"s successfully df_lig")
} else {
print("FAIL: NA replacement in df_lig NOT successful")
quit()
}
max(df_lig$b); min(df_lig$b)
# sanity checks: should be True # sanity checks: should be True
if( (max(df_duet$b) == max(my_df$average_duet_scaled)) & (min(df_duet$b) == min(my_df$average_duet_scaled)) ){ if( (max(df_duet$b) == max(my_df$averaged_duet_scaled)) & (min(df_duet$b) == min(my_df$averaged_duet_scaled)) ){
print("PASS: B-factors replaced correctly in df_duet") print("PASS: B-factors replaced correctly in df_duet")
} else { } else {
print ("FAIL: To replace B-factors in df_duet") print ("FAIL: To replace B-factors in df_duet")
quit() quit()
} }
if( (max(df_lig$b) == max(my_df$average_affinity_scaled)) & (min(df_lig$b) == min(my_df$average_affinity_scaled)) ){ if( (max(df_lig$b) == max(my_df$averaged_affinity_scaled)) & (min(df_lig$b) == min(my_df$averaged_affinity_scaled)) ){
print("PASS: B-factors replaced correctly in lig_duet") print("PASS: B-factors replaced correctly in df_lig")
} else { } else {
print ("FAIL: To replace B-factors in lig_duet") print ("FAIL: To replace B-factors in df_lig")
quit() quit()
} }
@ -259,10 +253,10 @@ if ( (dim(df_duet)[1] == dim(d2_duet)[1]) & (dim(df_lig)[1] == dim(d2_lig)[1]) &
# VERY important # VERY important
#========= #=========
# assign it back to the pdb file # assign it back to the pdb file
my_pdb_duet[[1]] = df_duet my_pdb_duet[['atom']] = df_duet
max(df_duet$b); min(df_duet$b) max(df_duet$b); min(df_duet$b)
my_pdb_lig[[1]] = df_lig my_pdb_lig[['atom']] = df_lig
max(df_lig$b); min(df_lig$b) max(df_lig$b); min(df_lig$b)
#========= #=========
@ -302,7 +296,7 @@ mtext(text = "Frequency"
, line = 0 , line = 0
, outer = TRUE) , outer = TRUE)
mtext(text = "stability distribution" mtext(text = "Stability Distribution"
, side = 3 , side = 3
, line = 0 , line = 0
, outer = TRUE) , outer = TRUE)