#!/usr/bin/env Rscript source("~/git/LSHTM_analysis/config/gid.R") source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") ######################################################### # TASK: Replace B-factors in the pdb file with the mean # normalised stability values. # read pdb file # read mcsm mean stability value files # extract the respective mean values and assign to the # b-factor column within their respective pdbs # generate some distribution plots for inspection ######################################################### # working dir and loading libraries #source("~/git/LSHTM_analysis/scripts/Header_TT.R") library(bio3d) require("getopt", quietly = TRUE) # cmd parse arguments #======================================================== #======================================================== cat(gene) gene_match = paste0(gene,"_p."); cat(gene_match) cat(gene_match) #============= # directories #============= datadir = paste0("~/git/Data") indir = paste0(datadir, "/", drug, "/input") outdir = paste0("~/git/Data", "/", drug, "/output") #outdir_plots = paste0("~/git/Writing/thesis/images/results/", tolower(gene)) #====== # input #====== in_filename_pdb = paste0(tolower(gene), "_complex.pdb") #in_filename_pdb = "~/git/Writing/thesis/images/results/gid/str_figures/gid_complex_copy_arpeg.pdb" infile_pdb = paste0(indir, "/", in_filename_pdb) cat(paste0("Input file:", infile_pdb) ) #======= # output #======= outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") cat("plots will output to:", outdir_images) #out_filename_duet_mspdb = paste0(tolower(gene), "_complex_bduet_ms.pdb") out_filename_duet_mspdb = paste0(tolower(gene), "_complex_b_stab_ms.pdb") outfile_duet_mspdb = paste0(outdir_images, out_filename_duet_mspdb) print(paste0("Output file:", outfile_duet_mspdb)) #%%=============================================================== #NOTE: duet here refers to the ensemble stability values ########################### # Read file: average stability values # or mcsm_normalised file ########################### my_df_raw = merged_df3[, c("position", "avg_stability", "avg_stability_scaled")] # avg by position on the SCALED values my_df <- my_df_raw %>% group_by(position) %>% summarize(avg_stab_sc_pos = mean(avg_stability_scaled)) max(my_df$avg_stab_sc_pos) min(my_df$avg_stab_sc_pos) #============================================================ # # scale b/w -1 and 1 # duet_min = min(my_df_by_position['avg_stab_sc_pos']) # duet_max = max(my_df_by_position['avg_stab_sc_pos']) # # # scale the averaged_duet values # my_df_by_position['avg_stab_sc_pos_scaled'] = lapply(my_df_by_position['avg_stab_sc_pos'] # , function(x) ifelse(x < 0, x/abs(duet_min), x/duet_max)) # # cat(paste0('Average duet scores:\n', head(my_df_by_position['avg_stab_sc_pos_scaled']) # , '\n---------------------------------------------------------------' # , '\nScaled duet scores:\n', head(my_df_by_position['avg_stab_sc_pos_scaled']))) # # min(my_df_by_position['avg_stab_sc_pos_scaled']) # max(my_df_by_position['avg_stab_sc_pos_scaled']) #============================================================ ############# # Read pdb ############# # list of 8 my_pdb = read.pdb(infile_pdb , maxlines = -1 , multi = FALSE , rm.insert = FALSE , rm.alt = TRUE , ATOM.only = FALSE , hex = FALSE , verbose = TRUE) # assign separately for duet and ligand my_pdb_duet = my_pdb #========================================================= # Replacing B factor with mean stability scores # within the respective dfs #========================================================== df_duet= my_pdb_duet[['atom']] # make a copy: required for downstream sanity checks d2_duet = df_duet # sanity checks: B factor max(df_duet$b); min(df_duet$b) #================================================== # histograms and density plots for inspection # 1: original B-factors # 2: original mean stability values # 3: replaced B-factors with mean stability values #================================================== # Set the margin on all sides par(oma = c(3,2,3,0) , mar = c(1,3,5,2) #, mfrow = c(3,2) #, mfrow = c(3,4)) , mfrow = c(3,2)) #============= # Row 1 plots: original B-factors # duet and affinity #============= hist(df_duet$b , xlab = "" , main = "Bfactor stability") plot(density(df_duet$b) , xlab = "" , main = "Bfactor stability") #============= # Row 2 plots: original mean stability values # duet and affinity #============= #hist(my_df$averaged_duet hist(my_df$avg_stab_sc_pos , xlab = "" , main = "mean stability values") #plot(density(my_df$averaged_duet) plot(density(my_df$avg_stab_sc_pos) , xlab = "" , main = "mean stability values") ################################################################ #========= # step 1_P1 #========= #df_duet$b = my_df$averaged_duet_scaled[match(df_duet$resno, my_df$position)] df_duet$b = my_df$avg_stab_sc_pos[match(df_duet$resno, my_df$position)] #========= # step 2_P1 #========= # count NA in Bfactor b_na_duet = sum(is.na(df_duet$b)) ; b_na_duet # count number of 0"s in Bactor sum(df_duet$b == 0) # replace all NA in b factor with 0 na_rep = 2 df_duet$b[is.na(df_duet$b)] = na_rep #========= # step 3_P1 #========= # sanity check: dim should be same before reassignment if ( (dim(df_duet)[1] == dim(d2_duet)[1]) & (dim(df_duet)[2] == dim(d2_duet)[2]) ){ print("PASS: Dims of both dfs as expected") } else { print ("FAIL: Dims mismatch") quit()} #========= # step 4_P1: # VERY important #========= # assign it back to the pdb file my_pdb_duet[['atom']] = df_duet max(df_duet$b); min(df_duet$b) table(df_duet$b) sum(is.na(df_duet$b)) #========= # step 5_P1 #========= cat(paste0("output file duet mean stability pdb:", outfile_duet_mspdb)) write.pdb(my_pdb_duet, outfile_duet_mspdb) #============================ # Add the 3rd histogram and density plots for comparisons #============================ # Plots continued... # Row 3 plots: hist and density of replaced B-factors with stability values hist(df_duet$b , xlab = "" , main = "repalcedB duet") plot(density(df_duet$b) , xlab = "" , main = "replacedB duet") # graph titles mtext(text = "Frequency" , side = 2 , line = 0 , outer = TRUE) mtext(text = paste0(tolower(gene), ": stability distribution") , side = 3 , line = 0 , outer = TRUE) #============================================