#!/usr/bin/env Rscript source("~/git/LSHTM_analysis/config/gid.R") source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") ######################################################### # TASK: Replace B-factors in the pdb file with the mean # normalised affinity values ######################################################### # working dir and loading libraries #source("~/git/LSHTM_analysis/scripts/Header_TT.R") library(bio3d) require("getopt", quietly = TRUE) # cmd parse arguments #======================================================== #======================================================== cat(gene) gene_match = paste0(gene,"_p."); cat(gene_match) cat(gene_match) #============= # directories #============= datadir = paste0("~/git/Data") indir = paste0(datadir, "/", drug, "/input") outdir = paste0("~/git/Data", "/", drug, "/output") #outdir_plots = paste0("~/git/Writing/thesis/images/results/", tolower(gene)) #======= # output #======= outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") cat("plots will output to:", outdir_images) #====== # input #====== in_filename_pdb = paste0(tolower(gene), "_complex.pdb") infile_pdb = paste0(indir, "/", in_filename_pdb) cat(paste0("Input file:", infile_pdb) ) #======= # output #======= #out_filename_duet_mspdb = paste0(tolower(gene), "_complex_bduet_ms.pdb") out_filename_lig_mspdb = paste0(tolower(gene), "_complex_b_lig_ms.pdb") outfile_lig_mspdb = paste0(outdir_images,out_filename_lig_mspdb) print(paste0("Output file:", outfile_lig_mspdb)) #%%=============================================================== #NOTE: duet here refers to the ensemble affinity values ########################### # Read file: average affinity values # or mcsm_normalised file ########################### my_df_raw = merged_df3[, c("position", "ligand_distance", "avg_lig_affinity_scaled", "avg_lig_affinity")] my_df_raw = my_df_raw[my_df_raw$ligand_distance<10,] # avg by position on the SCALED values my_df <- my_df_raw %>% group_by(position) %>% summarize(avg_ligaff_sc_pos = mean(avg_lig_affinity_scaled)) max(my_df$avg_ligaff_sc_pos) min(my_df$avg_ligaff_sc_pos) ############# # Read pdb ############# # list of 8 my_pdb = read.pdb(infile_pdb , maxlines = -1 , multi = FALSE , rm.insert = FALSE , rm.alt = TRUE , ATOM.only = FALSE , hex = FALSE , verbose = TRUE) # assign separately for duet and ligand my_pdb_duet = my_pdb #========================================================= # Replacing B factor with mean affinity scores # within the respective dfs #========================================================== # extract atom list into a variable # since in the list this corresponds to data frame, variable will be a df #df_duet = my_pdb_duet[[1]] df_duet= my_pdb_duet[['atom']] # make a copy: required for downstream sanity checks d2_duet = df_duet # sanity checks: B factor max(df_duet$b); min(df_duet$b) #================================================== # histograms and density plots for inspection # 1: original B-factors # 2: original mean affinity values # 3: replaced B-factors with mean affinity values #================================================== # Set the margin on all sides par(oma = c(3,2,3,0) , mar = c(1,3,5,2) #, mfrow = c(3,2) #, mfrow = c(3,4)) , mfrow = c(3,2)) #============= # Row 1 plots: original B-factors # duet and affinity #============= hist(df_duet$b , xlab = "" , main = "Bfactor affinity") plot(density(df_duet$b) , xlab = "" , main = "Bfactor affinity") #============= # Row 2 plots: original mean affinity values # affinity #============= #hist(my_df$averaged_duet hist(my_df$avg_ligaff_sc_pos , xlab = "" , main = "mean affinity values") #plot(density(my_df$averaged_duet) plot(density(my_df$avg_ligaff_sc_pos) , xlab = "" , main = "mean affinity values") ################################################################ #========= # step 1_P1 #========= df_duet$b = my_df$avg_ligaff_sc_pos[match(df_duet$resno, my_df$position)] #========= # step 2_P1 #========= # count NA in Bfactor b_na_duet = sum(is.na(df_duet$b)) ; b_na_duet # count number of 0"s in Bactor sum(df_duet$b == 0) # replace all NA in b factor with 0 na_rep = 2 df_duet$b[is.na(df_duet$b)] = na_rep #========= # step 3_P1 #========= # sanity check: dim should be same before reassignment if ( (dim(df_duet)[1] == dim(d2_duet)[1]) & (dim(df_duet)[2] == dim(d2_duet)[2]) ){ print("PASS: Dims of both dfs as expected") } else { print ("FAIL: Dims mismatch") quit()} #========= # step 4_P1: # VERY important #========= # assign it back to the pdb file my_pdb_duet[['atom']] = df_duet max(df_duet$b); min(df_duet$b) table(df_duet$b) sum(is.na(df_duet$b)) #========= # step 5_P1: OUTPUT #========= cat(paste0("output file duet mean affinity pdb:", outfile_lig_mspdb)) write.pdb(my_pdb_duet, outfile_lig_mspdb) # OUTPUT: position file poscsvF = paste0(outdir_images, tolower(gene), "_ligaff_positions.csv") cat(paste0("output file duet mean NA affinity POSITIONS:", poscsvF)) filtered_pos = toString(my_df$position) write.table(filtered_pos, poscsvF, row.names = F, col.names = F ) #============================ # Add the 3rd histogram and density plots for comparisons #============================ # Plots continued... # Row 3 plots: hist and density of replaced B-factors with affinity values hist(df_duet$b , xlab = "" , main = "repalcedB duet") plot(density(df_duet$b) , xlab = "" , main = "replacedB duet") # graph titles mtext(text = "Frequency" , side = 2 , line = 0 , outer = TRUE) mtext(text = paste0(tolower(gene), ": affinity distribution") , side = 3 , line = 0 , outer = TRUE) #============================================