From 54f9fd073ba2ea602904d2ecfe19208e8b29287a Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 24 Aug 2020 13:04:25 +0100 Subject: [PATCH] rectified mcsm_mean_stability to avereage on raw values and then scale --- scripts/plotting/mcsm_mean_stability.R | 151 +++++++++++++++---------- scripts/plotting/plotting_data.R | 2 +- 2 files changed, 94 insertions(+), 59 deletions(-) diff --git a/scripts/plotting/mcsm_mean_stability.R b/scripts/plotting/mcsm_mean_stability.R index 67d1385..217b183 100644 --- a/scripts/plotting/mcsm_mean_stability.R +++ b/scripts/plotting/mcsm_mean_stability.R @@ -6,41 +6,29 @@ getwd() # TASK: ######################################################### -source("Header_TT.R") -require(data.table) -require(dplyr) +#source("Header_TT.R") +#require(data.table) +#require(dplyr) + source("plotting_data.R") # should return #my_df #my_df_u #dup_muts +# cmd parse arguments +#require('getopt', quietly = TRUE) +#======================================================== + + #======================================================== # Read file: call script for combining df for PS #source("../combining_two_df.R") #======================================================== -#%% variable assignment: input and output paths & filenames -drug = "pyrazinamide" -gene = "pncA" -gene_match = paste0(gene,"_p.") -cat(gene_match) -#============= -# directories -#============= -datadir = paste0("~/git/Data") -indir = paste0(datadir, "/", drug, "/input") -outdir = paste0("~/git/Data", "/", drug, "/output") - -#====== -# input -#====== -#in_filename = "mcsm_complex1_normalised.csv" -in_filename_params = paste0(tolower(gene), "_all_params.csv") -infile_params = paste0(outdir, "/", in_filename_params) -cat(paste0("Input file 1:", infile_params) ) +# plotting_data.R imports all the dir names, etc #======= # output @@ -60,61 +48,108 @@ rm(my_df) ########################### # Data for bfactor figure -# PS average -# Lig average +# PS (duet) average +# Ligand affinity average ########################### head(df$position); head(df$mutationinformation) -head(df$duet_scaled) +head(df$duet_stability_change) # order data frame #df = df[order(df$position),] #already done - -head(df$position); head(df$mutationinformation) -head(df$duet_scaled) +#head(df$position); head(df$mutationinformation) +#head(df$duet_stability_change) #*********** -# PS: average by position +# PS(duet): average by position and then scale b/w -1 and 1 +# column to average: duet_stability_change (NOT scaled!) #*********** mean_duet_by_position <- df %>% group_by(position) %>% - summarize(averaged.duet = mean(duet_scaled)) + summarize(averaged_duet = mean(duet_stability_change)) + +# scale b/w -1 and 1 +duet_min = min(mean_duet_by_position['averaged_duet']) +duet_max = max(mean_duet_by_position['averaged_duet']) + +# scale the averaged_duet values +mean_duet_by_position['averaged_duet_scaled'] = lapply(mean_duet_by_position['averaged_duet'] + , function(x) ifelse(x < 0, x/abs(duet_min), x/duet_max)) + +cat(paste0('Average duet scores:\n', head(mean_duet_by_position['averaged_duet']) + , '\n---------------------------------------------------------------' + , '\nScaled duet scores:\n', head(mean_duet_by_position['averaged_duet_scaled']))) + +# sanity checks +l_bound_duet = min(mean_duet_by_position['averaged_duet_scaled']) +u_bound_duet = max(mean_duet_by_position['averaged_duet_scaled']) + +if ( (l_bound_duet == -1) && (u_bound_duet == 1) ){ + cat(paste0("PASS: duet scores averaged by position and then scaled" + , "\nmin averaged duet: ", l_bound_duet + , "\nmax averaged duet: ", u_bound_duet)) +}else{ + cat(paste0("FAIL: avergaed duet scores could not be scaled b/w -1 and 1" + , "\nmin averaged duet: ", l_bound_duet + , "\nmax averaged duet: ", u_bound_duet)) + quit() +} #*********** -# Lig: average by position +# Lig: average by position and then scale b/w -1 and 1 +# column: ligand_affinity_change (NOT scaled!) #*********** mean_affinity_by_position <- df %>% group_by(position) %>% - summarize(averaged.affinity = mean(affinity_scaled)) + summarize(averaged_affinity = mean(ligand_affinity_change)) + +# scale b/w -1 and 1 +affinity_min = min(mean_affinity_by_position['averaged_affinity']) +affinity_max = max(mean_affinity_by_position['averaged_affinity']) + +# scale the averaged_affinity values +mean_affinity_by_position['averaged_affinity_scaled'] = lapply(mean_affinity_by_position['averaged_affinity'] + , function(x) ifelse(x < 0, x/abs(affinity_min), x/affinity_max)) + +cat(paste0('Average affinity scores:\n', head(mean_affinity_by_position['averaged_affinity']) + , '\n---------------------------------------------------------------' + , '\nScaled affinity scores:\n', head(mean_affinity_by_position['averaged_affinity_scaled']))) + +# sanity checks +l_bound_affinity = min(mean_affinity_by_position['averaged_affinity_scaled']) +u_bound_affinity = max(mean_affinity_by_position['averaged_affinity_scaled']) + +if ( (l_bound_affinity == -1) && (u_bound_affinity == 1) ){ + cat(paste0("PASS: affinity scores averaged by position and then scaled" + , "\nmin averaged affintiy: ", l_bound_affinity + , "\nmax averaged affintiy: ", u_bound_affinity)) +}else{ + cat(paste0("FAIL: avergaed affinity scores could not be scaled b/w -1 and 1" + , "\nmin averaged affintiy: ", l_bound_affinity + , "\nmax averaged affintiy: ", u_bound_affinity)) + quit() +} #*********** -# cbind:mean_duet_by_position and mean_affinity_by_position +# merge: mean_duet_by_position and mean_affinity_by_position #*********** +common_cols = intersect(colnames(mean_duet_by_position), colnames(mean_affinity_by_position)) -combined = as.data.frame(cbind(mean_duet_by_position, mean_affinity_by_position )) - -# sanity check -# mean_PS_affinity_Bfactor - -colnames(combined) - -colnames(combined) = c("position" - , "average_duet_scaled" - , "position2" - , "average_affinity_scaled") - -colnames(combined) - -identical(combined$position, combined$position2) - -n = which(colnames(combined) == "position2"); n - -combined_df = combined[,-n] - -max(combined_df$average_duet_scaled) ; min(combined_df$average_duet_scaled) - -max(combined_df$average_affinity_scaled) ; min(combined_df$average_affinity_scaled) - -head(combined_df$position); tail(combined_df$position) +if (dim(mean_duet_by_position) && dim(mean_affinity_by_position)){ + print(paste0("PASS: dim's match, mering dfs by column :", common_cols)) + #combined = as.data.frame(cbind(mean_duet_by_position, mean_affinity_by_position )) + combined_df = as.data.frame(merge(mean_duet_by_position + , mean_affinity_by_position + , by = common_cols + , all = T)) + + cat(paste0("\nnrows combined_df:", nrow(combined_df) + , "\nnrows combined_df:", ncol(combined_df))) +}else{ + cat(paste0("FAIL: dim's mismatch, aborting cbind!" + , "\nnrows df1:", nrow(mean_duet_by_position) + , "\nnrows df2:", nrow(mean_affinity_by_position))) + quit() +} #%%============================================================ # output write.csv(combined_df, outfile_mean_stability diff --git a/scripts/plotting/plotting_data.R b/scripts/plotting/plotting_data.R index 3e3338c..8c6feb1 100644 --- a/scripts/plotting/plotting_data.R +++ b/scripts/plotting/plotting_data.R @@ -79,7 +79,7 @@ if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformati upos = unique(my_df_u$position) cat("Dim of clean df:"); cat(dim(my_df_u)) -cat("\nNo. of unique mutational positions:"); cat(length(upos)) +cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n") ######################################################################## # end of data extraction and cleaning for plots #