getwd() setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() ######################################################### # TASK: ######################################################### #source("Header_TT.R") #require(data.table) #require(dplyr) source("plotting_data.R") # should return #my_df #my_df_u #dup_muts # cmd parse arguments #require('getopt', quietly = TRUE) #======================================================== #======================================================== # Read file: call script for combining df for PS #source("../combining_two_df.R") #======================================================== # plotting_data.R imports all the dir names, etc #======= # output #======= out_filename_mean_stability = paste0(tolower(gene), "_mean_stability.csv") outfile_mean_stability = paste0(outdir, "/", out_filename_mean_stability) print(paste0("Output file:", outfile_mean_stability)) #%%=============================================================== #================ # Data for plots #================ # REASSIGNMENT as necessary df = my_df_u rm(my_df) ########################### # Data for bfactor figure # PS (duet) average # Ligand affinity average ########################### head(df$position); head(df$mutationinformation) head(df$duet_stability_change) # order data frame #df = df[order(df$position),] #already done #head(df$position); head(df$mutationinformation) #head(df$duet_stability_change) #*********** # PS(duet): average by position and then scale b/w -1 and 1 # column to average: duet_stability_change (NOT scaled!) #*********** mean_duet_by_position <- df %>% group_by(position) %>% summarize(averaged_duet = mean(duet_stability_change)) # scale b/w -1 and 1 duet_min = min(mean_duet_by_position['averaged_duet']) duet_max = max(mean_duet_by_position['averaged_duet']) # scale the averaged_duet values mean_duet_by_position['averaged_duet_scaled'] = lapply(mean_duet_by_position['averaged_duet'] , function(x) ifelse(x < 0, x/abs(duet_min), x/duet_max)) cat(paste0('Average duet scores:\n', head(mean_duet_by_position['averaged_duet']) , '\n---------------------------------------------------------------' , '\nScaled duet scores:\n', head(mean_duet_by_position['averaged_duet_scaled']))) # sanity checks l_bound_duet = min(mean_duet_by_position['averaged_duet_scaled']) u_bound_duet = max(mean_duet_by_position['averaged_duet_scaled']) if ( (l_bound_duet == -1) && (u_bound_duet == 1) ){ cat(paste0("PASS: duet scores averaged by position and then scaled" , "\nmin averaged duet: ", l_bound_duet , "\nmax averaged duet: ", u_bound_duet)) }else{ cat(paste0("FAIL: avergaed duet scores could not be scaled b/w -1 and 1" , "\nmin averaged duet: ", l_bound_duet , "\nmax averaged duet: ", u_bound_duet)) quit() } #*********** # Lig: average by position and then scale b/w -1 and 1 # column: ligand_affinity_change (NOT scaled!) #*********** mean_affinity_by_position <- df %>% group_by(position) %>% summarize(averaged_affinity = mean(ligand_affinity_change)) # scale b/w -1 and 1 affinity_min = min(mean_affinity_by_position['averaged_affinity']) affinity_max = max(mean_affinity_by_position['averaged_affinity']) # scale the averaged_affinity values mean_affinity_by_position['averaged_affinity_scaled'] = lapply(mean_affinity_by_position['averaged_affinity'] , function(x) ifelse(x < 0, x/abs(affinity_min), x/affinity_max)) cat(paste0('Average affinity scores:\n', head(mean_affinity_by_position['averaged_affinity']) , '\n---------------------------------------------------------------' , '\nScaled affinity scores:\n', head(mean_affinity_by_position['averaged_affinity_scaled']))) # sanity checks l_bound_affinity = min(mean_affinity_by_position['averaged_affinity_scaled']) u_bound_affinity = max(mean_affinity_by_position['averaged_affinity_scaled']) if ( (l_bound_affinity == -1) && (u_bound_affinity == 1) ){ cat(paste0("PASS: affinity scores averaged by position and then scaled" , "\nmin averaged affintiy: ", l_bound_affinity , "\nmax averaged affintiy: ", u_bound_affinity)) }else{ cat(paste0("FAIL: avergaed affinity scores could not be scaled b/w -1 and 1" , "\nmin averaged affintiy: ", l_bound_affinity , "\nmax averaged affintiy: ", u_bound_affinity)) quit() } #*********** # merge: mean_duet_by_position and mean_affinity_by_position #*********** common_cols = intersect(colnames(mean_duet_by_position), colnames(mean_affinity_by_position)) if (dim(mean_duet_by_position) && dim(mean_affinity_by_position)){ print(paste0("PASS: dim's match, mering dfs by column :", common_cols)) #combined = as.data.frame(cbind(mean_duet_by_position, mean_affinity_by_position )) combined_df = as.data.frame(merge(mean_duet_by_position , mean_affinity_by_position , by = common_cols , all = T)) cat(paste0("\nnrows combined_df:", nrow(combined_df) , "\nnrows combined_df:", ncol(combined_df))) }else{ cat(paste0("FAIL: dim's mismatch, aborting cbind!" , "\nnrows df1:", nrow(mean_duet_by_position) , "\nnrows df2:", nrow(mean_affinity_by_position))) quit() } #%%============================================================ # output write.csv(combined_df, outfile_mean_stability , row.names = F) cat("Finished writing file:\n" , outfile_mean_stability , "\nNo. of rows:", nrow(combined_df) , "\nNo. of cols:", ncol(combined_df)) # end of script #===============================================================