From 5c8a9e8f0013f0970cbf2d607dec3126ecadbc93 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 10 Sep 2021 18:16:41 +0100 Subject: [PATCH] sorted combining_dfs.py with all other data files and tidied up get_plotting_dfs.R --- scripts/combining_dfs.py | 71 ++++++++++++++++++++-------- scripts/functions/plotting_data.R | 72 +++++++++++++++-------------- scripts/plotting/get_plotting_dfs.R | 44 +++++++++--------- 3 files changed, 111 insertions(+), 76 deletions(-) diff --git a/scripts/combining_dfs.py b/scripts/combining_dfs.py index 4e2781e..faa9677 100755 --- a/scripts/combining_dfs.py +++ b/scripts/combining_dfs.py @@ -169,25 +169,31 @@ i_join = 'inner' #===================== # some preprocessing #===================== -#------------- + +#=========== # FoldX -#------------- +#=========== foldx_df.shape -#======================= + +#---------------------- # scale foldx values -#======================= +#---------------------- +# rename ddg column to ddg_foldx +foldx_df['ddg'] +foldx_df = foldx_df.rename(columns = {'ddg':'ddg_foldx'}) +foldx_df['ddg_foldx'] # Rescale values in Foldx_change col b/w -1 and 1 so negative numbers # stay neg and pos numbers stay positive -foldx_min = foldx_df['ddg'].min() -foldx_max = foldx_df['ddg'].max() +foldx_min = foldx_df['ddg_foldx'].min() +foldx_max = foldx_df['ddg_foldx'].max() foldx_min foldx_max foldx_scale = lambda x : x/abs(foldx_min) if x < 0 else (x/foldx_max if x >= 0 else 'failed') -foldx_df['foldx_scaled'] = foldx_df['ddg'].apply(foldx_scale) -print('Raw foldx scores:\n', foldx_df['ddg'] +foldx_df['foldx_scaled'] = foldx_df['ddg_foldx'].apply(foldx_scale) +print('Raw foldx scores:\n', foldx_df['ddg_foldx'] , '\n---------------------------------------------------------------' , '\nScaled foldx scores:\n', foldx_df['foldx_scaled']) @@ -195,8 +201,8 @@ print('Raw foldx scores:\n', foldx_df['ddg'] fsmi = foldx_df['foldx_scaled'].min() fsma = foldx_df['foldx_scaled'].max() -c = foldx_df[foldx_df['ddg']>=0].count() -foldx_pos = c.get(key = 'ddg') +c = foldx_df[foldx_df['ddg_foldx']>=0].count() +foldx_pos = c.get(key = 'ddg_foldx') c2 = foldx_df[foldx_df['foldx_scaled']>=0].count() foldx_pos2 = c2.get(key = 'foldx_scaled') @@ -209,20 +215,30 @@ else: , '\nGot:', foldx_pos2 , '\n======================================================') -# rename ddg column to ddg_foldx -foldx_df['ddg'] -foldx_df = foldx_df.rename(columns = {'ddg':'ddg_foldx'}) -foldx_df['ddg_foldx'] +#------------------------- +# foldx outcome category +#-------------------------- +foldx_df['foldx_outcome'] = foldx_df['ddg_foldx'].apply(lambda x: 'Stabilising' if x >= 0 else 'Destabilising') +foldx_df[foldx_df['ddg_foldx']>=0].count() +foc = foldx_df['foldx_outcome'].value_counts() -#------------- +if foc['Stabilising'] == foldx_pos and foc['Stabilising'] == foldx_pos2: + print('\nPASS: Foldx outcome category created') +else: + print('\nFAIL: Foldx outcome category could NOT be created' + , '\nExpected number:', foldx_pos + , '\nGot:', foc[0] + , '\n======================================================') + sys.exit() + +#======================= # Deepddg -#------------- +#======================= deepddg_df.shape -#======================= +#------------------------- # scale Deepddg values -#======================= - +#------------------------- # Rescale values in deepddg_change col b/w -1 and 1 so negative numbers # stay neg and pos numbers stay positive deepddg_min = deepddg_df['deepddg'].min() @@ -252,6 +268,23 @@ else: , '\nExpected number:', deepddg_pos , '\nGot:', deepddg_pos2 , '\n======================================================') + +#-------------------------- +# Deepddg outcome category +#-------------------------- +deepddg_df['deepddg_outcome'] = deepddg_df['deepddg'].apply(lambda x: 'Stabilising' if x >= 0 else 'Destabilising') +deepddg_df[deepddg_df['deepddg']>=0].count() +doc = deepddg_df['deepddg_outcome'].value_counts() + +if doc['Stabilising'] == deepddg_pos and doc['Stabilising'] == deepddg_pos2: + print('\nPASS: Deepddg outcome category created') +else: + print('\nFAIL: Deepddg outcome category could NOT be created' + , '\nExpected number:', deepddg_pos + , '\nGot:', doc[0] + , '\n======================================================') + sys.exit() + #%%============================================================================= # Now merges begin #%%============================================================================= diff --git a/scripts/functions/plotting_data.R b/scripts/functions/plotting_data.R index ddda207..5744faa 100755 --- a/scripts/functions/plotting_data.R +++ b/scripts/functions/plotting_data.R @@ -16,7 +16,9 @@ library(dplyr) ## my_df_u_lig ## dup_muts #======================================================== -plotting_data <- function(df, lig_dist_colname = 'ligand_distance', lig_dist_cutoff = 10) { +plotting_data <- function(df + , lig_dist_colname = 'ligand_distance' + , lig_dist_cutoff = 10) { my_df = data.frame() my_df_u = data.frame() my_df_u_lig = data.frame() @@ -38,51 +40,51 @@ cat("\nInput dimensions:", dim(df)) #================================== #------------------------------ -# adding foldx scaled values -# scale data b/w -1 and 1 -#------------------------------ -n = which(colnames(df) == "ddg"); n - -my_min = min(df[,n]); my_min -my_max = max(df[,n]); my_max - -df$foldx_scaled = ifelse(df[,n] < 0 - , df[,n]/abs(my_min) - , df[,n]/my_max) -# sanity check -my_min = min(df$foldx_scaled); my_min -my_max = max(df$foldx_scaled); my_max - -if (my_min == -1 && my_max == 1){ - cat("\nPASS: foldx ddg successfully scaled b/w -1 and 1" - , "\nProceeding with assigning foldx outcome category") -}else{ - cat("\nFAIL: could not scale foldx ddg values" - , "Aborting!\n") -} +# # adding foldx scaled values +# # scale data b/w -1 and 1 +# #------------------------------ +# n = which(colnames(df) == "ddg"); n +# +# my_min = min(df[,n]); my_min +# my_max = max(df[,n]); my_max +# +# df$foldx_scaled = ifelse(df[,n] < 0 +# , df[,n]/abs(my_min) +# , df[,n]/my_max) +# # sanity check +# my_min = min(df$foldx_scaled); my_min +# my_max = max(df$foldx_scaled); my_max +# +# if (my_min == -1 && my_max == 1){ +# cat("\nPASS: foldx ddg successfully scaled b/w -1 and 1" +# , "\nProceeding with assigning foldx outcome category") +# }else{ +# cat("\nFAIL: could not scale foldx ddg values" +# , "Aborting!\n") +# } #------------------------------ # adding foldx outcome category # ddg<0 = "Stabilising" (-ve) #------------------------------ -c1 = table(df$ddg < 0) -df$foldx_outcome = ifelse(df$ddg < 0, "Stabilising", "Destabilising") -c2 = table(df$ddg < 0) - -if ( all(c1 == c2) ){ - cat("\nPASS: foldx outcome successfully created") -}else{ - cat("\nFAIL: foldx outcome could not be created. Aborting!\n") - exit() -} +# c1 = table(df$ddg < 0) +# df$foldx_outcome = ifelse(df$ddg < 0, "Stabilising", "Destabilising") +# c2 = table(df$ddg < 0) +# +# if ( all(c1 == c2) ){ +# cat("\nPASS: foldx outcome successfully created") +# }else{ +# cat("\nFAIL: foldx outcome could not be created. Aborting!\n") +# exit() +# } #------------------------------ # renaming foldx column from # "ddg" --> "ddg_foldx" #------------------------------ -# change name to foldx -colnames(df)[n] <- "ddg_foldx" +# # change name to foldx +# colnames(df)[n] <- "ddg_foldx" #================================== # extract unique mutation entries diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index f1a7620..c1ce5b2 100755 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -97,33 +97,33 @@ merged_df3_comp = all_plot_dfs[[4]] # adding deepddg scaled values # scale data b/w -1 and 1 #============================ -n = which(colnames(merged_df3) == "deepddg"); n - -my_min = min(merged_df3[,n]); my_min -my_max = max(merged_df3[,n]); my_max - -merged_df3$deepddg_scaled = ifelse(merged_df3[,n] < 0 - , merged_df3[,n]/abs(my_min) - , merged_df3[,n]/my_max) -# sanity check -my_min = min(merged_df3$deepddg_scaled); my_min -my_max = max(merged_df3$deepddg_scaled); my_max - -if (my_min == -1 && my_max == 1){ - cat("\nPASS: DeepDDG successfully scaled b/w -1 and 1" - #, "\nProceeding with assigning deep outcome category") - , "\n") -}else{ - cat("\nFAIL: could not scale DeepDDG ddg values" - , "Aborting!") -} - +# n = which(colnames(merged_df3) == "deepddg"); n +# +# my_min = min(merged_df3[,n]); my_min +# my_max = max(merged_df3[,n]); my_max +# +# merged_df3$deepddg_scaled = ifelse(merged_df3[,n] < 0 +# , merged_df3[,n]/abs(my_min) +# , merged_df3[,n]/my_max) +# # sanity check +# my_min = min(merged_df3$deepddg_scaled); my_min +# my_max = max(merged_df3$deepddg_scaled); my_max +# +# if (my_min == -1 && my_max == 1){ +# cat("\nPASS: DeepDDG successfully scaled b/w -1 and 1" +# #, "\nProceeding with assigning deep outcome category") +# , "\n") +# }else{ +# cat("\nFAIL: could not scale DeepDDG ddg values" +# , "Aborting!") +# } +# #################################################################### # Data for combining other dfs #################################################################### -source("other_dfs_data.R") +#source("other_dfs_data.R") #################################################################### # Data for subcols barplot (~heatmap)