sorted combining_dfs.py with all other data files and tidied up get_plotting_dfs.R
This commit is contained in:
parent
4339976002
commit
e28be8bf0d
3 changed files with 111 additions and 76 deletions
|
@ -169,25 +169,31 @@ i_join = 'inner'
|
||||||
#=====================
|
#=====================
|
||||||
# some preprocessing
|
# some preprocessing
|
||||||
#=====================
|
#=====================
|
||||||
#-------------
|
|
||||||
|
#===========
|
||||||
# FoldX
|
# FoldX
|
||||||
#-------------
|
#===========
|
||||||
foldx_df.shape
|
foldx_df.shape
|
||||||
#=======================
|
|
||||||
|
#----------------------
|
||||||
# scale foldx values
|
# scale foldx values
|
||||||
#=======================
|
#----------------------
|
||||||
|
# rename ddg column to ddg_foldx
|
||||||
|
foldx_df['ddg']
|
||||||
|
foldx_df = foldx_df.rename(columns = {'ddg':'ddg_foldx'})
|
||||||
|
foldx_df['ddg_foldx']
|
||||||
|
|
||||||
# Rescale values in Foldx_change col b/w -1 and 1 so negative numbers
|
# Rescale values in Foldx_change col b/w -1 and 1 so negative numbers
|
||||||
# stay neg and pos numbers stay positive
|
# stay neg and pos numbers stay positive
|
||||||
foldx_min = foldx_df['ddg'].min()
|
foldx_min = foldx_df['ddg_foldx'].min()
|
||||||
foldx_max = foldx_df['ddg'].max()
|
foldx_max = foldx_df['ddg_foldx'].max()
|
||||||
foldx_min
|
foldx_min
|
||||||
foldx_max
|
foldx_max
|
||||||
|
|
||||||
foldx_scale = lambda x : x/abs(foldx_min) if x < 0 else (x/foldx_max if x >= 0 else 'failed')
|
foldx_scale = lambda x : x/abs(foldx_min) if x < 0 else (x/foldx_max if x >= 0 else 'failed')
|
||||||
|
|
||||||
foldx_df['foldx_scaled'] = foldx_df['ddg'].apply(foldx_scale)
|
foldx_df['foldx_scaled'] = foldx_df['ddg_foldx'].apply(foldx_scale)
|
||||||
print('Raw foldx scores:\n', foldx_df['ddg']
|
print('Raw foldx scores:\n', foldx_df['ddg_foldx']
|
||||||
, '\n---------------------------------------------------------------'
|
, '\n---------------------------------------------------------------'
|
||||||
, '\nScaled foldx scores:\n', foldx_df['foldx_scaled'])
|
, '\nScaled foldx scores:\n', foldx_df['foldx_scaled'])
|
||||||
|
|
||||||
|
@ -195,8 +201,8 @@ print('Raw foldx scores:\n', foldx_df['ddg']
|
||||||
fsmi = foldx_df['foldx_scaled'].min()
|
fsmi = foldx_df['foldx_scaled'].min()
|
||||||
fsma = foldx_df['foldx_scaled'].max()
|
fsma = foldx_df['foldx_scaled'].max()
|
||||||
|
|
||||||
c = foldx_df[foldx_df['ddg']>=0].count()
|
c = foldx_df[foldx_df['ddg_foldx']>=0].count()
|
||||||
foldx_pos = c.get(key = 'ddg')
|
foldx_pos = c.get(key = 'ddg_foldx')
|
||||||
|
|
||||||
c2 = foldx_df[foldx_df['foldx_scaled']>=0].count()
|
c2 = foldx_df[foldx_df['foldx_scaled']>=0].count()
|
||||||
foldx_pos2 = c2.get(key = 'foldx_scaled')
|
foldx_pos2 = c2.get(key = 'foldx_scaled')
|
||||||
|
@ -209,20 +215,30 @@ else:
|
||||||
, '\nGot:', foldx_pos2
|
, '\nGot:', foldx_pos2
|
||||||
, '\n======================================================')
|
, '\n======================================================')
|
||||||
|
|
||||||
# rename ddg column to ddg_foldx
|
#-------------------------
|
||||||
foldx_df['ddg']
|
# foldx outcome category
|
||||||
foldx_df = foldx_df.rename(columns = {'ddg':'ddg_foldx'})
|
#--------------------------
|
||||||
foldx_df['ddg_foldx']
|
foldx_df['foldx_outcome'] = foldx_df['ddg_foldx'].apply(lambda x: 'Stabilising' if x >= 0 else 'Destabilising')
|
||||||
|
foldx_df[foldx_df['ddg_foldx']>=0].count()
|
||||||
|
foc = foldx_df['foldx_outcome'].value_counts()
|
||||||
|
|
||||||
#-------------
|
if foc['Stabilising'] == foldx_pos and foc['Stabilising'] == foldx_pos2:
|
||||||
|
print('\nPASS: Foldx outcome category created')
|
||||||
|
else:
|
||||||
|
print('\nFAIL: Foldx outcome category could NOT be created'
|
||||||
|
, '\nExpected number:', foldx_pos
|
||||||
|
, '\nGot:', foc[0]
|
||||||
|
, '\n======================================================')
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
#=======================
|
||||||
# Deepddg
|
# Deepddg
|
||||||
#-------------
|
#=======================
|
||||||
deepddg_df.shape
|
deepddg_df.shape
|
||||||
|
|
||||||
#=======================
|
#-------------------------
|
||||||
# scale Deepddg values
|
# scale Deepddg values
|
||||||
#=======================
|
#-------------------------
|
||||||
|
|
||||||
# Rescale values in deepddg_change col b/w -1 and 1 so negative numbers
|
# Rescale values in deepddg_change col b/w -1 and 1 so negative numbers
|
||||||
# stay neg and pos numbers stay positive
|
# stay neg and pos numbers stay positive
|
||||||
deepddg_min = deepddg_df['deepddg'].min()
|
deepddg_min = deepddg_df['deepddg'].min()
|
||||||
|
@ -252,6 +268,23 @@ else:
|
||||||
, '\nExpected number:', deepddg_pos
|
, '\nExpected number:', deepddg_pos
|
||||||
, '\nGot:', deepddg_pos2
|
, '\nGot:', deepddg_pos2
|
||||||
, '\n======================================================')
|
, '\n======================================================')
|
||||||
|
|
||||||
|
#--------------------------
|
||||||
|
# Deepddg outcome category
|
||||||
|
#--------------------------
|
||||||
|
deepddg_df['deepddg_outcome'] = deepddg_df['deepddg'].apply(lambda x: 'Stabilising' if x >= 0 else 'Destabilising')
|
||||||
|
deepddg_df[deepddg_df['deepddg']>=0].count()
|
||||||
|
doc = deepddg_df['deepddg_outcome'].value_counts()
|
||||||
|
|
||||||
|
if doc['Stabilising'] == deepddg_pos and doc['Stabilising'] == deepddg_pos2:
|
||||||
|
print('\nPASS: Deepddg outcome category created')
|
||||||
|
else:
|
||||||
|
print('\nFAIL: Deepddg outcome category could NOT be created'
|
||||||
|
, '\nExpected number:', deepddg_pos
|
||||||
|
, '\nGot:', doc[0]
|
||||||
|
, '\n======================================================')
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
#%%=============================================================================
|
#%%=============================================================================
|
||||||
# Now merges begin
|
# Now merges begin
|
||||||
#%%=============================================================================
|
#%%=============================================================================
|
||||||
|
|
|
@ -16,7 +16,9 @@ library(dplyr)
|
||||||
## my_df_u_lig
|
## my_df_u_lig
|
||||||
## dup_muts
|
## dup_muts
|
||||||
#========================================================
|
#========================================================
|
||||||
plotting_data <- function(df, lig_dist_colname = 'ligand_distance', lig_dist_cutoff = 10) {
|
plotting_data <- function(df
|
||||||
|
, lig_dist_colname = 'ligand_distance'
|
||||||
|
, lig_dist_cutoff = 10) {
|
||||||
my_df = data.frame()
|
my_df = data.frame()
|
||||||
my_df_u = data.frame()
|
my_df_u = data.frame()
|
||||||
my_df_u_lig = data.frame()
|
my_df_u_lig = data.frame()
|
||||||
|
@ -38,51 +40,51 @@ cat("\nInput dimensions:", dim(df))
|
||||||
#==================================
|
#==================================
|
||||||
|
|
||||||
#------------------------------
|
#------------------------------
|
||||||
# adding foldx scaled values
|
# # adding foldx scaled values
|
||||||
# scale data b/w -1 and 1
|
# # scale data b/w -1 and 1
|
||||||
#------------------------------
|
# #------------------------------
|
||||||
n = which(colnames(df) == "ddg"); n
|
# n = which(colnames(df) == "ddg"); n
|
||||||
|
#
|
||||||
my_min = min(df[,n]); my_min
|
# my_min = min(df[,n]); my_min
|
||||||
my_max = max(df[,n]); my_max
|
# my_max = max(df[,n]); my_max
|
||||||
|
#
|
||||||
df$foldx_scaled = ifelse(df[,n] < 0
|
# df$foldx_scaled = ifelse(df[,n] < 0
|
||||||
, df[,n]/abs(my_min)
|
# , df[,n]/abs(my_min)
|
||||||
, df[,n]/my_max)
|
# , df[,n]/my_max)
|
||||||
# sanity check
|
# # sanity check
|
||||||
my_min = min(df$foldx_scaled); my_min
|
# my_min = min(df$foldx_scaled); my_min
|
||||||
my_max = max(df$foldx_scaled); my_max
|
# my_max = max(df$foldx_scaled); my_max
|
||||||
|
#
|
||||||
if (my_min == -1 && my_max == 1){
|
# if (my_min == -1 && my_max == 1){
|
||||||
cat("\nPASS: foldx ddg successfully scaled b/w -1 and 1"
|
# cat("\nPASS: foldx ddg successfully scaled b/w -1 and 1"
|
||||||
, "\nProceeding with assigning foldx outcome category")
|
# , "\nProceeding with assigning foldx outcome category")
|
||||||
}else{
|
# }else{
|
||||||
cat("\nFAIL: could not scale foldx ddg values"
|
# cat("\nFAIL: could not scale foldx ddg values"
|
||||||
, "Aborting!\n")
|
# , "Aborting!\n")
|
||||||
}
|
# }
|
||||||
|
|
||||||
#------------------------------
|
#------------------------------
|
||||||
# adding foldx outcome category
|
# adding foldx outcome category
|
||||||
# ddg<0 = "Stabilising" (-ve)
|
# ddg<0 = "Stabilising" (-ve)
|
||||||
#------------------------------
|
#------------------------------
|
||||||
c1 = table(df$ddg < 0)
|
# c1 = table(df$ddg < 0)
|
||||||
df$foldx_outcome = ifelse(df$ddg < 0, "Stabilising", "Destabilising")
|
# df$foldx_outcome = ifelse(df$ddg < 0, "Stabilising", "Destabilising")
|
||||||
c2 = table(df$ddg < 0)
|
# c2 = table(df$ddg < 0)
|
||||||
|
#
|
||||||
if ( all(c1 == c2) ){
|
# if ( all(c1 == c2) ){
|
||||||
cat("\nPASS: foldx outcome successfully created")
|
# cat("\nPASS: foldx outcome successfully created")
|
||||||
}else{
|
# }else{
|
||||||
cat("\nFAIL: foldx outcome could not be created. Aborting!\n")
|
# cat("\nFAIL: foldx outcome could not be created. Aborting!\n")
|
||||||
exit()
|
# exit()
|
||||||
}
|
# }
|
||||||
|
|
||||||
#------------------------------
|
#------------------------------
|
||||||
# renaming foldx column from
|
# renaming foldx column from
|
||||||
# "ddg" --> "ddg_foldx"
|
# "ddg" --> "ddg_foldx"
|
||||||
#------------------------------
|
#------------------------------
|
||||||
|
|
||||||
# change name to foldx
|
# # change name to foldx
|
||||||
colnames(df)[n] <- "ddg_foldx"
|
# colnames(df)[n] <- "ddg_foldx"
|
||||||
|
|
||||||
#==================================
|
#==================================
|
||||||
# extract unique mutation entries
|
# extract unique mutation entries
|
||||||
|
|
|
@ -97,33 +97,33 @@ merged_df3_comp = all_plot_dfs[[4]]
|
||||||
# adding deepddg scaled values
|
# adding deepddg scaled values
|
||||||
# scale data b/w -1 and 1
|
# scale data b/w -1 and 1
|
||||||
#============================
|
#============================
|
||||||
n = which(colnames(merged_df3) == "deepddg"); n
|
# n = which(colnames(merged_df3) == "deepddg"); n
|
||||||
|
#
|
||||||
my_min = min(merged_df3[,n]); my_min
|
# my_min = min(merged_df3[,n]); my_min
|
||||||
my_max = max(merged_df3[,n]); my_max
|
# my_max = max(merged_df3[,n]); my_max
|
||||||
|
#
|
||||||
merged_df3$deepddg_scaled = ifelse(merged_df3[,n] < 0
|
# merged_df3$deepddg_scaled = ifelse(merged_df3[,n] < 0
|
||||||
, merged_df3[,n]/abs(my_min)
|
# , merged_df3[,n]/abs(my_min)
|
||||||
, merged_df3[,n]/my_max)
|
# , merged_df3[,n]/my_max)
|
||||||
# sanity check
|
# # sanity check
|
||||||
my_min = min(merged_df3$deepddg_scaled); my_min
|
# my_min = min(merged_df3$deepddg_scaled); my_min
|
||||||
my_max = max(merged_df3$deepddg_scaled); my_max
|
# my_max = max(merged_df3$deepddg_scaled); my_max
|
||||||
|
#
|
||||||
if (my_min == -1 && my_max == 1){
|
# if (my_min == -1 && my_max == 1){
|
||||||
cat("\nPASS: DeepDDG successfully scaled b/w -1 and 1"
|
# cat("\nPASS: DeepDDG successfully scaled b/w -1 and 1"
|
||||||
#, "\nProceeding with assigning deep outcome category")
|
# #, "\nProceeding with assigning deep outcome category")
|
||||||
, "\n")
|
# , "\n")
|
||||||
}else{
|
# }else{
|
||||||
cat("\nFAIL: could not scale DeepDDG ddg values"
|
# cat("\nFAIL: could not scale DeepDDG ddg values"
|
||||||
, "Aborting!")
|
# , "Aborting!")
|
||||||
}
|
# }
|
||||||
|
#
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# Data for combining other dfs
|
# Data for combining other dfs
|
||||||
####################################################################
|
####################################################################
|
||||||
|
|
||||||
source("other_dfs_data.R")
|
#source("other_dfs_data.R")
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# Data for subcols barplot (~heatmap)
|
# Data for subcols barplot (~heatmap)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue