aadded colnames to plot as names
This commit is contained in:
parent
4315adc556
commit
2f7558a883
3 changed files with 314 additions and 168 deletions
|
@ -327,7 +327,6 @@ combining_dfs_plotting <- function( my_df_u
|
|||
stop("Cannot generate merged_df3")
|
||||
}
|
||||
##################################################################
|
||||
|
||||
head(merged_df3$position); tail(merged_df3$position) # should be sorted
|
||||
|
||||
# sanity check
|
||||
|
@ -391,6 +390,301 @@ combining_dfs_plotting <- function( my_df_u
|
|||
stop("Abort: merged_df3 or merged_df2 can't be created because of lable mismatch")
|
||||
}
|
||||
|
||||
##########################################################################
|
||||
# MERGED_df2: average cols #
|
||||
# Average stability + lig-affinity columns #
|
||||
##########################################################################
|
||||
|
||||
#=====================================
|
||||
# merged_df2: Stability values: average
|
||||
#====================================
|
||||
#------------------------------
|
||||
# foldx sign reverse
|
||||
# for consistency with other tools
|
||||
#----------------------------------
|
||||
head(merged_df2$ddg_foldx)
|
||||
|
||||
# foldx values: reverse signs
|
||||
#merged_df2['ddg_foldxC'] = abs(merged_df2$ddg_foldx)
|
||||
#head(merged_df2[, c("ddg_foldx", "ddg_foldxC")])
|
||||
|
||||
# foldx scaled: reverse signs fs
|
||||
merged_df2['foldx_scaled_signC'] = abs(merged_df2$foldx_scaled)
|
||||
head(merged_df2[, c("foldx_scaled", "foldx_scaled_signC")])
|
||||
|
||||
# find which stability cols to average: should contain revised foldx
|
||||
scaled_cols_stab = c("duet_scaled"
|
||||
, "deepddg_scaled"
|
||||
, "ddg_dynamut2_scaled"
|
||||
, "foldx_scaled_signC" # needed to get avg stability
|
||||
)
|
||||
|
||||
#-----------------------------------------------
|
||||
# merged_df2: ADD col: average across predictors: stability
|
||||
#-----------------------------------------------
|
||||
if (all((scaled_cols_stab%in%colnames(merged_df2)))){
|
||||
cat("\nPASS: finding stability cols to average")
|
||||
cols2avg_stab = scaled_cols_stab
|
||||
cat("\nAveraging", length(cols2avg_stab), "stability columns:"
|
||||
, "\nThese are:", cols2avg_stab)
|
||||
|
||||
merged_df2['avg_stability'] = rowMeans(merged_df2[, cols2avg_stab])
|
||||
}else{
|
||||
stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.")
|
||||
}
|
||||
|
||||
head(merged_df2[, c("mutationinformation"
|
||||
, "position"
|
||||
, "foldx_scaled"
|
||||
, scaled_cols_stab
|
||||
, "avg_stability")])
|
||||
#--------------------------------------
|
||||
# merged_df2: ADD col: average stability outcome
|
||||
#--------------------------------------
|
||||
merged_df2["avg_stability_outcome"] = ifelse(merged_df2["avg_stability"] < 0, "Destabilising", "Stabilising")
|
||||
|
||||
head(merged_df2[, c("mutationinformation"
|
||||
, "position"
|
||||
, "avg_stability"
|
||||
, "avg_stability_outcome")])
|
||||
|
||||
table(merged_df2["avg_stability_outcome"] )
|
||||
|
||||
#--------------------------------------
|
||||
# merged_df2: ADD col: average stability scaled
|
||||
#--------------------------------------
|
||||
merged_df2["avg_stability_scaled"] = lapply(merged_df2["avg_stability"]
|
||||
, function(x) {
|
||||
scales::rescale_mid(x
|
||||
, to = c(-1,1)
|
||||
, from = c( min(merged_df2["avg_stability"])
|
||||
, max(merged_df2["avg_stability"]))
|
||||
, mid = 0)
|
||||
})
|
||||
|
||||
if ( all(table(merged_df2["avg_stability"]<0) == table(merged_df2["avg_stability_scaled"]<0)) ){
|
||||
cat("\nPASS: Avergae stability column successfully averaged, scaled and categorised")
|
||||
|
||||
}else{
|
||||
cat("\nAbort:Avergae stability column could not be processed")
|
||||
}
|
||||
|
||||
head(merged_df2["avg_stability_scaled"])
|
||||
|
||||
##########################################################################################
|
||||
#=====================================
|
||||
# merged_df2: Affinity values: average
|
||||
#======================================
|
||||
|
||||
common_scaled_cols_affinity = c("affinity_scaled"
|
||||
, "mmcsm_lig_scaled")
|
||||
|
||||
#------------------------------------------------------
|
||||
# merged_df2: ADD col: ensemble average across predictors: affinity
|
||||
#------------------------------------------------------
|
||||
if (all((common_scaled_cols_affinity%in%colnames(merged_df2)))){
|
||||
cat("\nPASS: finding affinity cols to average")
|
||||
cols2avg_aff = common_scaled_cols_affinity
|
||||
merged_df2['avg_lig_affinity'] = rowMeans(merged_df2[, cols2avg_aff])
|
||||
}else{
|
||||
stop("\nAbort: cols to average not found.")
|
||||
}
|
||||
|
||||
head(merged_df2[, c("mutationinformation"
|
||||
, "position"
|
||||
, cols2avg_aff
|
||||
, "avg_lig_affinity")])
|
||||
|
||||
table(merged_df2$affinity_scaled<0 )
|
||||
table(merged_df2$mmcsm_lig_scaled<0 )
|
||||
|
||||
#--------------------------------------
|
||||
# merged_df2: ADD col: average affinity outcome
|
||||
#--------------------------------------
|
||||
merged_df2["avg_lig_affinity_outcome"] = ifelse(merged_df2["avg_lig_affinity"] < 0, "Destabilising", "Stabilising")
|
||||
|
||||
head(merged_df2[, c("mutationinformation"
|
||||
, "position"
|
||||
, "avg_lig_affinity"
|
||||
, "avg_lig_affinity_outcome")])
|
||||
|
||||
table(merged_df2["avg_lig_affinity_outcome"] )
|
||||
|
||||
min( merged_df2['avg_lig_affinity']); max( merged_df2['avg_lig_affinity'])
|
||||
|
||||
#--------------------------------------
|
||||
# merged_df2: ADD col: average affinity scaled
|
||||
#--------------------------------------
|
||||
merged_df2["avg_lig_affinity_scaled"] = lapply(merged_df2["avg_lig_affinity"]
|
||||
, function(x) {
|
||||
scales::rescale_mid(x
|
||||
, to = c(-1,1)
|
||||
, from = c( min(merged_df2["avg_lig_affinity"])
|
||||
, max(merged_df2["avg_lig_affinity"]))
|
||||
, mid = 0)
|
||||
})
|
||||
|
||||
if ( all(table(merged_df2["avg_lig_affinity"]<0) == table(merged_df2["avg_lig_affinity_scaled"]<0)) ){
|
||||
cat("\nPASS: Avergae affinity column successfully averaged, scaled and categorised")
|
||||
|
||||
}else{
|
||||
cat("\nAbort:Avergae affinity column could not be processed")
|
||||
}
|
||||
|
||||
min( merged_df2['avg_lig_affinity_scaled']); max( merged_df2['avg_lig_affinity_scaled'])
|
||||
|
||||
######################################################################################
|
||||
|
||||
##########################################################################
|
||||
# MERGED_d3: average cols #
|
||||
# Average stability + lig-affinity columns #
|
||||
##########################################################################
|
||||
|
||||
#==========================================
|
||||
# merged_df3: Stability values: average
|
||||
#==========================================
|
||||
#-------------------
|
||||
# foldx sign reverse
|
||||
# for consistency with other tools
|
||||
#-------------------
|
||||
head(merged_df3$ddg_foldx)
|
||||
|
||||
# foldx values: reverse signs
|
||||
#merged_df3['ddg_foldxC'] = abs(merged_df3$ddg_foldx)
|
||||
#head(merged_df3[, c("ddg_foldx", "ddg_foldxC")])
|
||||
|
||||
# foldx scaled: reverse signs fs
|
||||
merged_df3['foldx_scaled_signC'] = abs(merged_df3$foldx_scaled)
|
||||
head(merged_df3[, c("foldx_scaled", "foldx_scaled_signC")])
|
||||
|
||||
# find which stability cols to average: should contain revised foldx
|
||||
scaled_cols_stab = c("duet_scaled"
|
||||
, "deepddg_scaled"
|
||||
, "ddg_dynamut2_scaled"
|
||||
#, "foldx_scaled"
|
||||
, "foldx_scaled_signC" # needed to get avg stability
|
||||
)
|
||||
|
||||
#--------------------------------------------------------
|
||||
# merged_df3: ADD col: ensemble average across predictors: stability
|
||||
#---------------------------------------------------------
|
||||
if (all((scaled_cols_stab%in%colnames(merged_df3)))){
|
||||
cat("\nPASS: finding stability cols to average")
|
||||
cols2avg_stab = scaled_cols_stab
|
||||
cat("\nAveraging", length(cols2avg_stab), "stability columns:"
|
||||
, "\nThese are:", cols2avg_stab)
|
||||
|
||||
merged_df3['avg_stability'] = rowMeans(merged_df3[, cols2avg_stab])
|
||||
}else{
|
||||
stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.")
|
||||
}
|
||||
|
||||
head(merged_df3[, c("mutationinformation"
|
||||
, "position"
|
||||
, "foldx_scaled"
|
||||
, scaled_cols_stab
|
||||
, "avg_stability")])
|
||||
#--------------------------------------
|
||||
# merged_df3: ADD col: average stability outcome
|
||||
#--------------------------------------
|
||||
merged_df3["avg_stability_outcome"] = ifelse(merged_df3["avg_stability"] < 0, "Destabilising", "Stabilising")
|
||||
|
||||
head(merged_df3[, c("mutationinformation"
|
||||
, "position"
|
||||
, "avg_stability"
|
||||
, "avg_stability_outcome")])
|
||||
|
||||
table(merged_df3["avg_stability_outcome"] )
|
||||
|
||||
#--------------------------------------
|
||||
# merged_df3: ADD col: average stability scaled
|
||||
#--------------------------------------
|
||||
merged_df3["avg_stability_scaled"] = lapply(merged_df3["avg_stability"]
|
||||
, function(x) {
|
||||
scales::rescale_mid(x
|
||||
, to = c(-1,1)
|
||||
, from = c( min(merged_df3["avg_stability"])
|
||||
, max(merged_df3["avg_stability"]))
|
||||
, mid = 0)
|
||||
})
|
||||
|
||||
if ( all(table(merged_df3["avg_stability"]<0) == table(merged_df3["avg_stability_scaled"]<0)) ){
|
||||
cat("\nPASS: Avergae stability column successfully averaged, scaled and categorised")
|
||||
|
||||
}else{
|
||||
cat("\nAbort:Avergae stability column could not be processed")
|
||||
}
|
||||
|
||||
head(merged_df3["avg_stability_scaled"])
|
||||
|
||||
##########################################################################################
|
||||
#=====================================
|
||||
# merged_df3: Affinity values: average
|
||||
#======================================
|
||||
|
||||
common_scaled_cols_affinity = c("affinity_scaled"
|
||||
, "mmcsm_lig_scaled")
|
||||
|
||||
#------------------------------------------------------
|
||||
# merged_df3: ADD col: ensemble average across predictors: affinity
|
||||
#------------------------------------------------------
|
||||
if (all((common_scaled_cols_affinity%in%colnames(merged_df3)))){
|
||||
cat("\nPASS: finding affinity cols to average")
|
||||
cols2avg_aff = common_scaled_cols_affinity
|
||||
merged_df3['avg_lig_affinity'] = rowMeans(merged_df3[, cols2avg_aff])
|
||||
}else{
|
||||
stop("\nAbort: cols to average not found.")
|
||||
}
|
||||
|
||||
head(merged_df3[, c("mutationinformation"
|
||||
, "position"
|
||||
, cols2avg_aff
|
||||
, "avg_lig_affinity")])
|
||||
|
||||
table(merged_df3$affinity_scaled<0 )
|
||||
table(merged_df3$mmcsm_lig_scaled<0 )
|
||||
|
||||
#--------------------------------------
|
||||
# merged_df3: ADD col: average affinity outcome
|
||||
#--------------------------------------
|
||||
merged_df3["avg_lig_affinity_outcome"] = ifelse(merged_df3["avg_lig_affinity"] < 0, "Destabilising", "Stabilising")
|
||||
|
||||
head(merged_df3[, c("mutationinformation"
|
||||
, "position"
|
||||
, "avg_lig_affinity"
|
||||
, "avg_lig_affinity_outcome")])
|
||||
|
||||
table(merged_df3["avg_lig_affinity_outcome"] )
|
||||
|
||||
min( merged_df3['avg_lig_affinity']); max( merged_df3['avg_lig_affinity'])
|
||||
|
||||
#--------------------------------------
|
||||
# merged_df3: ADD col: average affinity scaled
|
||||
#--------------------------------------
|
||||
merged_df3["avg_lig_affinity_scaled"] = lapply(merged_df3["avg_lig_affinity"]
|
||||
, function(x) {
|
||||
scales::rescale_mid(x
|
||||
, to = c(-1,1)
|
||||
, from = c( min(merged_df3["avg_lig_affinity"])
|
||||
, max(merged_df3["avg_lig_affinity"]))
|
||||
, mid = 0)
|
||||
})
|
||||
|
||||
if ( all(table(merged_df3["avg_lig_affinity"]<0) == table(merged_df3["avg_lig_affinity_scaled"]<0)) ){
|
||||
cat("\nPASS: Avergae affinity column successfully averaged, scaled and categorised")
|
||||
|
||||
}else{
|
||||
cat("\nAbort:Avergae affinity column could not be processed")
|
||||
}
|
||||
|
||||
min( merged_df3['avg_lig_affinity_scaled']); max( merged_df3['avg_lig_affinity_scaled'])
|
||||
|
||||
|
||||
####################################################################
|
||||
#TODO
|
||||
# Choose few columns to return as plot_df
|
||||
|
||||
####################################################################
|
||||
return(list( merged_df2
|
||||
, merged_df3
|
||||
))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue