aadded colnames to plot as names

This commit is contained in:
Tanushree Tunstall 2022-08-10 14:06:13 +01:00
parent 4315adc556
commit 2f7558a883
3 changed files with 314 additions and 168 deletions

View file

@ -327,7 +327,6 @@ combining_dfs_plotting <- function( my_df_u
stop("Cannot generate merged_df3")
}
##################################################################
head(merged_df3$position); tail(merged_df3$position) # should be sorted
# sanity check
@ -391,6 +390,301 @@ combining_dfs_plotting <- function( my_df_u
stop("Abort: merged_df3 or merged_df2 can't be created because of lable mismatch")
}
##########################################################################
# MERGED_df2: average cols #
# Average stability + lig-affinity columns #
##########################################################################
#=====================================
# merged_df2: Stability values: average
#====================================
#------------------------------
# foldx sign reverse
# for consistency with other tools
#----------------------------------
head(merged_df2$ddg_foldx)
# foldx values: reverse signs
#merged_df2['ddg_foldxC'] = abs(merged_df2$ddg_foldx)
#head(merged_df2[, c("ddg_foldx", "ddg_foldxC")])
# foldx scaled: reverse signs fs
merged_df2['foldx_scaled_signC'] = abs(merged_df2$foldx_scaled)
head(merged_df2[, c("foldx_scaled", "foldx_scaled_signC")])
# find which stability cols to average: should contain revised foldx
scaled_cols_stab = c("duet_scaled"
, "deepddg_scaled"
, "ddg_dynamut2_scaled"
, "foldx_scaled_signC" # needed to get avg stability
)
#-----------------------------------------------
# merged_df2: ADD col: average across predictors: stability
#-----------------------------------------------
if (all((scaled_cols_stab%in%colnames(merged_df2)))){
cat("\nPASS: finding stability cols to average")
cols2avg_stab = scaled_cols_stab
cat("\nAveraging", length(cols2avg_stab), "stability columns:"
, "\nThese are:", cols2avg_stab)
merged_df2['avg_stability'] = rowMeans(merged_df2[, cols2avg_stab])
}else{
stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.")
}
head(merged_df2[, c("mutationinformation"
, "position"
, "foldx_scaled"
, scaled_cols_stab
, "avg_stability")])
#--------------------------------------
# merged_df2: ADD col: average stability outcome
#--------------------------------------
merged_df2["avg_stability_outcome"] = ifelse(merged_df2["avg_stability"] < 0, "Destabilising", "Stabilising")
head(merged_df2[, c("mutationinformation"
, "position"
, "avg_stability"
, "avg_stability_outcome")])
table(merged_df2["avg_stability_outcome"] )
#--------------------------------------
# merged_df2: ADD col: average stability scaled
#--------------------------------------
merged_df2["avg_stability_scaled"] = lapply(merged_df2["avg_stability"]
, function(x) {
scales::rescale_mid(x
, to = c(-1,1)
, from = c( min(merged_df2["avg_stability"])
, max(merged_df2["avg_stability"]))
, mid = 0)
})
if ( all(table(merged_df2["avg_stability"]<0) == table(merged_df2["avg_stability_scaled"]<0)) ){
cat("\nPASS: Avergae stability column successfully averaged, scaled and categorised")
}else{
cat("\nAbort:Avergae stability column could not be processed")
}
head(merged_df2["avg_stability_scaled"])
##########################################################################################
#=====================================
# merged_df2: Affinity values: average
#======================================
common_scaled_cols_affinity = c("affinity_scaled"
, "mmcsm_lig_scaled")
#------------------------------------------------------
# merged_df2: ADD col: ensemble average across predictors: affinity
#------------------------------------------------------
if (all((common_scaled_cols_affinity%in%colnames(merged_df2)))){
cat("\nPASS: finding affinity cols to average")
cols2avg_aff = common_scaled_cols_affinity
merged_df2['avg_lig_affinity'] = rowMeans(merged_df2[, cols2avg_aff])
}else{
stop("\nAbort: cols to average not found.")
}
head(merged_df2[, c("mutationinformation"
, "position"
, cols2avg_aff
, "avg_lig_affinity")])
table(merged_df2$affinity_scaled<0 )
table(merged_df2$mmcsm_lig_scaled<0 )
#--------------------------------------
# merged_df2: ADD col: average affinity outcome
#--------------------------------------
merged_df2["avg_lig_affinity_outcome"] = ifelse(merged_df2["avg_lig_affinity"] < 0, "Destabilising", "Stabilising")
head(merged_df2[, c("mutationinformation"
, "position"
, "avg_lig_affinity"
, "avg_lig_affinity_outcome")])
table(merged_df2["avg_lig_affinity_outcome"] )
min( merged_df2['avg_lig_affinity']); max( merged_df2['avg_lig_affinity'])
#--------------------------------------
# merged_df2: ADD col: average affinity scaled
#--------------------------------------
merged_df2["avg_lig_affinity_scaled"] = lapply(merged_df2["avg_lig_affinity"]
, function(x) {
scales::rescale_mid(x
, to = c(-1,1)
, from = c( min(merged_df2["avg_lig_affinity"])
, max(merged_df2["avg_lig_affinity"]))
, mid = 0)
})
if ( all(table(merged_df2["avg_lig_affinity"]<0) == table(merged_df2["avg_lig_affinity_scaled"]<0)) ){
cat("\nPASS: Avergae affinity column successfully averaged, scaled and categorised")
}else{
cat("\nAbort:Avergae affinity column could not be processed")
}
min( merged_df2['avg_lig_affinity_scaled']); max( merged_df2['avg_lig_affinity_scaled'])
######################################################################################
##########################################################################
# MERGED_d3: average cols #
# Average stability + lig-affinity columns #
##########################################################################
#==========================================
# merged_df3: Stability values: average
#==========================================
#-------------------
# foldx sign reverse
# for consistency with other tools
#-------------------
head(merged_df3$ddg_foldx)
# foldx values: reverse signs
#merged_df3['ddg_foldxC'] = abs(merged_df3$ddg_foldx)
#head(merged_df3[, c("ddg_foldx", "ddg_foldxC")])
# foldx scaled: reverse signs fs
merged_df3['foldx_scaled_signC'] = abs(merged_df3$foldx_scaled)
head(merged_df3[, c("foldx_scaled", "foldx_scaled_signC")])
# find which stability cols to average: should contain revised foldx
scaled_cols_stab = c("duet_scaled"
, "deepddg_scaled"
, "ddg_dynamut2_scaled"
#, "foldx_scaled"
, "foldx_scaled_signC" # needed to get avg stability
)
#--------------------------------------------------------
# merged_df3: ADD col: ensemble average across predictors: stability
#---------------------------------------------------------
if (all((scaled_cols_stab%in%colnames(merged_df3)))){
cat("\nPASS: finding stability cols to average")
cols2avg_stab = scaled_cols_stab
cat("\nAveraging", length(cols2avg_stab), "stability columns:"
, "\nThese are:", cols2avg_stab)
merged_df3['avg_stability'] = rowMeans(merged_df3[, cols2avg_stab])
}else{
stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.")
}
head(merged_df3[, c("mutationinformation"
, "position"
, "foldx_scaled"
, scaled_cols_stab
, "avg_stability")])
#--------------------------------------
# merged_df3: ADD col: average stability outcome
#--------------------------------------
merged_df3["avg_stability_outcome"] = ifelse(merged_df3["avg_stability"] < 0, "Destabilising", "Stabilising")
head(merged_df3[, c("mutationinformation"
, "position"
, "avg_stability"
, "avg_stability_outcome")])
table(merged_df3["avg_stability_outcome"] )
#--------------------------------------
# merged_df3: ADD col: average stability scaled
#--------------------------------------
merged_df3["avg_stability_scaled"] = lapply(merged_df3["avg_stability"]
, function(x) {
scales::rescale_mid(x
, to = c(-1,1)
, from = c( min(merged_df3["avg_stability"])
, max(merged_df3["avg_stability"]))
, mid = 0)
})
if ( all(table(merged_df3["avg_stability"]<0) == table(merged_df3["avg_stability_scaled"]<0)) ){
cat("\nPASS: Avergae stability column successfully averaged, scaled and categorised")
}else{
cat("\nAbort:Avergae stability column could not be processed")
}
head(merged_df3["avg_stability_scaled"])
##########################################################################################
#=====================================
# merged_df3: Affinity values: average
#======================================
common_scaled_cols_affinity = c("affinity_scaled"
, "mmcsm_lig_scaled")
#------------------------------------------------------
# merged_df3: ADD col: ensemble average across predictors: affinity
#------------------------------------------------------
if (all((common_scaled_cols_affinity%in%colnames(merged_df3)))){
cat("\nPASS: finding affinity cols to average")
cols2avg_aff = common_scaled_cols_affinity
merged_df3['avg_lig_affinity'] = rowMeans(merged_df3[, cols2avg_aff])
}else{
stop("\nAbort: cols to average not found.")
}
head(merged_df3[, c("mutationinformation"
, "position"
, cols2avg_aff
, "avg_lig_affinity")])
table(merged_df3$affinity_scaled<0 )
table(merged_df3$mmcsm_lig_scaled<0 )
#--------------------------------------
# merged_df3: ADD col: average affinity outcome
#--------------------------------------
merged_df3["avg_lig_affinity_outcome"] = ifelse(merged_df3["avg_lig_affinity"] < 0, "Destabilising", "Stabilising")
head(merged_df3[, c("mutationinformation"
, "position"
, "avg_lig_affinity"
, "avg_lig_affinity_outcome")])
table(merged_df3["avg_lig_affinity_outcome"] )
min( merged_df3['avg_lig_affinity']); max( merged_df3['avg_lig_affinity'])
#--------------------------------------
# merged_df3: ADD col: average affinity scaled
#--------------------------------------
merged_df3["avg_lig_affinity_scaled"] = lapply(merged_df3["avg_lig_affinity"]
, function(x) {
scales::rescale_mid(x
, to = c(-1,1)
, from = c( min(merged_df3["avg_lig_affinity"])
, max(merged_df3["avg_lig_affinity"]))
, mid = 0)
})
if ( all(table(merged_df3["avg_lig_affinity"]<0) == table(merged_df3["avg_lig_affinity_scaled"]<0)) ){
cat("\nPASS: Avergae affinity column successfully averaged, scaled and categorised")
}else{
cat("\nAbort:Avergae affinity column could not be processed")
}
min( merged_df3['avg_lig_affinity_scaled']); max( merged_df3['avg_lig_affinity_scaled'])
####################################################################
#TODO
# Choose few columns to return as plot_df
####################################################################
return(list( merged_df2
, merged_df3
))