diff --git a/scripts/functions/combining_dfs_plotting.R b/scripts/functions/combining_dfs_plotting.R index ee9df5e..7564962 100644 --- a/scripts/functions/combining_dfs_plotting.R +++ b/scripts/functions/combining_dfs_plotting.R @@ -327,7 +327,6 @@ combining_dfs_plotting <- function( my_df_u stop("Cannot generate merged_df3") } ################################################################## - head(merged_df3$position); tail(merged_df3$position) # should be sorted # sanity check @@ -391,6 +390,301 @@ combining_dfs_plotting <- function( my_df_u stop("Abort: merged_df3 or merged_df2 can't be created because of lable mismatch") } + ########################################################################## + # MERGED_df2: average cols # + # Average stability + lig-affinity columns # + ########################################################################## + + #===================================== + # merged_df2: Stability values: average + #==================================== + #------------------------------ + # foldx sign reverse + # for consistency with other tools + #---------------------------------- + head(merged_df2$ddg_foldx) + + # foldx values: reverse signs + #merged_df2['ddg_foldxC'] = abs(merged_df2$ddg_foldx) + #head(merged_df2[, c("ddg_foldx", "ddg_foldxC")]) + + # foldx scaled: reverse signs fs + merged_df2['foldx_scaled_signC'] = abs(merged_df2$foldx_scaled) + head(merged_df2[, c("foldx_scaled", "foldx_scaled_signC")]) + + # find which stability cols to average: should contain revised foldx + scaled_cols_stab = c("duet_scaled" + , "deepddg_scaled" + , "ddg_dynamut2_scaled" + , "foldx_scaled_signC" # needed to get avg stability + ) + + #----------------------------------------------- + # merged_df2: ADD col: average across predictors: stability + #----------------------------------------------- + if (all((scaled_cols_stab%in%colnames(merged_df2)))){ + cat("\nPASS: finding stability cols to average") + cols2avg_stab = scaled_cols_stab + cat("\nAveraging", length(cols2avg_stab), "stability columns:" + , "\nThese are:", cols2avg_stab) + + merged_df2['avg_stability'] = rowMeans(merged_df2[, cols2avg_stab]) + }else{ + stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.") + } + + head(merged_df2[, c("mutationinformation" + , "position" + , "foldx_scaled" + , scaled_cols_stab + , "avg_stability")]) + #-------------------------------------- + # merged_df2: ADD col: average stability outcome + #-------------------------------------- + merged_df2["avg_stability_outcome"] = ifelse(merged_df2["avg_stability"] < 0, "Destabilising", "Stabilising") + + head(merged_df2[, c("mutationinformation" + , "position" + , "avg_stability" + , "avg_stability_outcome")]) + + table(merged_df2["avg_stability_outcome"] ) + + #-------------------------------------- + # merged_df2: ADD col: average stability scaled + #-------------------------------------- + merged_df2["avg_stability_scaled"] = lapply(merged_df2["avg_stability"] + , function(x) { + scales::rescale_mid(x + , to = c(-1,1) + , from = c( min(merged_df2["avg_stability"]) + , max(merged_df2["avg_stability"])) + , mid = 0) + }) + + if ( all(table(merged_df2["avg_stability"]<0) == table(merged_df2["avg_stability_scaled"]<0)) ){ + cat("\nPASS: Avergae stability column successfully averaged, scaled and categorised") + + }else{ + cat("\nAbort:Avergae stability column could not be processed") + } + + head(merged_df2["avg_stability_scaled"]) + + ########################################################################################## + #===================================== + # merged_df2: Affinity values: average + #====================================== + + common_scaled_cols_affinity = c("affinity_scaled" + , "mmcsm_lig_scaled") + + #------------------------------------------------------ + # merged_df2: ADD col: ensemble average across predictors: affinity + #------------------------------------------------------ + if (all((common_scaled_cols_affinity%in%colnames(merged_df2)))){ + cat("\nPASS: finding affinity cols to average") + cols2avg_aff = common_scaled_cols_affinity + merged_df2['avg_lig_affinity'] = rowMeans(merged_df2[, cols2avg_aff]) + }else{ + stop("\nAbort: cols to average not found.") + } + + head(merged_df2[, c("mutationinformation" + , "position" + , cols2avg_aff + , "avg_lig_affinity")]) + + table(merged_df2$affinity_scaled<0 ) + table(merged_df2$mmcsm_lig_scaled<0 ) + + #-------------------------------------- + # merged_df2: ADD col: average affinity outcome + #-------------------------------------- + merged_df2["avg_lig_affinity_outcome"] = ifelse(merged_df2["avg_lig_affinity"] < 0, "Destabilising", "Stabilising") + + head(merged_df2[, c("mutationinformation" + , "position" + , "avg_lig_affinity" + , "avg_lig_affinity_outcome")]) + + table(merged_df2["avg_lig_affinity_outcome"] ) + + min( merged_df2['avg_lig_affinity']); max( merged_df2['avg_lig_affinity']) + + #-------------------------------------- + # merged_df2: ADD col: average affinity scaled + #-------------------------------------- + merged_df2["avg_lig_affinity_scaled"] = lapply(merged_df2["avg_lig_affinity"] + , function(x) { + scales::rescale_mid(x + , to = c(-1,1) + , from = c( min(merged_df2["avg_lig_affinity"]) + , max(merged_df2["avg_lig_affinity"])) + , mid = 0) + }) + + if ( all(table(merged_df2["avg_lig_affinity"]<0) == table(merged_df2["avg_lig_affinity_scaled"]<0)) ){ + cat("\nPASS: Avergae affinity column successfully averaged, scaled and categorised") + + }else{ + cat("\nAbort:Avergae affinity column could not be processed") + } + + min( merged_df2['avg_lig_affinity_scaled']); max( merged_df2['avg_lig_affinity_scaled']) + + ###################################################################################### + + ########################################################################## + # MERGED_d3: average cols # + # Average stability + lig-affinity columns # + ########################################################################## + + #========================================== + # merged_df3: Stability values: average + #========================================== + #------------------- + # foldx sign reverse + # for consistency with other tools + #------------------- + head(merged_df3$ddg_foldx) + + # foldx values: reverse signs + #merged_df3['ddg_foldxC'] = abs(merged_df3$ddg_foldx) + #head(merged_df3[, c("ddg_foldx", "ddg_foldxC")]) + + # foldx scaled: reverse signs fs + merged_df3['foldx_scaled_signC'] = abs(merged_df3$foldx_scaled) + head(merged_df3[, c("foldx_scaled", "foldx_scaled_signC")]) + + # find which stability cols to average: should contain revised foldx + scaled_cols_stab = c("duet_scaled" + , "deepddg_scaled" + , "ddg_dynamut2_scaled" + #, "foldx_scaled" + , "foldx_scaled_signC" # needed to get avg stability + ) + + #-------------------------------------------------------- + # merged_df3: ADD col: ensemble average across predictors: stability + #--------------------------------------------------------- + if (all((scaled_cols_stab%in%colnames(merged_df3)))){ + cat("\nPASS: finding stability cols to average") + cols2avg_stab = scaled_cols_stab + cat("\nAveraging", length(cols2avg_stab), "stability columns:" + , "\nThese are:", cols2avg_stab) + + merged_df3['avg_stability'] = rowMeans(merged_df3[, cols2avg_stab]) + }else{ + stop("\nAbort: Foldx column has opposing sign. Can't proceed to avergae.") + } + + head(merged_df3[, c("mutationinformation" + , "position" + , "foldx_scaled" + , scaled_cols_stab + , "avg_stability")]) + #-------------------------------------- + # merged_df3: ADD col: average stability outcome + #-------------------------------------- + merged_df3["avg_stability_outcome"] = ifelse(merged_df3["avg_stability"] < 0, "Destabilising", "Stabilising") + + head(merged_df3[, c("mutationinformation" + , "position" + , "avg_stability" + , "avg_stability_outcome")]) + + table(merged_df3["avg_stability_outcome"] ) + + #-------------------------------------- + # merged_df3: ADD col: average stability scaled + #-------------------------------------- + merged_df3["avg_stability_scaled"] = lapply(merged_df3["avg_stability"] + , function(x) { + scales::rescale_mid(x + , to = c(-1,1) + , from = c( min(merged_df3["avg_stability"]) + , max(merged_df3["avg_stability"])) + , mid = 0) + }) + + if ( all(table(merged_df3["avg_stability"]<0) == table(merged_df3["avg_stability_scaled"]<0)) ){ + cat("\nPASS: Avergae stability column successfully averaged, scaled and categorised") + + }else{ + cat("\nAbort:Avergae stability column could not be processed") + } + + head(merged_df3["avg_stability_scaled"]) + + ########################################################################################## + #===================================== + # merged_df3: Affinity values: average + #====================================== + + common_scaled_cols_affinity = c("affinity_scaled" + , "mmcsm_lig_scaled") + + #------------------------------------------------------ + # merged_df3: ADD col: ensemble average across predictors: affinity + #------------------------------------------------------ + if (all((common_scaled_cols_affinity%in%colnames(merged_df3)))){ + cat("\nPASS: finding affinity cols to average") + cols2avg_aff = common_scaled_cols_affinity + merged_df3['avg_lig_affinity'] = rowMeans(merged_df3[, cols2avg_aff]) + }else{ + stop("\nAbort: cols to average not found.") + } + + head(merged_df3[, c("mutationinformation" + , "position" + , cols2avg_aff + , "avg_lig_affinity")]) + + table(merged_df3$affinity_scaled<0 ) + table(merged_df3$mmcsm_lig_scaled<0 ) + + #-------------------------------------- + # merged_df3: ADD col: average affinity outcome + #-------------------------------------- + merged_df3["avg_lig_affinity_outcome"] = ifelse(merged_df3["avg_lig_affinity"] < 0, "Destabilising", "Stabilising") + + head(merged_df3[, c("mutationinformation" + , "position" + , "avg_lig_affinity" + , "avg_lig_affinity_outcome")]) + + table(merged_df3["avg_lig_affinity_outcome"] ) + + min( merged_df3['avg_lig_affinity']); max( merged_df3['avg_lig_affinity']) + + #-------------------------------------- + # merged_df3: ADD col: average affinity scaled + #-------------------------------------- + merged_df3["avg_lig_affinity_scaled"] = lapply(merged_df3["avg_lig_affinity"] + , function(x) { + scales::rescale_mid(x + , to = c(-1,1) + , from = c( min(merged_df3["avg_lig_affinity"]) + , max(merged_df3["avg_lig_affinity"])) + , mid = 0) + }) + + if ( all(table(merged_df3["avg_lig_affinity"]<0) == table(merged_df3["avg_lig_affinity_scaled"]<0)) ){ + cat("\nPASS: Avergae affinity column successfully averaged, scaled and categorised") + + }else{ + cat("\nAbort:Avergae affinity column could not be processed") + } + + min( merged_df3['avg_lig_affinity_scaled']); max( merged_df3['avg_lig_affinity_scaled']) + + + #################################################################### + #TODO + # Choose few columns to return as plot_df + + #################################################################### return(list( merged_df2 , merged_df3 )) diff --git a/scripts/plotting/plotting_thesis/linage_bp_dist.R b/scripts/plotting/plotting_thesis/linage_bp_dist.R index 38d2d05..eb7c2ec 100644 --- a/scripts/plotting/plotting_thesis/linage_bp_dist.R +++ b/scripts/plotting/plotting_thesis/linage_bp_dist.R @@ -150,11 +150,11 @@ linP_dm_om # , "mmcsm_lig_scaled" # , "mcsm_ppi2_scaled" # , "mcsm_na_scaled" -# , "avg_affinity_scaled") +# , "avg_lig_affinity_scaled") # lineage_distP(merged_df2 # , with_facet = F -# , x_axis = "avg_affinity_scaled" +# , x_axis = "avg_lig_affinity_scaled" # , y_axis = "lineage_labels" # , x_lab = my_xlabel # , use_lineages = c("L1", "L2", "L3", "L4") diff --git a/scripts/plotting/plotting_thesis/preformatting.R b/scripts/plotting/plotting_thesis/preformatting.R index 3f08099..b5a8bed 100644 --- a/scripts/plotting/plotting_thesis/preformatting.R +++ b/scripts/plotting/plotting_thesis/preformatting.R @@ -33,17 +33,21 @@ common_cols = c("mutationinformation" raw_cols_stability = c("duet_stability_change" , "deepddg" , "ddg_dynamut2" - , "ddg_foldx") + , "ddg_foldx" + , "avg_stability") scaled_cols_stability = c("duet_scaled" , "deepddg_scaled" , "ddg_dynamut2_scaled" - , "foldx_scaled") + , "foldx_scaled" + , "foldx_scaled_signC" # needed to get avg stability + , "avg_stability_scaled") outcome_cols_stability = c("duet_outcome" , "deepddg_outcome" , "ddg_dynamut2_outcome" - , "foldx_outcome") + , "foldx_outcome" + , "avg_stability_outcome") all_stability_cols = c(raw_cols_stability , scaled_cols_stability @@ -54,17 +58,20 @@ all_stability_cols = c(raw_cols_stability raw_cols_affinity = c("ligand_affinity_change" , "mmcsm_lig" , "mcsm_ppi2_affinity" - , "mcsm_na_affinity") + , "mcsm_na_affinity" + , "avg_lig_affinity") scaled_cols_affinity = c("affinity_scaled" , "mmcsm_lig_scaled" , "mcsm_ppi2_scaled" - , "mcsm_na_scaled" ) + , "mcsm_na_scaled" + , "avg_lig_affinity_scaled") outcome_cols_affinity = c( "ligand_outcome" , "mmcsm_lig_outcome" , "mcsm_ppi2_outcome" - , "mcsm_na_outcome") + , "mcsm_na_outcome" + , "avg_lig_affinity_outcome") all_affinity_cols = c(raw_cols_affinity , scaled_cols_affinity @@ -90,10 +97,6 @@ all_conserv_cols = c(raw_cols_conservation , outcome_cols_conservation) -all_cols = c(common_cols - , all_stability_cols - , all_affinity_cols - , all_conserv_cols) ######################################## categ_cols_to_factor = grep( "_outcome|_info", colnames(merged_df3) ) @@ -118,161 +121,10 @@ cat("\ncols changed to factor are:\n", colnames(merged_df3)[categ_cols_to_factor plot_cols = c("mutationinformation", "mutation_info_labels", "position", "dst_mode" , all_cols) -df3 = merged_df3[, colnames(merged_df3)%in%plot_cols] -#================= -# PREFORMATTING: for consistency -#================= -# DONE: combining_dfs.R -# df3$sensitivity = ifelse(df3$dst_mode == 1, "R", "S") -# table(df3$sensitivity) -# ConSurf labels -#consurf_colOld = "consurf_colour_rev" -#consurf_colNew = "consurf_outcome" -#df3[[consurf_colNew]] = df3[[consurf_colOld]] -#df3[[consurf_colNew]] = as.factor(df3[[consurf_colNew]]) -#df3[[consurf_colNew]] -# not this bit -#!!!!!!!!!!!!!1 -#levels(df3$consurf_outcome) = c( "nsd", 1, 2, 3, 4, 5, 6, 7, 8, 9) - -#levels(df3$consurf_outcome) - -# SNAP2 labels -#snap2_colname = "snap2_outcome" -#df3[[snap2_colname]] <- str_replace(df3[[snap2_colname]], "effect", "Effect") -#df3[[snap2_colname]] <- str_replace(df3[[snap2_colname]], "neutral", "Neutral") - -# for ref: not needed perse as function already does this and assigns labels for barplots -# labels_duet = levels(as.factor(df3$duet_outcome)) -# labels_foldx = levels(as.factor(df3$foldx_outcome)) -# labels_deepddg = levels(as.factor(df3$deepddg_outcome)) -# labels_ddg_dynamut2_outcome = levels(as.factor(df3$ddg_dynamut2_outcome)) -# -# labels_lig = levels(as.factor(df3_lig$ligand_outcome)) -# labels_mmlig = levels(as.factor(df3_lig$mmcsm_lig_outcome)) -# labels_ppi2 = levels(as.factor(df3_ppi2$mcsm_ppi2_outcome)) -# -# labels_provean = levels(as.factor(df3$provean_outcome)) -# labels_snap2 = levels(as.factor(df3$snap2_outcome)) -# labels_consurf = levels(as.factor(df3$consurf_colour_rev)) -# df3$consurf_colour_rev = as.factor(df3$consurf_colour_rev ) -############################################################################## -####################################### -# merged_df2: NECESSARY pre-processing -###################################### -df2 = merged_df2 - -#================= -# PREFORMATTING: for consistency -#================= -# DONE: combining_dfs.R -# df2$sensitivity = ifelse(df2$dst_mode == 1, "R", "S") -# table(df2$sensitivity) - -#---------------------------------------------------- -# Create dst2: fill na in dst with value of dst_mode -# for epistasis -#---------------------------------------------------- -# DONE: combining_dfs.R -# df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2f$dst) - -#---------------------------------------------------- -# reverse signs for foldx scaled values for -# to allow average with other tools -#---------------------------------------------------- -head(df2['ddg_foldx']) -df2['ddg_foldxC'] = abs(df2$ddg_foldx) -head(df2['ddg_foldxC']) - -head(df2['foldx_scaled']) -df2['foldx_scaled_signC'] = abs(df2$foldx_scaled) -head(df2['foldx_scaled_signC']) - -rm_foldx_cols = c("ddg_foldx","foldx_scaled") -raw_cols_stab_revised = raw_cols_stability[!raw_cols_stability%in%rm_foldx_cols] -raw_cols_stab_revised = c(raw_cols_stab_revised,"ddg_foldxC") - -scaled_cols_stab_revised = scaled_cols_stability[!scaled_cols_stability%in%rm_foldx_cols] -scaled_cols_stab_revised = c(scaled_cols_stab_revised, "foldx_scaled_signC") - -###################################################### -# Affinity related variables -# DONE:in plotting_globals.R -# DistCutOff = 10 -# LigDist_colname # = "ligand_distance" # from globals -# ppi2Dist_colname = "interface_dist" -# naDist_colname = "TBC" - -###################################################### -# corr colnames -# drug -# "dst_mode" -# "ligand_distance" -# "DUET" -# "mCSM-lig" -# "FoldX" -# "DeepDDG" -# "ASA" -# "RSA" -# "KD" -# "RD" -# "Consurf" -# "SNAP2" -# "MAF" -# "Log (OR)" -# "-Log (P)" -# "Dynamut2" -# "mCSM-PPI2" -# "interface_dist" - -corr_ps_colnames = c("DUET" -, "FoldX" -, "DeepDDG" -, "Dynamut2" - -, "MAF" -, "Log (OR)" -, "-Log (P)" - -# , "ASA" -# , "RSA" -# , "KD" -# , "RD" -# , "Consurf" -# , "SNAP2" - -#, "mCSM-lig" -#, "ligand_distance" -#, "mCSM-PPI2" -#, "interface_dist" -, "dst_mode" -, drug -) - -corr_lig_colnames = c("mCSM-lig" - , "MAF" - , "Log (OR)" - , "-Log (P)" - , "ligand_distance" - , "dst_mode" - , drug) - -corr_ppi2_colnames = c("mCSM-PPI2" - , "SNAP2" - , "Log (OR)" - , "-Log (P)" - , "interface_dist" - , "dst_mode" - , drug) - -corr_conservation_cols = c("ConSurf" - , "SNAP2" - , "PROVEAN" - , "MAF" - , "Log (OR)" - , "-Log (P)" - , "dst_mode" - , drug) +all_cols = c(common_cols + , all_stability_cols + , all_affinity_cols + , all_conserv_cols)