diff --git a/scripts/plotting/mcsm_mean_stability.R b/scripts/plotting/mcsm_mean_stability.R new file mode 100755 index 0000000..7e35e79 --- /dev/null +++ b/scripts/plotting/mcsm_mean_stability.R @@ -0,0 +1,163 @@ +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting") +getwd() + +######################################################### +# TASK: + +######################################################### +#source("~/git/LSHTM_analysis/scripts/Header_TT.R") +#require(data.table) +#require(dplyr) + +source("plotting_data.R") +# should return +#my_df +#my_df_u +#dup_muts + +# cmd parse arguments +#require('getopt', quietly = TRUE) +#======================================================== + + +#======================================================== +# Read file: call script for combining df for PS + +#source("../combining_two_df.R") + +#======================================================== + +# plotting_data.R imports all the dir names, etc + +#======= +# output +#======= +out_filename_mean_stability = paste0(tolower(gene), "_mean_stability.csv") +outfile_mean_stability = paste0(outdir, "/", out_filename_mean_stability) +print(paste0("Output file:", outfile_mean_stability)) + +#%%=============================================================== + +#================ +# Data for plots +#================ +# REASSIGNMENT as necessary +df = my_df_u +rm(my_df) + +########################### +# Data for bfactor figure +# PS (duet) average +# Ligand affinity average +########################### +head(df$position); head(df$mutationinformation) +head(df$duet_stability_change) + +# order data frame +#df = df[order(df$position),] #already done +#head(df$position); head(df$mutationinformation) +#head(df$duet_stability_change) + +#*********** +# PS(duet): average by position and then scale b/w -1 and 1 +# column to average: duet_stability_change (NOT scaled!) +#*********** +mean_duet_by_position <- df %>% + group_by(position) %>% + summarize(averaged_duet = mean(duet_stability_change)) + +# scale b/w -1 and 1 +duet_min = min(mean_duet_by_position['averaged_duet']) +duet_max = max(mean_duet_by_position['averaged_duet']) + +# scale the averaged_duet values +mean_duet_by_position['averaged_duet_scaled'] = lapply(mean_duet_by_position['averaged_duet'] + , function(x) ifelse(x < 0, x/abs(duet_min), x/duet_max)) + +cat(paste0('Average duet scores:\n', head(mean_duet_by_position['averaged_duet']) + , '\n---------------------------------------------------------------' + , '\nScaled duet scores:\n', head(mean_duet_by_position['averaged_duet_scaled']))) + +# sanity checks +l_bound_duet = min(mean_duet_by_position['averaged_duet_scaled']) +u_bound_duet = max(mean_duet_by_position['averaged_duet_scaled']) + +if ( (l_bound_duet == -1) && (u_bound_duet == 1) ){ + cat(paste0("PASS: duet scores averaged by position and then scaled" + , "\nmin averaged duet: ", l_bound_duet + , "\nmax averaged duet: ", u_bound_duet)) +}else{ + cat(paste0("FAIL: avergaed duet scores could not be scaled b/w -1 and 1" + , "\nmin averaged duet: ", l_bound_duet + , "\nmax averaged duet: ", u_bound_duet)) + quit() +} + +#*********** +# Lig: average by position and then scale b/w -1 and 1 +# column: ligand_affinity_change (NOT scaled!) +#*********** +mean_affinity_by_position <- df %>% + group_by(position) %>% + summarize(averaged_affinity = mean(ligand_affinity_change)) + +# scale b/w -1 and 1 +affinity_min = min(mean_affinity_by_position['averaged_affinity']) +affinity_max = max(mean_affinity_by_position['averaged_affinity']) + +# scale the averaged_affinity values +mean_affinity_by_position['averaged_affinity_scaled'] = lapply(mean_affinity_by_position['averaged_affinity'] + , function(x) ifelse(x < 0, x/abs(affinity_min), x/affinity_max)) + +cat(paste0('Average affinity scores:\n', head(mean_affinity_by_position['averaged_affinity']) + , '\n---------------------------------------------------------------' + , '\nScaled affinity scores:\n', head(mean_affinity_by_position['averaged_affinity_scaled']))) + +# sanity checks +l_bound_affinity = min(mean_affinity_by_position['averaged_affinity_scaled']) +u_bound_affinity = max(mean_affinity_by_position['averaged_affinity_scaled']) + +if ( (l_bound_affinity == -1) && (u_bound_affinity == 1) ){ + cat(paste0("PASS: affinity scores averaged by position and then scaled" + , "\nmin averaged affintiy: ", l_bound_affinity + , "\nmax averaged affintiy: ", u_bound_affinity)) +}else{ + cat(paste0("FAIL: avergaed affinity scores could not be scaled b/w -1 and 1" + , "\nmin averaged affintiy: ", l_bound_affinity + , "\nmax averaged affintiy: ", u_bound_affinity)) + quit() +} + +#*********** +# merge: mean_duet_by_position and mean_affinity_by_position +#*********** +common_cols = intersect(colnames(mean_duet_by_position), colnames(mean_affinity_by_position)) + +if (dim(mean_duet_by_position) && dim(mean_affinity_by_position)){ + print(paste0("PASS: dim's match, mering dfs by column :", common_cols)) + #combined = as.data.frame(cbind(mean_duet_by_position, mean_affinity_by_position )) + combined_df = as.data.frame(merge(mean_duet_by_position + , mean_affinity_by_position + , by = common_cols + , all = T)) + + cat(paste0("\nnrows combined_df:", nrow(combined_df) + , "\nnrows combined_df:", ncol(combined_df))) +}else{ + cat(paste0("FAIL: dim's mismatch, aborting cbind!" + , "\nnrows df1:", nrow(mean_duet_by_position) + , "\nnrows df2:", nrow(mean_affinity_by_position))) + quit() +} +#%%============================================================ +# output +write.csv(combined_df, outfile_mean_stability + , row.names = F) +cat("Finished writing file:\n" + , outfile_mean_stability + , "\nNo. of rows:", nrow(combined_df) + , "\nNo. of cols:", ncol(combined_df)) + +# end of script +#=============================================================== diff --git a/scripts/plotting/plotting_thesis/basic_barplots.R b/scripts/plotting/plotting_thesis/basic_barplots.R new file mode 100755 index 0000000..f706c85 --- /dev/null +++ b/scripts/plotting/plotting_thesis/basic_barplots.R @@ -0,0 +1,406 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Barplots for mCSM DUET, ligand affinity, and foldX +# basic barplots with count of mutations +# basic barplots with frequency of count of mutations + +# , df_colname = "" +# , leg_title = "" +# , ats = 25 # axis text size +# , als = 22 # axis label size +# , lts = 20 # legend text size +# , ltis = 22 # label title size +# , geom_ls = 10 # geom_label size +# , yaxis_title = "Number of nsSNPs" +# , bp_plot_title = "" +# , label_categories = c("Destabilising", "Stabilising") +# , title_colour = "chocolate4" +# , subtitle_text = NULL +# , sts = 20 +# , subtitle_colour = "pink" +# #, leg_position = c(0.73,0.8) # within plot area +# , leg_position = "top" +# , bar_fill_values = c("#F8766D", "#00BFC4") +######################################################### + +#======================================================================= +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/" + , tolower(gene), "/") +cat("plots will output to:", outdir_images) + +########################################################### +df3 = merged_df3 +# FIXME: port to a common script +#================= +# PREFORMATTING: for consistency +#================= +df3$sensitivity = ifelse(df3$dst_mode == 1, "R", "S") +table(df3$sensitivity) + +# ConSurf labels +consurf_colOld = "consurf_colour_rev" +consurf_colNew = "consurf_outcome" +df3[[consurf_colNew]] = df3[[consurf_colOld]] +df3[[consurf_colNew]] = as.factor(df3[[consurf_colNew]]) +df3[[consurf_colNew]] +levels(df3$consurf_outcome) = c( "nsd", 1, 2, 3, 4, 5, 6, 7, 8, 9) +levels(df3$consurf_outcome) + +# SNAP2 labels +snap2_colname = "snap2_outcome" +df3[[snap2_colname]] <- str_replace(df3[[snap2_colname]], "effect", "Effect") +df3[[snap2_colname]] <- str_replace(df3[[snap2_colname]], "neutral", "Neutral") + +############################################################## +gene_all_cols = colnames(df3)[colnames(df3)%in%all_cols] + +gene_outcome_cols = colnames(df3)[colnames(df3)%in%c(outcome_cols_stability + , outcome_cols_affinity + , outcome_cols_conservation)] +gene_outcome_cols + + +#======================================================================= +#------------------------------ +# stability barplots: +outcome_cols_stability +# label_categories should be = levels(as.factor(plot_df[[df_colname]])) +#------------------------------ +sts = 22 +subtitle_colour = "black" +geom_ls = 10 + +# duetP +duetP = stability_count_bp(plotdf = df3 + , df_colname = "duet_outcome" + , leg_title = "mCSM-DUET" + #, label_categories = labels_duet + , yaxis_title = "Number of nsSNPs" + , leg_position = "none" + , subtitle_text = "mCSM-DUET" + , geom_ls = geom_ls + , bar_fill_values = c("#F8766D", "#00BFC4") + , sts = sts + , subtitle_colour= subtitle_colour) + +# foldx +foldxP = stability_count_bp(plotdf = df3 + , df_colname = "foldx_outcome" + #, leg_title = "FoldX" + #, label_categories = labels_foldx + , yaxis_title = "" + , leg_position = "none" + , subtitle_text = "FoldX" + , geom_ls = geom_ls + , bar_fill_values = c("#F8766D", "#00BFC4") + , sts = sts + , subtitle_colour= subtitle_colour) + + +# deepddg +deepddgP = stability_count_bp(plotdf = df3 + , df_colname = "deepddg_outcome" + #, leg_title = "DeepDDG" + #, label_categories = labels_deepddg + , yaxis_title = "Number of nsSNPs" + , leg_position = "none" + , subtitle_text = "DeepDDG" + , geom_ls = geom_ls + , bar_fill_values = c("#F8766D", "#00BFC4") + , sts = sts + , subtitle_colour= subtitle_colour) + + +# deepddg +dynamut2P = stability_count_bp(plotdf = df3 + , df_colname = "ddg_dynamut2_outcome" + #, leg_title = "Dynamut2" + #, label_categories = labels_ddg_dynamut2_outcome + , yaxis_title = "" + , leg_position = "none" + , subtitle_text = "Dynamut2" + , geom_ls = geom_ls + , bar_fill_values = c("#F8766D", "#00BFC4") + , sts = sts + , subtitle_colour= subtitle_colour) + +dynamut2P + +# extract common legend +common_legend = get_legend(duetP + + guides(color = guide_legend(nrow = 1)) + + theme(legend.position = "top")) + +#========================== +# output: STABILITY PLOTS +#=========================== +bp_stability_CLP = paste0(outdir_images + , tolower(gene) + ,"_bp_stability_CL.svg") + +svg(bp_stability_CLP, width = 15, height = 12) +print(paste0("plot filename:", bp_stability_CLP)) + +cowplot::plot_grid( + common_legend, + cowplot::plot_grid(duetP, foldxP + , deepddgP, dynamut2P + , nrow = 2 + , ncol = 2 + #, labels = c("(a)", "(b)", "(c)", "(d)") + , labels = "AUTO" + , label_size = 25) + , ncol = 1 + , nrow = 2 + , rel_heights = c(0.4/10,9/10)) + +dev.off() +########################################################### +#========================= +# Affinity outcome +# check this var: outcome_cols_affinity +# get from preformatting or put in globals +#========================== +DistCutOff = 10 +LigDist_colname # = "ligand_distance" # from globals +ppi2Dist_colname = "interface_dist" +naDist_colname = "TBC" + +########################################################### +# get plotting data within the distance +df3_lig = df3[df3[[LigDist_colname]]= 2) { + output$corrplot <- renderPlot({ + + #--------------------- + # My correlation plot: user selects columns + #--------------------- + c_plot <- my_corr_pairs(corr_data_all = corrplot_df + , corr_cols = corr_cols_s + , dot_size = 2 + , ats = 1.5 + , corr_lab_size = length(cBCorrNames)/length(corr_cols_s) * 1.3 + , corr_value_size = 1) + + }) + } else{ output$txt = renderText({"Argh, common! It's a correlation plot. Select >=2 vars!"}) + + } + + }) + + #================================== + # Add button: Select All checkbox + #================================== + observeEvent( + input$select_all,{ + + updateCheckboxGroupInput(session, "variable", selected = cBCorrVals) + } +) + + #================ + # Reset render + #================ + observeEvent( + input$reset_graph,{ + + # reset checkboxes to default selection + updateCheckboxGroupInput(session, "variable", selected = cBCorrSelected) + + + # render plot + output$corrplot <- renderPlot({ + + #--------------------- + # My correlation plot: reset plot + #--------------------- + c_plot <- my_corr_pairs(corr_data_all = corrplot_df + , corr_cols = cBCorrSelected + , dot_size = 1.2 + , ats = 1.5 + , corr_lab_size = length(cBCorrNames)/length(cBCorrSelected) * 1.3 + , corr_value_size = 1) + }) + } + ) +} +) + +shinyApp(ui = u_corr, server = s_corr) diff --git a/scripts/plotting/plotting_thesis/corr/corr_plots_gc_lig_i.R b/scripts/plotting/plotting_thesis/corr/corr_plots_gc_lig_i.R new file mode 100644 index 0000000..6ca6337 --- /dev/null +++ b/scripts/plotting/plotting_thesis/corr/corr_plots_gc_lig_i.R @@ -0,0 +1,220 @@ +#!/usr/bin/env Rscript + +source("~/git/LSHTM_analysis/config/gid.R") +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +#=================================================================== +corr_data = corr_data_extract(merged_df3, drug_name = drug) +#corr_data = corr_data_extract(merged_df2, drug_name = drug) +#================================================================ +#other globals +dist_colname <- LigDist_colname # ligand_distance (from globals) +dist_cutoff <- LigDist_cutoff # 10 (from globals) + +cat("\nLigand distance cut off, colname:", dist_colname + , "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b" + , "\nThe min distance", gene, "structure df" , ":", min_ang, "\u212b") + +######################################################################## + +#========================================== +##################### +# Correlation plot +##################### +colnames(corr_df_m3_f) + +corrplot_cols_lig <- c( "Log (OR)" + , "MAF" + , "-Log (P)" + , "mCSM-lig" + , "mCSM-NA" + , "ASA" + , "RSA" + , "RD" + , "KD" + , dist_colname + , "mutation_info_labels" + ) + +corr_df_lig <- corr_df_m3_f[, corrplot_cols_lig] +head(corr_df_lig) + +corrplot_df_lig <- corr_df_lig + +# static df +# stat_df = corrplot_df_lig[, c("Log (OR)" +# , "MAF" +# , "-Log (P)" +# )] + +plot_title_lig <- "Correlation plots (ligand affinity)" + +# Checkbox Names +cCorrNames = c( "Odds Ratio" + , "Allele Frequency" + , "P-value" + , "Ligand affinity" + , "Nucleic Acid affinity" + , "ASA" + , "RSA" + , "RD" + , "KD" + , "Ligand Distance") + +# Checkbox Values (aka Column Names that are in corrplot_df_lig) +cCorrVals = c("Log (OR)" + , "MAF" + , "-Log (P)" + , "mCSM-lig" + , "mCSM-NA" + , "ASA" + , "RSA" + , "RD" + , "KD" + , dist_colname) + +# Pre-selected checkboxes +cCorrSelected = c("Log (OR)" + , "MAF" + , "-Log (P)") +#============ +# Define UI +#============ +u_corr_lig<- fluidPage( + headerPanel(plot_title_lig), + sidebarLayout(position = "left" + , sidebarPanel("Correlations: Filtered data data" + , numericInput(inputId = "lig_dist" + , label = "Ligand distance cutoff" + , value = dist_cutoff # 10 default from globals + , min = min_ang + , max = max_ang) + , checkboxGroupInput("variable", "Choose parameter:" + , choiceNames = cCorrNames + , choiceValues = cCorrVals + , selected = cCorrSelected + ) + # could be a fluid Row + , actionButton("add_col" , "Render") + , actionButton("reset_graph" , "Reset Graphs") + , actionButton("select_all" , "Select All") + + ) + + # output/display + , mainPanel(plotOutput(outputId = 'corrplot' + , height = "1000px" + , width = "1200px") + # , height = "800px" + # , width = "600px") + , textOutput("txt") + ) + ) +) + +#=============== +# Define server +#=============== +s_corr_lig <- shinyServer(function(input, output, session) + +{ + + #================ + # Initial render + #================ + output$corrplot <- renderPlot({ + + # get the user-specified lig_list + dist_cutoff_ini = input$lig_dist + + # subset data for plot + corrplot_df_lig_ini = corrplot_df_lig[corrplot_df_lig[[dist_colname]] < dist_cutoff_ini,] + + #--------------------- + # My correlation plot: initial plot + #--------------------- + c_plot <- my_corr_pairs( + #corr_data_all = corrplot_df_lig + corr_data_all = corrplot_df_lig_ini + , corr_cols = cCorrSelected + , dot_size = 2 + , ats = 1.5 + , corr_lab_size = length(cCorrNames)/length(cCorrSelected) * 1.3 + , corr_value_size = 1) + + }) + + #==================== + # Interactive render + #==================== + observeEvent( + input$add_col, { + + # get the user-specified lig_list + dist_cutoff_user = input$lig_dist + + # subset data for plot + corrplot_df_lig_s = corrplot_df_lig[corrplot_df_lig[[dist_colname]] < dist_cutoff_user,] + + # select cols for corrplot + corr_cols_s = c(input$variable) + + # render plot + if (length(c(input$variable)) >= 2) { + + output$corrplot <- renderPlot({ + + #--------------------- + # My correlation plot: user selects columns + #--------------------- + c_plot <- my_corr_pairs(corr_data_all = corrplot_df_lig_s + , corr_cols = corr_cols_s + , dot_size = 1.6 + , ats = 1.5 + , corr_lab_size = length(cCorrNames)/length(corr_cols_s) * 1.3 + , corr_value_size = 1) + }) + } else { output$txt = renderText({"Fuddu! It's a correlation plot. Select >=2 vars bewakoof!"})} + + }) + + #================================== + # Add button: Select All checkbox + #================================== + observeEvent( + input$select_all,{ + + updateCheckboxGroupInput(session, "variable", selected = cCorrVals) + } + ) + + #================ + # Reset render + #================ + observeEvent( + input$reset_graph,{ + + # reset checkboxes + updateCheckboxGroupInput(session, "variable", selected = cCorrSelected) + + # render plot + output$corrplot <- renderPlot({ + + #--------------------- + # My correlation plot: reset plot + #--------------------- + c_plot <- my_corr_pairs(corr_data_all = corrplot_df_lig + , corr_cols = cCorrSelected + , dot_size = 2 + , ats = 1.5 + , corr_lab_size = length(cCorrNames)/length(cCorrSelected) * 1.3 + , corr_value_size = 1) + + }) + } + ) +} +) + +shinyApp(ui = u_corr_lig, server = s_corr_lig) + diff --git a/scripts/plotting/plotting_thesis/corr/ggcorr_all_PS_LIG.R b/scripts/plotting/plotting_thesis/corr/ggcorr_all_PS_LIG.R new file mode 100644 index 0000000..e7701e2 --- /dev/null +++ b/scripts/plotting/plotting_thesis/corr/ggcorr_all_PS_LIG.R @@ -0,0 +1,323 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Corr plots for PS and Lig + +# Output: 1 svg + +#======================================================================= +# working dir and loading libraries +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + +source("~/git/LSHTM_analysis/scripts/Header_TT.R") +require(cowplot) +source("combining_dfs_plotting.R") +#source("my_pairs_panel.R") +# should return the following dfs, directories and variables + +# FIXME: Can't output from here + +# PS combined: +# 1) merged_df2 +# 2) merged_df2_comp +# 3) merged_df3 +# 4) merged_df3_comp + +# LIG combined: +# 5) merged_df2_lig +# 6) merged_df2_comp_lig +# 7) merged_df3_lig +# 8) merged_df3_comp_lig + +# 9) my_df_u +# 10) my_df_u_lig + +cat(paste0("Directories imported:" + , "\ndatadir:", datadir + , "\nindir:", indir + , "\noutdir:", outdir + , "\nplotdir:", plotdir)) + +cat(paste0("Variables imported:" + , "\ndrug:", drug + , "\ngene:", gene + , "\ngene_match:", gene_match + , "\nAngstrom symbol:", angstroms_symbol + , "\nNo. of duplicated muts:", dup_muts_nu + , "\nNA count for ORs:", na_count + , "\nNA count in df2:", na_count_df2 + , "\nNA count in df3:", na_count_df3)) + +#======= +# output +#======= +# can't combine by cowplot because not ggplots +#corr_plot_combined = "corr_combined.svg" +#plot_corr_plot_combined = paste0(plotdir,"/", corr_plot_combined) + +# PS +#ggcorr_all_ps = "ggcorr_all_PS.svg" +ggcorr_all_ps = "ggcorr_all_PS.png" +plot_ggcorr_all_ps = paste0(plotdir,"/", ggcorr_all_ps) + +# LIG +#ggcorr_all_lig = "ggcorr_all_LIG.svg" +ggcorr_all_lig = "ggcorr_all_LIG.png" +plot_ggcorr_all_lig = paste0(plotdir,"/", ggcorr_all_lig ) + +# combined +ggcorr_all_combined_labelled = "ggcorr_all_combined_labelled.png" +plot_ggcorr_all_combined_labelled = paste0(plotdir,"/", ggcorr_all_combined_labelled) + +#################################################################### +# end of loading libraries and functions # +######################################################################## + +#%%%%%%%%%%%%%%%%%%%%%%%%% +#df_ps = merged_df3_comp +#df_lig = merged_df3_comp_lig +merged_df3 = as.data.frame(merged_df3) +df_ps = merged_df3 +df_lig = merged_df3_lig + +#%%%%%%%%%%%%%%%%%%%%%%%%% + +rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig) + +######################################################################## +# end of data extraction and cleaning for plots # +######################################################################## + + +#====================== +# adding log cols +#====================== +# subset data to generate pairwise correlations +cols_to_select = c("duet_scaled" + + , "foldx_scaled" + + , "log10_or_mychisq" + , "neglog_pval_fisher" + + #, "or_kin" + #, "neglog_pwald_kin" + + , "af" + + , "asa" + , "rsa" + , "kd_values" + , "rd_values" + + , "duet_outcome" + , drug) + +corr_data_ps = df_ps[, cols_to_select] + +dim(corr_data_ps) + +#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic +#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic + +# assign nice colnames (for display) +my_corr_colnames = c("DUET" + + , "Foldx" + + , "Log (OR)" + , "-Log (P)" + + #, "OR (adjusted)" + #, "-Log (P wald)" + + , "AF" + + , "ASA" + , "RSA" + , "KD" + , "RD" + + , "duet_outcome" + , drug) + +length(my_corr_colnames) + +colnames(corr_data_ps) +colnames(corr_data_ps) <- my_corr_colnames +colnames(corr_data_ps) + +#------------------------ +# Data for ggcorr PS plot +#------------------------ +start = 1 +end_ggcorr = which(colnames(corr_data_ps) == "duet_outcome"); end_ggcorr # should be the last column +offset = 1 + +my_ggcorr_ps = corr_data_ps[start:(end_ggcorr-1)] +head(my_ggcorr_ps) + +# correlation matrix +corr1 <- round(cor(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs"), 1) + +# p-value matrix +pmat1 <- cor_pmat(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs" + , conf.level = 0.99) + +corr2 = psych::corr.test(my_ggcorr_ps + , method = "spearman" + , use = "pairwise.complete.obs")$r +corr2 = round(corr2, 1) + +pmat2 = psych::corr.test(my_ggcorr_ps + , method = "spearman" + , adjust = "none" + , use = "pairwise.complete.obs")$p + +corr1== corr2 +pmat1==pmat2 + +#------------------------ +# Generate ggcorr PS plot +#------------------------ +cat("ggCorr plot PS:", plot_ggcorr_all_ps) +#png(filename = plot_ggcorr_all_ps, width = 1024, height = 768, units = "px", pointsize = 20) +ggcorr_ps = ggcorrplot(corr1 + , p.mat = pmat1 + , hc.order = TRUE + , outline.col = "black" + , ggtheme = ggplot2::theme_gray + , colors = c("#6D9EC1", "white", "#E46726") + , title = "DUET and Foldx stability") + + +ggcorr_ps +#dev.off() + +#=========================== +# Data for Correlation plots: LIG +#=========================== +table(df_lig$ligand_outcome) + +df_lig$log10_or_mychisq = log10(df_lig$or_mychisq) +df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher) + + +df_lig$log10_or_kin = log10(df_lig$or_kin) +df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin) + +# subset data to generate pairwise correlations +cols_to_select_lig = c("affinity_scaled" + + , "log10_or_mychisq" + , "neglog_pval_fisher" + + , "or_kin" + , "neglog_pwald_kin" + + , "af" + + , "asa" + , "rsa" + , "kd_values" + , "rd_values" + + , "ligand_outcome" + , drug) + +corr_data_lig = df_lig[, cols_to_select_lig] + +dim(corr_data_lig) + +# assign nice colnames (for display) +my_corr_colnames_lig = c("Ligand Affinity" + + , "Log (OR)" + , "-Log (P)" + + , "OR (adjusted)" + , "-Log(P wald)" + + , "AF" + + , "ASA" + , "RSA" + , "KD" + , "RD" + + , "ligand_outcome" + , drug) + +length(my_corr_colnames) + +colnames(corr_data_lig) +colnames(corr_data_lig) <- my_corr_colnames_lig +colnames(corr_data_lig) + +#------------------------ +# Data for ggcorr LIG plot +#------------------------ + +start = 1 +end_ggcorr_lig = which(colnames(corr_data_lig) == "ligand_outcome"); end_ggcorr_lig # should be the last column +offset = 1 + +my_ggcorr_lig = corr_data_lig[start:(end_ggcorr_lig-1)] +head(my_ggcorr_lig); str(my_ggcorr_lig) + +# correlation matrix +corr1_lig <- round(cor(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs"), 1) + +# p-value matrix +pmat1_lig <- cor_pmat(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs") + +corr2_lig = psych::corr.test(my_ggcorr_lig + , method = "spearman" + , use = "pairwise.complete.obs")$r + +corr2_lig = round(corr2_lig, 1) + +pmat2_lig = psych::corr.test(my_ggcorr_lig + , method = "spearman" + , adjust = "none" + , use = "pairwise.complete.obs")$p + +corr1_lig == corr2_lig +pmat1_lig == pmat2_lig + + +# for display order columns by hc order of ps + +#col_order = levels(ggcorr_ps$data[2]) + +#col_order <- c("Species", "Petal.Width", "Sepal.Length", + #"Sepal.Width", "Petal.Length") +#my_data2 <- my_data[, col_order] +#my_data2 + +#------------------------ +# Generate ggcorr LIG plot +#------------------------ +cat("ggCorr LIG plot:", plot_ggcorr_all_lig) +#svg(plot_ggcorr_all_lig, width = 15, height = 15) +#png(plot_ggcorr_all_lig, width = 1024, height = 768, units = "px", pointsize = 20) + +ggcorr_lig = ggcorrplot(corr1_lig + , p.mat = pmat1_lig + , hc.order = TRUE + , outline.col = "black" + + , ggtheme = ggplot2::theme_gray + , colors = c("#6D9EC1", "white", "#E46726") + , title = "Ligand affinty") + + +ggcorr_lig +#dev.off() + +####################################################### +#============================= +# combine plots for output +#============================= ++ \ No newline at end of file diff --git a/scripts/plotting/plotting_thesis/corr_plots_thesis.R b/scripts/plotting/plotting_thesis/corr_plots_thesis.R new file mode 100644 index 0000000..e9480d5 --- /dev/null +++ b/scripts/plotting/plotting_thesis/corr_plots_thesis.R @@ -0,0 +1,141 @@ +merged_df3 = as.data.frame(merged_df3) +corr_plotdf = corr_data_extract(merged_df3, extract_scaled_cols = F) + +#================ +# stability +#================ +corr_ps_colnames = c("DUET" + , "FoldX" + , "DeepDDG" + , "Dynamut2" + , "MAF" + , "Log (OR)" + , "-Log (P)" + #, "ligand_distance" + , "dst_mode" + , drug) + +corr_df_ps = corr_plotdf[, corr_ps_colnames] + +color_coln = which(colnames(corr_df_ps) == "dst_mode") +end = which(colnames(corr_df_ps) == drug) +ncol_omit = 2 +corr_end = end-ncol_omit + +#------------------------ +# Output: stability corrP +#------------------------ +corr_psP = paste0(outdir_images + ,tolower(gene) + ,"_corr_stability.svg" ) + +cat("Corr plot stability with coloured dots:", corr_psP) +svg(corr_psP, width = 15, height = 15) + +my_corr_pairs(corr_data_all = corr_df_ps + , corr_cols = colnames(corr_df_ps[1:corr_end]) + , corr_method = "spearman" # other options: "pearson" or "kendall" + , colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode" + , categ_colour = c("red", "blue") + , density_show = F + , hist_col = "coral4" + , dot_size = 1.6 + , ats = 1.5 + , corr_lab_size = 3 + , corr_value_size = 1) + +dev.off() +##################################################### +DistCutOff = 10 +LigDist_colname # = "ligand_distance" # from globals +ppi2Dist_colname = "interface_dist" +naDist_colname = "TBC" +##################################################### + +#================ +# ligand affinity +#================ +corr_lig_colnames = c("mCSM-lig" + , "MAF" + , "Log (OR)" + , "-Log (P)" + , "ligand_distance" + , "dst_mode" + , drug) + +corr_df_lig = corr_plotdf[, corr_lig_colnames] +corr_df_lig = corr_df_lig[corr_df_lig[[LigDist_colname]]