diff --git a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_adjusted_PS_LIG.R b/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_adjusted_PS_LIG.R deleted file mode 100644 index 4355a23..0000000 --- a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_adjusted_PS_LIG.R +++ /dev/null @@ -1,330 +0,0 @@ -#!/usr/bin/env Rscript -######################################################### -# TASK: Corr plots for PS and Lig - -# Output: 1 svg - -#======================================================================= -# working dir and loading libraries -getwd() -setwd("~/git/LSHTM_analysis/scripts/plotting/") -getwd() - - -source("~/git/LSHTM_analysis/scripts/Header_TT.R") -require(cowplot) -source("combining_dfs_plotting.R") -source("my_pairs_panel.R") -# should return the following dfs, directories and variables - -# PS combined: -# 1) merged_df2 -# 2) merged_df2_comp -# 3) merged_df3 -# 4) merged_df3_comp - -# LIG combined: -# 5) merged_df2_lig -# 6) merged_df2_comp_lig -# 7) merged_df3_lig -# 8) merged_df3_comp_lig - -# 9) my_df_u -# 10) my_df_u_lig - -cat(paste0("Directories imported:" - , "\ndatadir:", datadir - , "\nindir:", indir - , "\noutdir:", outdir - , "\nplotdir:", plotdir)) - -cat(paste0("Variables imported:" - , "\ndrug:", drug - , "\ngene:", gene - , "\ngene_match:", gene_match - , "\nAngstrom symbol:", angstroms_symbol - , "\nNo. of duplicated muts:", dup_muts_nu - , "\nNA count for ORs:", na_count - , "\nNA count in df2:", na_count_df2 - , "\nNA count in df3:", na_count_df3)) - -#======= -# output -#======= -# can't combine by cowplot because not ggplots -#corr_plot_combined = "corr_combined.svg" -#plot_corr_plot_combined = paste0(plotdir,"/", corr_plot_combined) - -# PS -corr_ps_adjusted = "corr_PS_adjusted.svg" -plot_corr_ps_adjusted = paste0(plotdir,"/", corr_ps) - -# LIG -corr_lig_adjusted = "corr_LIG_adjusted.svg" -plot_corr_lig_adjusted = paste0(plotdir,"/", corr_lig) - -#################################################################### -# end of loading libraries and functions # -######################################################################## - -#%%%%%%%%%%%%%%%%%%%%%%%%% -df_ps = merged_df3_comp -df_lig = merged_df3_comp_lig -#%%%%%%%%%%%%%%%%%%%%%%%%% - -rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig) - -######################################################################## -# end of data extraction and cleaning for plots # -######################################################################## - -#=========================== -# Data for Correlation plots:PS -#=========================== -table(df_ps$duet_outcome) - - -#=========================== -# Data for Correlation plots:foldx -#=========================== -#============================ -# adding foldx scaled values -# scale data b/w -1 and 1 -#============================ -n = which(colnames(df_ps) == "ddg"); n - -my_min = min(df_ps[,n]); my_min -my_max = max(df_ps[,n]); my_max - -df_ps$foldx_scaled = ifelse(df_ps[,n] < 0 - , df_ps[,n]/abs(my_min) - , df_ps[,n]/my_max) -# sanity check -my_min = min(df_ps$foldx_scaled); my_min -my_max = max(df_ps$foldx_scaled); my_max - -if (my_min == -1 && my_max == 1){ - cat("PASS: foldx ddg successfully scaled b/w -1 and 1" - , "\nProceeding with assigning foldx outcome category") -}else{ - cat("FAIL: could not scale foldx ddg values" - , "Aborting!") -} - - -#================================ -# adding foldx outcome category -# ddg<0 = "Stabilising" (-ve) -#================================= - -c1 = table(df_ps$ddg < 0) -df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising") -c2 = table(df_ps$ddg < 0) - -if ( all(c1 == c2) ){ - cat("PASS: foldx outcome successfully created") -}else{ - cat("FAIL: foldx outcome could not be created. Aborting!") - exit() -} - -table(df_ps$foldx_outcome) - - -#====================== -# adding log cols -#====================== - -df_ps$log10_or_mychisq = log10(df_ps$or_mychisq) -df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher) - -df_ps$log10_or_kin = log10(df_ps$or_kin) -df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin) - -# subset data to generate pairwise correlations -cols_to_select = c("duet_scaled" - - , "foldx_scaled" - - #, "log10_or_mychisq" - #, "neglog_pval_fisher" - - , "or_kin" - , "neglog_pwald_kin" - - , "af" - - , "asa" - , "rsa" - , "kd_values" - , "rd_values" - - , "duet_outcome" - , drug) - -corr_data_ps = df_ps[, cols_to_select] - -dim(corr_data_ps) - -#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic -#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic - -# assign nice colnames (for display) -my_corr_colnames = c("DUET" - - , "Foldx" - #, "Log(OR)" - #, "-Log(P)" - - , "OR adjusted" - , "-Log(P wald)" - - , "AF" - - , "ASA" - , "RSA" - , "KD" - , "RD" - - , "duet_outcome" - , drug) - -length(my_corr_colnames) - -colnames(corr_data_ps) -colnames(corr_data_ps) <- my_corr_colnames -colnames(corr_data_ps) - -#----------------- -# generate corr PS plot -#----------------- -start = 1 -end = which(colnames(corr_data_ps) == drug); end # should be the last column -offset = 1 - -my_corr_ps = corr_data_ps[start:(end-offset)] -head(my_corr_ps) - -#my_cols = c("#f8766d", "#00bfc4") -# deep blue :#007d85 -# deep red: #ae301e - -cat("Corr plot PS:", plot_corr_ps_adjusted) -svg(plot_corr_ps_adjusted, width = 15, height = 15) - -OutPlot1 = pairs.panels(my_corr_ps[1:(length(my_corr_ps)-1)] - , method = "spearman" # correlation method - , hist.col = "grey" ##00AFBB - , density = TRUE # show density plots - , ellipses = F # show correlation ellipses - , stars = T - , rug = F - , breaks = "Sturges" - , show.points = T - , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_ps$duet_outcome))] - , pch = 21 - , jitter = T - #, alpha = .05 - #, points(pch = 19, col = c("#f8766d", "#00bfc4")) - , cex = 2 - , cex.axis = 1.5 - , cex.labels = 1.5 - , cex.cor = 1 - , smooth = F -) - -print(OutPlot1) -dev.off() - -#=========================== -# Data for Correlation plots: LIG -#=========================== -table(df_lig$ligand_outcome) - -df_lig$log10_or_mychisq = log10(df_lig$or_mychisq) -df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher) - - -df_lig$log10_or_kin = log10(df_lig$or_kin) -df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin) - - -# subset data to generate pairwise correlations -cols_to_select = c("affinity_scaled" - - , "log10_or_mychisq" - , "neglog_pval_fisher" - - #, "or_kin" - #, "neglog_pwald_kin" - - , "af" - - , "ligand_outcome" - , drug) - -corr_data_lig = df_lig[, cols_to_select] - - -dim(corr_data_lig) - -# assign nice colnames (for display) -my_corr_colnames = c("Ligand Affinity" - - , "Log(OR)" - , "-Log(P)" - - #, "OR adjusted" - #, "-Log(P wald)" - - , "AF" - - , "ligand_outcome" - , drug) - -length(my_corr_colnames) - -colnames(corr_data_lig) -colnames(corr_data_lig) <- my_corr_colnames -colnames(corr_data_lig) - -#----------------- -# generate corr LIG plot -#----------------- - -start = 1 -end = which(colnames(corr_data_lig) == drug); end # should be the last column -offset = 1 - -my_corr_lig = corr_data_lig[start:(end-offset)] -head(my_corr_lig) - -cat("Corr LIG plot:", plot_corr_lig_adjusted) -svg(plot_corr_lig_adjusted, width = 15, height = 15) - -OutPlot2 = pairs.panels(my_corr_lig[1:(length(my_corr_lig)-1)] - , method = "spearman" # correlation method - , hist.col = "grey" ##00AFBB - , density = TRUE # show density plots - , ellipses = F # show correlation ellipses - , stars = T - , rug = F - , breaks = "Sturges" - , show.points = T - , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_lig$ligand_outcome))] - , pch = 21 - , jitter = T - #, alpha = .05 - #, points(pch = 19, col = c("#f8766d", "#00bfc4")) - , cex = 3 - , cex.axis = 2.5 - , cex.labels = 2.1 - , cex.cor = 1 - , smooth = F -) - -print(OutPlot2) -dev.off() -####################################################### - - diff --git a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots.R b/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots.R deleted file mode 100755 index ed5979d..0000000 --- a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots.R +++ /dev/null @@ -1,242 +0,0 @@ -#!/usr/bin/env Rscript -getwd() -setwd("~/git/LSHTM_analysis/scripts/plotting") -getwd() -source("~/git/LSHTM_analysis/scripts/Header_TT.R") - -spec = matrix(c( - "drug" , "d", 1, "character", - "gene" , "g", 1, "character", - "data_file1" , "fa", 2, "character", - "data_file2" , "fb", 2, "character" -), byrow = TRUE, ncol = 4) - -opt = getopt(spec) - -drug = opt$drug -gene = opt$gene -infile_params = opt$data_file1 -infile_metadata = opt$data_file2 - -if(is.null(drug)|is.null(gene)) { - stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)") -} - -#=========== -# Input -#=========== - -source("get_plotting_dfs.R") - -#=========== -# output -#=========== -# PS -corr_ps = "corr_PS.svg" -plot_corr_ps = paste0(plotdir,"/", corr_ps) - -corr_ps_all = "corr_PS_all.svg" -plot_corr_ps_all = paste0(plotdir,"/", corr_ps_all) - - -# LIG -corr_lig = "corr_LIG.svg" -plot_corr_lig = paste0(plotdir,"/", corr_lig) - -corr_lig_all = "corr_LIG_all.svg" -plot_corr_lig_all = paste0(plotdir,"/", corr_lig_all) - -############################################################################## -foo = corr_ps_df3 -#foo2 = corr_ps_df2 - -bar = corr_lig_df3 -#bar2 = corr_lig_df2 - -#================================ -# Data for Correlation plots: PS -#================================ -# subset data to generate pairwise correlations -cols_to_select = c("DUET" - , "Foldx" - , "Log (OR)" - , "-Log (P)" - , "MAF" - , "duet_outcome" - , drug) -corr_data_ps = foo[names(foo)%in%cols_to_select] -length(cols_to_select) - -colnames(corr_data_ps) - -start = 1 -end = which(colnames(corr_data_ps) == drug); end # should be the last column -offset = 1 - -my_corr_ps = corr_data_ps[start:(end - offset)] -head(my_corr_ps) - -#--------------------- -# Corr plot PS: short -# data: corr_ps_df3 -# cols: 7 -#--------------------- -cat("Corr plot PS DUET with coloured dots:", plot_corr_ps) -svg(plot_corr_ps, width = 15, height = 15) - -pairs.panels(my_corr_ps[1:(length(my_corr_ps)-1)] - , method = "spearman" # correlation method - , hist.col = "grey" ##00AFBB - , density = TRUE # show density plots - , ellipses = F # show correlation ellipses - , stars = T - , rug = F - , breaks = "Sturges" - , show.points = T - , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_ps$duet_outcome))] # foldx colours are reveresed - , pch = 21 # for bg - , jitter = T - , alpha = 1 - , cex = 1.8 - , cex.axis = 2 - , cex.labels = 4 - , cex.cor = 1 - , smooth = F -) -dev.off() - -corr_ps_rho = corr.test(my_corr_ps[1:5], method = "spearman")$r -corr_ps_p = corr.test(my_corr_ps[1:5], method = "spearman")$p - -#--------------------- -# Corr plot PS: ALL -# data: corr_ps_df3 -# cols: 10 -#--------------------- -end_ps_all = which(colnames(foo) == drug); end_ps_all # should be the last column - -my_corr_ps_all = foo[start:(end_ps_all - offset)] -cols_to_drop = "Mutation" -my_corr_ps_all = my_corr_ps_all[, !(names(my_corr_ps_all)%in%cols_to_drop)] -head(my_corr_ps_all) -length(colnames(my_corr_ps_all)) - -cat("Corr plot PS DUET with coloured dots:", plot_corr_ps_all) -svg(plot_corr_ps_all, width = 15, height = 15) - -pairs.panels(my_corr_ps_all[1:(length(my_corr_ps_all)-1)] - , method = "spearman" # correlation method - , hist.col = "grey" ##00AFBB - , density = TRUE # show density plots - , ellipses = F # show correlation ellipses - , stars = T - , rug = F - , breaks = "Sturges" - , show.points = T - , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_ps_all$duet_outcome))] # foldx colours are reveresed - , pch = 21 # for bg - , jitter = T - , alpha = 1 - , cex = 1.5 - , cex.axis = 2 - , cex.labels = 2.5 - , cex.cor = 1 - , smooth = F -) -dev.off() - -#================================== -# Data for Correlation plots: LIG -#================================== -cols_to_select_lig = c("Ligand Affinity" - , "Log (OR)" - , "-Log (P)" - , "MAF" - , "ligand_outcome" - , drug) - -corr_data_lig = bar[names(bar)%in%cols_to_select_lig] -length(cols_to_select_lig) - -colnames(corr_data_lig) - -start_lig = 1 -end_lig = which(colnames(corr_data_lig) == drug); end_lig # should be the last column -offset_lig = 1 - -my_corr_lig = corr_data_lig[start_lig:(end_lig-offset_lig)] -head(my_corr_lig) - -#--------------------- -# Corr plot LIG: short -# data: corr_lig_df3 -# cols: 7 -#--------------------- -cat("Corr LIG plot with coloured dots:", plot_corr_lig) -svg(plot_corr_lig, width = 15, height = 15) - -pairs.panels(my_corr_lig[1:(length(my_corr_lig)-1)] - , method = "spearman" # correlation method - , hist.col = "grey" ##00AFBB - , density = TRUE # show density plots - , ellipses = F # show correlation ellipses - , stars = T - , rug = F - , breaks = "Sturges" - , show.points = T - , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_lig$ligand_outcome))] - , pch = 21 # for bg - , jitter = T - , cex = 2 - , cex.axis = 2 - , cex.labels = 4 - , cex.cor = 1 - , smooth = F -) - -dev.off() - -corr_lig_rho = corr.test(my_corr_lig[1:4], method = "spearman")$r -corr_lig_p = corr.test(my_corr_lig[1:4], method = "spearman")$p - -#--------------------- -# Corr plot LIG: ALL -# data: corr_lig_df3 -# cols: 9 -#--------------------- -end_lig_all = which(colnames(bar) == drug); end_lig_all # should be the last column - -my_corr_lig_all = bar[start_lig:(end_lig_all - offset_lig)] -cols_to_drop = "Mutation" -my_corr_lig_all = my_corr_lig_all[, !(names(my_corr_lig_all)%in%cols_to_drop)] -head(my_corr_lig_all) -length(colnames(my_corr_lig_all)) - -cat("Corr plot LIG with coloured dots:", plot_corr_lig_all) -svg(plot_corr_lig_all, width = 15, height = 15) - -pairs.panels(my_corr_lig_all[1:(length(my_corr_lig_all)-1)] - , method = "spearman" # correlation method - , hist.col = "grey" ##00AFBB - , density = TRUE # show density plots - , ellipses = F # show correlation ellipses - , stars = T - , rug = F - , breaks = "Sturges" - , show.points = T - , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_lig_all$ligand_outcome))] # foldx colours are reveresed - , pch = 21 # for bg - , jitter = T - , alpha = 1 - , cex = 1.5 - , cex.axis = 2 - , cex.labels = 2.2 - , cex.cor = 1 - , smooth = F -) -dev.off() - - -######################################################################= -# End of script -######################################################################= \ No newline at end of file diff --git a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_i.R b/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_i.R deleted file mode 100644 index 64cf7ff..0000000 --- a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_i.R +++ /dev/null @@ -1,276 +0,0 @@ -#!/usr/bin/env Rscript -source("~/git/LSHTM_analysis/config/gid.R") -source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") - -#=================================================================== -corr_data = corr_data_extract(merged_df3, drug_name = drug) -#corr_data = corr_data_extract(merged_df2, drug_name = drug) - -geneL_normal = c("pnca") -geneL_na_dy = c("gid") -geneL_na = c("rpob") -geneL_ppi2 = c("alr", "embb", "katg", "rpob") - -core_cols <- c( "Log (OR)" , "MAF", "-Log (P)" - , "DUET", "FoldX" - , "DeepDDG", "Dynamut2" - , "ASA", "RSA", "RD", "KD" - , "Consurf", "SNAP2" - #, "mutation_info_labels" -) - - -if (tolower(gene)%in%geneL_normal){ - corrplot_cols = core_cols -} - -if (tolower(gene)%in%geneL_na_dy){ - additional_cols = c("mCSM-NA" - , "Dynamut" - , "ENCoM-DDG" - , "ENCoM-DDS" - , "mCSM" - , "SDM" - , "DUET-d" - , "mutation_info_labels") - corrplot_cols = c(core_cols, additional_cols) -} -if (tolower(gene)%in%geneL_na){ - additional_cols = c("mCSM-NA" - , "mutation_info_labels") - corrplot_cols = c(core_cols, additional_cols) - -} - -if (tolower(gene)%in%geneL_ppi2){ - additional_cols = c("mCSM-PPI2" - , "mutation_info_labels") - corrplot_cols = c(core_cols, additional_cols) -} - -#======================================== -# corrplot_cols <- c( "Log (OR)" -# , "MAF" -# , "-Log (P)" -# , "DUET" -# , "FoldX" -# , "DeepDDG" -# , "Dynamut2" -# , "mCSM-NA" -# , "Dynamut" -# , "ENCoM-DDG" -# , "ENCoM-DDS" -# , "mCSM" -# , "SDM" -# , "DUET-d" -# , "ASA" -# , "RSA" -# , "RD" -# , "KD" -# , "mutation_info_labels" -# ) - -corr_df <- corr_data[, corrplot_cols] # col order is according to corrplot_cols -head(corr_df); names(corr_df) - -if ( all( corrplot_cols%in%names(corr_df) ) ){ - cat("\nPASS: Successfully selected" - , length(corrplot_cols) - , "columns for building correlation df") -} else { - cat("\nFAIl: Something went wrong, numbers mismatch" - , "\nExpected cols:", length(corrplot_cols) - , "\nGot:", length(corr_df) ) -} - -#===================================================== -corrplot_df <- corr_df - -# stat_df = corrplot_df[, c("Log (OR)" -# , "MAF" -# , "-Log (P)")] - -plot_title <- "Correlation plots (stability)" - -# Checkbox Names -# FIXME: select columns conditionally based on gene and grey out the ones that are not present! - -cBCorrNames = c( "Odds Ratio" - , "Allele Frequency" - , "P-value" - , "DUET" - , "FoldX" - , "DeepDDG" - , "Dynamut2" - , "ASA" - , "RSA" - , "RD" - , "KD" - , "Consurf" - , "SNAP2" - , "Nucleic Acid affinity" - , "PPi2 affinity" - - #, "Dynamut" - #, "ENCoM-Stability" - #, "ENCoM-Flexibility" - #, "mCSM" - #, "SDM" - #, "DUET-d" -) - -# Checkbox Values (aka Column Names that are in corrplot_df) -cBCorrVals = c("Log (OR)" - , "MAF" - , "-Log (P)" - , "DUET" - , "FoldX" - , "DeepDDG" - , "Dynamut2" - , "ASA" - , "RSA" - , "RD" - , "KD" - , "Consurf" - , "SNAP2" - , "mCSM-NA" - , "mCSM-PPI2" - # , "Dynamut" - # , "ENCoM-DDG" - # , "ENCoM-DDS" - # , "mCSM" - # , "SDM" - # , "DUET-d" - ) - -# Pre-selected checkboxes -cBCorrSelected = c("Log (OR)" - , "MAF" - , "-Log (P)") - -################# -# Define UI -################# -u_corr <- fluidPage( - - headerPanel(plot_title), - - sidebarLayout(position = "left" - , sidebarPanel( - checkboxGroupInput("variable", "Choose parameter:" - , choiceNames = cBCorrNames - , choiceValues = cBCorrVals - , selected = cBCorrSelected - ) - - # could be a fluid Row - , actionButton("add_col" , "Render") - , actionButton("reset_graph" , "Reset Graphs") - , actionButton("select_all" , "Select All") - - ) - - # output/display - , mainPanel(plotOutput(outputId = 'corrplot' - , height = "1200px" - , width = "1500px") -# , height = "800px" -# , width = "600px") - , textOutput("txt") - ) - ) -) - -################# -# Define server -################# -s_corr <- shinyServer(function(input, output, session) - -{ - - #================ - # Initial render - #================ - output$corrplot <- renderPlot({ - - #--------------------- - # My correlation plot: initial plot - #--------------------- - c_plot <- my_corr_pairs(corr_data_all = corrplot_df - , corr_cols = cBCorrSelected - , corr_method = "spearman" - , dot_size = 2 - , ats = 1.5 - , corr_lab_size = length(cBCorrNames)/length(cBCorrSelected) * 1.3 - , corr_value_size = 1) - }) - - #==================== - # Interactive render - #==================== - observeEvent( - input$add_col, { - - # select cols for corrplot - corr_cols_s <- c(input$variable) - - # render plot - if (length(c(input$variable)) >= 2) { - output$corrplot <- renderPlot({ - - #--------------------- - # My correlation plot: user selects columns - #--------------------- - c_plot <- my_corr_pairs(corr_data_all = corrplot_df - , corr_cols = corr_cols_s - , dot_size = 2 - , ats = 1.5 - , corr_lab_size = length(cBCorrNames)/length(corr_cols_s) * 1.3 - , corr_value_size = 1) - - }) - } else{ output$txt = renderText({"Argh, common! It's a correlation plot. Select >=2 vars!"}) - - } - - }) - - #================================== - # Add button: Select All checkbox - #================================== - observeEvent( - input$select_all,{ - - updateCheckboxGroupInput(session, "variable", selected = cBCorrVals) - } -) - - #================ - # Reset render - #================ - observeEvent( - input$reset_graph,{ - - # reset checkboxes to default selection - updateCheckboxGroupInput(session, "variable", selected = cBCorrSelected) - - - # render plot - output$corrplot <- renderPlot({ - - #--------------------- - # My correlation plot: reset plot - #--------------------- - c_plot <- my_corr_pairs(corr_data_all = corrplot_df - , corr_cols = cBCorrSelected - , dot_size = 1.2 - , ats = 1.5 - , corr_lab_size = length(cBCorrNames)/length(cBCorrSelected) * 1.3 - , corr_value_size = 1) - }) - } - ) -} -) - -shinyApp(ui = u_corr, server = s_corr) diff --git a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_lig_i.R b/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_lig_i.R deleted file mode 100644 index 6ca6337..0000000 --- a/scripts/plotting/plotting_thesis/corr_thesis_ref/corr_plots_gc_lig_i.R +++ /dev/null @@ -1,220 +0,0 @@ -#!/usr/bin/env Rscript - -source("~/git/LSHTM_analysis/config/gid.R") -source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") - -#=================================================================== -corr_data = corr_data_extract(merged_df3, drug_name = drug) -#corr_data = corr_data_extract(merged_df2, drug_name = drug) -#================================================================ -#other globals -dist_colname <- LigDist_colname # ligand_distance (from globals) -dist_cutoff <- LigDist_cutoff # 10 (from globals) - -cat("\nLigand distance cut off, colname:", dist_colname - , "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b" - , "\nThe min distance", gene, "structure df" , ":", min_ang, "\u212b") - -######################################################################## - -#========================================== -##################### -# Correlation plot -##################### -colnames(corr_df_m3_f) - -corrplot_cols_lig <- c( "Log (OR)" - , "MAF" - , "-Log (P)" - , "mCSM-lig" - , "mCSM-NA" - , "ASA" - , "RSA" - , "RD" - , "KD" - , dist_colname - , "mutation_info_labels" - ) - -corr_df_lig <- corr_df_m3_f[, corrplot_cols_lig] -head(corr_df_lig) - -corrplot_df_lig <- corr_df_lig - -# static df -# stat_df = corrplot_df_lig[, c("Log (OR)" -# , "MAF" -# , "-Log (P)" -# )] - -plot_title_lig <- "Correlation plots (ligand affinity)" - -# Checkbox Names -cCorrNames = c( "Odds Ratio" - , "Allele Frequency" - , "P-value" - , "Ligand affinity" - , "Nucleic Acid affinity" - , "ASA" - , "RSA" - , "RD" - , "KD" - , "Ligand Distance") - -# Checkbox Values (aka Column Names that are in corrplot_df_lig) -cCorrVals = c("Log (OR)" - , "MAF" - , "-Log (P)" - , "mCSM-lig" - , "mCSM-NA" - , "ASA" - , "RSA" - , "RD" - , "KD" - , dist_colname) - -# Pre-selected checkboxes -cCorrSelected = c("Log (OR)" - , "MAF" - , "-Log (P)") -#============ -# Define UI -#============ -u_corr_lig<- fluidPage( - headerPanel(plot_title_lig), - sidebarLayout(position = "left" - , sidebarPanel("Correlations: Filtered data data" - , numericInput(inputId = "lig_dist" - , label = "Ligand distance cutoff" - , value = dist_cutoff # 10 default from globals - , min = min_ang - , max = max_ang) - , checkboxGroupInput("variable", "Choose parameter:" - , choiceNames = cCorrNames - , choiceValues = cCorrVals - , selected = cCorrSelected - ) - # could be a fluid Row - , actionButton("add_col" , "Render") - , actionButton("reset_graph" , "Reset Graphs") - , actionButton("select_all" , "Select All") - - ) - - # output/display - , mainPanel(plotOutput(outputId = 'corrplot' - , height = "1000px" - , width = "1200px") - # , height = "800px" - # , width = "600px") - , textOutput("txt") - ) - ) -) - -#=============== -# Define server -#=============== -s_corr_lig <- shinyServer(function(input, output, session) - -{ - - #================ - # Initial render - #================ - output$corrplot <- renderPlot({ - - # get the user-specified lig_list - dist_cutoff_ini = input$lig_dist - - # subset data for plot - corrplot_df_lig_ini = corrplot_df_lig[corrplot_df_lig[[dist_colname]] < dist_cutoff_ini,] - - #--------------------- - # My correlation plot: initial plot - #--------------------- - c_plot <- my_corr_pairs( - #corr_data_all = corrplot_df_lig - corr_data_all = corrplot_df_lig_ini - , corr_cols = cCorrSelected - , dot_size = 2 - , ats = 1.5 - , corr_lab_size = length(cCorrNames)/length(cCorrSelected) * 1.3 - , corr_value_size = 1) - - }) - - #==================== - # Interactive render - #==================== - observeEvent( - input$add_col, { - - # get the user-specified lig_list - dist_cutoff_user = input$lig_dist - - # subset data for plot - corrplot_df_lig_s = corrplot_df_lig[corrplot_df_lig[[dist_colname]] < dist_cutoff_user,] - - # select cols for corrplot - corr_cols_s = c(input$variable) - - # render plot - if (length(c(input$variable)) >= 2) { - - output$corrplot <- renderPlot({ - - #--------------------- - # My correlation plot: user selects columns - #--------------------- - c_plot <- my_corr_pairs(corr_data_all = corrplot_df_lig_s - , corr_cols = corr_cols_s - , dot_size = 1.6 - , ats = 1.5 - , corr_lab_size = length(cCorrNames)/length(corr_cols_s) * 1.3 - , corr_value_size = 1) - }) - } else { output$txt = renderText({"Fuddu! It's a correlation plot. Select >=2 vars bewakoof!"})} - - }) - - #================================== - # Add button: Select All checkbox - #================================== - observeEvent( - input$select_all,{ - - updateCheckboxGroupInput(session, "variable", selected = cCorrVals) - } - ) - - #================ - # Reset render - #================ - observeEvent( - input$reset_graph,{ - - # reset checkboxes - updateCheckboxGroupInput(session, "variable", selected = cCorrSelected) - - # render plot - output$corrplot <- renderPlot({ - - #--------------------- - # My correlation plot: reset plot - #--------------------- - c_plot <- my_corr_pairs(corr_data_all = corrplot_df_lig - , corr_cols = cCorrSelected - , dot_size = 2 - , ats = 1.5 - , corr_lab_size = length(cCorrNames)/length(cCorrSelected) * 1.3 - , corr_value_size = 1) - - }) - } - ) -} -) - -shinyApp(ui = u_corr_lig, server = s_corr_lig) - diff --git a/scripts/plotting/plotting_thesis/corr_thesis_ref/ggcorr_all_PS_LIG.R b/scripts/plotting/plotting_thesis/corr_thesis_ref/ggcorr_all_PS_LIG.R deleted file mode 100644 index e7701e2..0000000 --- a/scripts/plotting/plotting_thesis/corr_thesis_ref/ggcorr_all_PS_LIG.R +++ /dev/null @@ -1,323 +0,0 @@ -#!/usr/bin/env Rscript -######################################################### -# TASK: Corr plots for PS and Lig - -# Output: 1 svg - -#======================================================================= -# working dir and loading libraries -getwd() -setwd("~/git/LSHTM_analysis/scripts/plotting/") -getwd() - -source("~/git/LSHTM_analysis/scripts/Header_TT.R") -require(cowplot) -source("combining_dfs_plotting.R") -#source("my_pairs_panel.R") -# should return the following dfs, directories and variables - -# FIXME: Can't output from here - -# PS combined: -# 1) merged_df2 -# 2) merged_df2_comp -# 3) merged_df3 -# 4) merged_df3_comp - -# LIG combined: -# 5) merged_df2_lig -# 6) merged_df2_comp_lig -# 7) merged_df3_lig -# 8) merged_df3_comp_lig - -# 9) my_df_u -# 10) my_df_u_lig - -cat(paste0("Directories imported:" - , "\ndatadir:", datadir - , "\nindir:", indir - , "\noutdir:", outdir - , "\nplotdir:", plotdir)) - -cat(paste0("Variables imported:" - , "\ndrug:", drug - , "\ngene:", gene - , "\ngene_match:", gene_match - , "\nAngstrom symbol:", angstroms_symbol - , "\nNo. of duplicated muts:", dup_muts_nu - , "\nNA count for ORs:", na_count - , "\nNA count in df2:", na_count_df2 - , "\nNA count in df3:", na_count_df3)) - -#======= -# output -#======= -# can't combine by cowplot because not ggplots -#corr_plot_combined = "corr_combined.svg" -#plot_corr_plot_combined = paste0(plotdir,"/", corr_plot_combined) - -# PS -#ggcorr_all_ps = "ggcorr_all_PS.svg" -ggcorr_all_ps = "ggcorr_all_PS.png" -plot_ggcorr_all_ps = paste0(plotdir,"/", ggcorr_all_ps) - -# LIG -#ggcorr_all_lig = "ggcorr_all_LIG.svg" -ggcorr_all_lig = "ggcorr_all_LIG.png" -plot_ggcorr_all_lig = paste0(plotdir,"/", ggcorr_all_lig ) - -# combined -ggcorr_all_combined_labelled = "ggcorr_all_combined_labelled.png" -plot_ggcorr_all_combined_labelled = paste0(plotdir,"/", ggcorr_all_combined_labelled) - -#################################################################### -# end of loading libraries and functions # -######################################################################## - -#%%%%%%%%%%%%%%%%%%%%%%%%% -#df_ps = merged_df3_comp -#df_lig = merged_df3_comp_lig -merged_df3 = as.data.frame(merged_df3) -df_ps = merged_df3 -df_lig = merged_df3_lig - -#%%%%%%%%%%%%%%%%%%%%%%%%% - -rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig) - -######################################################################## -# end of data extraction and cleaning for plots # -######################################################################## - - -#====================== -# adding log cols -#====================== -# subset data to generate pairwise correlations -cols_to_select = c("duet_scaled" - - , "foldx_scaled" - - , "log10_or_mychisq" - , "neglog_pval_fisher" - - #, "or_kin" - #, "neglog_pwald_kin" - - , "af" - - , "asa" - , "rsa" - , "kd_values" - , "rd_values" - - , "duet_outcome" - , drug) - -corr_data_ps = df_ps[, cols_to_select] - -dim(corr_data_ps) - -#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic -#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic - -# assign nice colnames (for display) -my_corr_colnames = c("DUET" - - , "Foldx" - - , "Log (OR)" - , "-Log (P)" - - #, "OR (adjusted)" - #, "-Log (P wald)" - - , "AF" - - , "ASA" - , "RSA" - , "KD" - , "RD" - - , "duet_outcome" - , drug) - -length(my_corr_colnames) - -colnames(corr_data_ps) -colnames(corr_data_ps) <- my_corr_colnames -colnames(corr_data_ps) - -#------------------------ -# Data for ggcorr PS plot -#------------------------ -start = 1 -end_ggcorr = which(colnames(corr_data_ps) == "duet_outcome"); end_ggcorr # should be the last column -offset = 1 - -my_ggcorr_ps = corr_data_ps[start:(end_ggcorr-1)] -head(my_ggcorr_ps) - -# correlation matrix -corr1 <- round(cor(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs"), 1) - -# p-value matrix -pmat1 <- cor_pmat(my_ggcorr_ps, method = "spearman", use = "pairwise.complete.obs" - , conf.level = 0.99) - -corr2 = psych::corr.test(my_ggcorr_ps - , method = "spearman" - , use = "pairwise.complete.obs")$r -corr2 = round(corr2, 1) - -pmat2 = psych::corr.test(my_ggcorr_ps - , method = "spearman" - , adjust = "none" - , use = "pairwise.complete.obs")$p - -corr1== corr2 -pmat1==pmat2 - -#------------------------ -# Generate ggcorr PS plot -#------------------------ -cat("ggCorr plot PS:", plot_ggcorr_all_ps) -#png(filename = plot_ggcorr_all_ps, width = 1024, height = 768, units = "px", pointsize = 20) -ggcorr_ps = ggcorrplot(corr1 - , p.mat = pmat1 - , hc.order = TRUE - , outline.col = "black" - , ggtheme = ggplot2::theme_gray - , colors = c("#6D9EC1", "white", "#E46726") - , title = "DUET and Foldx stability") - - -ggcorr_ps -#dev.off() - -#=========================== -# Data for Correlation plots: LIG -#=========================== -table(df_lig$ligand_outcome) - -df_lig$log10_or_mychisq = log10(df_lig$or_mychisq) -df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher) - - -df_lig$log10_or_kin = log10(df_lig$or_kin) -df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin) - -# subset data to generate pairwise correlations -cols_to_select_lig = c("affinity_scaled" - - , "log10_or_mychisq" - , "neglog_pval_fisher" - - , "or_kin" - , "neglog_pwald_kin" - - , "af" - - , "asa" - , "rsa" - , "kd_values" - , "rd_values" - - , "ligand_outcome" - , drug) - -corr_data_lig = df_lig[, cols_to_select_lig] - -dim(corr_data_lig) - -# assign nice colnames (for display) -my_corr_colnames_lig = c("Ligand Affinity" - - , "Log (OR)" - , "-Log (P)" - - , "OR (adjusted)" - , "-Log(P wald)" - - , "AF" - - , "ASA" - , "RSA" - , "KD" - , "RD" - - , "ligand_outcome" - , drug) - -length(my_corr_colnames) - -colnames(corr_data_lig) -colnames(corr_data_lig) <- my_corr_colnames_lig -colnames(corr_data_lig) - -#------------------------ -# Data for ggcorr LIG plot -#------------------------ - -start = 1 -end_ggcorr_lig = which(colnames(corr_data_lig) == "ligand_outcome"); end_ggcorr_lig # should be the last column -offset = 1 - -my_ggcorr_lig = corr_data_lig[start:(end_ggcorr_lig-1)] -head(my_ggcorr_lig); str(my_ggcorr_lig) - -# correlation matrix -corr1_lig <- round(cor(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs"), 1) - -# p-value matrix -pmat1_lig <- cor_pmat(my_ggcorr_lig, method = "spearman", use = "pairwise.complete.obs") - -corr2_lig = psych::corr.test(my_ggcorr_lig - , method = "spearman" - , use = "pairwise.complete.obs")$r - -corr2_lig = round(corr2_lig, 1) - -pmat2_lig = psych::corr.test(my_ggcorr_lig - , method = "spearman" - , adjust = "none" - , use = "pairwise.complete.obs")$p - -corr1_lig == corr2_lig -pmat1_lig == pmat2_lig - - -# for display order columns by hc order of ps - -#col_order = levels(ggcorr_ps$data[2]) - -#col_order <- c("Species", "Petal.Width", "Sepal.Length", - #"Sepal.Width", "Petal.Length") -#my_data2 <- my_data[, col_order] -#my_data2 - -#------------------------ -# Generate ggcorr LIG plot -#------------------------ -cat("ggCorr LIG plot:", plot_ggcorr_all_lig) -#svg(plot_ggcorr_all_lig, width = 15, height = 15) -#png(plot_ggcorr_all_lig, width = 1024, height = 768, units = "px", pointsize = 20) - -ggcorr_lig = ggcorrplot(corr1_lig - , p.mat = pmat1_lig - , hc.order = TRUE - , outline.col = "black" - - , ggtheme = ggplot2::theme_gray - , colors = c("#6D9EC1", "white", "#E46726") - , title = "Ligand affinty") - - -ggcorr_lig -#dev.off() - -####################################################### -#============================= -# combine plots for output -#============================= -+ \ No newline at end of file diff --git a/scripts/plotting/plotting_thesis/preformatting.R b/scripts/plotting/plotting_thesis/preformatting.R index f88afa8..74fbbf7 100644 --- a/scripts/plotting/plotting_thesis/preformatting.R +++ b/scripts/plotting/plotting_thesis/preformatting.R @@ -88,7 +88,7 @@ all_cols = c(common_cols #======= # output #======= -outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene)) +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") #################################### # merged_df3: NECESSARY pre-processing @@ -228,6 +228,15 @@ corr_lig_colnames = c("mCSM-lig" , drug) corr_ppi2_colnames = c("mCSM-PPI2" + , "SNAP2" + , "Log (OR)" + , "-Log (P)" + , "interface_dist" + , "dst_mode" + , drug) + + +corr_conservation = c("Consurf" , "MAF" , "Log (OR)" , "-Log (P)"