diff --git a/scripts/functions/corr_plot_df.R b/scripts/functions/corr_plot_df.R new file mode 100644 index 0000000..37b6b49 --- /dev/null +++ b/scripts/functions/corr_plot_df.R @@ -0,0 +1,108 @@ +#colnames_to_extract = c("mutationinformation" + # , "duet_affinity_change") +#display_colnames_key = c(mutationinformation = "MUT" +# , duet_affinity_change = "DUET") +################################################################## +corr_data_extract <- function(corr_plot_df + , gene_name = gene + , drug_name = drug + , colnames_to_extract + , colnames_display_key + , extract_scaled_cols = F){ + + if ( missing(colnames_to_extract) || missing(colnames_display_key) ){ + #if ( missing(colnames_to_extract) ){ + + cat("\n==========================================" + , "\nCORR PLOTS data: ALL params" + , "\n=========================================") + + cat("\nExtracting default columns for" + , "\nGene name:", gene + , "\nDrug name:", drug) + + colnames_to_extract = c(drug + #, "mutationinformation" + , "mutation_info_labels" + , "duet_stability_change" + , "ligand_affinity_change" + , "ligand_distance" + , "ddg_foldx" + , "deepddg" + , "asa" + , "rsa" + , "kd_values" + , "rd_values" + , "af" + , "log10_or_mychisq" + , "neglog_pval_fisher" + , "ddg_dynamut2" + , "consurf_score" + , "snap2_score" + , "ddg_dynamut", "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet" + , "mcsm_na_affinity" + , "mcsm_ppi2_affinity" + ) + + # [optional] arg: extract_scaled_cols + if (extract_scaled_cols){ + cat("\nExtracting scaled columns as well...\n") + all_scaled_cols = colnames(merged_df3)[grep(".*scaled", colnames(merged_df3))] + colnames_to_extract = c(colnames_to_extract, all_scaled_cols) + + }else{ + colnames_to_extract = colnames_to_extract + } + + corr_df = corr_plot_df[, colnames(corr_plot_df)%in%colnames_to_extract] + + # arg: colnames_display_key + colnames_display_key = c(duet_stability_change = "DUET" + , ligand_affinity_change = "mCSM-lig" + , ligand_distance = "ligand_distance" + , ddg_foldx = "FoldX" + , deepddg = "DeepDDG" + , asa = "ASA" + , rsa = "RSA" + , kd_values = "KD" + , rd_values = "RD" + , af = "MAF" + , log10_or_mychisq = "Log (OR)" + , neglog_pval_fisher = "-Log (P)" + , ddg_dynamut2 = "Dynamut2" + , consurf_score = "Consurf" + , snap2_score = "SNAP2" + , ddg_dynamut = "Dynamut" + , ddg_encom = "ENCoM-DDG" + , ddg_mcsm = "mCSM" + , ddg_sdm = "SDM" + , ddg_duet = "DUET-d" + , dds_encom = "ENCoM-DDS" + , mcsm_na_affinity = "mCSM-NA" + , mcsm_ppi2_affinity = "mCSM-PPI2") + + # COMMENT: This only works when all the columns are in the namekey vector. + # If one is missing, there is no error, but it also renamed as "NA. + #names(corr_df) <- colnames_display_key[names(corr_df)] + + # Solution: to use plyr::rename() + # Consider using requireNamespace() instead of library() so its function names doesn't collide with dplyr's. + corr_df = plyr::rename(corr_df + , replace = colnames_display_key + , warn_missing = T + , warn_duplicated = T) + + cat("\nExtracted ncols:", ncol(corr_df) + ,"\nRenaming successful") + + cat("\nSneak peak...") + #print(head(corr_df)) + + # Move drug column to the end + last_col = colnames(corr_df[ncol(corr_df)]) + corr_df_f = corr_df %>% dplyr::relocate(all_of(drug), .after = last_col) + + return(corr_df_f) + } + +} diff --git a/scripts/functions/tests/test_corr_plot_df.R b/scripts/functions/tests/test_corr_plot_df.R new file mode 100644 index 0000000..4376a58 --- /dev/null +++ b/scripts/functions/tests/test_corr_plot_df.R @@ -0,0 +1,7 @@ +#!/usr/bin/env Rscript +source("~/git/LSHTM_analysis/config/gid.R") +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +m3 = corr_data_extract(merged_df3); head(m3) +m2 = corr_data_extract(meregd_df2); head(m2) +m3S = corr_data_extract(merged_df3, extract_scaled_cols = T); head(m3S) \ No newline at end of file