diff --git a/scripts/Header_TT.R b/scripts/Header_TT.R index 8d64042..7b76e3e 100755 --- a/scripts/Header_TT.R +++ b/scripts/Header_TT.R @@ -223,20 +223,20 @@ consurf_palette2 = c("0" = "yellow2" , "9" = "darkorchid4") -consurf_colours = c(#"0" = rgb(1.00,1.00,0.59) - "nsd" = rgb(1.00,1.00,0.59) - , "1" = rgb(0.63,0.16,0.37) - , "2" = rgb(0.94,0.49,0.67) - , "3" = rgb(0.98,0.78,0.86) - , "4" = rgb(0.98,0.92,0.96) - , "5" = rgb(1.00,1.00,1.00) - , "6" = rgb(0.84,0.94,0.94) - , "7" = rgb(0.65,0.86,0.90) - , "8" = rgb(0.29,0.69,0.75) - , "9" = rgb(0.04,0.49,0.51) - ) +consurf_colours = c( + "nsd" = rgb(1.00,1.00,0.59) + , "1" = rgb(0.63,0.16,0.37) + , "2" = rgb(0.94,0.49,0.67) + , "3" = rgb(0.98,0.78,0.86) + , "4" = rgb(0.98,0.92,0.96) + , "5" = rgb(1.00,1.00,1.00) + , "6" = rgb(0.84,0.94,0.94) + , "7" = rgb(0.65,0.86,0.90) + , "8" = rgb(0.29,0.69,0.75) + , "9" = rgb(0.04,0.49,0.51) + ) -consurf_bp_colours = c(#"0" = rgb(1.00,1.00,0.59) +consurf_bp_colours = c( "0" = rgb(1.00,1.00,0.59) , "1" = rgb(0.63,0.16,0.37) , "2" = rgb(0.94,0.49,0.67) diff --git a/scripts/functions/corr_plot_data.R b/scripts/functions/corr_plot_data.R index c456950..29399c2 100644 --- a/scripts/functions/corr_plot_data.R +++ b/scripts/functions/corr_plot_data.R @@ -22,9 +22,12 @@ #1) Corr type? #2) ################################################################## +# LigDist_colname #from globals: plotting_globals.R +# ppi2Dist_colname #from globals: plotting_globals.R +# naDist_colname #from globals: plotting_globals.R corr_data_extract <- function(df - #, gene_name = gene - , drug_name = drug + , gene + , drug #, ligand_dist_colname = LigDist_colname , colnames_to_extract , colnames_display_key @@ -38,34 +41,48 @@ corr_data_extract <- function(df , "\n=========================================") cat("\nExtracting default columns for" - #, "\nGene name:", gene + , "\nGene name:", gene , "\nDrug name:", drug) - colnames_to_extract = c(drug - #, "mutationinformation" - #, "mutation_info_labels" - , "dst_mode" - , "duet_stability_change" - , "ligand_affinity_change" - , "ligand_distance" - #, ligand_dist_colname - , "interface_dist" - , "ddg_foldx" - , "deepddg" - , "asa" - , "rsa" - , "kd_values" - , "rd_values" - , "af" - , "log10_or_mychisq" - , "neglog_pval_fisher" - , "ddg_dynamut2" - , "consurf_score" - , "snap2_score" - , "ddg_dynamut", "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet" - , "mcsm_na_affinity" - , "mcsm_ppi2_affinity" - ) + geneL_normal = c("pnca") + geneL_na = c("gid", "rpob") + geneL_ppi2 = c("alr", "embb", "katg", "rpob") + + common_colnames = c(drug, "dst_mode" + , "duet_stability_change" , "ddg_foldx" , "deepddg" , "ddg_dynamut2" + , "asa" , "rsa" , "kd_values" , "rd_values" + , "maf" , "log10_or_mychisq" , "neglog_pval_fisher" + , LigDist_colname + , "consurf_score" , "snap2_score" , "provean_score" + , "ligand_affinity_change" + #, "ddg_dynamut", "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet" + ) + + display_common_colnames = c( drug, "dst_mode" + , "DUET" , "FoldX" , "DeepDDG", "Dynamut2" + , "ASA" , "RSA" , "KD" , "RD" + , "MAF" , "Log(OR)" , "-Log(P)" + , "Lig-Dist" + , "ConSurf" , "SNAP2" , "PROVEAN" + , "mCSM-lig" + # , "Dynamut" , "ENCoM-DDG" , "mCSM" , "SDM" , "DUET-d" , "ENCoM-DDS" + ) + + if (tolower(gene)%in%geneL_normal){ + colnames_to_extract = c(common_colnames) + display_colnames = c(display_common_colnames) + + } + + if (tolower(gene)%in%geneL_ppi2){ + colnames_to_extract = c(common_colnames ,"mcsm_ppi2_affinity", ppi2Dist_colname) + display_colnames = c(display_common_colnames,"mCSM-PPI2" , "PPI-Dist") + } + + if (tolower(gene)%in%geneL_na){ + colnames_to_extract = c(common_colnames,"mcsm_na_affinity", naDist_colname) + display_colnames = c(display_common_colnames, "mCSM-NA", "NA-Dist") + } # [optional] arg: extract_scaled_cols if (extract_scaled_cols){ @@ -77,46 +94,15 @@ corr_data_extract <- function(df colnames_to_extract = colnames_to_extract } - corr_df = df[, colnames(df)%in%colnames_to_extract] - + # extract df based on gene + corr_df = df[,colnames_to_extract] + colnames(corr_df) + display_colnames + # arg: colnames_display_key - colnames_display_key = c(duet_stability_change = "DUET" - , ligand_affinity_change = "mCSM-lig" - , ligand_distance = "ligand_distance" - #, ligand_dist_colname = "ligand_distance" - , interface_dist = "interface_dist" - , ddg_foldx = "FoldX" - , deepddg = "DeepDDG" - , asa = "ASA" - , rsa = "RSA" - , kd_values = "KD" - , rd_values = "RD" - , af = "MAF" - , log10_or_mychisq = "Log (OR)" - , neglog_pval_fisher = "-Log (P)" - , ddg_dynamut2 = "Dynamut2" - , consurf_score = "Consurf" - , snap2_score = "SNAP2" - , ddg_dynamut = "Dynamut" - , ddg_encom = "ENCoM-DDG" - , ddg_mcsm = "mCSM" - , ddg_sdm = "SDM" - , ddg_duet = "DUET-d" - , dds_encom = "ENCoM-DDS" - , mcsm_na_affinity = "mCSM-NA" - , mcsm_ppi2_affinity = "mCSM-PPI2") - - # COMMENT: This only works when all the columns are in the namekey vector. - # If one is missing, there is no error, but it also renamed as "NA. - #names(corr_df) <- colnames_display_key[names(corr_df)] - - # Solution: to use plyr::rename() - # Consider using requireNamespace() instead of library() so its function names doesn't collide with dplyr's. - corr_df = plyr::rename(corr_df - , replace = colnames_display_key - , warn_missing = T - , warn_duplicated = T) - + colnames(corr_df)[colnames(corr_df)%in%colnames_to_extract] <- display_colnames + colnames(corr_df) + cat("\nExtracted ncols:", ncol(corr_df) ,"\nRenaming successful") diff --git a/scripts/functions/plotting_globals.R b/scripts/functions/plotting_globals.R index 0dc1a78..75c8944 100644 --- a/scripts/functions/plotting_globals.R +++ b/scripts/functions/plotting_globals.R @@ -39,9 +39,9 @@ resistance_col <<- "drtype" LigDist_colname <<- "ligand_distance" LigDist_cutoff <<- 10 -DistCutOff = 10 -ppi2Dist_colname = "interface_dist" -naDist_colname = "TBC" +DistCutOff <<- 10 +ppi2Dist_colname <<- "interface_dist" +naDist_colname <<- "TBC" #================== # Angstroms symbol diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index 0f7d6d4..2b8c6ba 100644 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -186,10 +186,16 @@ cat(s3) # make sure the above script works because merged_df2_combined is needed merged_df3 = as.data.frame(merged_df3) -corr_df_m3_f = corr_data_extract(merged_df3, extract_scaled_cols = F) +corr_df_m3_f = corr_data_extract(merged_df3 + , gene = gene + , drug = drug + , extract_scaled_cols = F) head(corr_df_m3_f) -corr_df_m2_f = corr_data_extract(merged_df2, extract_scaled_cols = F) +corr_df_m2_f = corr_data_extract(merged_df2 + , gene = gene + , drug = drug + , extract_scaled_cols = F) head(corr_df_m2_f) s4 = c("\nSuccessfully sourced Corr_data.R") diff --git a/scripts/plotting/plotting_thesis/corr_plots_thesis.R b/scripts/plotting/plotting_thesis/corr_plots_thesis.R index c282341..fef87cd 100644 --- a/scripts/plotting/plotting_thesis/corr_plots_thesis.R +++ b/scripts/plotting/plotting_thesis/corr_plots_thesis.R @@ -1,5 +1,9 @@ merged_df3 = as.data.frame(merged_df3) -corr_plotdf = corr_data_extract(merged_df3, extract_scaled_cols = F) +corr_plotdf = corr_data_extract(merged_df3 + , gene = gene + , drug = drug + , extract_scaled_cols = F) +colnames(corr_plotdf) #================ # stability @@ -9,12 +13,13 @@ corr_ps_colnames = c("DUET" , "DeepDDG" , "Dynamut2" , "MAF" - , "Log (OR)" - , "-Log (P)" + , "Log(OR)" + , "-Log(P)" #, "ligand_distance" , "dst_mode" , drug) +corr_ps_colnames%in%colnames(corr_plotdf) corr_df_ps = corr_plotdf[, corr_ps_colnames] color_coln = which(colnames(corr_df_ps) == "dst_mode") @@ -46,10 +51,10 @@ my_corr_pairs(corr_data_all = corr_df_ps dev.off() ##################################################### -DistCutOff = 10 -LigDist_colname # = "ligand_distance" # from globals -ppi2Dist_colname = "interface_dist" -naDist_colname = "TBC" +#DistCutOff = 10 +#LigDist_colname # = "ligand_distance" # from globals +#ppi2Dist_colname = "interface_dist" +#naDist_colname = "TBC" ##################################################### #================ @@ -57,14 +62,15 @@ naDist_colname = "TBC" #================ corr_lig_colnames = c("mCSM-lig" , "MAF" - , "Log (OR)" - , "-Log (P)" - , "ligand_distance" + , "Log(OR)" + , "-Log(P)" + , "Lig-Dist" , "dst_mode" , drug) +corr_lig_colnames%in%colnames(corr_plotdf) corr_df_lig = corr_plotdf[, corr_lig_colnames] -corr_df_lig = corr_df_lig[corr_df_lig[[LigDist_colname]]