diff --git a/scripts/functions/corr_plot_data.R b/scripts/functions/corr_plot_data.R index b23b90e..970910f 100644 --- a/scripts/functions/corr_plot_data.R +++ b/scripts/functions/corr_plot_data.R @@ -74,17 +74,23 @@ corr_data_extract <- function(df if (tolower(gene)%in%geneL_normal){ colnames_to_extract = c(common_colnames) display_colnames = c(display_common_colnames) + corr_df = df[,colnames_to_extract] + colnames(corr_df) = display_colnames } if (tolower(gene)%in%geneL_ppi2){ colnames_to_extract = c(common_colnames ,"mcsm_ppi2_affinity", ppi2Dist_colname) display_colnames = c(display_common_colnames,"mCSM-PPI2" , "PPI-Dist") + corr_df = df[,colnames_to_extract] + colnames(corr_df) = display_colnames } if (tolower(gene)%in%geneL_na){ colnames_to_extract = c(common_colnames,"mcsm_na_affinity", naDist_colname) display_colnames = c(display_common_colnames, "mCSM-NA", "NA-Dist") + corr_df = df[,colnames_to_extract] + colnames(corr_df) = display_colnames } # [optional] arg: extract_scaled_cols @@ -92,19 +98,23 @@ corr_data_extract <- function(df cat("\nExtracting scaled columns as well...\n") all_scaled_cols = colnames(merged_df3)[grep(".*scaled", colnames(merged_df3))] colnames_to_extract = c(colnames_to_extract, all_scaled_cols) - + corr_df = df[,colnames_to_extract] + colnames(corr_df) = display_colnames }else{ colnames_to_extract = colnames_to_extract + corr_df = df[,colnames_to_extract] + colnames(corr_df) = display_colnames } - # extract df based on gene - corr_df = df[,colnames_to_extract] - colnames(corr_df) - display_colnames - - # arg: colnames_display_key - colnames(corr_df)[colnames(corr_df)%in%colnames_to_extract] <- display_colnames - colnames(corr_df) + # WORKED: + # # extract df based on gene + # corr_df = df[,colnames_to_extract] + # colnames(corr_df) + # display_colnames + # + # # arg: colnames_display_key + # colnames(corr_df)[colnames(corr_df)%in%colnames_to_extract] <- display_colnames + # colnames(corr_df) cat("\nExtracted ncols:", ncol(corr_df) ,"\nRenaming successful") diff --git a/scripts/functions/dm_om_data.R b/scripts/functions/dm_om_data.R index 1a3a526..c9f5333 100644 --- a/scripts/functions/dm_om_data.R +++ b/scripts/functions/dm_om_data.R @@ -8,11 +8,11 @@ ################################################################## # from plotting_globals.R # DistCutOff, LigDist_colname, ppi2Dist_colname, naDist_colname +gene dm_om_wf_lf_data <- function(df , gene # from globals , colnames_to_extract - #, ligand_dist_colname = LigDist_colname # from globals #, LigDist_colname # from globals used #, ppi2Dist_colname #from globals used #, naDist_colname #from globals used @@ -21,13 +21,13 @@ dm_om_wf_lf_data <- function(df , snp_colname = "mutationinformation" , aa_pos_colname = "position" # to sort df by , mut_colname = "mutation" - , mut_info_colname = "mutation_info" - , mut_info_label_colname = "mutation_info_labels" # if empty, below used - #, dr_other_muts_labels = c("DM", "OM") # only used if ^^ = "" + , mut_info_colname = "dst_mode" + , mut_info_label_colname = "mutation_info_labels" , categ_cols_to_factor){ df = as.data.frame(df) - df$maf = log10(df$maf) # can't see otherwise + df$maf2 = log10(df$maf) # can't see otherwise + sum(is.na(df$maf2)) # Initialise the required dfs based on gene name geneL_normal = c("pnca") @@ -50,6 +50,8 @@ dm_om_wf_lf_data <- function(df , lf_consurf = data.frame() , wf_snap2 = data.frame() , lf_snap2 = data.frame() + , wf_dist_gen = data.frame() # NEW + , lf_dist_gen = data.frame() # NEW ) # additional dfs @@ -76,132 +78,170 @@ dm_om_wf_lf_data <- function(df , length(wf_lf_dataL)) #======================================================================= - if (missing(colnames_to_extract)){ + # display names + stability_suffix <- paste0(delta_symbol, delta_symbol, "G") - colnames_to_extract = c(snp_colname - , mut_colname, mut_info_colname, mut_info_label_colname - , aa_pos_colname - , LigDist_colname # from globals - , ppi2Dist_colname # from globals - , naDist_colname # from globals - , "duet_stability_change" , "duet_scaled" , "duet_outcome" - , "ligand_affinity_change", "affinity_scaled" , "ligand_outcome" - , "ddg_foldx" , "foldx_scaled" , "foldx_outcome" - , "deepddg" , "deepddg_scaled" , "deepddg_outcome" - , "asa" , "rsa" - , "rd_values" , "kd_values" - , "log10_or_mychisq" , "neglog_pval_fisher" , "maf" #"af" - , "ddg_dynamut2" , "ddg_dynamut2_scaled", "ddg_dynamut2_outcome" - , "mcsm_ppi2_affinity" , "mcsm_ppi2_scaled" , "mcsm_ppi2_outcome" - , "consurf_score" , "consurf_scaled" , "consurf_outcome" # exists now - , "consurf_colour_rev" - , "snap2_score" , "snap2_scaled" , "snap2_outcome" - , "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome" - , "provean_score" , "provean_scaled" , "provean_outcome") - - }else{ - colnames_to_extract = c(mut_colname, mut_info_colname, mut_info_label_colname - , aa_pos_colname, LigDist_colname - , colnames_to_extract) - } - comb_df = df[, colnames(df)%in%colnames_to_extract] - comb_df_s = dplyr::arrange(comb_df, aa_pos_colname) + duet_dn = paste0("DUET ", stability_suffix); duet_dn + foldx_dn = paste0("FoldX ", stability_suffix); foldx_dn + deepddg_dn = paste0("Deepddg " , stability_suffix); deepddg_dn + dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn -#======================================================================= - if(missing(categ_cols_to_factor)){ - categ_cols_to_factor = grep( "_outcome|_info", colnames(comb_df_s) ) - }else{ - categ_cols_to_factor = categ_cols_to_factor - } - #fact_cols = colnames(comb_df_s)[grepl( "_outcome|_info", colnames(comb_df_s) )] - fact_cols = colnames(comb_df_s)[categ_cols_to_factor] + consurf_dn = "ConSurf" + snap2_dn = "SNAP2" + provean_dn = "PROVEAN" + + or_dn = "Log10(OR)" + pval_dn = "-Log10(P)" + maf2_dn = "Log10(MAF)" + + asa_dn = "ASA" + rsa_dn = "RSA" + rd_dn = "RD" + kd_dn = "KD" + + lig_dist_dn = paste0("Lig Dist(", angstroms_symbol, ")"); lig_dist_dn + mcsm_lig_dn = paste0("mCSM-lig"); mcsm_lig_dn + mmcsm_lig_dn2 = paste0("mmCSM-lig"); mmcsm_lig_dn2 + + + na_dist_dn = paste0("NA Dist(", angstroms_symbol, ")"); na_dist_dn + mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn + + ppi2_dist_dn = paste0("PPI Dist(", angstroms_symbol, ")"); ppi2_dist_dn + mcsm_ppi2_dn = paste0("mCSM-PPI2 ", stability_suffix); mcsm_ppi2_dn - if (any(lapply(comb_df_s[, fact_cols], class) == "character")){ + #======================================================================= + if(missing(categ_cols_to_factor)){ + categ_cols_to_factor = grep( "_outcome|_info", colnames(df) ) + }else{ + categ_cols_to_factor = categ_cols_to_factor + } + #fact_cols = colnames(comb_df_s)[grepl( "_outcome|_info", colnames(comb_df_s) )] + fact_cols = colnames(df)[categ_cols_to_factor] + + if (any(lapply(df[, fact_cols], class) == "character")){ cat("\nChanging", length(categ_cols_to_factor), "cols to factor") - comb_df_s[, fact_cols] <- lapply(comb_df_s[, fact_cols], as.factor) - if (all(lapply(comb_df_s[, fact_cols], class) == "factor")){ + df[, fact_cols] <- lapply(df[, fact_cols], as.factor) + if (all(lapply(df[, fact_cols], class) == "factor")){ cat("\nSuccessful: cols changed to factor") } }else{ cat("\nRequested cols aready factors") } -#======================================================================= -table(comb_df_s[[mut_info_colname]]) + + cat("\ncols changed to factor are:\n", colnames(df)[categ_cols_to_factor] ) -# pretty display names i.e. labels to reduce major code duplication later -foo_cnames = data.frame(colnames(comb_df_s)) -names(foo_cnames) <- "old_name" + #======================================================================= + if (missing(colnames_to_extract)){ + # NOTE: these vars are from globals + #LigDist_colname, ppi2Dist_colname, naDist_colname + + common_colnames = c(snp_colname + , mut_colname , "dst_mode" , mut_info_label_colname + , aa_pos_colname + + , "duet_stability_change" , "duet_scaled" , "duet_outcome" + , "ddg_foldx" , "foldx_scaled" , "foldx_outcome" + , "deepddg" , "deepddg_scaled" , "deepddg_outcome" + , "ddg_dynamut2" , "ddg_dynamut2_scaled" , "ddg_dynamut2_outcome" + + , "consurf_score" , "consurf_scaled" , "consurf_outcome" , "consurf_colour_rev" + , "snap2_score" , "snap2_scaled" , "snap2_outcome" + , "provean_score" , "provean_scaled" , "provean_outcome" + + , "log10_or_mychisq" , "neglog_pval_fisher" , "maf2" + , "asa" , "rsa" , "rd_values" , "kd_values" + + , "mmcsm_lig" , "mmcsm_lig_scaled" , "mmcsm_lig_outcome" + , "ligand_affinity_change", "affinity_scaled" , "ligand_outcome" , LigDist_colname + ) -stability_suffix <- paste0(delta_symbol, delta_symbol, "G") -#flexibility_suffix <- paste0(delta_symbol, delta_symbol, "S") + display_common_colnames = c(snp_colname + , mut_colname , "dst_mode" , mut_info_label_colname + , aa_pos_colname + + , "duet_stability_change" , duet_dn , "duet_outcome" + , "ddg_foldx" , foldx_dn , "foldx_outcome" + , "deepddg" , deepddg_dn , "deepddg_outcome" + , "ddg_dynamut2" , dynamut2_dn , "ddg_dynamut2_outcome" + , consurf_dn , "consurf_scaled" , "consurf_outcome" , "consurf_colour_rev" + , snap2_dn , "snap2_scaled" , "snap2_outcome" + , provean_dn , "provean_scaled" , "provean_outcome" + + , or_dn , pval_dn , maf2_dn + , asa_dn , rsa_dn , rd_dn , kd_dn + + , "mmcsm_lig" , mmcsm_lig_dn2 , "mmcsm_lig_outcome" + , "ligand_affinity_change", mcsm_lig_dn , "ligand_outcome" , lig_dist_dn + ) + + if (length(common_colnames) == length(display_common_colnames)){ + cat("\nLength match: Proceeding to extracting end cols") + }else{ + stop("Abort: Length mismatch: b/w ncols to extract and disply name") + } + + # ordering is important! + # static_cols_end = c(lig_dist_dn + # , "ASA" + # , "RSA" + # , "RD" + # , "KD" + # , "Log10(MAF)" + # #, "Log10(OR)" + # #, "-Log(P)" + # ) + static_cols_end_common = c(lig_dist_dn, "Log10(MAF)"); static_cols_end_common + + if (tolower(gene)%in%geneL_normal){ + colnames_to_extract = c(common_colnames) + display_colnames = c(display_common_colnames) + comb_df_sl = df[, colnames_to_extract] + + # Rename cols: display names + colnames(comb_df_sl) = display_colnames + #colnames(comb_df)[colnames(comb_df)%in%colnames_to_extract] <- display_colnames -#lig_dn = paste0("Ligand distance (", angstroms_symbol, ")"); lig_dn -#mcsm_lig_dn = paste0("Ligand affinity (log fold change)"); mcsm_lig_dn - -lig_dn = paste0("Lig Dist(", angstroms_symbol, ")"); lig_dn -mcsm_lig_dn = paste0("mCSM-lig"); mcsm_lig_dn - -duet_dn = paste0("DUET ", stability_suffix); duet_dn -foldx_dn = paste0("FoldX ", stability_suffix); foldx_dn -deepddg_dn = paste0("Deepddg " , stability_suffix); deepddg_dn -dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn - -mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn -mcsm_ppi2_dn = paste0("mCSM-PPI2 ", stability_suffix); mcsm_ppi2_dn -consurf_dn = paste0("ConSurf"); consurf_dn -snap2_dn = paste0("SNAP2"); snap2_dn -provean_dn = paste0("PROVEAN"); provean_dn - -# change column names: plyr -new_colnames = c(asa = "ASA" - , rsa = "RSA" - , rd_values = "RD" - , kd_values = "KD" - #, log10_or_mychisq = "Log10(OR)" - #, neglog_pval_fisher = "-Log(P)" - #, af = "MAF" - , maf = "Log10(MAF)" - #, ligand_dist_colname= lig_dn # cannot handle variable name 'ligand_dist_colname' - , affinity_scaled = mcsm_lig_dn - , duet_scaled = duet_dn - , foldx_scaled = foldx_dn - , deepddg_scaled = deepddg_dn - , ddg_dynamut2_scaled = dynamut2_dn - , mcsm_na_scaled = mcsm_na_dn - , mcsm_ppi2_scaled = mcsm_ppi2_dn - #, consurf_scaled = consurf_dn - , consurf_score = consurf_dn - #, consurf_colour_rev = consurf_dn - #, snap2_scaled = snap2_dn - , snap2_score = snap2_dn - , provean_score = provean_dn) - - -comb_df_sl1 = plyr::rename(comb_df_s - , replace = new_colnames - , warn_missing = T - , warn_duplicated = T) - -# renaming colname using variable i.e ligand_dist_colname: dplyr -#comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname)) -comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(LigDist_colname)) # NEW -names(comb_df_sl) - -#======================= -# NEW: Affinity filtered data -#======================== -# mcsm-lig --> LigDist_colname -comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dn]] ppi2Dist_colname -comb_df_sl_ppi2 = comb_df_sl[comb_df_sl[[ppi2Dist_colname]] naDist_colname -comb_df_sl_na = comb_df_sl[comb_df_sl[[naDist_colname]] ppi2Dist_colname + comb_df_sl_ppi2 = comb_df_sl[comb_df_sl[[ppi2_dist_dn]] naDist_colname + comb_df_sl_na = comb_df_sl[comb_df_sl[[na_dist_dn]] LigDist_colname + comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dist_dn]] 2) { + if (length(levels(lf_df[[colour_categ]])) > 2) { lf_bp_colours = consurf_bp_colours } else { - lf_bp_colours = hue_pal()(2) + #lf_bp_colours = hue_pal()(2) + lf_bp_colours = dot_colours + } } @@ -58,7 +62,6 @@ lf_bp2 <- function(lf_df = lf_duet ggplot(lf_df, aes_string(x = x_grp, y = y_var)) + - facet_wrap(fwv , nrow = n_facet_row , scales = y_scales) + @@ -67,6 +70,7 @@ lf_bp2 <- function(lf_df = lf_duet geom_violin(trim = T , scale = "width" + , colour = "black" #, position = position_dodge(width = 0.9) , draw_quantiles = violin_quantiles) + diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index cd7c0a4..ea59f22 100644 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -118,39 +118,43 @@ geneL_na = c("gid", "rpob") geneL_ppi2 = c("alr", "embb", "katg", "rpob") all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene) - + wf_duet = all_dm_om_df[['wf_duet']] lf_duet = all_dm_om_df[['lf_duet']] - + wf_mcsm_lig = all_dm_om_df[['wf_mcsm_lig']] lf_mcsm_lig = all_dm_om_df[['lf_mcsm_lig']] - + wf_foldx = all_dm_om_df[['wf_foldx']] lf_foldx = all_dm_om_df[['lf_foldx']] - + wf_deepddg = all_dm_om_df[['wf_deepddg']] lf_deepddg = all_dm_om_df[['lf_deepddg']] - + wf_dynamut2 = all_dm_om_df[['wf_dynamut2']] lf_dynamut2 = all_dm_om_df[['lf_dynamut2']] - + wf_consurf = all_dm_om_df[['wf_consurf']] lf_consurf = all_dm_om_df[['lf_consurf']] - + wf_snap2 = all_dm_om_df[['wf_snap2']] lf_snap2 = all_dm_om_df[['lf_snap2']] - + wf_provean = all_dm_om_df[['wf_provean']] lf_provean = all_dm_om_df[['lf_provean']] +# NEW +wf_dist_gen = all_dm_om_df[['wf_dist_gen']] +lf_dist_gen = all_dm_om_df[['lf_dist_gen']] + if (tolower(gene)%in%geneL_na){ - wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']] - lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']] + wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']] + lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']] } - + if (tolower(gene)%in%geneL_ppi2){ - wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']] - lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']] + wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']] + lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']] } s2 = c("\nSuccessfully sourced other_plots_data.R") diff --git a/scripts/plotting/plotting_thesis/corr_plots_thesis.R b/scripts/plotting/plotting_thesis/corr_plots_thesis.R index 65f0217..1470076 100644 --- a/scripts/plotting/plotting_thesis/corr_plots_thesis.R +++ b/scripts/plotting/plotting_thesis/corr_plots_thesis.R @@ -24,7 +24,7 @@ corr_plotdf = corr_data_extract(merged_df3 , extract_scaled_cols = F) colnames(corr_plotdf) colnames(corr_df_m3_f) -corr_plotdf = corr_df_m3_f # for downstream code +corr_plotdf = corr_df_m3_f #for downstream code #================ # stability #================ @@ -61,7 +61,7 @@ svg(corr_psP, width = 15, height = 15) my_corr_pairs(corr_data_all = corr_df_ps , corr_cols = colnames(corr_df_ps[1:corr_end]) - , corr_method = "spearman" # other options: "pearson" or "kendall" + , corr_method = "spearman" , colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode" , categ_colour = c("red", "blue") , density_show = F @@ -113,7 +113,7 @@ svg(corr_ligP, width = 10, height = 10) my_corr_pairs(corr_data_all = corr_df_lig , corr_cols = colnames(corr_df_lig[1:corr_end]) - , corr_method = "spearman" # other options: "pearson" or "kendall" + , corr_method = "spearman" , colour_categ_col = colnames(corr_df_lig[color_coln]) #"dst_mode" , categ_colour = c("red", "blue") , density_show = F @@ -158,7 +158,7 @@ svg(corr_ppi2P, width = 10, height = 10) my_corr_pairs(corr_data_all = corr_df_ppi2 , corr_cols = colnames(corr_df_ppi2[1:corr_end]) - , corr_method = "spearman" # other options: "pearson" or "kendall" + , corr_method = "spearman" , colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode" , categ_colour = c("red", "blue") , density_show = F @@ -181,7 +181,7 @@ corr_na_colnames = c("mCSM-NA" , "MAF" , "Log(OR)" , "-Log(P)" - , "NA-Dist" # "interface_dist" + , "NA-Dist" # "NA_dist" , "dst_mode" , drug) @@ -254,7 +254,7 @@ svg(corr_consP, width = 10, height = 10) my_corr_pairs(corr_data_all = corr_df_cons , corr_cols = colnames(corr_df_cons[1:corr_end]) - , corr_method = "spearman" # other options: "pearson" or "kendall" + , corr_method = "spearman" , colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode" , categ_colour = c("red", "blue") , density_show = F diff --git a/scripts/plotting/plotting_thesis/dm_om_plots.R b/scripts/plotting/plotting_thesis/dm_om_plots.R index 733e77f..b14d032 100644 --- a/scripts/plotting/plotting_thesis/dm_om_plots.R +++ b/scripts/plotting/plotting_thesis/dm_om_plots.R @@ -1,237 +1,230 @@ ################# # Numbers ################## -nrow(wf_mcsm_lig) -table(wf_mcsm_lig$mutation_info_labels) - -nrow(wf_mcsm_ppi2) -table(wf_mcsm_ppi2$mutation_info_labels) +all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene) +# +# lf_duet = all_dm_om_df[['lf_duet']] +# table(lf_duet$param_type) ################################################################ -geneL_normal = c("pnca") -geneL_na = c("gid", "rpob") -geneL_ppi2 = c("alr", "embb", "katg", "rpob") +#====================== +# Data: Dist+genomics +#====================== +lf_dist_genP = all_dm_om_df[['lf_dist_gen']] +wf_dist_genP = all_dm_om_df[['wf_dist_gen']] +levels(lf_dist_genP$param_type) +#lf_dist_genP$param_type <- factor(lf_dist_genP$param_type, levels=c("Log10(MAF)", "Lig Dist(Å)", "PPI Dist(Å)")) +table(lf_dist_genP$param_type) -if (tolower(gene)%in%geneL_na){ - lf_mcsm_na - } +genomics_param = c("Log10(MAF)") -if (tolower(gene)%in%geneL_ppi2){ - lf_mcsm_ppi2 -} +dist_genP = lf_bp2(lf_dist_genP + #, p_title + , violin_quantiles = c(0.5), monochrome = F) -colnames(lf_duet) -table(lf_duet$param_type) +#------------------- +# Genomics data plot +#------------------- +genomics_dataP = lf_dist_genP[lf_dist_genP$param_type%in%genomics_param,] +genomics_dataP$param_type = factor(genomics_dataP$param_type) +table(genomics_dataP$param_type) -static_colsP = c("Lig Dist(Å)","ASA", "RSA","RD","KD","Log10(MAF)") +genomicsP = lf_bp2(genomics_dataP + #, p_title = "" + , violin_quantiles = c(0.5), monochrome = F) -stability_suffix <- paste0(delta_symbol, delta_symbol, "G") - -lf_commonP = lf_duet[!lf_duet$param_type%in%c("DUET ΔΔG"),] -lf_commonP$param_type = levels(droplevels(lf_commonP$param_type)) -table(lf_commonP$param_type); colnames(lf_commonP) -lf_commonP$outcome = lf_commonP$duet_outcome -lf_commonP$duet_outcome = NULL +genomicsP +#check +wilcox.test(wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="R"] + , wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE) -lf_duet$outcome = lf_duet$duet_outcome -lf_duet$duet_outcome = NULL -lf_duetP = lf_duet[!lf_duet$param_type%in%c(static_colsP, "outcome"),] -lf_duetP$param_type = levels(droplevels(lf_duetP$param_type)) -table(lf_duetP$param_type); colnames(lf_duetP) -colnames(lf_duetP) +tapply(wf_dist_genP$`Log10(MAF)`, wf_dist_genP$mutation_info_labels, summary) -lf_foldx$outcome = lf_foldx$foldx_outcome -lf_foldx$foldx_outcome = NULL -lf_foldxP = lf_foldx[!lf_foldx$param_type%in%c(static_colsP,"outcome"),] -lf_foldxP$param_type = levels(droplevels(lf_foldxP$param_type)) -table(lf_foldxP$param_type) -colnames(lf_foldxP) +#------------------- +# Distance data plot: not genomics data +#------------------- +dist_dataP = lf_dist_genP[!lf_dist_genP$param_type%in%genomics_param,] +#dist_dataP$param_type = factor(dist_dataP$param_type) +table(dist_dataP$param_type) +distanceP = lf_bp2(dist_dataP + #, p_title = "" + , violin_quantiles = c(0.5), monochrome = F) -lf_deepddg$outcome = lf_deepddg$deepddg_outcome -lf_deepddg$deepddg_outcome = NULL -lf_deepddgP = lf_deepddg[!lf_deepddg$param_type%in%c(static_colsP, "outcome"),] -lf_deepddgP$param_type = levels(droplevels(lf_deepddgP$param_type)) +distanceP + +# check +wilcox.test(wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"] +, wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE) + +wilcox.test(wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"] + , wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE) + +tapply(wf_dist_genP$`PPI Dist(Å)`, wf_dist_genP$mutation_info_labels, summary) + +tapply(wf_dist_genP$`Lig Dist(Å)`, wf_dist_genP$mutation_info_labels, summary) + +#============== +# Plot:DUET +#============== +lf_duetP = all_dm_om_df[['lf_duet']] +#lf_duetP = lf_duet[!lf_duet$param_type%in%c(static_colsP),] +table(lf_duetP$param_type) +lf_duetP$param_type = factor(lf_duetP$param_type) +table(lf_duetP$param_type) + +duetP = lf_bp2(lf_duetP + #, p_title = paste0("Stability",stability_suffix) + , violin_quantiles = c(0.5), monochrome = F) + +#============== +# Plot:FoldX +#============== +lf_foldxP = all_dm_om_df[['lf_foldx']] +#lf_foldxP = lf_foldx[!lf_foldx$param_type%in%c(static_colsP),] +table(lf_foldxP$param_type) +lf_foldxP$param_type = factor(lf_foldxP$param_type) +table(lf_foldxP$param_type) + +foldxP = lf_bp2(lf_foldxP + #, p_title = paste0("Stability",stability_suffix) + , violin_quantiles = c(0.5), monochrome = F) + +#============== +# Plot:DeepDDG +#============== +lf_deepddgP = all_dm_om_df[['lf_deepddg']] +#lf_deepddgP = lf_deepddg[!lf_deepddg$param_type%in%c(static_colsP),] +table(lf_deepddgP$param_type) +lf_deepddgP$param_type = factor(lf_deepddgP$param_type) table(lf_deepddgP$param_type) -colnames(lf_deepddgP) +deepddgP = lf_bp2(lf_deepddgP + #, p_title = paste0("Stability",stability_suffix) + , violin_quantiles = c(0.5), monochrome = F + , dot_transparency = 0.3) +deepddgP -lf_dynamut2$outcome = lf_dynamut2$ddg_dynamut2_outcome -lf_dynamut2$ddg_dynamut2_outcome = NULL -lf_dynamut2P = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_colsP, "outcome"),] -lf_dynamut2P$param_type = levels(droplevels(lf_dynamut2P$param_type)) +#============== +# Plot: Dynamut2 +#============== +lf_dynamut2P = all_dm_om_df[['lf_dynamut2']] +#lf_dynamut2P = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_colsP),] +table(lf_dynamut2P$param_type) +lf_dynamut2P$param_type = factor(lf_dynamut2P$param_type) table(lf_dynamut2P$param_type) -colnames(lf_dynamut2P) +dynamut2P = lf_bp2(lf_dynamut2P + #, p_title = paste0("Stability",stability_suffix) + , violin_quantiles = c(0.5), monochrome = F) -lf_consurf$outcome = lf_consurf$consurf_outcome -lf_consurf$consurf_outcome = NULL -lf_consurfP = lf_consurf[!lf_consurf$param_type%in%c(static_colsP),] -lf_consurfP$param_type = levels(droplevels(lf_consurfP$param_type)) +#============== +# Plot:ConSurf +#============== +lf_consurfP = all_dm_om_df[['lf_consurf']] +#lf_consurfP = lf_consurf[!lf_consurf$param_type%in%c(static_colsP),] +table(lf_consurfP$param_type) +lf_consurfP$param_type = factor(lf_consurfP$param_type) table(lf_consurfP$param_type) -colnames(lf_consurfP) +consurfP = lf_bp2(lf_consurfP + #, p_title = paste0("Stability",stability_suffix) + , violin_quantiles = c(0.5), monochrome = F) -lf_snap2$outcome = lf_snap2$snap2_outcome -lf_snap2$snap2_outcome = NULL -lf_snap2P = lf_snap2[!lf_snap2$param_type%in%c(static_colsP),] -lf_snap2P$param_type = levels(droplevels(lf_snap2P$param_type)) +#============== +# Plot:SNAP2 +#============== +lf_snap2P = all_dm_om_df[['lf_snap2']] +#lf_snap2P = lf_snap2[!lf_snap2$param_type%in%c(static_colsP),] +table(lf_snap2P$param_type) +lf_snap2P$param_type = factor(lf_snap2P$param_type) table(lf_snap2P$param_type) -colnames(lf_snap2P) +snap2P = lf_bp2(lf_snap2P + #, p_title = paste0("Stability",stability_suffix) + , violin_quantiles = c(0.5), monochrome = F) -lf_provean$outcome = lf_provean$provean_outcome -lf_provean$provean_outcome = NULL -lf_proveanP = lf_provean[!lf_provean$param_type%in%c(static_colsP),] -lf_proveanP$param_type = levels(droplevels(lf_proveanP$param_type)) +#============== +# Plot:PROVEAN +#============== +lf_proveanP = all_dm_om_df[['lf_provean']] +#lf_proveanP = lf_provean[!lf_provean$param_type%in%c(static_colsP),] +table(lf_proveanP$param_type) +lf_proveanP$param_type = factor(lf_proveanP$param_type) table(lf_proveanP$param_type) -colnames(lf_proveanP) -bar = rbind(colnames(lf_duetP) - , colnames(lf_foldxP) - , colnames(lf_deepddgP) - , colnames(lf_dynamut2P) - , colnames(lf_consurfP) - , colnames(lf_snap2P) - , colnames(lf_proveanP) -) -bar - -lf_df_stabP = rbind((lf_duetP) - , (lf_foldxP) - , (lf_deepddgP) - , (lf_dynamut2P)) - -lf_df_consP = rbind((lf_consurfP) - , (lf_snap2P) - , (lf_proveanP)) - -table(lf_df_stabP$param_type) -# VERY USEFUL for seeing numbers for param types -table(lf_df_stabP$param_type,lf_df_stabP$outcome) -table(lf_df_consP$param_type,lf_df_consP$outcome) - -#============== -# Plot:BP -#============== -stability_suffix <- paste0(delta_symbol, delta_symbol, "G") - -# lf_bp(lf_df_stabP, p_title = paste0("Stability",stability_suffix) -# , violin_quantiles = c(0.5)) -# lf_bp(lf_df_consP, p_title = "Evolutionary Conservation" -# , violin_quantiles = c(0.5)) - -lf_bp2(lf_df_stabP, p_title = paste0("Stability",stability_suffix) +proveanP = lf_bp2(lf_proveanP + #, p_title = paste0("Stability",stability_suffix) , violin_quantiles = c(0.5), monochrome = F) -lf_bp2(lf_duet, p_title = paste0("Stability",stability_suffix) - , violin_quantiles = c(0.5), monochrome = F) - - - -lf_bp2(lf_df_consP, p_title = "Evolutionary Conservation" - , violin_quantiles = c(0.5), monochrome = F) - -#HMMM: Bollocks! -lf_bp2(lf_commonP, p_title = paste0("Residue level properties") - , violin_quantiles = c(0.5) - , monochrome = T) # doesn't plot stat bars - -lf_bp(lf_commonP, p_title = paste0("Residue level properties") - , violin_quantiles = c(0.5)) #plots stat bars but incorrect result - -lf_unpaired_stats(lf_duet) -wilcox.test(wf_duet$`Lig Dist(Å)`[wf_duet$mutation_info_labels=="R"] - , wf_duet$`Lig Dist(Å)`[wf_duet$mutation_info_labels=="S"]) - -wilcox.test(wf_duet$ASA[wf_duet$mutation_info_labels=="R"] - , wf_duet$ASA[wf_duet$mutation_info_labels=="S"]) - - -# 1: variable -# 9: conserved - -# CHECK THESE -foo = merged_df3[c("dst_mode", "mutation_info_labels", "consurf_colour_rev" - , "consurf_scaled" - , "consurf_score" - , "consurf_outcome" - , "snap2_score" - , "snap2_scaled" - , "snap2_outcome" - , "provean_score" - , "provean_scaled" - , "provean_outcome")] - -################ -# Affinity -################ -# ligand -lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome -lf_mcsm_lig$ligand_outcome = NULL -colnames(lf_mcsm_lig) -table(lf_mcsm_lig$param_type) - - -lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome -lf_mcsm_ligP = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_colsP, "outcome"),] -#lf_mcsm_ligP$param_type = levels(droplevels(lf_mcsm_ligP)) +#============== +# Plot: mCSM-lig +#============== +lf_mcsm_ligP = all_dm_om_df[['lf_mcsm_lig']] +#lf_mcsm_ligP = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_colsP),] +table(lf_mcsm_ligP$param_type) +lf_mcsm_ligP$param_type = factor(lf_mcsm_ligP$param_type) table(lf_mcsm_ligP$param_type) -lf_mcsm_ligP$ligand_outcome = NULL -colnames(lf_mcsm_ligP) -if (tolower(gene)%in%geneL_na){ - lf_mcsm_na$outcome = lf_mcsm_na$mcsm_na_outcome - #lf_mcsm_na$mcsm_na_outcome = NULL - lf_mcsm_naP = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_colsP, "outcome"),] - #lf_mcsm_naP$param_type = levels(droplevels(lf_mcsm_naP)) - table(lf_mcsm_naP$param_type) - lf_mcsm_naP$mcsm_na_outcome = NULL - colnames(lf_mcsm_naP) -} +mcsmligP = lf_bp2(lf_mcsm_ligP + #, p_title = paste0("Stability",stability_suffix) + , violin_quantiles = c(0.5), monochrome = F) +#============== +# Plot: mCSM-ppi2 +#============== if (tolower(gene)%in%geneL_ppi2){ - - lf_mcsm_ppi2$outcome = lf_mcsm_ppi2$mcsm_ppi2_outcome - colnames(lf_mcsm_ppi2) - #lf_mcsm_ppi2$mcsm_ppi2_outcome = NULL - lf_mcsm_ppi2P = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_colsP, "outcome"),] - #lf_mcsm_ppi2P$param_type = levels(droplevels(lf_mcsm_ppi2P)) + lf_mcsm_ppi2P = all_dm_om_df[['lf_mcsm_ppi2']] + #lf_mcsm_ppi2P = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_colsP),] + table(lf_mcsm_ppi2P$param_type) + lf_mcsm_ppi2P$param_type = factor(lf_mcsm_ppi2P$param_type) table(lf_mcsm_ppi2P$param_type) - lf_mcsm_ppi2P$mcsm_ppi2_outcome = NULL - colnames(lf_mcsm_ppi2P) + mcsmppi2P = lf_bp2(lf_mcsm_ppi2P + #, p_title = paste0("Stability",stability_suffix) + , violin_quantiles = c(0.5), monochrome = F) } +#============== +# Plot: mCSM-NA +#============== +if (tolower(gene)%in%geneL_na){ + lf_mcsm_naP = all_dm_om_df[['lf_mcsm_na']] + #lf_mcsm_naP = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_colsP),] + table(lf_mcsm_naP$param_type) + lf_mcsm_naP$param_type = factor(lf_mcsm_naP$param_type) + table(lf_mcsm_naP$param_type) + + mcsmnaP = lf_bp2(lf_mcsm_naP + #, p_title = paste0("Stability",stability_suffix) + , violin_quantiles = c(0.5), monochrome = F) +} -bar = rbind(colnames(lf_mcsm_ligP) - #, colnames(lf_mcsm_naP) - , colnames(lf_mcsm_ppi2P)) -bar +###################################### +# Outplot with stats +###################################### -lf_df_affP = rbind((lf_mcsm_ligP) - , (lf_mcsm_ppi2P)) +cowplot::plot_grid( + cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P, genomicsP, distanceP + , nrow=1), + # cowplot::plot_grid(genomicsP, distanceP + # , nrow = 1), + cowplot::plot_grid(consurfP, snap2P, proveanP + , mcsmligP + , mcsmppi2P + #, mcsmnaP + , nrow=1), + nrow=2) -lf_bp(lf_df_affP, p_title = paste0("Affinity changes") - , violin_quantiles = c(0.5)) - #, monochrome = T) # doesn't plot stat bars -wilcox.test(wf_mcsm_lig$ASA[wf_mcsm_lig$mutation_info_labels=="R"] - , wf_mcsm_lig$ASA[wf_mcsm_lig$mutation_info_labels=="S"]) +foo = lf_consurfP -wilcox.test(wf_mcsm_ppi2$ASA[wf_mcsm_ppi2$mutation_info_labels=="R"] - , wf_mcsm_ppi2$ASA[wf_mcsm_ppi2$mutation_info_labels=="S"]) +# proveanP = lf_bp2(lf_proveanP, colour_categ = "mutation_info_labels" +# , p_title = paste0("Evolutionary conservation") +# , dot_transparency = 1 +# , violin_quantiles = c(0.5), monochrome = F) +# +# proveanP -#=============================== -p1 = lf_bp2(lf_df_stabP, p_title = paste0("Stability",stability_suffix) - , violin_quantiles = c(0.5), monochrome = F) - -p2 = lf_bp2(lf_df_consP, p_title = "Evolutionary Conservation" - , violin_quantiles = c(0.5), monochrome = F) - -p3 = lf_bp2(lf_df_affP, p_title = paste0("Affinity changes") - , violin_quantiles = c(0.5), monochrome = F) - -cowplot::plot_grid(p1,cowplot::plot_grid(p2, p3), nrow=2)