diff --git a/scripts/functions/dm_om_data.R b/scripts/functions/dm_om_data.R index c9f5333..d03e170 100644 --- a/scripts/functions/dm_om_data.R +++ b/scripts/functions/dm_om_data.R @@ -40,6 +40,8 @@ dm_om_wf_lf_data <- function(df , lf_duet = data.frame() , wf_mcsm_lig = data.frame() , lf_mcsm_lig = data.frame() + , wf_mmcsm_lig2 = data.frame() # NEW + , lf_mmcsm_lig2 = data.frame() # NEW , wf_foldx = data.frame() , lf_foldx = data.frame() , wf_deepddg = data.frame() @@ -638,52 +640,48 @@ table(lf_mcsm_lig$param_type); colnames(lf_mcsm_lig) wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig -#==================== -# mcsm-NA affinity +#========================= +# mmCSM-lig2: # data filtered by cut off -#==================== -if (tolower(gene)%in%geneL_na){ - #--------------- - # mCSM-NA: WF and lF - #----------------- - # WF data: mcsm-na - cols_to_select_mcsm_na = c(static_cols_start, c("mcsm_na_outcome", mcsm_na_dn), static_cols_end) - #wf_mcsm_na = comb_df_sl[, cols_to_select_mcsm_na] - wf_mcsm_na = comb_df_sl_na[, cols_to_select_mcsm_na] - - pivot_cols_mcsm_na = cols_to_select_mcsm_na[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_na - expected_rows_lf = nrow(wf_mcsm_na) * (length(wf_mcsm_na) - length(pivot_cols_mcsm_na)) - expected_rows_lf - - # LF data: mcsm-na - lf_mcsm_na = gather(wf_mcsm_na - , key = param_type - , value = param_value - , all_of(mcsm_na_dn):tail(static_cols_end,1) - , factor_key = TRUE) - - if (nrow(lf_mcsm_na) == expected_rows_lf){ - cat("\nPASS: long format data created for ", mcsm_na_dn) - }else{ - cat("\nFAIL: long format data could not be created for duet") - quit() - } - - # NEW columns [outcome and outcome colname] - lf_mcsm_na$outcome_colname = "mcsm_na_outcome" - lf_mcsm_na$outcome = lf_mcsm_na$mcsm_na_outcome - - # DROP static cols - lf_mcsm_na = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_cols_end),] - lf_mcsm_na$param_type = factor(lf_mcsm_na$param_type) - table(lf_mcsm_na$param_type); colnames(lf_mcsm_na) - - # Assign them to the output list - wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na - wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na +#========================= +#--------------------- +# mmCSM-lig2: WF and lF +#---------------------- +# WF data: mmcsm_lig2 +cols_to_select_mmcsm_lig2 = c(static_cols_start, c("mmcsm_lig_outcome", mmcsm_lig_dn2), static_cols_end) +wf_mmcsm_lig2 = comb_df_sl_lig[, cols_to_select_mmcsm_lig2] # filtered df +pivot_cols_mmcsm_lig2 = cols_to_select_mmcsm_lig2[1: (length(static_cols_start) + 1)]; pivot_cols_mmcsm_lig2 +expected_rows_lf = nrow(wf_mmcsm_lig2) * (length(wf_mmcsm_lig2) - length(pivot_cols_mmcsm_lig2)) +expected_rows_lf + +# LF data: mmcsm_lig2 +lf_mmcsm_lig2 = gather(wf_mmcsm_lig2 + , key = param_type + , value = param_value + , all_of(mmcsm_lig_dn2):tail(static_cols_end,1) + , factor_key = TRUE) + +if (nrow(lf_mmcsm_lig2) == expected_rows_lf){ + cat("\nPASS: long format data created for ", mmcsm_lig_dn2) +}else{ + cat("\nFAIL: long format data could not be created for mmcsm_lig2") + quit() } +# NEW columns [outcome and outcome colname] +lf_mmcsm_lig2$outcome_colname = "mmcsm_lig_outcome" +lf_mmcsm_lig2$outcome = lf_mmcsm_lig2$mmcsm_lig_outcome + +# DROP static cols +lf_mmcsm_lig2 = lf_mmcsm_lig2[!lf_mmcsm_lig2$param_type%in%c(static_cols_end),] +lf_mmcsm_lig2$param_type = factor(lf_mmcsm_lig2$param_type) +table(lf_mmcsm_lig2$param_type); colnames(lf_mmcsm_lig2) + +# Assign them to the output list +wf_lf_dataL[['wf_mmcsm_lig2']] = wf_mmcsm_lig2 +wf_lf_dataL[['lf_mmcsm_lig2']] = lf_mmcsm_lig2 + #========================= # mcsm-ppi2 affinity # data filtered by cut off @@ -730,6 +728,54 @@ if (tolower(gene)%in%geneL_ppi2){ } + + +#==================== +# mcsm-NA affinity +# data filtered by cut off +#==================== +if (tolower(gene)%in%geneL_na){ + #--------------- + # mCSM-NA: WF and lF + #----------------- + # WF data: mcsm-na + cols_to_select_mcsm_na = c(static_cols_start, c("mcsm_na_outcome", mcsm_na_dn), static_cols_end) + #wf_mcsm_na = comb_df_sl[, cols_to_select_mcsm_na] + wf_mcsm_na = comb_df_sl_na[, cols_to_select_mcsm_na] + + pivot_cols_mcsm_na = cols_to_select_mcsm_na[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_na + expected_rows_lf = nrow(wf_mcsm_na) * (length(wf_mcsm_na) - length(pivot_cols_mcsm_na)) + expected_rows_lf + + # LF data: mcsm-na + lf_mcsm_na = gather(wf_mcsm_na + , key = param_type + , value = param_value + , all_of(mcsm_na_dn):tail(static_cols_end,1) + , factor_key = TRUE) + + if (nrow(lf_mcsm_na) == expected_rows_lf){ + cat("\nPASS: long format data created for ", mcsm_na_dn) + }else{ + cat("\nFAIL: long format data could not be created for duet") + quit() + } + + # NEW columns [outcome and outcome colname] + lf_mcsm_na$outcome_colname = "mcsm_na_outcome" + lf_mcsm_na$outcome = lf_mcsm_na$mcsm_na_outcome + + # DROP static cols + lf_mcsm_na = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_cols_end),] + lf_mcsm_na$param_type = factor(lf_mcsm_na$param_type) + table(lf_mcsm_na$param_type); colnames(lf_mcsm_na) + + # Assign them to the output list + wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na + wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na + +} + return(wf_lf_dataL) } ############################################################################ diff --git a/scripts/functions/lf_bp2.R b/scripts/functions/lf_bp2.R index e3b1140..b768b21 100644 --- a/scripts/functions/lf_bp2.R +++ b/scripts/functions/lf_bp2.R @@ -18,8 +18,9 @@ lf_bp2 <- function(lf_df #lf_duet , y_scales = "free_y" , colour_bp_strip = "khaki2" , dot_size = 3 - , dot_transparency = 1 #0.3: lighter + , dot_transparency = 0.3 #0.3: lighter , violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL + , line_thickness = 0.65 , my_ats = 22 # axis text size , my_als = 20 # axis label size , my_fls = 20 # facet label size @@ -69,6 +70,7 @@ lf_bp2 <- function(lf_df #lf_duet ggplot2::scale_color_manual(values = lf_bp_colours) + geom_violin(trim = T + , size = line_thickness , scale = "width" , colour = "black" #, position = position_dodge(width = 0.9) @@ -98,11 +100,11 @@ lf_bp2 <- function(lf_df #lf_duet , x = "" , y = "") + - stat_compare_means(comparisons = my_comparisonsL , method = stat_method , paired = my_paired - , label = stat_label[2]) + + , label = stat_label[2] + , size = 5) + geom_quasirandom( size = dot_size , alpha = dot_transparency diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index ea59f22..53ca49a 100644 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -196,11 +196,11 @@ corr_df_m3_f = corr_data_extract(merged_df3 , extract_scaled_cols = F) head(corr_df_m3_f) -corr_df_m2_f = corr_data_extract(merged_df2 - , gene = gene - , drug = drug - , extract_scaled_cols = F) -head(corr_df_m2_f) +# corr_df_m2_f = corr_data_extract(merged_df2 +# , gene = gene +# , drug = drug +# , extract_scaled_cols = F) +# head(corr_df_m2_f) s4 = c("\nSuccessfully sourced Corr_data.R") cat(s4) diff --git a/scripts/plotting/plotting_thesis/dm_om_plots.R b/scripts/plotting/plotting_thesis/dm_om_plots.R index b14d032..c703157 100644 --- a/scripts/plotting/plotting_thesis/dm_om_plots.R +++ b/scripts/plotting/plotting_thesis/dm_om_plots.R @@ -76,8 +76,9 @@ lf_duetP$param_type = factor(lf_duetP$param_type) table(lf_duetP$param_type) duetP = lf_bp2(lf_duetP - #, p_title = paste0("Stability",stability_suffix) - , violin_quantiles = c(0.5), monochrome = F) + #, p_title = "" + , violin_quantiles = c(0.5), monochrome = F + , dot_transparency = 0.2) #============== # Plot:FoldX @@ -89,8 +90,9 @@ lf_foldxP$param_type = factor(lf_foldxP$param_type) table(lf_foldxP$param_type) foldxP = lf_bp2(lf_foldxP - #, p_title = paste0("Stability",stability_suffix) - , violin_quantiles = c(0.5), monochrome = F) + #, p_title = "" + , violin_quantiles = c(0.5), monochrome = F + , dot_transparency = 0.1) #============== # Plot:DeepDDG @@ -102,9 +104,10 @@ lf_deepddgP$param_type = factor(lf_deepddgP$param_type) table(lf_deepddgP$param_type) deepddgP = lf_bp2(lf_deepddgP - #, p_title = paste0("Stability",stability_suffix) - , violin_quantiles = c(0.5), monochrome = F - , dot_transparency = 0.3) + #, p_title = "" + , violin_quantiles = c(0.5), monochrome = F + , dot_transparency = 0.2) + deepddgP #============== @@ -117,8 +120,10 @@ lf_dynamut2P$param_type = factor(lf_dynamut2P$param_type) table(lf_dynamut2P$param_type) dynamut2P = lf_bp2(lf_dynamut2P - #, p_title = paste0("Stability",stability_suffix) - , violin_quantiles = c(0.5), monochrome = F) + #, p_title = "" + , violin_quantiles = c(0.5), monochrome = F + , dot_transparency = 0.2) + #============== # Plot:ConSurf @@ -130,8 +135,8 @@ lf_consurfP$param_type = factor(lf_consurfP$param_type) table(lf_consurfP$param_type) consurfP = lf_bp2(lf_consurfP - #, p_title = paste0("Stability",stability_suffix) - , violin_quantiles = c(0.5), monochrome = F) + #, p_title = "" + , violin_quantiles = c(0.5), monochrome = F) #============== # Plot:SNAP2 @@ -143,8 +148,8 @@ lf_snap2P$param_type = factor(lf_snap2P$param_type) table(lf_snap2P$param_type) snap2P = lf_bp2(lf_snap2P - #, p_title = paste0("Stability",stability_suffix) - , violin_quantiles = c(0.5), monochrome = F) + #, p_title = "" + , violin_quantiles = c(0.5), monochrome = F) #============== # Plot:PROVEAN @@ -156,12 +161,13 @@ lf_proveanP$param_type = factor(lf_proveanP$param_type) table(lf_proveanP$param_type) proveanP = lf_bp2(lf_proveanP - #, p_title = paste0("Stability",stability_suffix) - , violin_quantiles = c(0.5), monochrome = F) + #, p_title = "" + , violin_quantiles = c(0.5), monochrome = F) -#============== +############################################################################ +#================ # Plot: mCSM-lig -#============== +#================ lf_mcsm_ligP = all_dm_om_df[['lf_mcsm_lig']] #lf_mcsm_ligP = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_colsP),] table(lf_mcsm_ligP$param_type) @@ -169,12 +175,30 @@ lf_mcsm_ligP$param_type = factor(lf_mcsm_ligP$param_type) table(lf_mcsm_ligP$param_type) mcsmligP = lf_bp2(lf_mcsm_ligP - #, p_title = paste0("Stability",stability_suffix) - , violin_quantiles = c(0.5), monochrome = F) + #, p_title = "" + , violin_quantiles = c(0.5), monochrome = F + , dot_transparency = 1) -#============== + +#================= +# Plot: mmCSM-lig2 +#================= +lf_mmcsm_lig2P = all_dm_om_df[['lf_mmcsm_lig2']] +#lf_mmcsm_lig2P = lf_mmcsm_lig2P[!lf_mmcsm_lig2P$param_type%in%c(static_colsP),] +table(lf_mmcsm_lig2P$param_type) +lf_mmcsm_lig2P$param_type = factor(lf_mmcsm_lig2P$param_type) +table(lf_mmcsm_lig2P$param_type) + +mcsmlig2P = lf_bp2(lf_mmcsm_lig2P + #, p_title = "" + , violin_quantiles = c(0.5), monochrome = F + , dot_transparency = 1) + +mcsmlig2P + +#================ # Plot: mCSM-ppi2 -#============== +#================ if (tolower(gene)%in%geneL_ppi2){ lf_mcsm_ppi2P = all_dm_om_df[['lf_mcsm_ppi2']] #lf_mcsm_ppi2P = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_colsP),] @@ -183,8 +207,10 @@ if (tolower(gene)%in%geneL_ppi2){ table(lf_mcsm_ppi2P$param_type) mcsmppi2P = lf_bp2(lf_mcsm_ppi2P - #, p_title = paste0("Stability",stability_suffix) - , violin_quantiles = c(0.5), monochrome = F) + #, p_title = "" + , violin_quantiles = c(0.5), monochrome = F + , dot_transparency = 1) + } #============== @@ -198,28 +224,39 @@ if (tolower(gene)%in%geneL_na){ table(lf_mcsm_naP$param_type) mcsmnaP = lf_bp2(lf_mcsm_naP - #, p_title = paste0("Stability",stability_suffix) - , violin_quantiles = c(0.5), monochrome = F) + #, p_title = "" + , violin_quantiles = c(0.5), monochrome = F + , dot_transparency = 1) + } ###################################### # Outplot with stats ###################################### +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +dm_om_combinedP = paste0(outdir_images + ,tolower(gene) + ,"_dm_om_all.svg" ) + +cat("DM OM plots with stats:", dm_om_combinedP) +svg(dm_om_combinedP, width = 32, height = 18) cowplot::plot_grid( cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P, genomicsP, distanceP - , nrow=1), - # cowplot::plot_grid(genomicsP, distanceP - # , nrow = 1), + , nrow=1 + , rel_widths = c(1/7, 1/7,1/7,1/7, 1/7, 1.75/7)), + #, rel_widths = c(1/8, 1/8,1/8,1/8, 1/8, 2.75/8)), # for 3 distances cowplot::plot_grid(consurfP, snap2P, proveanP , mcsmligP + , mcsmlig2P , mcsmppi2P #, mcsmnaP , nrow=1), nrow=2) +dev.off() -foo = lf_consurfP +#foo = lf_consurfP # proveanP = lf_bp2(lf_proveanP, colour_categ = "mutation_info_labels" # , p_title = paste0("Evolutionary conservation") diff --git a/scripts/plotting/plotting_thesis/preformatting.R b/scripts/plotting/plotting_thesis/preformatting.R index faf9b89..903f23c 100644 --- a/scripts/plotting/plotting_thesis/preformatting.R +++ b/scripts/plotting/plotting_thesis/preformatting.R @@ -8,6 +8,11 @@ source("~/git/LSHTM_analysis/config/embb.R") # get plottting dfs source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") ################################################################### # FIXME: ADD distance to NA when SP replies # DONE: plotting_globals.R @@ -86,12 +91,6 @@ all_cols = c(common_cols , scaled_cols_conservation , outcome_cols_conservation) - -#======= -# output -#======= -outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") - #################################### # merged_df3: NECESSARY pre-processing ###################################