refactored dm om plots and generated the final layout

This commit is contained in:
Tanushree Tunstall 2022-08-08 16:45:55 +01:00
parent 28510471f0
commit 5bdfd03443
5 changed files with 169 additions and 85 deletions

View file

@ -40,6 +40,8 @@ dm_om_wf_lf_data <- function(df
, lf_duet = data.frame()
, wf_mcsm_lig = data.frame()
, lf_mcsm_lig = data.frame()
, wf_mmcsm_lig2 = data.frame() # NEW
, lf_mmcsm_lig2 = data.frame() # NEW
, wf_foldx = data.frame()
, lf_foldx = data.frame()
, wf_deepddg = data.frame()
@ -638,52 +640,48 @@ table(lf_mcsm_lig$param_type); colnames(lf_mcsm_lig)
wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig
wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig
#====================
# mcsm-NA affinity
#=========================
# mmCSM-lig2:
# data filtered by cut off
#====================
if (tolower(gene)%in%geneL_na){
#---------------
# mCSM-NA: WF and lF
#-----------------
# WF data: mcsm-na
cols_to_select_mcsm_na = c(static_cols_start, c("mcsm_na_outcome", mcsm_na_dn), static_cols_end)
#wf_mcsm_na = comb_df_sl[, cols_to_select_mcsm_na]
wf_mcsm_na = comb_df_sl_na[, cols_to_select_mcsm_na]
pivot_cols_mcsm_na = cols_to_select_mcsm_na[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_na
expected_rows_lf = nrow(wf_mcsm_na) * (length(wf_mcsm_na) - length(pivot_cols_mcsm_na))
expected_rows_lf
# LF data: mcsm-na
lf_mcsm_na = gather(wf_mcsm_na
, key = param_type
, value = param_value
, all_of(mcsm_na_dn):tail(static_cols_end,1)
, factor_key = TRUE)
if (nrow(lf_mcsm_na) == expected_rows_lf){
cat("\nPASS: long format data created for ", mcsm_na_dn)
}else{
cat("\nFAIL: long format data could not be created for duet")
quit()
}
# NEW columns [outcome and outcome colname]
lf_mcsm_na$outcome_colname = "mcsm_na_outcome"
lf_mcsm_na$outcome = lf_mcsm_na$mcsm_na_outcome
# DROP static cols
lf_mcsm_na = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_cols_end),]
lf_mcsm_na$param_type = factor(lf_mcsm_na$param_type)
table(lf_mcsm_na$param_type); colnames(lf_mcsm_na)
# Assign them to the output list
wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na
wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na
#=========================
#---------------------
# mmCSM-lig2: WF and lF
#----------------------
# WF data: mmcsm_lig2
cols_to_select_mmcsm_lig2 = c(static_cols_start, c("mmcsm_lig_outcome", mmcsm_lig_dn2), static_cols_end)
wf_mmcsm_lig2 = comb_df_sl_lig[, cols_to_select_mmcsm_lig2] # filtered df
pivot_cols_mmcsm_lig2 = cols_to_select_mmcsm_lig2[1: (length(static_cols_start) + 1)]; pivot_cols_mmcsm_lig2
expected_rows_lf = nrow(wf_mmcsm_lig2) * (length(wf_mmcsm_lig2) - length(pivot_cols_mmcsm_lig2))
expected_rows_lf
# LF data: mmcsm_lig2
lf_mmcsm_lig2 = gather(wf_mmcsm_lig2
, key = param_type
, value = param_value
, all_of(mmcsm_lig_dn2):tail(static_cols_end,1)
, factor_key = TRUE)
if (nrow(lf_mmcsm_lig2) == expected_rows_lf){
cat("\nPASS: long format data created for ", mmcsm_lig_dn2)
}else{
cat("\nFAIL: long format data could not be created for mmcsm_lig2")
quit()
}
# NEW columns [outcome and outcome colname]
lf_mmcsm_lig2$outcome_colname = "mmcsm_lig_outcome"
lf_mmcsm_lig2$outcome = lf_mmcsm_lig2$mmcsm_lig_outcome
# DROP static cols
lf_mmcsm_lig2 = lf_mmcsm_lig2[!lf_mmcsm_lig2$param_type%in%c(static_cols_end),]
lf_mmcsm_lig2$param_type = factor(lf_mmcsm_lig2$param_type)
table(lf_mmcsm_lig2$param_type); colnames(lf_mmcsm_lig2)
# Assign them to the output list
wf_lf_dataL[['wf_mmcsm_lig2']] = wf_mmcsm_lig2
wf_lf_dataL[['lf_mmcsm_lig2']] = lf_mmcsm_lig2
#=========================
# mcsm-ppi2 affinity
# data filtered by cut off
@ -730,6 +728,54 @@ if (tolower(gene)%in%geneL_ppi2){
}
#====================
# mcsm-NA affinity
# data filtered by cut off
#====================
if (tolower(gene)%in%geneL_na){
#---------------
# mCSM-NA: WF and lF
#-----------------
# WF data: mcsm-na
cols_to_select_mcsm_na = c(static_cols_start, c("mcsm_na_outcome", mcsm_na_dn), static_cols_end)
#wf_mcsm_na = comb_df_sl[, cols_to_select_mcsm_na]
wf_mcsm_na = comb_df_sl_na[, cols_to_select_mcsm_na]
pivot_cols_mcsm_na = cols_to_select_mcsm_na[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_na
expected_rows_lf = nrow(wf_mcsm_na) * (length(wf_mcsm_na) - length(pivot_cols_mcsm_na))
expected_rows_lf
# LF data: mcsm-na
lf_mcsm_na = gather(wf_mcsm_na
, key = param_type
, value = param_value
, all_of(mcsm_na_dn):tail(static_cols_end,1)
, factor_key = TRUE)
if (nrow(lf_mcsm_na) == expected_rows_lf){
cat("\nPASS: long format data created for ", mcsm_na_dn)
}else{
cat("\nFAIL: long format data could not be created for duet")
quit()
}
# NEW columns [outcome and outcome colname]
lf_mcsm_na$outcome_colname = "mcsm_na_outcome"
lf_mcsm_na$outcome = lf_mcsm_na$mcsm_na_outcome
# DROP static cols
lf_mcsm_na = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_cols_end),]
lf_mcsm_na$param_type = factor(lf_mcsm_na$param_type)
table(lf_mcsm_na$param_type); colnames(lf_mcsm_na)
# Assign them to the output list
wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na
wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na
}
return(wf_lf_dataL)
}
############################################################################

View file

@ -18,8 +18,9 @@ lf_bp2 <- function(lf_df #lf_duet
, y_scales = "free_y"
, colour_bp_strip = "khaki2"
, dot_size = 3
, dot_transparency = 1 #0.3: lighter
, dot_transparency = 0.3 #0.3: lighter
, violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL
, line_thickness = 0.65
, my_ats = 22 # axis text size
, my_als = 20 # axis label size
, my_fls = 20 # facet label size
@ -69,6 +70,7 @@ lf_bp2 <- function(lf_df #lf_duet
ggplot2::scale_color_manual(values = lf_bp_colours) +
geom_violin(trim = T
, size = line_thickness
, scale = "width"
, colour = "black"
#, position = position_dodge(width = 0.9)
@ -98,11 +100,11 @@ lf_bp2 <- function(lf_df #lf_duet
, x = ""
, y = "") +
stat_compare_means(comparisons = my_comparisonsL
, method = stat_method
, paired = my_paired
, label = stat_label[2]) +
, label = stat_label[2]
, size = 5) +
geom_quasirandom(
size = dot_size
, alpha = dot_transparency

View file

@ -196,11 +196,11 @@ corr_df_m3_f = corr_data_extract(merged_df3
, extract_scaled_cols = F)
head(corr_df_m3_f)
corr_df_m2_f = corr_data_extract(merged_df2
, gene = gene
, drug = drug
, extract_scaled_cols = F)
head(corr_df_m2_f)
# corr_df_m2_f = corr_data_extract(merged_df2
# , gene = gene
# , drug = drug
# , extract_scaled_cols = F)
# head(corr_df_m2_f)
s4 = c("\nSuccessfully sourced Corr_data.R")
cat(s4)

View file

@ -76,8 +76,9 @@ lf_duetP$param_type = factor(lf_duetP$param_type)
table(lf_duetP$param_type)
duetP = lf_bp2(lf_duetP
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.2)
#==============
# Plot:FoldX
@ -89,8 +90,9 @@ lf_foldxP$param_type = factor(lf_foldxP$param_type)
table(lf_foldxP$param_type)
foldxP = lf_bp2(lf_foldxP
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.1)
#==============
# Plot:DeepDDG
@ -102,9 +104,10 @@ lf_deepddgP$param_type = factor(lf_deepddgP$param_type)
table(lf_deepddgP$param_type)
deepddgP = lf_bp2(lf_deepddgP
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.3)
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.2)
deepddgP
#==============
@ -117,8 +120,10 @@ lf_dynamut2P$param_type = factor(lf_dynamut2P$param_type)
table(lf_dynamut2P$param_type)
dynamut2P = lf_bp2(lf_dynamut2P
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.2)
#==============
# Plot:ConSurf
@ -130,8 +135,8 @@ lf_consurfP$param_type = factor(lf_consurfP$param_type)
table(lf_consurfP$param_type)
consurfP = lf_bp2(lf_consurfP
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
#==============
# Plot:SNAP2
@ -143,8 +148,8 @@ lf_snap2P$param_type = factor(lf_snap2P$param_type)
table(lf_snap2P$param_type)
snap2P = lf_bp2(lf_snap2P
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
#==============
# Plot:PROVEAN
@ -156,12 +161,13 @@ lf_proveanP$param_type = factor(lf_proveanP$param_type)
table(lf_proveanP$param_type)
proveanP = lf_bp2(lf_proveanP
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
#==============
############################################################################
#================
# Plot: mCSM-lig
#==============
#================
lf_mcsm_ligP = all_dm_om_df[['lf_mcsm_lig']]
#lf_mcsm_ligP = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_colsP),]
table(lf_mcsm_ligP$param_type)
@ -169,12 +175,30 @@ lf_mcsm_ligP$param_type = factor(lf_mcsm_ligP$param_type)
table(lf_mcsm_ligP$param_type)
mcsmligP = lf_bp2(lf_mcsm_ligP
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 1)
#==============
#=================
# Plot: mmCSM-lig2
#=================
lf_mmcsm_lig2P = all_dm_om_df[['lf_mmcsm_lig2']]
#lf_mmcsm_lig2P = lf_mmcsm_lig2P[!lf_mmcsm_lig2P$param_type%in%c(static_colsP),]
table(lf_mmcsm_lig2P$param_type)
lf_mmcsm_lig2P$param_type = factor(lf_mmcsm_lig2P$param_type)
table(lf_mmcsm_lig2P$param_type)
mcsmlig2P = lf_bp2(lf_mmcsm_lig2P
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 1)
mcsmlig2P
#================
# Plot: mCSM-ppi2
#==============
#================
if (tolower(gene)%in%geneL_ppi2){
lf_mcsm_ppi2P = all_dm_om_df[['lf_mcsm_ppi2']]
#lf_mcsm_ppi2P = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_colsP),]
@ -183,8 +207,10 @@ if (tolower(gene)%in%geneL_ppi2){
table(lf_mcsm_ppi2P$param_type)
mcsmppi2P = lf_bp2(lf_mcsm_ppi2P
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 1)
}
#==============
@ -198,28 +224,39 @@ if (tolower(gene)%in%geneL_na){
table(lf_mcsm_naP$param_type)
mcsmnaP = lf_bp2(lf_mcsm_naP
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 1)
}
######################################
# Outplot with stats
######################################
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
dm_om_combinedP = paste0(outdir_images
,tolower(gene)
,"_dm_om_all.svg" )
cat("DM OM plots with stats:", dm_om_combinedP)
svg(dm_om_combinedP, width = 32, height = 18)
cowplot::plot_grid(
cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P, genomicsP, distanceP
, nrow=1),
# cowplot::plot_grid(genomicsP, distanceP
# , nrow = 1),
, nrow=1
, rel_widths = c(1/7, 1/7,1/7,1/7, 1/7, 1.75/7)),
#, rel_widths = c(1/8, 1/8,1/8,1/8, 1/8, 2.75/8)), # for 3 distances
cowplot::plot_grid(consurfP, snap2P, proveanP
, mcsmligP
, mcsmlig2P
, mcsmppi2P
#, mcsmnaP
, nrow=1),
nrow=2)
dev.off()
foo = lf_consurfP
#foo = lf_consurfP
# proveanP = lf_bp2(lf_proveanP, colour_categ = "mutation_info_labels"
# , p_title = paste0("Evolutionary conservation")

View file

@ -8,6 +8,11 @@ source("~/git/LSHTM_analysis/config/embb.R")
# get plottting dfs
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
###################################################################
# FIXME: ADD distance to NA when SP replies
# DONE: plotting_globals.R
@ -86,12 +91,6 @@ all_cols = c(common_cols
, scaled_cols_conservation
, outcome_cols_conservation)
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
####################################
# merged_df3: NECESSARY pre-processing
###################################