This commit is contained in:
Tanushree Tunstall 2022-08-05 16:13:57 +01:00
parent 6cb9998c4c
commit 33925dafe9
4 changed files with 142 additions and 99 deletions

View file

@ -26,6 +26,7 @@ dm_om_wf_lf_data <- function(df
, categ_cols_to_factor){
df = as.data.frame(df)
df$maf = log10(df$maf) # can't see otherwise
# Initialise the required dfs based on gene name
geneL_normal = c("pnca")
@ -93,7 +94,9 @@ dm_om_wf_lf_data <- function(df
, "mcsm_ppi2_affinity" , "mcsm_ppi2_scaled" , "mcsm_ppi2_outcome"
, "consurf_score" , "consurf_scaled" , "consurf_outcome" # exists now
, "snap2_score" , "snap2_scaled" , "snap2_outcome"
, "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome")
, "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome"
, "provean_score" , "provean_scaled" , "provean_outcome")
}else{
colnames_to_extract = c(mut_colname, mut_info_colname, mut_info_label_colname
, aa_pos_colname, LigDist_colname
@ -143,19 +146,19 @@ dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn
mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn
mcsm_ppi2_dn = paste0("mCSM-PPI2 ", stability_suffix); mcsm_ppi2_dn
consurf_dn = paste0("Consurf"); consurf_dn
consurf_dn = paste0("ConSurf"); consurf_dn
snap2_dn = paste0("SNAP2"); snap2_dn
provean_dn = paste0("PROVEAN"); provean_dn
# change column names: plyr
new_colnames = c(asa = "ASA"
, rsa = "RSA"
, rd_values = "RD"
, kd_values = "KD"
, log10_or_mychisq = "Log10 (OR)"
, neglog_pval_fisher = "-Log (P)"
#, log10_or_mychisq = "Log10(OR)"
#, neglog_pval_fisher = "-Log(P)"
#, af = "MAF"
, maf = "MAF"
, maf = "Log10(MAF)"
#, ligand_dist_colname= lig_dn # cannot handle variable name 'ligand_dist_colname'
, affinity_scaled = mcsm_lig_dn
, duet_scaled = duet_dn
@ -163,9 +166,11 @@ new_colnames = c(asa = "ASA"
, deepddg_scaled = deepddg_dn
, ddg_dynamut2_scaled = dynamut2_dn
, mcsm_na_scaled = mcsm_na_dn
, mcsm_ppi2_affinity = mcsm_ppi2_dn
, consurf_score = consurf_dn
, snap2_score = snap2_dn)
, mcsm_ppi2_scaled = mcsm_ppi2_dn
, consurf_scaled = consurf_dn
, snap2_scaled = snap2_dn
, provean_scaled = provean_dn)
comb_df_sl1 = plyr::rename(comb_df_s
, replace = new_colnames
@ -173,7 +178,8 @@ comb_df_sl1 = plyr::rename(comb_df_s
, warn_duplicated = T)
# renaming colname using variable i.e ligand_dist_colname: dplyr
comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname))
#comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname))
comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(LigDist_colname)) # NEW
names(comb_df_sl)
#=======================
@ -206,8 +212,8 @@ static_cols_end = c(lig_dn
, "RSA"
, "RD"
, "KD"
, "MAF"
, "Log10 (OR)"
, "Log10(MAF)"
#, "Log10(OR)"
#, "-Log(P)"
)
@ -344,19 +350,22 @@ wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2
#1--> "most_variable", 2--> "", 3-->"", 4-->""
#5-->"", 6-->"", 7-->"", 8-->"", 9-->"most_conserved"
#====================
# FIXME: if you add category column to consurf
# WF data: consurf
cols_to_select_consurf = c(static_cols_start, c("consurf_outcome", consurf_dn), static_cols_end)
wf_consurf = comb_df_sl[, cols_to_select_consurf]
pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start) + 1)]; pivot_cols_consurf
# WF data: consurf
cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end)
wf_consurf = comb_df_sl[, cols_to_select_consurf]
pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start))]; pivot_cols_consurf
expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf))
expected_rows_lf
# when outcome didn't exist
#cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end)
#wf_consurf = comb_df_sl[, cols_to_select_consurf]
#
# pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start))]; pivot_cols_consurf
# expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf))
# expected_rows_lf
# LF data: consurf
lf_consurf = gather(wf_consurf
, key = param_type
@ -403,6 +412,37 @@ if (nrow(lf_snap2) == expected_rows_lf){
# Assign them to the output list
wf_lf_dataL[['wf_snap2']] = wf_snap2
wf_lf_dataL[['lf_snap2']] = lf_snap2
#==============
# Provean2: LF
#==============
# WF data: provean
cols_to_select_provean = c(static_cols_start, c("provean_outcome", provean_dn), static_cols_end)
wf_provean = comb_df_sl[, cols_to_select_provean]
pivot_cols_provean = cols_to_select_provean[1: (length(static_cols_start) + 1)]; pivot_cols_provean
expected_rows_lf = nrow(wf_provean) * (length(wf_provean) - length(pivot_cols_provean))
expected_rows_lf
# LF data: provean
lf_provean = gather(wf_provean
, key = param_type
, value = param_value
, all_of(provean_dn):tail(static_cols_end,1)
, factor_key = TRUE)
if (nrow(lf_provean) == expected_rows_lf){
cat("\nPASS: long format data created for ", provean_dn)
}else{
cat("\nFAIL: long format data could not be created for duet")
quit()
}
# Assign them to the output list
wf_lf_dataL[['wf_provean']] = wf_provean
wf_lf_dataL[['lf_provean']] = lf_provean
###########################################################################
# AFFINITY cols
###########################################################################

View file

@ -25,7 +25,7 @@ lf_bp <- function(lf_df = lf_duet
, make_boxplot = FALSE
, bp_width = c("auto", 0.5)
, add_stats = TRUE
, stat_grp_comp = c("DM", "OM")
, stat_grp_comp = c("R", "S")
, stat_method = "wilcox.test"
, my_paired = FALSE
, stat_label = c("p.format", "p.signif") ){
@ -129,77 +129,77 @@ lf_bp <- function(lf_df = lf_duet
# TODO: plot_ly()
#############################
lf_bp_plotly <- function(lf_df
, p_title = ""
, colour_categ = ""
, x_grp = mutation_info
, y_var = param_value
, facet_var = param_type
, n_facet_row = 1
, y_scales = "free_y"
, colour_bp_strip = "khaki2"
, dot_size = 3
, dot_transparency = 0.3
, violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL
, my_ats = 20 # axis text size
, my_als = 18 # axis label size
, my_fls = 18 # facet label size
, my_pts = 22 # plot title size)
#, make_boxplot = FALSE
, bp_width = c("auto", 0.5)
#, add_stats = FALSE
#, stat_grp_comp = c("DM", "OM")
#, stat_method = "wilcox.test"
#, my_paired = FALSE
#, stat_label = c("p.format", "p.signif")
){
OutPlotly = ggplot(lf_df, aes(x = eval(parse(text = x_grp))
, y = eval(parse(text = y_var))
, label1 = x_grp
, label2 = y_var
, lable3 = colour_categ) ) +
facet_wrap(~ eval(parse(text = facet_var))
, nrow = n_facet_row
, scales = y_scales) +
geom_violin(trim = T
, scale = "width"
, draw_quantiles = violin_quantiles) +
geom_beeswarm(priority = "density"
, size = dot_size
, alpha = dot_transparency
, show.legend = FALSE
, cex = 0.8
, aes(colour = factor(eval(parse(text = colour_categ) ) ) ) ) +
theme(axis.text.x = element_text(size = my_ats)
, axis.text.y = element_text(size = my_ats
, angle = 0
, hjust = 1
, vjust = 0)
, axis.title.x = element_text(size = my_ats)
, axis.title.y = element_text(size = my_ats)
, plot.title = element_text(size = my_pts
, hjust = 0.5
, colour = "black"
, face = "bold")
, strip.background = element_rect(fill = colour_bp_strip)
, strip.text.x = element_text(size = my_fls
, colour = "black")
, legend.title = element_text(color = "black"
, size = my_als)
, legend.text = element_text(size = my_ats)
, legend.position = "none")+
labs(title = p_title
, x = ""
, y = "")
OutPlotly = ggplotly(OutPlotly
#, tooltip = c("label")
)
return(OutPlotly)
}
# lf_bp_plotly <- function(lf_df
# , p_title = ""
# , colour_categ = ""
# , x_grp = mutation_info
# , y_var = param_value
# , facet_var = param_type
# , n_facet_row = 1
# , y_scales = "free_y"
# , colour_bp_strip = "khaki2"
# , dot_size = 3
# , dot_transparency = 0.3
# , violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL
# , my_ats = 20 # axis text size
# , my_als = 18 # axis label size
# , my_fls = 18 # facet label size
# , my_pts = 22 # plot title size)
# #, make_boxplot = FALSE
# , bp_width = c("auto", 0.5)
# #, add_stats = FALSE
# #, stat_grp_comp = c("DM", "OM")
# #, stat_method = "wilcox.test"
# #, my_paired = FALSE
# #, stat_label = c("p.format", "p.signif")
# ){
#
# OutPlotly = ggplot(lf_df, aes(x = eval(parse(text = x_grp))
# , y = eval(parse(text = y_var))
# , label1 = x_grp
# , label2 = y_var
# , lable3 = colour_categ) ) +
#
# facet_wrap(~ eval(parse(text = facet_var))
# , nrow = n_facet_row
# , scales = y_scales) +
#
# geom_violin(trim = T
# , scale = "width"
# , draw_quantiles = violin_quantiles) +
#
# geom_beeswarm(priority = "density"
# , size = dot_size
# , alpha = dot_transparency
# , show.legend = FALSE
# , cex = 0.8
# , aes(colour = factor(eval(parse(text = colour_categ) ) ) ) ) +
# theme(axis.text.x = element_text(size = my_ats)
# , axis.text.y = element_text(size = my_ats
# , angle = 0
# , hjust = 1
# , vjust = 0)
# , axis.title.x = element_text(size = my_ats)
# , axis.title.y = element_text(size = my_ats)
# , plot.title = element_text(size = my_pts
# , hjust = 0.5
# , colour = "black"
# , face = "bold")
# , strip.background = element_rect(fill = colour_bp_strip)
# , strip.text.x = element_text(size = my_fls
# , colour = "black")
# , legend.title = element_text(color = "black"
# , size = my_als)
# , legend.text = element_text(size = my_ats)
# , legend.position = "none")+
#
# labs(title = p_title
# , x = ""
# , y = "")
#
# OutPlotly = ggplotly(OutPlotly
# #, tooltip = c("label")
# )
# return(OutPlotly)
#
# }

View file

@ -3,7 +3,7 @@ library(ggpubr)
lf_unpaired_stats <- function(lf_data
, lf_stat_value = "param_value"
, lf_stat_group = "mutation_info"
, lf_stat_group = "mutation_info_labels"
, lf_col_statvars = "param_type"
, my_paired = FALSE
, stat_adj = "none"){

View file

@ -140,6 +140,9 @@ lf_consurf = all_dm_om_df[['lf_consurf']]
wf_snap2 = all_dm_om_df[['wf_snap2']]
lf_snap2 = all_dm_om_df[['lf_snap2']]
wf_provean = all_dm_om_df[['wf_provean']]
lf_provean = all_dm_om_df[['lf_provean']]
if (tolower(gene)%in%geneL_na){
wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]