more plot modifications dm and om plots mainly

This commit is contained in:
Tanushree Tunstall 2022-08-08 15:32:16 +01:00
parent 4e6f10d1ba
commit 0234a8f77b
6 changed files with 501 additions and 358 deletions

View file

@ -74,17 +74,23 @@ corr_data_extract <- function(df
if (tolower(gene)%in%geneL_normal){ if (tolower(gene)%in%geneL_normal){
colnames_to_extract = c(common_colnames) colnames_to_extract = c(common_colnames)
display_colnames = c(display_common_colnames) display_colnames = c(display_common_colnames)
corr_df = df[,colnames_to_extract]
colnames(corr_df) = display_colnames
} }
if (tolower(gene)%in%geneL_ppi2){ if (tolower(gene)%in%geneL_ppi2){
colnames_to_extract = c(common_colnames ,"mcsm_ppi2_affinity", ppi2Dist_colname) colnames_to_extract = c(common_colnames ,"mcsm_ppi2_affinity", ppi2Dist_colname)
display_colnames = c(display_common_colnames,"mCSM-PPI2" , "PPI-Dist") display_colnames = c(display_common_colnames,"mCSM-PPI2" , "PPI-Dist")
corr_df = df[,colnames_to_extract]
colnames(corr_df) = display_colnames
} }
if (tolower(gene)%in%geneL_na){ if (tolower(gene)%in%geneL_na){
colnames_to_extract = c(common_colnames,"mcsm_na_affinity", naDist_colname) colnames_to_extract = c(common_colnames,"mcsm_na_affinity", naDist_colname)
display_colnames = c(display_common_colnames, "mCSM-NA", "NA-Dist") display_colnames = c(display_common_colnames, "mCSM-NA", "NA-Dist")
corr_df = df[,colnames_to_extract]
colnames(corr_df) = display_colnames
} }
# [optional] arg: extract_scaled_cols # [optional] arg: extract_scaled_cols
@ -92,19 +98,23 @@ corr_data_extract <- function(df
cat("\nExtracting scaled columns as well...\n") cat("\nExtracting scaled columns as well...\n")
all_scaled_cols = colnames(merged_df3)[grep(".*scaled", colnames(merged_df3))] all_scaled_cols = colnames(merged_df3)[grep(".*scaled", colnames(merged_df3))]
colnames_to_extract = c(colnames_to_extract, all_scaled_cols) colnames_to_extract = c(colnames_to_extract, all_scaled_cols)
corr_df = df[,colnames_to_extract]
colnames(corr_df) = display_colnames
}else{ }else{
colnames_to_extract = colnames_to_extract colnames_to_extract = colnames_to_extract
corr_df = df[,colnames_to_extract]
colnames(corr_df) = display_colnames
} }
# extract df based on gene # WORKED:
corr_df = df[,colnames_to_extract] # # extract df based on gene
colnames(corr_df) # corr_df = df[,colnames_to_extract]
display_colnames # colnames(corr_df)
# display_colnames
# arg: colnames_display_key #
colnames(corr_df)[colnames(corr_df)%in%colnames_to_extract] <- display_colnames # # arg: colnames_display_key
colnames(corr_df) # colnames(corr_df)[colnames(corr_df)%in%colnames_to_extract] <- display_colnames
# colnames(corr_df)
cat("\nExtracted ncols:", ncol(corr_df) cat("\nExtracted ncols:", ncol(corr_df)
,"\nRenaming successful") ,"\nRenaming successful")

View file

@ -8,11 +8,11 @@
################################################################## ##################################################################
# from plotting_globals.R # from plotting_globals.R
# DistCutOff, LigDist_colname, ppi2Dist_colname, naDist_colname # DistCutOff, LigDist_colname, ppi2Dist_colname, naDist_colname
gene
dm_om_wf_lf_data <- function(df dm_om_wf_lf_data <- function(df
, gene # from globals , gene # from globals
, colnames_to_extract , colnames_to_extract
#, ligand_dist_colname = LigDist_colname # from globals
#, LigDist_colname # from globals used #, LigDist_colname # from globals used
#, ppi2Dist_colname #from globals used #, ppi2Dist_colname #from globals used
#, naDist_colname #from globals used #, naDist_colname #from globals used
@ -21,13 +21,13 @@ dm_om_wf_lf_data <- function(df
, snp_colname = "mutationinformation" , snp_colname = "mutationinformation"
, aa_pos_colname = "position" # to sort df by , aa_pos_colname = "position" # to sort df by
, mut_colname = "mutation" , mut_colname = "mutation"
, mut_info_colname = "mutation_info" , mut_info_colname = "dst_mode"
, mut_info_label_colname = "mutation_info_labels" # if empty, below used , mut_info_label_colname = "mutation_info_labels"
#, dr_other_muts_labels = c("DM", "OM") # only used if ^^ = ""
, categ_cols_to_factor){ , categ_cols_to_factor){
df = as.data.frame(df) df = as.data.frame(df)
df$maf = log10(df$maf) # can't see otherwise df$maf2 = log10(df$maf) # can't see otherwise
sum(is.na(df$maf2))
# Initialise the required dfs based on gene name # Initialise the required dfs based on gene name
geneL_normal = c("pnca") geneL_normal = c("pnca")
@ -50,6 +50,8 @@ dm_om_wf_lf_data <- function(df
, lf_consurf = data.frame() , lf_consurf = data.frame()
, wf_snap2 = data.frame() , wf_snap2 = data.frame()
, lf_snap2 = data.frame() , lf_snap2 = data.frame()
, wf_dist_gen = data.frame() # NEW
, lf_dist_gen = data.frame() # NEW
) )
# additional dfs # additional dfs
@ -76,132 +78,170 @@ dm_om_wf_lf_data <- function(df
, length(wf_lf_dataL)) , length(wf_lf_dataL))
#======================================================================= #=======================================================================
if (missing(colnames_to_extract)){ # display names
colnames_to_extract = c(snp_colname
, mut_colname, mut_info_colname, mut_info_label_colname
, aa_pos_colname
, LigDist_colname # from globals
, ppi2Dist_colname # from globals
, naDist_colname # from globals
, "duet_stability_change" , "duet_scaled" , "duet_outcome"
, "ligand_affinity_change", "affinity_scaled" , "ligand_outcome"
, "ddg_foldx" , "foldx_scaled" , "foldx_outcome"
, "deepddg" , "deepddg_scaled" , "deepddg_outcome"
, "asa" , "rsa"
, "rd_values" , "kd_values"
, "log10_or_mychisq" , "neglog_pval_fisher" , "maf" #"af"
, "ddg_dynamut2" , "ddg_dynamut2_scaled", "ddg_dynamut2_outcome"
, "mcsm_ppi2_affinity" , "mcsm_ppi2_scaled" , "mcsm_ppi2_outcome"
, "consurf_score" , "consurf_scaled" , "consurf_outcome" # exists now
, "consurf_colour_rev"
, "snap2_score" , "snap2_scaled" , "snap2_outcome"
, "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome"
, "provean_score" , "provean_scaled" , "provean_outcome")
}else{
colnames_to_extract = c(mut_colname, mut_info_colname, mut_info_label_colname
, aa_pos_colname, LigDist_colname
, colnames_to_extract)
}
comb_df = df[, colnames(df)%in%colnames_to_extract]
comb_df_s = dplyr::arrange(comb_df, aa_pos_colname)
#=======================================================================
if(missing(categ_cols_to_factor)){
categ_cols_to_factor = grep( "_outcome|_info", colnames(comb_df_s) )
}else{
categ_cols_to_factor = categ_cols_to_factor
}
#fact_cols = colnames(comb_df_s)[grepl( "_outcome|_info", colnames(comb_df_s) )]
fact_cols = colnames(comb_df_s)[categ_cols_to_factor]
if (any(lapply(comb_df_s[, fact_cols], class) == "character")){
cat("\nChanging", length(categ_cols_to_factor), "cols to factor")
comb_df_s[, fact_cols] <- lapply(comb_df_s[, fact_cols], as.factor)
if (all(lapply(comb_df_s[, fact_cols], class) == "factor")){
cat("\nSuccessful: cols changed to factor")
}
}else{
cat("\nRequested cols aready factors")
}
#=======================================================================
table(comb_df_s[[mut_info_colname]])
# pretty display names i.e. labels to reduce major code duplication later
foo_cnames = data.frame(colnames(comb_df_s))
names(foo_cnames) <- "old_name"
stability_suffix <- paste0(delta_symbol, delta_symbol, "G") stability_suffix <- paste0(delta_symbol, delta_symbol, "G")
#flexibility_suffix <- paste0(delta_symbol, delta_symbol, "S")
#lig_dn = paste0("Ligand distance (", angstroms_symbol, ")"); lig_dn
#mcsm_lig_dn = paste0("Ligand affinity (log fold change)"); mcsm_lig_dn
lig_dn = paste0("Lig Dist(", angstroms_symbol, ")"); lig_dn
mcsm_lig_dn = paste0("mCSM-lig"); mcsm_lig_dn
duet_dn = paste0("DUET ", stability_suffix); duet_dn duet_dn = paste0("DUET ", stability_suffix); duet_dn
foldx_dn = paste0("FoldX ", stability_suffix); foldx_dn foldx_dn = paste0("FoldX ", stability_suffix); foldx_dn
deepddg_dn = paste0("Deepddg " , stability_suffix); deepddg_dn deepddg_dn = paste0("Deepddg " , stability_suffix); deepddg_dn
dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn
consurf_dn = "ConSurf"
snap2_dn = "SNAP2"
provean_dn = "PROVEAN"
or_dn = "Log10(OR)"
pval_dn = "-Log10(P)"
maf2_dn = "Log10(MAF)"
asa_dn = "ASA"
rsa_dn = "RSA"
rd_dn = "RD"
kd_dn = "KD"
lig_dist_dn = paste0("Lig Dist(", angstroms_symbol, ")"); lig_dist_dn
mcsm_lig_dn = paste0("mCSM-lig"); mcsm_lig_dn
mmcsm_lig_dn2 = paste0("mmCSM-lig"); mmcsm_lig_dn2
na_dist_dn = paste0("NA Dist(", angstroms_symbol, ")"); na_dist_dn
mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn
ppi2_dist_dn = paste0("PPI Dist(", angstroms_symbol, ")"); ppi2_dist_dn
mcsm_ppi2_dn = paste0("mCSM-PPI2 ", stability_suffix); mcsm_ppi2_dn mcsm_ppi2_dn = paste0("mCSM-PPI2 ", stability_suffix); mcsm_ppi2_dn
consurf_dn = paste0("ConSurf"); consurf_dn
snap2_dn = paste0("SNAP2"); snap2_dn
provean_dn = paste0("PROVEAN"); provean_dn
# change column names: plyr #=======================================================================
new_colnames = c(asa = "ASA" if(missing(categ_cols_to_factor)){
, rsa = "RSA" categ_cols_to_factor = grep( "_outcome|_info", colnames(df) )
, rd_values = "RD" }else{
, kd_values = "KD" categ_cols_to_factor = categ_cols_to_factor
#, log10_or_mychisq = "Log10(OR)" }
#, neglog_pval_fisher = "-Log(P)" #fact_cols = colnames(comb_df_s)[grepl( "_outcome|_info", colnames(comb_df_s) )]
#, af = "MAF" fact_cols = colnames(df)[categ_cols_to_factor]
, maf = "Log10(MAF)"
#, ligand_dist_colname= lig_dn # cannot handle variable name 'ligand_dist_colname'
, affinity_scaled = mcsm_lig_dn
, duet_scaled = duet_dn
, foldx_scaled = foldx_dn
, deepddg_scaled = deepddg_dn
, ddg_dynamut2_scaled = dynamut2_dn
, mcsm_na_scaled = mcsm_na_dn
, mcsm_ppi2_scaled = mcsm_ppi2_dn
#, consurf_scaled = consurf_dn
, consurf_score = consurf_dn
#, consurf_colour_rev = consurf_dn
#, snap2_scaled = snap2_dn
, snap2_score = snap2_dn
, provean_score = provean_dn)
if (any(lapply(df[, fact_cols], class) == "character")){
cat("\nChanging", length(categ_cols_to_factor), "cols to factor")
df[, fact_cols] <- lapply(df[, fact_cols], as.factor)
if (all(lapply(df[, fact_cols], class) == "factor")){
cat("\nSuccessful: cols changed to factor")
}
}else{
cat("\nRequested cols aready factors")
}
comb_df_sl1 = plyr::rename(comb_df_s cat("\ncols changed to factor are:\n", colnames(df)[categ_cols_to_factor] )
, replace = new_colnames
, warn_missing = T
, warn_duplicated = T)
# renaming colname using variable i.e ligand_dist_colname: dplyr #=======================================================================
#comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname)) if (missing(colnames_to_extract)){
comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(LigDist_colname)) # NEW # NOTE: these vars are from globals
names(comb_df_sl) #LigDist_colname, ppi2Dist_colname, naDist_colname
#======================= common_colnames = c(snp_colname
# NEW: Affinity filtered data , mut_colname , "dst_mode" , mut_info_label_colname
#======================== , aa_pos_colname
# mcsm-lig --> LigDist_colname
comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dn]]<DistCutOff,]
# mcsm-ppi2 --> ppi2Dist_colname , "duet_stability_change" , "duet_scaled" , "duet_outcome"
comb_df_sl_ppi2 = comb_df_sl[comb_df_sl[[ppi2Dist_colname]]<DistCutOff,] , "ddg_foldx" , "foldx_scaled" , "foldx_outcome"
, "deepddg" , "deepddg_scaled" , "deepddg_outcome"
, "ddg_dynamut2" , "ddg_dynamut2_scaled" , "ddg_dynamut2_outcome"
# mcsm-na --> naDist_colname , "consurf_score" , "consurf_scaled" , "consurf_outcome" , "consurf_colour_rev"
comb_df_sl_na = comb_df_sl[comb_df_sl[[naDist_colname]]<DistCutOff,] , "snap2_score" , "snap2_scaled" , "snap2_outcome"
, "provean_score" , "provean_scaled" , "provean_outcome"
, "log10_or_mychisq" , "neglog_pval_fisher" , "maf2"
, "asa" , "rsa" , "rd_values" , "kd_values"
, "mmcsm_lig" , "mmcsm_lig_scaled" , "mmcsm_lig_outcome"
, "ligand_affinity_change", "affinity_scaled" , "ligand_outcome" , LigDist_colname
)
display_common_colnames = c(snp_colname
, mut_colname , "dst_mode" , mut_info_label_colname
, aa_pos_colname
, "duet_stability_change" , duet_dn , "duet_outcome"
, "ddg_foldx" , foldx_dn , "foldx_outcome"
, "deepddg" , deepddg_dn , "deepddg_outcome"
, "ddg_dynamut2" , dynamut2_dn , "ddg_dynamut2_outcome"
, consurf_dn , "consurf_scaled" , "consurf_outcome" , "consurf_colour_rev"
, snap2_dn , "snap2_scaled" , "snap2_outcome"
, provean_dn , "provean_scaled" , "provean_outcome"
, or_dn , pval_dn , maf2_dn
, asa_dn , rsa_dn , rd_dn , kd_dn
, "mmcsm_lig" , mmcsm_lig_dn2 , "mmcsm_lig_outcome"
, "ligand_affinity_change", mcsm_lig_dn , "ligand_outcome" , lig_dist_dn
)
if (length(common_colnames) == length(display_common_colnames)){
cat("\nLength match: Proceeding to extracting end cols")
}else{
stop("Abort: Length mismatch: b/w ncols to extract and disply name")
}
# ordering is important!
# static_cols_end = c(lig_dist_dn
# , "ASA"
# , "RSA"
# , "RD"
# , "KD"
# , "Log10(MAF)"
# #, "Log10(OR)"
# #, "-Log(P)"
# )
static_cols_end_common = c(lig_dist_dn, "Log10(MAF)"); static_cols_end_common
if (tolower(gene)%in%geneL_normal){
colnames_to_extract = c(common_colnames)
display_colnames = c(display_common_colnames)
comb_df_sl = df[, colnames_to_extract]
# Rename cols: display names
colnames(comb_df_sl) = display_colnames
#colnames(comb_df)[colnames(comb_df)%in%colnames_to_extract] <- display_colnames
static_cols_end = static_cols_end_common
cat("\nend colnames for gene:", static_cols_end)
}
if (tolower(gene)%in%geneL_ppi2){
colnames_to_extract = c(common_colnames, "mcsm_ppi2_affinity" ,"mcsm_ppi2_scaled" , "mcsm_ppi2_outcome" , ppi2Dist_colname)
display_colnames = c(display_common_colnames,"mcsm_ppi2_affinity", mcsm_ppi2_dn , "mcsm_ppi2_outcome" , ppi2_dist_dn )
comb_df_sl = df[, colnames_to_extract]
# Rename cols: display names
colnames(comb_df_sl) = display_colnames
# Affinity filtered data: mcsm-ppi2 --> ppi2Dist_colname
comb_df_sl_ppi2 = comb_df_sl[comb_df_sl[[ppi2_dist_dn]]<DistCutOff,]
# ordering is important!
static_cols_end = c(ppi2_dist_dn, static_cols_end_common)
cat("\nend colnames for gene:", static_cols_end)
}
if (tolower(gene)%in%geneL_na){
colnames_to_extract = c(common_colnames,"mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome" , naDist_colname)
display_colnames = c(display_common_colnames, "mcsm_na_affinity" , mcsm_na_dn , "mcsm_na_outcome" , na_dist_dn)
comb_df_sl = df[, colnames_to_extract]
# Rename cols: display names
colnames(comb_df) = display_colnames
# Affinity filtered data: mcsm-na --> naDist_colname
comb_df_sl_na = comb_df_sl[comb_df_sl[[na_dist_dn]]<DistCutOff,]
# ordering is important!
static_cols_end = c(na_dist_dn, static_cols_end_common)
cat("\nend colnames for gene:", static_cols_end)
}
# Affinity filtered data: mcsm-lig: COMMON for all genes, mcsm-lig --> LigDist_colname
comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dist_dn]]<DistCutOff,]
}
#####################################################################
static_cols1 = mut_info_label_colname
#######################################################################
#====================== #======================
# Selecting dfs # Selecting dfs
# with appropriate cols # with appropriate cols
@ -209,20 +249,61 @@ static_cols1 = mut_info_label_colname
static_cols_start = c(snp_colname static_cols_start = c(snp_colname
, aa_pos_colname , aa_pos_colname
, mut_colname , mut_colname
, static_cols1) , mut_info_label_colname)
# ordering is important! # static_cols_end
static_cols_end = c(lig_dn cat("\nEnd colnames for gene:", static_cols_end)
, "ASA"
, "RSA"
, "RD"
, "KD"
, "Log10(MAF)"
#, "Log10(OR)"
#, "-Log(P)"
)
######################################################################### #########################################################################
#==============
# Distance and genomics
#==============
# WF data: dist + genomics
cols_to_select_dist_gen = c(static_cols_start, c("duet_outcome", duet_dn), static_cols_end)
wf_dist_gen = comb_df_sl[, cols_to_select_dist_gen]; head(wf_dist_gen)
#pivot_cols_ps = cols_to_select_ps[1:5]; pivot_cols_ps
pivot_cols_dist_gen = cols_to_select_dist_gen[1: (length(static_cols_start) + 1)]; pivot_cols_dist_gen
expected_rows_lf = nrow(wf_dist_gen) * (length(wf_dist_gen) - length(pivot_cols_dist_gen))
expected_rows_lf
# LF dist and genomics
lf_dist_gen = tidyr::gather(wf_dist_gen
, key = param_type
, value = param_value
, all_of(duet_dn):tail(static_cols_end,1)
, factor_key = TRUE)
if (nrow(lf_dist_gen) == expected_rows_lf){
cat("\nPASS: long format data created for Distance and Genomics")
}else{
cat("\nFAIL: long format data could not be created for Distance and Genomics")
quit()
}
# DROP duet cols
drop_cols = c(duet_dn, "duet_outcome"); drop_cols
table(lf_dist_gen$param_type)
lf_dist_gen = lf_dist_gen[!lf_dist_gen$param_type%in%drop_cols,]
lf_dist_gen$param_type = factor(lf_dist_gen$param_type)
table(lf_dist_gen$param_type)
# NEW columns [outcome and outcome colname]
lf_dist_gen$outcome_colname = mut_info_colname
lf_dist_gen$outcome = lf_dist_gen[[mut_info_label_colname]]
head(lf_dist_gen)
wf_dist_gen = subset(wf_dist_gen, select = !(names(wf_dist_gen) %in% drop_cols))
colnames(wf_dist_gen)
colnames(lf_dist_gen)
# Assign them to the output list
wf_lf_dataL[['wf_dist_gen']] = wf_dist_gen
wf_lf_dataL[['lf_dist_gen']] = lf_dist_gen
##########################################################
#============== #==============
# DUET # DUET
#============== #==============
@ -249,10 +330,17 @@ if (nrow(lf_duet) == expected_rows_lf){
quit() quit()
} }
table(lf_duet$param_type)
# NEW columns [outcome and outcome colname] # NEW columns [outcome and outcome colname]
lf_duet$outcome_colname = "duet_outcome" lf_duet$outcome_colname = "duet_outcome"
lf_duet$outcome = lf_duet$duet_outcome lf_duet$outcome = lf_duet$duet_outcome
# DROP static cols
lf_duet = lf_duet[!lf_duet$param_type%in%c(static_cols_end),]
lf_duet$param_type = factor(lf_duet$param_type)
table(lf_duet$param_type); colnames(lf_duet)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_duet']] = wf_duet wf_lf_dataL[['wf_duet']] = wf_duet
wf_lf_dataL[['lf_duet']] = lf_duet wf_lf_dataL[['lf_duet']] = lf_duet
@ -287,6 +375,11 @@ if (nrow(lf_foldx) == expected_rows_lf){
lf_foldx$outcome_colname = "foldx_outcome" lf_foldx$outcome_colname = "foldx_outcome"
lf_foldx$outcome = lf_foldx$foldx_outcome lf_foldx$outcome = lf_foldx$foldx_outcome
# DROP static cols
lf_foldx = lf_foldx[!lf_foldx$param_type%in%c(static_cols_end),]
lf_foldx$param_type = factor(lf_foldx$param_type)
table(lf_foldx$param_type); colnames(lf_foldx)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_foldx']] = wf_foldx wf_lf_dataL[['wf_foldx']] = wf_foldx
wf_lf_dataL[['lf_foldx']] = lf_foldx wf_lf_dataL[['lf_foldx']] = lf_foldx
@ -321,6 +414,11 @@ if (nrow(lf_deepddg) == expected_rows_lf){
lf_deepddg$outcome_colname = "deepddg_outcome" lf_deepddg$outcome_colname = "deepddg_outcome"
lf_deepddg$outcome = lf_deepddg$deepddg_outcome lf_deepddg$outcome = lf_deepddg$deepddg_outcome
# DROP static cols
lf_deepddg = lf_deepddg[!lf_deepddg$param_type%in%c(static_cols_end),]
lf_deepddg$param_type = factor(lf_deepddg$param_type)
table(lf_deepddg$param_type); colnames(lf_deepddg)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_deepddg']] = wf_deepddg wf_lf_dataL[['wf_deepddg']] = wf_deepddg
wf_lf_dataL[['lf_deepddg']] = lf_deepddg wf_lf_dataL[['lf_deepddg']] = lf_deepddg
@ -354,11 +452,15 @@ if (nrow(lf_dynamut2) == expected_rows_lf){
lf_dynamut2$outcome_colname = "ddg_dynamut2_outcome" lf_dynamut2$outcome_colname = "ddg_dynamut2_outcome"
lf_dynamut2$outcome = lf_dynamut2$ddg_dynamut2_outcome lf_dynamut2$outcome = lf_dynamut2$ddg_dynamut2_outcome
# DROP static cols
lf_dynamut2 = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_cols_end),]
lf_dynamut2$param_type = factor(lf_dynamut2$param_type)
table(lf_dynamut2$param_type); colnames(lf_dynamut2)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_dynamut2']] = wf_dynamut2 wf_lf_dataL[['wf_dynamut2']] = wf_dynamut2
wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2 wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2
###################################################################################### ######################################################################################
#================== #==================
# Consurf: LF # Consurf: LF
@ -405,6 +507,11 @@ if (nrow(lf_consurf) == expected_rows_lf){
lf_consurf$outcome_colname = "consurf_outcome" lf_consurf$outcome_colname = "consurf_outcome"
lf_consurf$outcome = lf_consurf$consurf_outcome lf_consurf$outcome = lf_consurf$consurf_outcome
# DROP static cols
lf_consurf = lf_consurf[!lf_consurf$param_type%in%c(static_cols_end),]
lf_consurf$param_type = factor(lf_consurf$param_type)
table(lf_consurf$param_type); colnames(lf_consurf)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_consurf']] = wf_consurf wf_lf_dataL[['wf_consurf']] = wf_consurf
wf_lf_dataL[['lf_consurf']] = lf_consurf wf_lf_dataL[['lf_consurf']] = lf_consurf
@ -438,6 +545,11 @@ if (nrow(lf_snap2) == expected_rows_lf){
lf_snap2$outcome_colname = "snap2_outcome" lf_snap2$outcome_colname = "snap2_outcome"
lf_snap2$outcome = lf_snap2$snap2_outcome lf_snap2$outcome = lf_snap2$snap2_outcome
# DROP static cols
lf_snap2 = lf_snap2[!lf_snap2$param_type%in%c(static_cols_end),]
lf_snap2$param_type = factor(lf_snap2$param_type)
table(lf_snap2$param_type); colnames(lf_snap2)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_snap2']] = wf_snap2 wf_lf_dataL[['wf_snap2']] = wf_snap2
wf_lf_dataL[['lf_snap2']] = lf_snap2 wf_lf_dataL[['lf_snap2']] = lf_snap2
@ -471,6 +583,11 @@ if (nrow(lf_provean) == expected_rows_lf){
lf_provean$outcome_colname = "provean_outcome" lf_provean$outcome_colname = "provean_outcome"
lf_provean$outcome = lf_provean$provean_outcome lf_provean$outcome = lf_provean$provean_outcome
# DROP static cols
lf_provean = lf_provean[!lf_provean$param_type%in%c(static_cols_end),]
lf_provean$param_type = factor(lf_provean$param_type)
table(lf_provean$param_type); colnames(lf_provean)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_provean']] = wf_provean wf_lf_dataL[['wf_provean']] = wf_provean
wf_lf_dataL[['lf_provean']] = lf_provean wf_lf_dataL[['lf_provean']] = lf_provean
@ -512,6 +629,11 @@ if (nrow(lf_mcsm_lig) == expected_rows_lf){
lf_mcsm_lig$outcome_colname = "ligand_outcome" lf_mcsm_lig$outcome_colname = "ligand_outcome"
lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome
# DROP static cols
lf_mcsm_lig = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_cols_end),]
lf_mcsm_lig$param_type = factor(lf_mcsm_lig$param_type)
table(lf_mcsm_lig$param_type); colnames(lf_mcsm_lig)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig
wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig
@ -551,6 +673,11 @@ if (tolower(gene)%in%geneL_na){
lf_mcsm_na$outcome_colname = "mcsm_na_outcome" lf_mcsm_na$outcome_colname = "mcsm_na_outcome"
lf_mcsm_na$outcome = lf_mcsm_na$mcsm_na_outcome lf_mcsm_na$outcome = lf_mcsm_na$mcsm_na_outcome
# DROP static cols
lf_mcsm_na = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_cols_end),]
lf_mcsm_na$param_type = factor(lf_mcsm_na$param_type)
table(lf_mcsm_na$param_type); colnames(lf_mcsm_na)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na
wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na
@ -592,6 +719,11 @@ if (tolower(gene)%in%geneL_ppi2){
lf_mcsm_ppi2$outcome_colname = "mcsm_ppi2_outcome" lf_mcsm_ppi2$outcome_colname = "mcsm_ppi2_outcome"
lf_mcsm_ppi2$outcome = lf_mcsm_ppi2$mcsm_ppi2_outcome lf_mcsm_ppi2$outcome = lf_mcsm_ppi2$mcsm_ppi2_outcome
# DROP static cols
lf_mcsm_ppi2 = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_cols_end),]
lf_mcsm_ppi2$param_type = factor(lf_mcsm_ppi2$param_type)
table(lf_mcsm_ppi2$param_type); colnames(lf_mcsm_ppi2)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_mcsm_ppi2']] = wf_mcsm_ppi2 wf_lf_dataL[['wf_mcsm_ppi2']] = wf_mcsm_ppi2
wf_lf_dataL[['lf_mcsm_ppi2']] = lf_mcsm_ppi2 wf_lf_dataL[['lf_mcsm_ppi2']] = lf_mcsm_ppi2

View file

@ -6,9 +6,11 @@
# beeswarm # beeswarm
############################# #############################
lf_bp2 <- function(lf_df = lf_duet lf_bp2 <- function(lf_df #lf_duet
, p_title = "" , p_title = ""
, colour_categ = "outcome" #, colour_categ = "outcome"
, colour_categ = "mutation_info_labels"
, dot_colours = c("red", "blue")
, x_grp = "mutation_info_labels" , x_grp = "mutation_info_labels"
, y_var = "param_value" , y_var = "param_value"
, facet_var = "param_type" , facet_var = "param_type"
@ -16,7 +18,7 @@ lf_bp2 <- function(lf_df = lf_duet
, y_scales = "free_y" , y_scales = "free_y"
, colour_bp_strip = "khaki2" , colour_bp_strip = "khaki2"
, dot_size = 3 , dot_size = 3
, dot_transparency = 0.5 , dot_transparency = 1 #0.3: lighter
, violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL , violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL
, my_ats = 22 # axis text size , my_ats = 22 # axis text size
, my_als = 20 # axis label size , my_als = 20 # axis label size
@ -38,14 +40,16 @@ lf_bp2 <- function(lf_df = lf_duet
# Only use the longer colour palette if there are many outcomes # Only use the longer colour palette if there are many outcomes
if (monochrome) { if (monochrome) {
lf_bp_colours = c(1:length(levels(lf_df$outcome))) lf_bp_colours = c(1:length(levels(lf_df[[colour_categ]])))
lf_bp_colours[c(1:length(levels(lf_df$outcome)))] = rgb(0,0,0) lf_bp_colours[c(1:length(levels(lf_df[[colour_categ]])))] = rgb(0,0,0)
} else { } else {
if (length(levels(lf_df$outcome)) > 2) { if (length(levels(lf_df[[colour_categ]])) > 2) {
lf_bp_colours = consurf_bp_colours lf_bp_colours = consurf_bp_colours
} }
else { else {
lf_bp_colours = hue_pal()(2) #lf_bp_colours = hue_pal()(2)
lf_bp_colours = dot_colours
} }
} }
@ -58,7 +62,6 @@ lf_bp2 <- function(lf_df = lf_duet
ggplot(lf_df, aes_string(x = x_grp, y = y_var)) + ggplot(lf_df, aes_string(x = x_grp, y = y_var)) +
facet_wrap(fwv facet_wrap(fwv
, nrow = n_facet_row , nrow = n_facet_row
, scales = y_scales) + , scales = y_scales) +
@ -67,6 +70,7 @@ lf_bp2 <- function(lf_df = lf_duet
geom_violin(trim = T geom_violin(trim = T
, scale = "width" , scale = "width"
, colour = "black"
#, position = position_dodge(width = 0.9) #, position = position_dodge(width = 0.9)
, draw_quantiles = violin_quantiles) + , draw_quantiles = violin_quantiles) +

View file

@ -143,6 +143,10 @@ lf_snap2 = all_dm_om_df[['lf_snap2']]
wf_provean = all_dm_om_df[['wf_provean']] wf_provean = all_dm_om_df[['wf_provean']]
lf_provean = all_dm_om_df[['lf_provean']] lf_provean = all_dm_om_df[['lf_provean']]
# NEW
wf_dist_gen = all_dm_om_df[['wf_dist_gen']]
lf_dist_gen = all_dm_om_df[['lf_dist_gen']]
if (tolower(gene)%in%geneL_na){ if (tolower(gene)%in%geneL_na){
wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']] wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']] lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]

View file

@ -61,7 +61,7 @@ svg(corr_psP, width = 15, height = 15)
my_corr_pairs(corr_data_all = corr_df_ps my_corr_pairs(corr_data_all = corr_df_ps
, corr_cols = colnames(corr_df_ps[1:corr_end]) , corr_cols = colnames(corr_df_ps[1:corr_end])
, corr_method = "spearman" # other options: "pearson" or "kendall" , corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode" , colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue") , categ_colour = c("red", "blue")
, density_show = F , density_show = F
@ -113,7 +113,7 @@ svg(corr_ligP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_lig my_corr_pairs(corr_data_all = corr_df_lig
, corr_cols = colnames(corr_df_lig[1:corr_end]) , corr_cols = colnames(corr_df_lig[1:corr_end])
, corr_method = "spearman" # other options: "pearson" or "kendall" , corr_method = "spearman"
, colour_categ_col = colnames(corr_df_lig[color_coln]) #"dst_mode" , colour_categ_col = colnames(corr_df_lig[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue") , categ_colour = c("red", "blue")
, density_show = F , density_show = F
@ -158,7 +158,7 @@ svg(corr_ppi2P, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_ppi2 my_corr_pairs(corr_data_all = corr_df_ppi2
, corr_cols = colnames(corr_df_ppi2[1:corr_end]) , corr_cols = colnames(corr_df_ppi2[1:corr_end])
, corr_method = "spearman" # other options: "pearson" or "kendall" , corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode" , colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue") , categ_colour = c("red", "blue")
, density_show = F , density_show = F
@ -181,7 +181,7 @@ corr_na_colnames = c("mCSM-NA"
, "MAF" , "MAF"
, "Log(OR)" , "Log(OR)"
, "-Log(P)" , "-Log(P)"
, "NA-Dist" # "interface_dist" , "NA-Dist" # "NA_dist"
, "dst_mode" , "dst_mode"
, drug) , drug)
@ -254,7 +254,7 @@ svg(corr_consP, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_cons my_corr_pairs(corr_data_all = corr_df_cons
, corr_cols = colnames(corr_df_cons[1:corr_end]) , corr_cols = colnames(corr_df_cons[1:corr_end])
, corr_method = "spearman" # other options: "pearson" or "kendall" , corr_method = "spearman"
, colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode" , colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue") , categ_colour = c("red", "blue")
, density_show = F , density_show = F

View file

@ -1,237 +1,230 @@
################# #################
# Numbers # Numbers
################## ##################
nrow(wf_mcsm_lig) all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
table(wf_mcsm_lig$mutation_info_labels) #
# lf_duet = all_dm_om_df[['lf_duet']]
nrow(wf_mcsm_ppi2) # table(lf_duet$param_type)
table(wf_mcsm_ppi2$mutation_info_labels)
################################################################ ################################################################
geneL_normal = c("pnca") #======================
geneL_na = c("gid", "rpob") # Data: Dist+genomics
geneL_ppi2 = c("alr", "embb", "katg", "rpob") #======================
lf_dist_genP = all_dm_om_df[['lf_dist_gen']]
wf_dist_genP = all_dm_om_df[['wf_dist_gen']]
levels(lf_dist_genP$param_type)
#lf_dist_genP$param_type <- factor(lf_dist_genP$param_type, levels=c("Log10(MAF)", "Lig Dist(Å)", "PPI Dist(Å)"))
table(lf_dist_genP$param_type)
if (tolower(gene)%in%geneL_na){ genomics_param = c("Log10(MAF)")
lf_mcsm_na
}
if (tolower(gene)%in%geneL_ppi2){ dist_genP = lf_bp2(lf_dist_genP
lf_mcsm_ppi2 #, p_title
} , violin_quantiles = c(0.5), monochrome = F)
colnames(lf_duet) #-------------------
table(lf_duet$param_type) # Genomics data plot
#-------------------
genomics_dataP = lf_dist_genP[lf_dist_genP$param_type%in%genomics_param,]
genomics_dataP$param_type = factor(genomics_dataP$param_type)
table(genomics_dataP$param_type)
static_colsP = c("Lig Dist(Å)","ASA", "RSA","RD","KD","Log10(MAF)") genomicsP = lf_bp2(genomics_dataP
#, p_title = ""
, violin_quantiles = c(0.5), monochrome = F)
stability_suffix <- paste0(delta_symbol, delta_symbol, "G") genomicsP
lf_commonP = lf_duet[!lf_duet$param_type%in%c("DUET ΔΔG"),] #check
lf_commonP$param_type = levels(droplevels(lf_commonP$param_type)) wilcox.test(wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="R"]
table(lf_commonP$param_type); colnames(lf_commonP) , wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
lf_commonP$outcome = lf_commonP$duet_outcome
lf_commonP$duet_outcome = NULL
tapply(wf_dist_genP$`Log10(MAF)`, wf_dist_genP$mutation_info_labels, summary)
lf_duet$outcome = lf_duet$duet_outcome #-------------------
lf_duet$duet_outcome = NULL # Distance data plot: not genomics data
lf_duetP = lf_duet[!lf_duet$param_type%in%c(static_colsP, "outcome"),] #-------------------
lf_duetP$param_type = levels(droplevels(lf_duetP$param_type)) dist_dataP = lf_dist_genP[!lf_dist_genP$param_type%in%genomics_param,]
table(lf_duetP$param_type); colnames(lf_duetP) #dist_dataP$param_type = factor(dist_dataP$param_type)
colnames(lf_duetP) table(dist_dataP$param_type)
lf_foldx$outcome = lf_foldx$foldx_outcome distanceP = lf_bp2(dist_dataP
lf_foldx$foldx_outcome = NULL #, p_title = ""
lf_foldxP = lf_foldx[!lf_foldx$param_type%in%c(static_colsP,"outcome"),] , violin_quantiles = c(0.5), monochrome = F)
lf_foldxP$param_type = levels(droplevels(lf_foldxP$param_type))
distanceP
# check
wilcox.test(wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
, wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
wilcox.test(wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
, wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
tapply(wf_dist_genP$`PPI Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
tapply(wf_dist_genP$`Lig Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
#==============
# Plot:DUET
#==============
lf_duetP = all_dm_om_df[['lf_duet']]
#lf_duetP = lf_duet[!lf_duet$param_type%in%c(static_colsP),]
table(lf_duetP$param_type)
lf_duetP$param_type = factor(lf_duetP$param_type)
table(lf_duetP$param_type)
duetP = lf_bp2(lf_duetP
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
#==============
# Plot:FoldX
#==============
lf_foldxP = all_dm_om_df[['lf_foldx']]
#lf_foldxP = lf_foldx[!lf_foldx$param_type%in%c(static_colsP),]
table(lf_foldxP$param_type)
lf_foldxP$param_type = factor(lf_foldxP$param_type)
table(lf_foldxP$param_type) table(lf_foldxP$param_type)
colnames(lf_foldxP)
foldxP = lf_bp2(lf_foldxP
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
lf_deepddg$outcome = lf_deepddg$deepddg_outcome #==============
lf_deepddg$deepddg_outcome = NULL # Plot:DeepDDG
lf_deepddgP = lf_deepddg[!lf_deepddg$param_type%in%c(static_colsP, "outcome"),] #==============
lf_deepddgP$param_type = levels(droplevels(lf_deepddgP$param_type)) lf_deepddgP = all_dm_om_df[['lf_deepddg']]
#lf_deepddgP = lf_deepddg[!lf_deepddg$param_type%in%c(static_colsP),]
table(lf_deepddgP$param_type)
lf_deepddgP$param_type = factor(lf_deepddgP$param_type)
table(lf_deepddgP$param_type) table(lf_deepddgP$param_type)
colnames(lf_deepddgP)
deepddgP = lf_bp2(lf_deepddgP
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F
, dot_transparency = 0.3)
deepddgP
lf_dynamut2$outcome = lf_dynamut2$ddg_dynamut2_outcome #==============
lf_dynamut2$ddg_dynamut2_outcome = NULL # Plot: Dynamut2
lf_dynamut2P = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_colsP, "outcome"),] #==============
lf_dynamut2P$param_type = levels(droplevels(lf_dynamut2P$param_type)) lf_dynamut2P = all_dm_om_df[['lf_dynamut2']]
#lf_dynamut2P = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_colsP),]
table(lf_dynamut2P$param_type)
lf_dynamut2P$param_type = factor(lf_dynamut2P$param_type)
table(lf_dynamut2P$param_type) table(lf_dynamut2P$param_type)
colnames(lf_dynamut2P)
dynamut2P = lf_bp2(lf_dynamut2P
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
lf_consurf$outcome = lf_consurf$consurf_outcome #==============
lf_consurf$consurf_outcome = NULL # Plot:ConSurf
lf_consurfP = lf_consurf[!lf_consurf$param_type%in%c(static_colsP),] #==============
lf_consurfP$param_type = levels(droplevels(lf_consurfP$param_type)) lf_consurfP = all_dm_om_df[['lf_consurf']]
#lf_consurfP = lf_consurf[!lf_consurf$param_type%in%c(static_colsP),]
table(lf_consurfP$param_type)
lf_consurfP$param_type = factor(lf_consurfP$param_type)
table(lf_consurfP$param_type) table(lf_consurfP$param_type)
colnames(lf_consurfP)
consurfP = lf_bp2(lf_consurfP
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
lf_snap2$outcome = lf_snap2$snap2_outcome #==============
lf_snap2$snap2_outcome = NULL # Plot:SNAP2
lf_snap2P = lf_snap2[!lf_snap2$param_type%in%c(static_colsP),] #==============
lf_snap2P$param_type = levels(droplevels(lf_snap2P$param_type)) lf_snap2P = all_dm_om_df[['lf_snap2']]
#lf_snap2P = lf_snap2[!lf_snap2$param_type%in%c(static_colsP),]
table(lf_snap2P$param_type)
lf_snap2P$param_type = factor(lf_snap2P$param_type)
table(lf_snap2P$param_type) table(lf_snap2P$param_type)
colnames(lf_snap2P)
snap2P = lf_bp2(lf_snap2P
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
lf_provean$outcome = lf_provean$provean_outcome #==============
lf_provean$provean_outcome = NULL # Plot:PROVEAN
lf_proveanP = lf_provean[!lf_provean$param_type%in%c(static_colsP),] #==============
lf_proveanP$param_type = levels(droplevels(lf_proveanP$param_type)) lf_proveanP = all_dm_om_df[['lf_provean']]
#lf_proveanP = lf_provean[!lf_provean$param_type%in%c(static_colsP),]
table(lf_proveanP$param_type)
lf_proveanP$param_type = factor(lf_proveanP$param_type)
table(lf_proveanP$param_type) table(lf_proveanP$param_type)
colnames(lf_proveanP)
bar = rbind(colnames(lf_duetP) proveanP = lf_bp2(lf_proveanP
, colnames(lf_foldxP) #, p_title = paste0("Stability",stability_suffix)
, colnames(lf_deepddgP) , violin_quantiles = c(0.5), monochrome = F)
, colnames(lf_dynamut2P)
, colnames(lf_consurfP)
, colnames(lf_snap2P)
, colnames(lf_proveanP)
)
bar
lf_df_stabP = rbind((lf_duetP)
, (lf_foldxP)
, (lf_deepddgP)
, (lf_dynamut2P))
lf_df_consP = rbind((lf_consurfP)
, (lf_snap2P)
, (lf_proveanP))
table(lf_df_stabP$param_type)
# VERY USEFUL for seeing numbers for param types
table(lf_df_stabP$param_type,lf_df_stabP$outcome)
table(lf_df_consP$param_type,lf_df_consP$outcome)
#============== #==============
# Plot:BP # Plot: mCSM-lig
#============== #==============
stability_suffix <- paste0(delta_symbol, delta_symbol, "G") lf_mcsm_ligP = all_dm_om_df[['lf_mcsm_lig']]
#lf_mcsm_ligP = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_colsP),]
# lf_bp(lf_df_stabP, p_title = paste0("Stability",stability_suffix) table(lf_mcsm_ligP$param_type)
# , violin_quantiles = c(0.5)) lf_mcsm_ligP$param_type = factor(lf_mcsm_ligP$param_type)
# lf_bp(lf_df_consP, p_title = "Evolutionary Conservation"
# , violin_quantiles = c(0.5))
lf_bp2(lf_df_stabP, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
lf_bp2(lf_duet, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
lf_bp2(lf_df_consP, p_title = "Evolutionary Conservation"
, violin_quantiles = c(0.5), monochrome = F)
#HMMM: Bollocks!
lf_bp2(lf_commonP, p_title = paste0("Residue level properties")
, violin_quantiles = c(0.5)
, monochrome = T) # doesn't plot stat bars
lf_bp(lf_commonP, p_title = paste0("Residue level properties")
, violin_quantiles = c(0.5)) #plots stat bars but incorrect result
lf_unpaired_stats(lf_duet)
wilcox.test(wf_duet$`Lig Dist(Å)`[wf_duet$mutation_info_labels=="R"]
, wf_duet$`Lig Dist(Å)`[wf_duet$mutation_info_labels=="S"])
wilcox.test(wf_duet$ASA[wf_duet$mutation_info_labels=="R"]
, wf_duet$ASA[wf_duet$mutation_info_labels=="S"])
# 1: variable
# 9: conserved
# CHECK THESE
foo = merged_df3[c("dst_mode", "mutation_info_labels", "consurf_colour_rev"
, "consurf_scaled"
, "consurf_score"
, "consurf_outcome"
, "snap2_score"
, "snap2_scaled"
, "snap2_outcome"
, "provean_score"
, "provean_scaled"
, "provean_outcome")]
################
# Affinity
################
# ligand
lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome
lf_mcsm_lig$ligand_outcome = NULL
colnames(lf_mcsm_lig)
table(lf_mcsm_lig$param_type)
lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome
lf_mcsm_ligP = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_colsP, "outcome"),]
#lf_mcsm_ligP$param_type = levels(droplevels(lf_mcsm_ligP))
table(lf_mcsm_ligP$param_type) table(lf_mcsm_ligP$param_type)
lf_mcsm_ligP$ligand_outcome = NULL
colnames(lf_mcsm_ligP)
if (tolower(gene)%in%geneL_na){ mcsmligP = lf_bp2(lf_mcsm_ligP
lf_mcsm_na$outcome = lf_mcsm_na$mcsm_na_outcome #, p_title = paste0("Stability",stability_suffix)
#lf_mcsm_na$mcsm_na_outcome = NULL , violin_quantiles = c(0.5), monochrome = F)
lf_mcsm_naP = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_colsP, "outcome"),]
#lf_mcsm_naP$param_type = levels(droplevels(lf_mcsm_naP))
table(lf_mcsm_naP$param_type)
lf_mcsm_naP$mcsm_na_outcome = NULL
colnames(lf_mcsm_naP)
}
#==============
# Plot: mCSM-ppi2
#==============
if (tolower(gene)%in%geneL_ppi2){ if (tolower(gene)%in%geneL_ppi2){
lf_mcsm_ppi2P = all_dm_om_df[['lf_mcsm_ppi2']]
lf_mcsm_ppi2$outcome = lf_mcsm_ppi2$mcsm_ppi2_outcome #lf_mcsm_ppi2P = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_colsP),]
colnames(lf_mcsm_ppi2) table(lf_mcsm_ppi2P$param_type)
#lf_mcsm_ppi2$mcsm_ppi2_outcome = NULL lf_mcsm_ppi2P$param_type = factor(lf_mcsm_ppi2P$param_type)
lf_mcsm_ppi2P = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_colsP, "outcome"),]
#lf_mcsm_ppi2P$param_type = levels(droplevels(lf_mcsm_ppi2P))
table(lf_mcsm_ppi2P$param_type) table(lf_mcsm_ppi2P$param_type)
lf_mcsm_ppi2P$mcsm_ppi2_outcome = NULL
colnames(lf_mcsm_ppi2P)
mcsmppi2P = lf_bp2(lf_mcsm_ppi2P
#, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F)
} }
#==============
# Plot: mCSM-NA
#==============
if (tolower(gene)%in%geneL_na){
lf_mcsm_naP = all_dm_om_df[['lf_mcsm_na']]
#lf_mcsm_naP = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_colsP),]
table(lf_mcsm_naP$param_type)
lf_mcsm_naP$param_type = factor(lf_mcsm_naP$param_type)
table(lf_mcsm_naP$param_type)
bar = rbind(colnames(lf_mcsm_ligP) mcsmnaP = lf_bp2(lf_mcsm_naP
#, colnames(lf_mcsm_naP) #, p_title = paste0("Stability",stability_suffix)
, colnames(lf_mcsm_ppi2P))
bar
lf_df_affP = rbind((lf_mcsm_ligP)
, (lf_mcsm_ppi2P))
lf_bp(lf_df_affP, p_title = paste0("Affinity changes")
, violin_quantiles = c(0.5))
#, monochrome = T) # doesn't plot stat bars
wilcox.test(wf_mcsm_lig$ASA[wf_mcsm_lig$mutation_info_labels=="R"]
, wf_mcsm_lig$ASA[wf_mcsm_lig$mutation_info_labels=="S"])
wilcox.test(wf_mcsm_ppi2$ASA[wf_mcsm_ppi2$mutation_info_labels=="R"]
, wf_mcsm_ppi2$ASA[wf_mcsm_ppi2$mutation_info_labels=="S"])
#===============================
p1 = lf_bp2(lf_df_stabP, p_title = paste0("Stability",stability_suffix)
, violin_quantiles = c(0.5), monochrome = F) , violin_quantiles = c(0.5), monochrome = F)
}
p2 = lf_bp2(lf_df_consP, p_title = "Evolutionary Conservation" ######################################
, violin_quantiles = c(0.5), monochrome = F) # Outplot with stats
######################################
p3 = lf_bp2(lf_df_affP, p_title = paste0("Affinity changes") cowplot::plot_grid(
, violin_quantiles = c(0.5), monochrome = F) cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P, genomicsP, distanceP
, nrow=1),
# cowplot::plot_grid(genomicsP, distanceP
# , nrow = 1),
cowplot::plot_grid(consurfP, snap2P, proveanP
, mcsmligP
, mcsmppi2P
#, mcsmnaP
, nrow=1),
nrow=2)
foo = lf_consurfP
# proveanP = lf_bp2(lf_proveanP, colour_categ = "mutation_info_labels"
# , p_title = paste0("Evolutionary conservation")
# , dot_transparency = 1
# , violin_quantiles = c(0.5), monochrome = F)
#
# proveanP
cowplot::plot_grid(p1,cowplot::plot_grid(p2, p3), nrow=2)