more plot modifications dm and om plots mainly
This commit is contained in:
parent
4e6f10d1ba
commit
0234a8f77b
6 changed files with 501 additions and 358 deletions
|
@ -74,17 +74,23 @@ corr_data_extract <- function(df
|
|||
if (tolower(gene)%in%geneL_normal){
|
||||
colnames_to_extract = c(common_colnames)
|
||||
display_colnames = c(display_common_colnames)
|
||||
corr_df = df[,colnames_to_extract]
|
||||
colnames(corr_df) = display_colnames
|
||||
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
colnames_to_extract = c(common_colnames ,"mcsm_ppi2_affinity", ppi2Dist_colname)
|
||||
display_colnames = c(display_common_colnames,"mCSM-PPI2" , "PPI-Dist")
|
||||
corr_df = df[,colnames_to_extract]
|
||||
colnames(corr_df) = display_colnames
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
colnames_to_extract = c(common_colnames,"mcsm_na_affinity", naDist_colname)
|
||||
display_colnames = c(display_common_colnames, "mCSM-NA", "NA-Dist")
|
||||
corr_df = df[,colnames_to_extract]
|
||||
colnames(corr_df) = display_colnames
|
||||
}
|
||||
|
||||
# [optional] arg: extract_scaled_cols
|
||||
|
@ -92,19 +98,23 @@ corr_data_extract <- function(df
|
|||
cat("\nExtracting scaled columns as well...\n")
|
||||
all_scaled_cols = colnames(merged_df3)[grep(".*scaled", colnames(merged_df3))]
|
||||
colnames_to_extract = c(colnames_to_extract, all_scaled_cols)
|
||||
|
||||
corr_df = df[,colnames_to_extract]
|
||||
colnames(corr_df) = display_colnames
|
||||
}else{
|
||||
colnames_to_extract = colnames_to_extract
|
||||
corr_df = df[,colnames_to_extract]
|
||||
colnames(corr_df) = display_colnames
|
||||
}
|
||||
|
||||
# extract df based on gene
|
||||
corr_df = df[,colnames_to_extract]
|
||||
colnames(corr_df)
|
||||
display_colnames
|
||||
|
||||
# arg: colnames_display_key
|
||||
colnames(corr_df)[colnames(corr_df)%in%colnames_to_extract] <- display_colnames
|
||||
colnames(corr_df)
|
||||
# WORKED:
|
||||
# # extract df based on gene
|
||||
# corr_df = df[,colnames_to_extract]
|
||||
# colnames(corr_df)
|
||||
# display_colnames
|
||||
#
|
||||
# # arg: colnames_display_key
|
||||
# colnames(corr_df)[colnames(corr_df)%in%colnames_to_extract] <- display_colnames
|
||||
# colnames(corr_df)
|
||||
|
||||
cat("\nExtracted ncols:", ncol(corr_df)
|
||||
,"\nRenaming successful")
|
||||
|
|
|
@ -8,11 +8,11 @@
|
|||
##################################################################
|
||||
# from plotting_globals.R
|
||||
# DistCutOff, LigDist_colname, ppi2Dist_colname, naDist_colname
|
||||
gene
|
||||
|
||||
dm_om_wf_lf_data <- function(df
|
||||
, gene # from globals
|
||||
, colnames_to_extract
|
||||
#, ligand_dist_colname = LigDist_colname # from globals
|
||||
#, LigDist_colname # from globals used
|
||||
#, ppi2Dist_colname #from globals used
|
||||
#, naDist_colname #from globals used
|
||||
|
@ -21,13 +21,13 @@ dm_om_wf_lf_data <- function(df
|
|||
, snp_colname = "mutationinformation"
|
||||
, aa_pos_colname = "position" # to sort df by
|
||||
, mut_colname = "mutation"
|
||||
, mut_info_colname = "mutation_info"
|
||||
, mut_info_label_colname = "mutation_info_labels" # if empty, below used
|
||||
#, dr_other_muts_labels = c("DM", "OM") # only used if ^^ = ""
|
||||
, mut_info_colname = "dst_mode"
|
||||
, mut_info_label_colname = "mutation_info_labels"
|
||||
, categ_cols_to_factor){
|
||||
|
||||
df = as.data.frame(df)
|
||||
df$maf = log10(df$maf) # can't see otherwise
|
||||
df$maf2 = log10(df$maf) # can't see otherwise
|
||||
sum(is.na(df$maf2))
|
||||
|
||||
# Initialise the required dfs based on gene name
|
||||
geneL_normal = c("pnca")
|
||||
|
@ -50,6 +50,8 @@ dm_om_wf_lf_data <- function(df
|
|||
, lf_consurf = data.frame()
|
||||
, wf_snap2 = data.frame()
|
||||
, lf_snap2 = data.frame()
|
||||
, wf_dist_gen = data.frame() # NEW
|
||||
, lf_dist_gen = data.frame() # NEW
|
||||
)
|
||||
|
||||
# additional dfs
|
||||
|
@ -76,132 +78,170 @@ dm_om_wf_lf_data <- function(df
|
|||
, length(wf_lf_dataL))
|
||||
|
||||
#=======================================================================
|
||||
if (missing(colnames_to_extract)){
|
||||
# display names
|
||||
stability_suffix <- paste0(delta_symbol, delta_symbol, "G")
|
||||
|
||||
colnames_to_extract = c(snp_colname
|
||||
, mut_colname, mut_info_colname, mut_info_label_colname
|
||||
, aa_pos_colname
|
||||
, LigDist_colname # from globals
|
||||
, ppi2Dist_colname # from globals
|
||||
, naDist_colname # from globals
|
||||
, "duet_stability_change" , "duet_scaled" , "duet_outcome"
|
||||
, "ligand_affinity_change", "affinity_scaled" , "ligand_outcome"
|
||||
, "ddg_foldx" , "foldx_scaled" , "foldx_outcome"
|
||||
, "deepddg" , "deepddg_scaled" , "deepddg_outcome"
|
||||
, "asa" , "rsa"
|
||||
, "rd_values" , "kd_values"
|
||||
, "log10_or_mychisq" , "neglog_pval_fisher" , "maf" #"af"
|
||||
, "ddg_dynamut2" , "ddg_dynamut2_scaled", "ddg_dynamut2_outcome"
|
||||
, "mcsm_ppi2_affinity" , "mcsm_ppi2_scaled" , "mcsm_ppi2_outcome"
|
||||
, "consurf_score" , "consurf_scaled" , "consurf_outcome" # exists now
|
||||
, "consurf_colour_rev"
|
||||
, "snap2_score" , "snap2_scaled" , "snap2_outcome"
|
||||
, "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome"
|
||||
, "provean_score" , "provean_scaled" , "provean_outcome")
|
||||
duet_dn = paste0("DUET ", stability_suffix); duet_dn
|
||||
foldx_dn = paste0("FoldX ", stability_suffix); foldx_dn
|
||||
deepddg_dn = paste0("Deepddg " , stability_suffix); deepddg_dn
|
||||
dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn
|
||||
|
||||
consurf_dn = "ConSurf"
|
||||
snap2_dn = "SNAP2"
|
||||
provean_dn = "PROVEAN"
|
||||
|
||||
or_dn = "Log10(OR)"
|
||||
pval_dn = "-Log10(P)"
|
||||
maf2_dn = "Log10(MAF)"
|
||||
|
||||
asa_dn = "ASA"
|
||||
rsa_dn = "RSA"
|
||||
rd_dn = "RD"
|
||||
kd_dn = "KD"
|
||||
|
||||
lig_dist_dn = paste0("Lig Dist(", angstroms_symbol, ")"); lig_dist_dn
|
||||
mcsm_lig_dn = paste0("mCSM-lig"); mcsm_lig_dn
|
||||
mmcsm_lig_dn2 = paste0("mmCSM-lig"); mmcsm_lig_dn2
|
||||
|
||||
|
||||
na_dist_dn = paste0("NA Dist(", angstroms_symbol, ")"); na_dist_dn
|
||||
mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn
|
||||
|
||||
ppi2_dist_dn = paste0("PPI Dist(", angstroms_symbol, ")"); ppi2_dist_dn
|
||||
mcsm_ppi2_dn = paste0("mCSM-PPI2 ", stability_suffix); mcsm_ppi2_dn
|
||||
|
||||
#=======================================================================
|
||||
if(missing(categ_cols_to_factor)){
|
||||
categ_cols_to_factor = grep( "_outcome|_info", colnames(df) )
|
||||
}else{
|
||||
colnames_to_extract = c(mut_colname, mut_info_colname, mut_info_label_colname
|
||||
, aa_pos_colname, LigDist_colname
|
||||
, colnames_to_extract)
|
||||
categ_cols_to_factor = categ_cols_to_factor
|
||||
}
|
||||
comb_df = df[, colnames(df)%in%colnames_to_extract]
|
||||
comb_df_s = dplyr::arrange(comb_df, aa_pos_colname)
|
||||
|
||||
#=======================================================================
|
||||
if(missing(categ_cols_to_factor)){
|
||||
categ_cols_to_factor = grep( "_outcome|_info", colnames(comb_df_s) )
|
||||
}else{
|
||||
categ_cols_to_factor = categ_cols_to_factor
|
||||
}
|
||||
#fact_cols = colnames(comb_df_s)[grepl( "_outcome|_info", colnames(comb_df_s) )]
|
||||
fact_cols = colnames(comb_df_s)[categ_cols_to_factor]
|
||||
fact_cols = colnames(df)[categ_cols_to_factor]
|
||||
|
||||
if (any(lapply(comb_df_s[, fact_cols], class) == "character")){
|
||||
if (any(lapply(df[, fact_cols], class) == "character")){
|
||||
cat("\nChanging", length(categ_cols_to_factor), "cols to factor")
|
||||
comb_df_s[, fact_cols] <- lapply(comb_df_s[, fact_cols], as.factor)
|
||||
if (all(lapply(comb_df_s[, fact_cols], class) == "factor")){
|
||||
df[, fact_cols] <- lapply(df[, fact_cols], as.factor)
|
||||
if (all(lapply(df[, fact_cols], class) == "factor")){
|
||||
cat("\nSuccessful: cols changed to factor")
|
||||
}
|
||||
}else{
|
||||
cat("\nRequested cols aready factors")
|
||||
}
|
||||
#=======================================================================
|
||||
table(comb_df_s[[mut_info_colname]])
|
||||
|
||||
# pretty display names i.e. labels to reduce major code duplication later
|
||||
foo_cnames = data.frame(colnames(comb_df_s))
|
||||
names(foo_cnames) <- "old_name"
|
||||
cat("\ncols changed to factor are:\n", colnames(df)[categ_cols_to_factor] )
|
||||
|
||||
stability_suffix <- paste0(delta_symbol, delta_symbol, "G")
|
||||
#flexibility_suffix <- paste0(delta_symbol, delta_symbol, "S")
|
||||
#=======================================================================
|
||||
if (missing(colnames_to_extract)){
|
||||
# NOTE: these vars are from globals
|
||||
#LigDist_colname, ppi2Dist_colname, naDist_colname
|
||||
|
||||
#lig_dn = paste0("Ligand distance (", angstroms_symbol, ")"); lig_dn
|
||||
#mcsm_lig_dn = paste0("Ligand affinity (log fold change)"); mcsm_lig_dn
|
||||
common_colnames = c(snp_colname
|
||||
, mut_colname , "dst_mode" , mut_info_label_colname
|
||||
, aa_pos_colname
|
||||
|
||||
lig_dn = paste0("Lig Dist(", angstroms_symbol, ")"); lig_dn
|
||||
mcsm_lig_dn = paste0("mCSM-lig"); mcsm_lig_dn
|
||||
, "duet_stability_change" , "duet_scaled" , "duet_outcome"
|
||||
, "ddg_foldx" , "foldx_scaled" , "foldx_outcome"
|
||||
, "deepddg" , "deepddg_scaled" , "deepddg_outcome"
|
||||
, "ddg_dynamut2" , "ddg_dynamut2_scaled" , "ddg_dynamut2_outcome"
|
||||
|
||||
duet_dn = paste0("DUET ", stability_suffix); duet_dn
|
||||
foldx_dn = paste0("FoldX ", stability_suffix); foldx_dn
|
||||
deepddg_dn = paste0("Deepddg " , stability_suffix); deepddg_dn
|
||||
dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn
|
||||
, "consurf_score" , "consurf_scaled" , "consurf_outcome" , "consurf_colour_rev"
|
||||
, "snap2_score" , "snap2_scaled" , "snap2_outcome"
|
||||
, "provean_score" , "provean_scaled" , "provean_outcome"
|
||||
|
||||
mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn
|
||||
mcsm_ppi2_dn = paste0("mCSM-PPI2 ", stability_suffix); mcsm_ppi2_dn
|
||||
consurf_dn = paste0("ConSurf"); consurf_dn
|
||||
snap2_dn = paste0("SNAP2"); snap2_dn
|
||||
provean_dn = paste0("PROVEAN"); provean_dn
|
||||
, "log10_or_mychisq" , "neglog_pval_fisher" , "maf2"
|
||||
, "asa" , "rsa" , "rd_values" , "kd_values"
|
||||
|
||||
# change column names: plyr
|
||||
new_colnames = c(asa = "ASA"
|
||||
, rsa = "RSA"
|
||||
, rd_values = "RD"
|
||||
, kd_values = "KD"
|
||||
#, log10_or_mychisq = "Log10(OR)"
|
||||
#, neglog_pval_fisher = "-Log(P)"
|
||||
#, af = "MAF"
|
||||
, maf = "Log10(MAF)"
|
||||
#, ligand_dist_colname= lig_dn # cannot handle variable name 'ligand_dist_colname'
|
||||
, affinity_scaled = mcsm_lig_dn
|
||||
, duet_scaled = duet_dn
|
||||
, foldx_scaled = foldx_dn
|
||||
, deepddg_scaled = deepddg_dn
|
||||
, ddg_dynamut2_scaled = dynamut2_dn
|
||||
, mcsm_na_scaled = mcsm_na_dn
|
||||
, mcsm_ppi2_scaled = mcsm_ppi2_dn
|
||||
#, consurf_scaled = consurf_dn
|
||||
, consurf_score = consurf_dn
|
||||
#, consurf_colour_rev = consurf_dn
|
||||
#, snap2_scaled = snap2_dn
|
||||
, snap2_score = snap2_dn
|
||||
, provean_score = provean_dn)
|
||||
, "mmcsm_lig" , "mmcsm_lig_scaled" , "mmcsm_lig_outcome"
|
||||
, "ligand_affinity_change", "affinity_scaled" , "ligand_outcome" , LigDist_colname
|
||||
)
|
||||
|
||||
display_common_colnames = c(snp_colname
|
||||
, mut_colname , "dst_mode" , mut_info_label_colname
|
||||
, aa_pos_colname
|
||||
|
||||
comb_df_sl1 = plyr::rename(comb_df_s
|
||||
, replace = new_colnames
|
||||
, warn_missing = T
|
||||
, warn_duplicated = T)
|
||||
, "duet_stability_change" , duet_dn , "duet_outcome"
|
||||
, "ddg_foldx" , foldx_dn , "foldx_outcome"
|
||||
, "deepddg" , deepddg_dn , "deepddg_outcome"
|
||||
, "ddg_dynamut2" , dynamut2_dn , "ddg_dynamut2_outcome"
|
||||
, consurf_dn , "consurf_scaled" , "consurf_outcome" , "consurf_colour_rev"
|
||||
, snap2_dn , "snap2_scaled" , "snap2_outcome"
|
||||
, provean_dn , "provean_scaled" , "provean_outcome"
|
||||
|
||||
# renaming colname using variable i.e ligand_dist_colname: dplyr
|
||||
#comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname))
|
||||
comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(LigDist_colname)) # NEW
|
||||
names(comb_df_sl)
|
||||
, or_dn , pval_dn , maf2_dn
|
||||
, asa_dn , rsa_dn , rd_dn , kd_dn
|
||||
|
||||
#=======================
|
||||
# NEW: Affinity filtered data
|
||||
#========================
|
||||
# mcsm-lig --> LigDist_colname
|
||||
comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dn]]<DistCutOff,]
|
||||
, "mmcsm_lig" , mmcsm_lig_dn2 , "mmcsm_lig_outcome"
|
||||
, "ligand_affinity_change", mcsm_lig_dn , "ligand_outcome" , lig_dist_dn
|
||||
)
|
||||
|
||||
# mcsm-ppi2 --> ppi2Dist_colname
|
||||
comb_df_sl_ppi2 = comb_df_sl[comb_df_sl[[ppi2Dist_colname]]<DistCutOff,]
|
||||
if (length(common_colnames) == length(display_common_colnames)){
|
||||
cat("\nLength match: Proceeding to extracting end cols")
|
||||
}else{
|
||||
stop("Abort: Length mismatch: b/w ncols to extract and disply name")
|
||||
}
|
||||
|
||||
# mcsm-na --> naDist_colname
|
||||
comb_df_sl_na = comb_df_sl[comb_df_sl[[naDist_colname]]<DistCutOff,]
|
||||
# ordering is important!
|
||||
# static_cols_end = c(lig_dist_dn
|
||||
# , "ASA"
|
||||
# , "RSA"
|
||||
# , "RD"
|
||||
# , "KD"
|
||||
# , "Log10(MAF)"
|
||||
# #, "Log10(OR)"
|
||||
# #, "-Log(P)"
|
||||
# )
|
||||
static_cols_end_common = c(lig_dist_dn, "Log10(MAF)"); static_cols_end_common
|
||||
|
||||
if (tolower(gene)%in%geneL_normal){
|
||||
colnames_to_extract = c(common_colnames)
|
||||
display_colnames = c(display_common_colnames)
|
||||
comb_df_sl = df[, colnames_to_extract]
|
||||
|
||||
# Rename cols: display names
|
||||
colnames(comb_df_sl) = display_colnames
|
||||
#colnames(comb_df)[colnames(comb_df)%in%colnames_to_extract] <- display_colnames
|
||||
|
||||
static_cols_end = static_cols_end_common
|
||||
cat("\nend colnames for gene:", static_cols_end)
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
colnames_to_extract = c(common_colnames, "mcsm_ppi2_affinity" ,"mcsm_ppi2_scaled" , "mcsm_ppi2_outcome" , ppi2Dist_colname)
|
||||
display_colnames = c(display_common_colnames,"mcsm_ppi2_affinity", mcsm_ppi2_dn , "mcsm_ppi2_outcome" , ppi2_dist_dn )
|
||||
comb_df_sl = df[, colnames_to_extract]
|
||||
|
||||
# Rename cols: display names
|
||||
colnames(comb_df_sl) = display_colnames
|
||||
# Affinity filtered data: mcsm-ppi2 --> ppi2Dist_colname
|
||||
comb_df_sl_ppi2 = comb_df_sl[comb_df_sl[[ppi2_dist_dn]]<DistCutOff,]
|
||||
|
||||
# ordering is important!
|
||||
static_cols_end = c(ppi2_dist_dn, static_cols_end_common)
|
||||
cat("\nend colnames for gene:", static_cols_end)
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
colnames_to_extract = c(common_colnames,"mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome" , naDist_colname)
|
||||
display_colnames = c(display_common_colnames, "mcsm_na_affinity" , mcsm_na_dn , "mcsm_na_outcome" , na_dist_dn)
|
||||
comb_df_sl = df[, colnames_to_extract]
|
||||
|
||||
# Rename cols: display names
|
||||
colnames(comb_df) = display_colnames
|
||||
# Affinity filtered data: mcsm-na --> naDist_colname
|
||||
comb_df_sl_na = comb_df_sl[comb_df_sl[[na_dist_dn]]<DistCutOff,]
|
||||
|
||||
# ordering is important!
|
||||
static_cols_end = c(na_dist_dn, static_cols_end_common)
|
||||
cat("\nend colnames for gene:", static_cols_end)
|
||||
|
||||
}
|
||||
|
||||
# Affinity filtered data: mcsm-lig: COMMON for all genes, mcsm-lig --> LigDist_colname
|
||||
comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dist_dn]]<DistCutOff,]
|
||||
|
||||
}
|
||||
|
||||
#####################################################################
|
||||
static_cols1 = mut_info_label_colname
|
||||
#######################################################################
|
||||
#======================
|
||||
# Selecting dfs
|
||||
# with appropriate cols
|
||||
|
@ -209,20 +249,61 @@ static_cols1 = mut_info_label_colname
|
|||
static_cols_start = c(snp_colname
|
||||
, aa_pos_colname
|
||||
, mut_colname
|
||||
, static_cols1)
|
||||
, mut_info_label_colname)
|
||||
|
||||
# ordering is important!
|
||||
static_cols_end = c(lig_dn
|
||||
, "ASA"
|
||||
, "RSA"
|
||||
, "RD"
|
||||
, "KD"
|
||||
, "Log10(MAF)"
|
||||
#, "Log10(OR)"
|
||||
#, "-Log(P)"
|
||||
)
|
||||
# static_cols_end
|
||||
cat("\nEnd colnames for gene:", static_cols_end)
|
||||
|
||||
#########################################################################
|
||||
#==============
|
||||
# Distance and genomics
|
||||
#==============
|
||||
# WF data: dist + genomics
|
||||
cols_to_select_dist_gen = c(static_cols_start, c("duet_outcome", duet_dn), static_cols_end)
|
||||
wf_dist_gen = comb_df_sl[, cols_to_select_dist_gen]; head(wf_dist_gen)
|
||||
|
||||
#pivot_cols_ps = cols_to_select_ps[1:5]; pivot_cols_ps
|
||||
pivot_cols_dist_gen = cols_to_select_dist_gen[1: (length(static_cols_start) + 1)]; pivot_cols_dist_gen
|
||||
expected_rows_lf = nrow(wf_dist_gen) * (length(wf_dist_gen) - length(pivot_cols_dist_gen))
|
||||
expected_rows_lf
|
||||
|
||||
# LF dist and genomics
|
||||
lf_dist_gen = tidyr::gather(wf_dist_gen
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(duet_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_dist_gen) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for Distance and Genomics")
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for Distance and Genomics")
|
||||
quit()
|
||||
}
|
||||
|
||||
# DROP duet cols
|
||||
drop_cols = c(duet_dn, "duet_outcome"); drop_cols
|
||||
table(lf_dist_gen$param_type)
|
||||
lf_dist_gen = lf_dist_gen[!lf_dist_gen$param_type%in%drop_cols,]
|
||||
lf_dist_gen$param_type = factor(lf_dist_gen$param_type)
|
||||
table(lf_dist_gen$param_type)
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_dist_gen$outcome_colname = mut_info_colname
|
||||
lf_dist_gen$outcome = lf_dist_gen[[mut_info_label_colname]]
|
||||
head(lf_dist_gen)
|
||||
|
||||
wf_dist_gen = subset(wf_dist_gen, select = !(names(wf_dist_gen) %in% drop_cols))
|
||||
|
||||
colnames(wf_dist_gen)
|
||||
colnames(lf_dist_gen)
|
||||
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_dist_gen']] = wf_dist_gen
|
||||
wf_lf_dataL[['lf_dist_gen']] = lf_dist_gen
|
||||
##########################################################
|
||||
|
||||
#==============
|
||||
# DUET
|
||||
#==============
|
||||
|
@ -249,10 +330,17 @@ if (nrow(lf_duet) == expected_rows_lf){
|
|||
quit()
|
||||
}
|
||||
|
||||
table(lf_duet$param_type)
|
||||
|
||||
# NEW columns [outcome and outcome colname]
|
||||
lf_duet$outcome_colname = "duet_outcome"
|
||||
lf_duet$outcome = lf_duet$duet_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_duet = lf_duet[!lf_duet$param_type%in%c(static_cols_end),]
|
||||
lf_duet$param_type = factor(lf_duet$param_type)
|
||||
table(lf_duet$param_type); colnames(lf_duet)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_duet']] = wf_duet
|
||||
wf_lf_dataL[['lf_duet']] = lf_duet
|
||||
|
@ -287,6 +375,11 @@ if (nrow(lf_foldx) == expected_rows_lf){
|
|||
lf_foldx$outcome_colname = "foldx_outcome"
|
||||
lf_foldx$outcome = lf_foldx$foldx_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_foldx = lf_foldx[!lf_foldx$param_type%in%c(static_cols_end),]
|
||||
lf_foldx$param_type = factor(lf_foldx$param_type)
|
||||
table(lf_foldx$param_type); colnames(lf_foldx)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_foldx']] = wf_foldx
|
||||
wf_lf_dataL[['lf_foldx']] = lf_foldx
|
||||
|
@ -321,6 +414,11 @@ if (nrow(lf_deepddg) == expected_rows_lf){
|
|||
lf_deepddg$outcome_colname = "deepddg_outcome"
|
||||
lf_deepddg$outcome = lf_deepddg$deepddg_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_deepddg = lf_deepddg[!lf_deepddg$param_type%in%c(static_cols_end),]
|
||||
lf_deepddg$param_type = factor(lf_deepddg$param_type)
|
||||
table(lf_deepddg$param_type); colnames(lf_deepddg)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_deepddg']] = wf_deepddg
|
||||
wf_lf_dataL[['lf_deepddg']] = lf_deepddg
|
||||
|
@ -354,11 +452,15 @@ if (nrow(lf_dynamut2) == expected_rows_lf){
|
|||
lf_dynamut2$outcome_colname = "ddg_dynamut2_outcome"
|
||||
lf_dynamut2$outcome = lf_dynamut2$ddg_dynamut2_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_dynamut2 = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_cols_end),]
|
||||
lf_dynamut2$param_type = factor(lf_dynamut2$param_type)
|
||||
table(lf_dynamut2$param_type); colnames(lf_dynamut2)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_dynamut2']] = wf_dynamut2
|
||||
wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2
|
||||
|
||||
|
||||
######################################################################################
|
||||
#==================
|
||||
# Consurf: LF
|
||||
|
@ -405,6 +507,11 @@ if (nrow(lf_consurf) == expected_rows_lf){
|
|||
lf_consurf$outcome_colname = "consurf_outcome"
|
||||
lf_consurf$outcome = lf_consurf$consurf_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_consurf = lf_consurf[!lf_consurf$param_type%in%c(static_cols_end),]
|
||||
lf_consurf$param_type = factor(lf_consurf$param_type)
|
||||
table(lf_consurf$param_type); colnames(lf_consurf)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_consurf']] = wf_consurf
|
||||
wf_lf_dataL[['lf_consurf']] = lf_consurf
|
||||
|
@ -438,6 +545,11 @@ if (nrow(lf_snap2) == expected_rows_lf){
|
|||
lf_snap2$outcome_colname = "snap2_outcome"
|
||||
lf_snap2$outcome = lf_snap2$snap2_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_snap2 = lf_snap2[!lf_snap2$param_type%in%c(static_cols_end),]
|
||||
lf_snap2$param_type = factor(lf_snap2$param_type)
|
||||
table(lf_snap2$param_type); colnames(lf_snap2)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_snap2']] = wf_snap2
|
||||
wf_lf_dataL[['lf_snap2']] = lf_snap2
|
||||
|
@ -471,6 +583,11 @@ if (nrow(lf_provean) == expected_rows_lf){
|
|||
lf_provean$outcome_colname = "provean_outcome"
|
||||
lf_provean$outcome = lf_provean$provean_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_provean = lf_provean[!lf_provean$param_type%in%c(static_cols_end),]
|
||||
lf_provean$param_type = factor(lf_provean$param_type)
|
||||
table(lf_provean$param_type); colnames(lf_provean)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_provean']] = wf_provean
|
||||
wf_lf_dataL[['lf_provean']] = lf_provean
|
||||
|
@ -512,6 +629,11 @@ if (nrow(lf_mcsm_lig) == expected_rows_lf){
|
|||
lf_mcsm_lig$outcome_colname = "ligand_outcome"
|
||||
lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_mcsm_lig = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_cols_end),]
|
||||
lf_mcsm_lig$param_type = factor(lf_mcsm_lig$param_type)
|
||||
table(lf_mcsm_lig$param_type); colnames(lf_mcsm_lig)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig
|
||||
wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig
|
||||
|
@ -551,6 +673,11 @@ if (tolower(gene)%in%geneL_na){
|
|||
lf_mcsm_na$outcome_colname = "mcsm_na_outcome"
|
||||
lf_mcsm_na$outcome = lf_mcsm_na$mcsm_na_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_mcsm_na = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_cols_end),]
|
||||
lf_mcsm_na$param_type = factor(lf_mcsm_na$param_type)
|
||||
table(lf_mcsm_na$param_type); colnames(lf_mcsm_na)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na
|
||||
wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na
|
||||
|
@ -592,6 +719,11 @@ if (tolower(gene)%in%geneL_ppi2){
|
|||
lf_mcsm_ppi2$outcome_colname = "mcsm_ppi2_outcome"
|
||||
lf_mcsm_ppi2$outcome = lf_mcsm_ppi2$mcsm_ppi2_outcome
|
||||
|
||||
# DROP static cols
|
||||
lf_mcsm_ppi2 = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_cols_end),]
|
||||
lf_mcsm_ppi2$param_type = factor(lf_mcsm_ppi2$param_type)
|
||||
table(lf_mcsm_ppi2$param_type); colnames(lf_mcsm_ppi2)
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_ppi2']] = wf_mcsm_ppi2
|
||||
wf_lf_dataL[['lf_mcsm_ppi2']] = lf_mcsm_ppi2
|
||||
|
|
|
@ -6,9 +6,11 @@
|
|||
# beeswarm
|
||||
#############################
|
||||
|
||||
lf_bp2 <- function(lf_df = lf_duet
|
||||
lf_bp2 <- function(lf_df #lf_duet
|
||||
, p_title = ""
|
||||
, colour_categ = "outcome"
|
||||
#, colour_categ = "outcome"
|
||||
, colour_categ = "mutation_info_labels"
|
||||
, dot_colours = c("red", "blue")
|
||||
, x_grp = "mutation_info_labels"
|
||||
, y_var = "param_value"
|
||||
, facet_var = "param_type"
|
||||
|
@ -16,7 +18,7 @@ lf_bp2 <- function(lf_df = lf_duet
|
|||
, y_scales = "free_y"
|
||||
, colour_bp_strip = "khaki2"
|
||||
, dot_size = 3
|
||||
, dot_transparency = 0.5
|
||||
, dot_transparency = 1 #0.3: lighter
|
||||
, violin_quantiles = c(0.25, 0.5, 0.75) # can be NULL
|
||||
, my_ats = 22 # axis text size
|
||||
, my_als = 20 # axis label size
|
||||
|
@ -38,14 +40,16 @@ lf_bp2 <- function(lf_df = lf_duet
|
|||
|
||||
# Only use the longer colour palette if there are many outcomes
|
||||
if (monochrome) {
|
||||
lf_bp_colours = c(1:length(levels(lf_df$outcome)))
|
||||
lf_bp_colours[c(1:length(levels(lf_df$outcome)))] = rgb(0,0,0)
|
||||
lf_bp_colours = c(1:length(levels(lf_df[[colour_categ]])))
|
||||
lf_bp_colours[c(1:length(levels(lf_df[[colour_categ]])))] = rgb(0,0,0)
|
||||
} else {
|
||||
if (length(levels(lf_df$outcome)) > 2) {
|
||||
if (length(levels(lf_df[[colour_categ]])) > 2) {
|
||||
lf_bp_colours = consurf_bp_colours
|
||||
}
|
||||
else {
|
||||
lf_bp_colours = hue_pal()(2)
|
||||
#lf_bp_colours = hue_pal()(2)
|
||||
lf_bp_colours = dot_colours
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -58,7 +62,6 @@ lf_bp2 <- function(lf_df = lf_duet
|
|||
|
||||
ggplot(lf_df, aes_string(x = x_grp, y = y_var)) +
|
||||
|
||||
|
||||
facet_wrap(fwv
|
||||
, nrow = n_facet_row
|
||||
, scales = y_scales) +
|
||||
|
@ -67,6 +70,7 @@ lf_bp2 <- function(lf_df = lf_duet
|
|||
|
||||
geom_violin(trim = T
|
||||
, scale = "width"
|
||||
, colour = "black"
|
||||
#, position = position_dodge(width = 0.9)
|
||||
, draw_quantiles = violin_quantiles) +
|
||||
|
||||
|
|
|
@ -143,14 +143,18 @@ lf_snap2 = all_dm_om_df[['lf_snap2']]
|
|||
wf_provean = all_dm_om_df[['wf_provean']]
|
||||
lf_provean = all_dm_om_df[['lf_provean']]
|
||||
|
||||
# NEW
|
||||
wf_dist_gen = all_dm_om_df[['wf_dist_gen']]
|
||||
lf_dist_gen = all_dm_om_df[['lf_dist_gen']]
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
|
||||
lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]
|
||||
wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
|
||||
lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]
|
||||
}
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
|
||||
lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
|
||||
wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
|
||||
lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
|
||||
}
|
||||
|
||||
s2 = c("\nSuccessfully sourced other_plots_data.R")
|
||||
|
|
|
@ -24,7 +24,7 @@ corr_plotdf = corr_data_extract(merged_df3
|
|||
, extract_scaled_cols = F)
|
||||
colnames(corr_plotdf)
|
||||
colnames(corr_df_m3_f)
|
||||
corr_plotdf = corr_df_m3_f # for downstream code
|
||||
corr_plotdf = corr_df_m3_f #for downstream code
|
||||
#================
|
||||
# stability
|
||||
#================
|
||||
|
@ -61,7 +61,7 @@ svg(corr_psP, width = 15, height = 15)
|
|||
|
||||
my_corr_pairs(corr_data_all = corr_df_ps
|
||||
, corr_cols = colnames(corr_df_ps[1:corr_end])
|
||||
, corr_method = "spearman" # other options: "pearson" or "kendall"
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_ps[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
|
@ -113,7 +113,7 @@ svg(corr_ligP, width = 10, height = 10)
|
|||
|
||||
my_corr_pairs(corr_data_all = corr_df_lig
|
||||
, corr_cols = colnames(corr_df_lig[1:corr_end])
|
||||
, corr_method = "spearman" # other options: "pearson" or "kendall"
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_lig[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
|
@ -158,7 +158,7 @@ svg(corr_ppi2P, width = 10, height = 10)
|
|||
|
||||
my_corr_pairs(corr_data_all = corr_df_ppi2
|
||||
, corr_cols = colnames(corr_df_ppi2[1:corr_end])
|
||||
, corr_method = "spearman" # other options: "pearson" or "kendall"
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
|
@ -181,7 +181,7 @@ corr_na_colnames = c("mCSM-NA"
|
|||
, "MAF"
|
||||
, "Log(OR)"
|
||||
, "-Log(P)"
|
||||
, "NA-Dist" # "interface_dist"
|
||||
, "NA-Dist" # "NA_dist"
|
||||
, "dst_mode"
|
||||
, drug)
|
||||
|
||||
|
@ -254,7 +254,7 @@ svg(corr_consP, width = 10, height = 10)
|
|||
|
||||
my_corr_pairs(corr_data_all = corr_df_cons
|
||||
, corr_cols = colnames(corr_df_cons[1:corr_end])
|
||||
, corr_method = "spearman" # other options: "pearson" or "kendall"
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_cons[color_coln]) #"dst_mode"
|
||||
, categ_colour = c("red", "blue")
|
||||
, density_show = F
|
||||
|
|
|
@ -1,237 +1,230 @@
|
|||
#################
|
||||
# Numbers
|
||||
##################
|
||||
nrow(wf_mcsm_lig)
|
||||
table(wf_mcsm_lig$mutation_info_labels)
|
||||
|
||||
nrow(wf_mcsm_ppi2)
|
||||
table(wf_mcsm_ppi2$mutation_info_labels)
|
||||
all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
|
||||
#
|
||||
# lf_duet = all_dm_om_df[['lf_duet']]
|
||||
# table(lf_duet$param_type)
|
||||
################################################################
|
||||
|
||||
geneL_normal = c("pnca")
|
||||
geneL_na = c("gid", "rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
#======================
|
||||
# Data: Dist+genomics
|
||||
#======================
|
||||
lf_dist_genP = all_dm_om_df[['lf_dist_gen']]
|
||||
wf_dist_genP = all_dm_om_df[['wf_dist_gen']]
|
||||
|
||||
levels(lf_dist_genP$param_type)
|
||||
#lf_dist_genP$param_type <- factor(lf_dist_genP$param_type, levels=c("Log10(MAF)", "Lig Dist(Å)", "PPI Dist(Å)"))
|
||||
table(lf_dist_genP$param_type)
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
lf_mcsm_na
|
||||
}
|
||||
genomics_param = c("Log10(MAF)")
|
||||
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
lf_mcsm_ppi2
|
||||
}
|
||||
dist_genP = lf_bp2(lf_dist_genP
|
||||
#, p_title
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
colnames(lf_duet)
|
||||
table(lf_duet$param_type)
|
||||
#-------------------
|
||||
# Genomics data plot
|
||||
#-------------------
|
||||
genomics_dataP = lf_dist_genP[lf_dist_genP$param_type%in%genomics_param,]
|
||||
genomics_dataP$param_type = factor(genomics_dataP$param_type)
|
||||
table(genomics_dataP$param_type)
|
||||
|
||||
static_colsP = c("Lig Dist(Å)","ASA", "RSA","RD","KD","Log10(MAF)")
|
||||
genomicsP = lf_bp2(genomics_dataP
|
||||
#, p_title = ""
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
stability_suffix <- paste0(delta_symbol, delta_symbol, "G")
|
||||
genomicsP
|
||||
|
||||
lf_commonP = lf_duet[!lf_duet$param_type%in%c("DUET ΔΔG"),]
|
||||
lf_commonP$param_type = levels(droplevels(lf_commonP$param_type))
|
||||
table(lf_commonP$param_type); colnames(lf_commonP)
|
||||
lf_commonP$outcome = lf_commonP$duet_outcome
|
||||
lf_commonP$duet_outcome = NULL
|
||||
#check
|
||||
wilcox.test(wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="R"]
|
||||
, wf_dist_genP$`Log10(MAF)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
|
||||
|
||||
tapply(wf_dist_genP$`Log10(MAF)`, wf_dist_genP$mutation_info_labels, summary)
|
||||
|
||||
lf_duet$outcome = lf_duet$duet_outcome
|
||||
lf_duet$duet_outcome = NULL
|
||||
lf_duetP = lf_duet[!lf_duet$param_type%in%c(static_colsP, "outcome"),]
|
||||
lf_duetP$param_type = levels(droplevels(lf_duetP$param_type))
|
||||
table(lf_duetP$param_type); colnames(lf_duetP)
|
||||
colnames(lf_duetP)
|
||||
#-------------------
|
||||
# Distance data plot: not genomics data
|
||||
#-------------------
|
||||
dist_dataP = lf_dist_genP[!lf_dist_genP$param_type%in%genomics_param,]
|
||||
#dist_dataP$param_type = factor(dist_dataP$param_type)
|
||||
table(dist_dataP$param_type)
|
||||
|
||||
lf_foldx$outcome = lf_foldx$foldx_outcome
|
||||
lf_foldx$foldx_outcome = NULL
|
||||
lf_foldxP = lf_foldx[!lf_foldx$param_type%in%c(static_colsP,"outcome"),]
|
||||
lf_foldxP$param_type = levels(droplevels(lf_foldxP$param_type))
|
||||
distanceP = lf_bp2(dist_dataP
|
||||
#, p_title = ""
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
distanceP
|
||||
|
||||
# check
|
||||
wilcox.test(wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
|
||||
, wf_dist_genP$`PPI Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
|
||||
|
||||
wilcox.test(wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="R"]
|
||||
, wf_dist_genP$`Lig Dist(Å)`[wf_dist_genP$mutation_info_labels=="S"], paired = FALSE)
|
||||
|
||||
tapply(wf_dist_genP$`PPI Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
|
||||
|
||||
tapply(wf_dist_genP$`Lig Dist(Å)`, wf_dist_genP$mutation_info_labels, summary)
|
||||
|
||||
#==============
|
||||
# Plot:DUET
|
||||
#==============
|
||||
lf_duetP = all_dm_om_df[['lf_duet']]
|
||||
#lf_duetP = lf_duet[!lf_duet$param_type%in%c(static_colsP),]
|
||||
table(lf_duetP$param_type)
|
||||
lf_duetP$param_type = factor(lf_duetP$param_type)
|
||||
table(lf_duetP$param_type)
|
||||
|
||||
duetP = lf_bp2(lf_duetP
|
||||
#, p_title = paste0("Stability",stability_suffix)
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
#==============
|
||||
# Plot:FoldX
|
||||
#==============
|
||||
lf_foldxP = all_dm_om_df[['lf_foldx']]
|
||||
#lf_foldxP = lf_foldx[!lf_foldx$param_type%in%c(static_colsP),]
|
||||
table(lf_foldxP$param_type)
|
||||
lf_foldxP$param_type = factor(lf_foldxP$param_type)
|
||||
table(lf_foldxP$param_type)
|
||||
colnames(lf_foldxP)
|
||||
|
||||
foldxP = lf_bp2(lf_foldxP
|
||||
#, p_title = paste0("Stability",stability_suffix)
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
lf_deepddg$outcome = lf_deepddg$deepddg_outcome
|
||||
lf_deepddg$deepddg_outcome = NULL
|
||||
lf_deepddgP = lf_deepddg[!lf_deepddg$param_type%in%c(static_colsP, "outcome"),]
|
||||
lf_deepddgP$param_type = levels(droplevels(lf_deepddgP$param_type))
|
||||
#==============
|
||||
# Plot:DeepDDG
|
||||
#==============
|
||||
lf_deepddgP = all_dm_om_df[['lf_deepddg']]
|
||||
#lf_deepddgP = lf_deepddg[!lf_deepddg$param_type%in%c(static_colsP),]
|
||||
table(lf_deepddgP$param_type)
|
||||
lf_deepddgP$param_type = factor(lf_deepddgP$param_type)
|
||||
table(lf_deepddgP$param_type)
|
||||
colnames(lf_deepddgP)
|
||||
|
||||
deepddgP = lf_bp2(lf_deepddgP
|
||||
#, p_title = paste0("Stability",stability_suffix)
|
||||
, violin_quantiles = c(0.5), monochrome = F
|
||||
, dot_transparency = 0.3)
|
||||
deepddgP
|
||||
|
||||
lf_dynamut2$outcome = lf_dynamut2$ddg_dynamut2_outcome
|
||||
lf_dynamut2$ddg_dynamut2_outcome = NULL
|
||||
lf_dynamut2P = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_colsP, "outcome"),]
|
||||
lf_dynamut2P$param_type = levels(droplevels(lf_dynamut2P$param_type))
|
||||
#==============
|
||||
# Plot: Dynamut2
|
||||
#==============
|
||||
lf_dynamut2P = all_dm_om_df[['lf_dynamut2']]
|
||||
#lf_dynamut2P = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_colsP),]
|
||||
table(lf_dynamut2P$param_type)
|
||||
lf_dynamut2P$param_type = factor(lf_dynamut2P$param_type)
|
||||
table(lf_dynamut2P$param_type)
|
||||
colnames(lf_dynamut2P)
|
||||
|
||||
dynamut2P = lf_bp2(lf_dynamut2P
|
||||
#, p_title = paste0("Stability",stability_suffix)
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
lf_consurf$outcome = lf_consurf$consurf_outcome
|
||||
lf_consurf$consurf_outcome = NULL
|
||||
lf_consurfP = lf_consurf[!lf_consurf$param_type%in%c(static_colsP),]
|
||||
lf_consurfP$param_type = levels(droplevels(lf_consurfP$param_type))
|
||||
#==============
|
||||
# Plot:ConSurf
|
||||
#==============
|
||||
lf_consurfP = all_dm_om_df[['lf_consurf']]
|
||||
#lf_consurfP = lf_consurf[!lf_consurf$param_type%in%c(static_colsP),]
|
||||
table(lf_consurfP$param_type)
|
||||
lf_consurfP$param_type = factor(lf_consurfP$param_type)
|
||||
table(lf_consurfP$param_type)
|
||||
colnames(lf_consurfP)
|
||||
|
||||
consurfP = lf_bp2(lf_consurfP
|
||||
#, p_title = paste0("Stability",stability_suffix)
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
lf_snap2$outcome = lf_snap2$snap2_outcome
|
||||
lf_snap2$snap2_outcome = NULL
|
||||
lf_snap2P = lf_snap2[!lf_snap2$param_type%in%c(static_colsP),]
|
||||
lf_snap2P$param_type = levels(droplevels(lf_snap2P$param_type))
|
||||
#==============
|
||||
# Plot:SNAP2
|
||||
#==============
|
||||
lf_snap2P = all_dm_om_df[['lf_snap2']]
|
||||
#lf_snap2P = lf_snap2[!lf_snap2$param_type%in%c(static_colsP),]
|
||||
table(lf_snap2P$param_type)
|
||||
lf_snap2P$param_type = factor(lf_snap2P$param_type)
|
||||
table(lf_snap2P$param_type)
|
||||
colnames(lf_snap2P)
|
||||
|
||||
snap2P = lf_bp2(lf_snap2P
|
||||
#, p_title = paste0("Stability",stability_suffix)
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
lf_provean$outcome = lf_provean$provean_outcome
|
||||
lf_provean$provean_outcome = NULL
|
||||
lf_proveanP = lf_provean[!lf_provean$param_type%in%c(static_colsP),]
|
||||
lf_proveanP$param_type = levels(droplevels(lf_proveanP$param_type))
|
||||
#==============
|
||||
# Plot:PROVEAN
|
||||
#==============
|
||||
lf_proveanP = all_dm_om_df[['lf_provean']]
|
||||
#lf_proveanP = lf_provean[!lf_provean$param_type%in%c(static_colsP),]
|
||||
table(lf_proveanP$param_type)
|
||||
lf_proveanP$param_type = factor(lf_proveanP$param_type)
|
||||
table(lf_proveanP$param_type)
|
||||
colnames(lf_proveanP)
|
||||
|
||||
bar = rbind(colnames(lf_duetP)
|
||||
, colnames(lf_foldxP)
|
||||
, colnames(lf_deepddgP)
|
||||
, colnames(lf_dynamut2P)
|
||||
, colnames(lf_consurfP)
|
||||
, colnames(lf_snap2P)
|
||||
, colnames(lf_proveanP)
|
||||
)
|
||||
bar
|
||||
|
||||
lf_df_stabP = rbind((lf_duetP)
|
||||
, (lf_foldxP)
|
||||
, (lf_deepddgP)
|
||||
, (lf_dynamut2P))
|
||||
|
||||
lf_df_consP = rbind((lf_consurfP)
|
||||
, (lf_snap2P)
|
||||
, (lf_proveanP))
|
||||
|
||||
table(lf_df_stabP$param_type)
|
||||
# VERY USEFUL for seeing numbers for param types
|
||||
table(lf_df_stabP$param_type,lf_df_stabP$outcome)
|
||||
table(lf_df_consP$param_type,lf_df_consP$outcome)
|
||||
|
||||
#==============
|
||||
# Plot:BP
|
||||
#==============
|
||||
stability_suffix <- paste0(delta_symbol, delta_symbol, "G")
|
||||
|
||||
# lf_bp(lf_df_stabP, p_title = paste0("Stability",stability_suffix)
|
||||
# , violin_quantiles = c(0.5))
|
||||
# lf_bp(lf_df_consP, p_title = "Evolutionary Conservation"
|
||||
# , violin_quantiles = c(0.5))
|
||||
|
||||
lf_bp2(lf_df_stabP, p_title = paste0("Stability",stability_suffix)
|
||||
proveanP = lf_bp2(lf_proveanP
|
||||
#, p_title = paste0("Stability",stability_suffix)
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
lf_bp2(lf_duet, p_title = paste0("Stability",stability_suffix)
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
|
||||
|
||||
lf_bp2(lf_df_consP, p_title = "Evolutionary Conservation"
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
#HMMM: Bollocks!
|
||||
lf_bp2(lf_commonP, p_title = paste0("Residue level properties")
|
||||
, violin_quantiles = c(0.5)
|
||||
, monochrome = T) # doesn't plot stat bars
|
||||
|
||||
lf_bp(lf_commonP, p_title = paste0("Residue level properties")
|
||||
, violin_quantiles = c(0.5)) #plots stat bars but incorrect result
|
||||
|
||||
lf_unpaired_stats(lf_duet)
|
||||
wilcox.test(wf_duet$`Lig Dist(Å)`[wf_duet$mutation_info_labels=="R"]
|
||||
, wf_duet$`Lig Dist(Å)`[wf_duet$mutation_info_labels=="S"])
|
||||
|
||||
wilcox.test(wf_duet$ASA[wf_duet$mutation_info_labels=="R"]
|
||||
, wf_duet$ASA[wf_duet$mutation_info_labels=="S"])
|
||||
|
||||
|
||||
# 1: variable
|
||||
# 9: conserved
|
||||
|
||||
# CHECK THESE
|
||||
foo = merged_df3[c("dst_mode", "mutation_info_labels", "consurf_colour_rev"
|
||||
, "consurf_scaled"
|
||||
, "consurf_score"
|
||||
, "consurf_outcome"
|
||||
, "snap2_score"
|
||||
, "snap2_scaled"
|
||||
, "snap2_outcome"
|
||||
, "provean_score"
|
||||
, "provean_scaled"
|
||||
, "provean_outcome")]
|
||||
|
||||
################
|
||||
# Affinity
|
||||
################
|
||||
# ligand
|
||||
lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome
|
||||
lf_mcsm_lig$ligand_outcome = NULL
|
||||
colnames(lf_mcsm_lig)
|
||||
table(lf_mcsm_lig$param_type)
|
||||
|
||||
|
||||
lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome
|
||||
lf_mcsm_ligP = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_colsP, "outcome"),]
|
||||
#lf_mcsm_ligP$param_type = levels(droplevels(lf_mcsm_ligP))
|
||||
#==============
|
||||
# Plot: mCSM-lig
|
||||
#==============
|
||||
lf_mcsm_ligP = all_dm_om_df[['lf_mcsm_lig']]
|
||||
#lf_mcsm_ligP = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_colsP),]
|
||||
table(lf_mcsm_ligP$param_type)
|
||||
lf_mcsm_ligP$param_type = factor(lf_mcsm_ligP$param_type)
|
||||
table(lf_mcsm_ligP$param_type)
|
||||
lf_mcsm_ligP$ligand_outcome = NULL
|
||||
colnames(lf_mcsm_ligP)
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
lf_mcsm_na$outcome = lf_mcsm_na$mcsm_na_outcome
|
||||
#lf_mcsm_na$mcsm_na_outcome = NULL
|
||||
lf_mcsm_naP = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_colsP, "outcome"),]
|
||||
#lf_mcsm_naP$param_type = levels(droplevels(lf_mcsm_naP))
|
||||
table(lf_mcsm_naP$param_type)
|
||||
lf_mcsm_naP$mcsm_na_outcome = NULL
|
||||
colnames(lf_mcsm_naP)
|
||||
}
|
||||
mcsmligP = lf_bp2(lf_mcsm_ligP
|
||||
#, p_title = paste0("Stability",stability_suffix)
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
#==============
|
||||
# Plot: mCSM-ppi2
|
||||
#==============
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
|
||||
lf_mcsm_ppi2$outcome = lf_mcsm_ppi2$mcsm_ppi2_outcome
|
||||
colnames(lf_mcsm_ppi2)
|
||||
#lf_mcsm_ppi2$mcsm_ppi2_outcome = NULL
|
||||
lf_mcsm_ppi2P = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_colsP, "outcome"),]
|
||||
#lf_mcsm_ppi2P$param_type = levels(droplevels(lf_mcsm_ppi2P))
|
||||
lf_mcsm_ppi2P = all_dm_om_df[['lf_mcsm_ppi2']]
|
||||
#lf_mcsm_ppi2P = lf_mcsm_ppi2[!lf_mcsm_ppi2$param_type%in%c(static_colsP),]
|
||||
table(lf_mcsm_ppi2P$param_type)
|
||||
lf_mcsm_ppi2P$param_type = factor(lf_mcsm_ppi2P$param_type)
|
||||
table(lf_mcsm_ppi2P$param_type)
|
||||
lf_mcsm_ppi2P$mcsm_ppi2_outcome = NULL
|
||||
colnames(lf_mcsm_ppi2P)
|
||||
|
||||
mcsmppi2P = lf_bp2(lf_mcsm_ppi2P
|
||||
#, p_title = paste0("Stability",stability_suffix)
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
}
|
||||
|
||||
#==============
|
||||
# Plot: mCSM-NA
|
||||
#==============
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
lf_mcsm_naP = all_dm_om_df[['lf_mcsm_na']]
|
||||
#lf_mcsm_naP = lf_mcsm_na[!lf_mcsm_na$param_type%in%c(static_colsP),]
|
||||
table(lf_mcsm_naP$param_type)
|
||||
lf_mcsm_naP$param_type = factor(lf_mcsm_naP$param_type)
|
||||
table(lf_mcsm_naP$param_type)
|
||||
|
||||
bar = rbind(colnames(lf_mcsm_ligP)
|
||||
#, colnames(lf_mcsm_naP)
|
||||
, colnames(lf_mcsm_ppi2P))
|
||||
bar
|
||||
mcsmnaP = lf_bp2(lf_mcsm_naP
|
||||
#, p_title = paste0("Stability",stability_suffix)
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
}
|
||||
|
||||
lf_df_affP = rbind((lf_mcsm_ligP)
|
||||
, (lf_mcsm_ppi2P))
|
||||
######################################
|
||||
# Outplot with stats
|
||||
######################################
|
||||
|
||||
lf_bp(lf_df_affP, p_title = paste0("Affinity changes")
|
||||
, violin_quantiles = c(0.5))
|
||||
#, monochrome = T) # doesn't plot stat bars
|
||||
cowplot::plot_grid(
|
||||
cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P, genomicsP, distanceP
|
||||
, nrow=1),
|
||||
# cowplot::plot_grid(genomicsP, distanceP
|
||||
# , nrow = 1),
|
||||
cowplot::plot_grid(consurfP, snap2P, proveanP
|
||||
, mcsmligP
|
||||
, mcsmppi2P
|
||||
#, mcsmnaP
|
||||
, nrow=1),
|
||||
nrow=2)
|
||||
|
||||
wilcox.test(wf_mcsm_lig$ASA[wf_mcsm_lig$mutation_info_labels=="R"]
|
||||
, wf_mcsm_lig$ASA[wf_mcsm_lig$mutation_info_labels=="S"])
|
||||
|
||||
wilcox.test(wf_mcsm_ppi2$ASA[wf_mcsm_ppi2$mutation_info_labels=="R"]
|
||||
, wf_mcsm_ppi2$ASA[wf_mcsm_ppi2$mutation_info_labels=="S"])
|
||||
foo = lf_consurfP
|
||||
|
||||
#===============================
|
||||
p1 = lf_bp2(lf_df_stabP, p_title = paste0("Stability",stability_suffix)
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
# proveanP = lf_bp2(lf_proveanP, colour_categ = "mutation_info_labels"
|
||||
# , p_title = paste0("Evolutionary conservation")
|
||||
# , dot_transparency = 1
|
||||
# , violin_quantiles = c(0.5), monochrome = F)
|
||||
#
|
||||
# proveanP
|
||||
|
||||
p2 = lf_bp2(lf_df_consP, p_title = "Evolutionary Conservation"
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
p3 = lf_bp2(lf_df_affP, p_title = paste0("Affinity changes")
|
||||
, violin_quantiles = c(0.5), monochrome = F)
|
||||
|
||||
cowplot::plot_grid(p1,cowplot::plot_grid(p2, p3), nrow=2)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue