added function to get wf and lf data and corresponding test
This commit is contained in:
parent
a287b801f7
commit
5779b3fe87
2 changed files with 763 additions and 0 deletions
710
scripts/functions/dm_om_data.R
Normal file
710
scripts/functions/dm_om_data.R
Normal file
|
@ -0,0 +1,710 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: Script to format data for dm om plots:
|
||||
# generating WF and LF data for each of the parameters
|
||||
# duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc
|
||||
# Called by get_plotting_dfs.R
|
||||
|
||||
# dm_om_wf_lf_data()
|
||||
# Input: data with all parameters (merged_df3, my_use case)
|
||||
# gene: [conditional generation of dfs like mcsm-NA, mcsm-ppi2 as not all genes have all these values]
|
||||
# colnames_to_extract = c("mutationinformation"
|
||||
# , "duet_affinity_change...")
|
||||
# ligand_dist_colname = LigDist_colname # from globals
|
||||
# dr_muts = dr_muts_col # from globals ...dr_mutations_<drug>
|
||||
# other_muts = other_muts_col # from globals ...other_mutations_<drug>
|
||||
# snp_colname = "mutationinformation"
|
||||
# aa_pos_colname = "position" # to sort df by
|
||||
# mut_colname = "mutation"
|
||||
# mut_info_colname = "mutation_info"
|
||||
# mut_info_label_colname = "mutation_info_labels" # if empty, below used
|
||||
# dr_other_muts_labels = c("DM", "OM") # only used if ^^ = ""
|
||||
# categ_cols_to_factor: converts the cols with '_outcome'and 'info' to factor
|
||||
|
||||
# TO DO: SHINY
|
||||
#1)
|
||||
#2)
|
||||
##################################################################
|
||||
dm_om_wf_lf_data <- function(df
|
||||
, gene_name = gene # from globals
|
||||
, colnames_to_extract
|
||||
, ligand_dist_colname = LigDist_colname # from globals
|
||||
, dr_muts = dr_muts_col # from globals
|
||||
, other_muts = other_muts_col # from globals
|
||||
, snp_colname = "mutationinformation"
|
||||
, aa_pos_colname = "position" # to sort df by
|
||||
, mut_colname = "mutation"
|
||||
, mut_info_colname = "mutation_info"
|
||||
, mut_info_label_colname = "mutation_info_labels" # if empty, below used
|
||||
, dr_other_muts_labels = c("DM", "OM") # only used if ^^ = ""
|
||||
, categ_cols_to_factor){
|
||||
|
||||
# Initialise the required dfs based on gene name
|
||||
geneL_normal = c("pnca")
|
||||
#geneL_na_dy = c("gid")
|
||||
geneL_na = c("gid", "rpob")
|
||||
geneL_dy = c("gid")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
# common_dfs
|
||||
common_dfsL = list(
|
||||
wf_duet = data.frame()
|
||||
, lf_duet = data.frame()
|
||||
, wf_mcsm_lig = data.frame()
|
||||
, lf_mcsm_lig = data.frame()
|
||||
, wf_foldx = data.frame()
|
||||
, lf_foldx = data.frame()
|
||||
, wf_deepddg = data.frame()
|
||||
, lf_deepddg = data.frame()
|
||||
, wf_dynamut2 = data.frame()
|
||||
, lf_dynamut2 = data.frame()
|
||||
, wf_consurf = data.frame()
|
||||
, lf_consurf = data.frame()
|
||||
, wf_snap2 = data.frame()
|
||||
, lf_snap2 = data.frame()
|
||||
)
|
||||
|
||||
# additional dfs
|
||||
if (tolower(gene_name)%in%geneL_normal){
|
||||
wf_lf_dataL = common_dfsL
|
||||
}
|
||||
|
||||
if (tolower(gene_name)%in%geneL_na){
|
||||
additional_dfL = list(
|
||||
wf_mcsm_na = data.frame()
|
||||
, lf_mcsm_na = data.frame()
|
||||
)
|
||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||
}
|
||||
|
||||
if (tolower(gene_name)%in%geneL_ppi2){
|
||||
additional_dfL = list(
|
||||
wf_mcsm_ppi2 = data.frame()
|
||||
, lf_mcsm_ppi2 = data.frame()
|
||||
)
|
||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||
}
|
||||
|
||||
if (tolower(gene_name)%in%geneL_dy){
|
||||
additional_dfL = list(
|
||||
wf_mcsm_na = data.frame()
|
||||
, lf_mcsm_na = data.frame()
|
||||
, wf_dynamut = data.frame()
|
||||
, lf_dynamut = data.frame()
|
||||
, wf_encomddg = data.frame()
|
||||
, lf_encomddg = data.frame()
|
||||
, wf_encomdds = data.frame()
|
||||
, lf_encomdds = data.frame()
|
||||
, wf_sdm = data.frame()
|
||||
, lf_sdm = data.frame()
|
||||
, wf_mcsm = data.frame()
|
||||
, lf_mcsm = data.frame()
|
||||
)
|
||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||
}
|
||||
cat("\nInitializing an empty list of length:"
|
||||
, length(wf_lf_dataL))
|
||||
|
||||
#=======================================================================
|
||||
if (missing(colnames_to_extract)){
|
||||
|
||||
colnames_to_extract = c(snp_colname
|
||||
, mut_colname, mut_info_colname, mut_info_label_colname
|
||||
, aa_pos_colname
|
||||
, LigDist_colname
|
||||
, "duet_stability_change" , "duet_scaled" , "duet_outcome"
|
||||
, "ligand_affinity_change", "affinity_scaled" , "ligand_outcome"
|
||||
, "ddg_foldx" , "foldx_scaled" , "foldx_outcome"
|
||||
, "deepddg" , "deepddg_scaled" , "deepddg_outcome"
|
||||
, "asa" , "rsa"
|
||||
, "rd_values" , "kd_values"
|
||||
, "log10_or_mychisq" , "neglog_pval_fisher" , "af"
|
||||
, "ddg_dynamut2" , "ddg_dynamut2_scaled", "ddg_dynamut2_outcome"
|
||||
, "mcsm_ppi2_affinity" , "mcsm_ppi2_scaled" , "mcsm_ppi2_outcome"
|
||||
, "consurf_score" , "consurf_scaled" #, "consurf_outcome"
|
||||
, "snap2_score" , "snap2_scaled" , "snap2_outcome"
|
||||
, "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome"
|
||||
, "ddg_dynamut" , "ddg_dynamut_scaled" , "ddg_dynamut_outcome"
|
||||
, "ddg_encom" , "ddg_encom_scaled" , "ddg_encom_outcome"
|
||||
, "dds_encom" , "dds_encom_scaled" , "dds_encom_outcome"
|
||||
, "ddg_mcsm" , "ddg_mcsm_scaled" , "ddg_mcsm_outcome"
|
||||
, "ddg_sdm" , "ddg_sdm_scaled" , "ddg_sdm_outcome"
|
||||
, "ddg_duet" , "ddg_duet_scaled" , "ddg_duet_outcome")
|
||||
}else{
|
||||
colnames_to_extract = c(mut_colname, mut_info_colname, mut_info_label_colname
|
||||
, aa_pos_colname, LigDist_colname
|
||||
, colnames_to_extract)
|
||||
}
|
||||
comb_df = df[, colnames(df)%in%colnames_to_extract]
|
||||
comb_df_s = dplyr::arrange(comb_df, aa_pos_colname)
|
||||
|
||||
#=======================================================================
|
||||
if(missing(categ_cols_to_factor)){
|
||||
categ_cols_to_factor = grep( "_outcome|_info", colnames(comb_df_s) )
|
||||
}else{
|
||||
categ_cols_to_factor = categ_cols_to_factor
|
||||
}
|
||||
#fact_cols = colnames(comb_df_s)[grepl( "_outcome|_info", colnames(comb_df_s) )]
|
||||
fact_cols = colnames(comb_df_s)[categ_cols_to_factor]
|
||||
|
||||
if (any(lapply(comb_df_s[, fact_cols], class) == "character")){
|
||||
cat("\nChanging", length(categ_cols_to_factor), "cols to factor")
|
||||
comb_df_s[, fact_cols] <- lapply(comb_df_s[, fact_cols], as.factor)
|
||||
if (all(lapply(comb_df_s[, fact_cols], class) == "factor")){
|
||||
cat("\nSuccessful: cols changed to factor")
|
||||
}
|
||||
}else{
|
||||
cat("\nRequested cols aready factors")
|
||||
}
|
||||
#=======================================================================
|
||||
table(comb_df_s[[mut_info_colname]])
|
||||
|
||||
# further checks to make sure dr and other muts are indeed unique
|
||||
dr_muts = comb_df_s[comb_df_s[[mut_info_colname]] == dr_muts,]
|
||||
dr_muts_names = unique(dr_muts$mutation)
|
||||
|
||||
other_muts = comb_df_s[comb_df_s[[mut_info_colname]] == other_muts,]
|
||||
other_muts_names = unique(other_muts$mutation)
|
||||
|
||||
if ( table(dr_muts_names%in%other_muts_names)[[1]] == length(dr_muts_names) &&
|
||||
table(other_muts_names%in%dr_muts_names)[[1]] == length(other_muts_names) ){
|
||||
cat("PASS: dr and other muts are indeed unique")
|
||||
}else{
|
||||
cat("FAIL: dr and others muts are NOT unique!")
|
||||
quit()
|
||||
}
|
||||
|
||||
# pretty display names i.e. labels to reduce major code duplication later
|
||||
foo_cnames = data.frame(colnames(comb_df_s))
|
||||
names(foo_cnames) <- "old_name"
|
||||
|
||||
stability_suffix <- paste0(delta_symbol, delta_symbol, "G")
|
||||
flexibility_suffix <- paste0(delta_symbol, delta_symbol, "S")
|
||||
|
||||
lig_dn = paste0("Ligand distance (", angstroms_symbol, ")"); lig_dn
|
||||
mcsm_lig_dn = paste0("Ligand affinity (log fold change)"); mcsm_lig_dn
|
||||
|
||||
duet_dn = paste0("DUET ", stability_suffix); duet_dn
|
||||
foldx_dn = paste0("FoldX ", stability_suffix); foldx_dn
|
||||
deepddg_dn = paste0("Deepddg " , stability_suffix); deepddg_dn
|
||||
dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn
|
||||
|
||||
mcsm_na_dn = paste0("mCSM-NA affinity ", stability_suffix); mcsm_na_dn
|
||||
mcsm_ppi2_dn = paste0("mCSM-PPI2 affinity ", stability_suffix); mcsm_ppi2_dn
|
||||
consurf_dn = paste0("Consurf"); consurf_dn
|
||||
snap2_dn = paste0("SNAP2"); snap2_dn
|
||||
|
||||
dynamut_dn = paste0("Dynamut ", stability_suffix); dynamut_dn
|
||||
encom_ddg_dn = paste0("EnCOM " , stability_suffix); encom_ddg_dn
|
||||
encom_dds_dn = paste0("EnCOM " , flexibility_suffix ); encom_dds_dn
|
||||
sdm_dn = paste0("SDM " , stability_suffix); sdm_dn
|
||||
mcsm_dn = paste0("mCSM " , stability_suffix ); mcsm_dn
|
||||
|
||||
|
||||
# change column names: plyr
|
||||
new_colnames = c(asa = "ASA"
|
||||
, rsa = "RSA"
|
||||
, rd_values = "RD"
|
||||
, kd_values = "KD"
|
||||
, log10_or_mychisq = "Log10 (OR)"
|
||||
, neglog_pval_fisher = "-Log (P)"
|
||||
, af = "MAF"
|
||||
#, ligand_dist_colname = lig_dn # cannot handle variable name 'ligand_dist_colname'
|
||||
, affinity_scaled = mcsm_lig_dn
|
||||
, duet_scaled = duet_dn
|
||||
, foldx_scaled = foldx_dn
|
||||
, deepddg_scaled = deepddg_dn
|
||||
, ddg_dynamut2_scaled = dynamut2_dn
|
||||
, mcsm_na_scaled = mcsm_na_dn
|
||||
, mcsm_ppi2_affinity = mcsm_ppi2_dn
|
||||
, consurf_score = consurf_dn
|
||||
, snap2_score = snap2_dn
|
||||
, ddg_dynamut_scaled = dynamut_dn
|
||||
, ddg_encom_scaled = encom_ddg_dn
|
||||
, dds_encom_scaled = encom_dds_dn
|
||||
, ddg_sdm = sdm_dn
|
||||
, ddg_mcsm = mcsm_dn)
|
||||
|
||||
comb_df_sl1 = plyr::rename(comb_df_s
|
||||
, replace = new_colnames
|
||||
, warn_missing = T
|
||||
, warn_duplicated = T)
|
||||
|
||||
# renaming colname using variable i.e ligand_dist_colname: dplyr
|
||||
comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname))
|
||||
names(comb_df_sl)
|
||||
#####################################################################
|
||||
if (mut_info_label_colname == "") {
|
||||
cat("\nAssigning labels:", dr_other_muts_labels, "--> to column:", mut_info_colname)
|
||||
table(comb_df_sl[[mut_info_colname]])
|
||||
|
||||
# dr_muts
|
||||
levels(comb_df_sl[[mut_info_colname]])[levels(comb_df_sl[[mut_info_colname]])==dr_muts] <- dr_other_muts_labels[[1]]
|
||||
# other_muts
|
||||
levels(comb_df_sl[[mut_info_colname]])[levels(comb_df_sl[[mut_info_colname]])==other_muts] <- dr_other_muts_labels[[2]]
|
||||
table(comb_df_sl[[mut_info_colname]])
|
||||
|
||||
static_cols1 = mut_info_colname
|
||||
}else{
|
||||
table(comb_df_sl[[mut_info_label_colname]])
|
||||
static_cols1 = mut_info_label_colname
|
||||
|
||||
}
|
||||
#######################################################################
|
||||
#======================
|
||||
# Selecting dfs
|
||||
# with appropriate cols
|
||||
#=======================
|
||||
|
||||
static_cols_start = c(snp_colname
|
||||
, aa_pos_colname
|
||||
, mut_colname
|
||||
, static_cols1)
|
||||
|
||||
# ordering is important!
|
||||
static_cols_end = c(lig_dn
|
||||
, "ASA"
|
||||
, "RSA"
|
||||
, "RD"
|
||||
, "KD"
|
||||
, "MAF"
|
||||
, "Log10 (OR)"
|
||||
, "-Log (P)")
|
||||
|
||||
#########################################################################
|
||||
#==============
|
||||
# DUET
|
||||
#==============
|
||||
# WF data: duet
|
||||
cols_to_select_duet = c(static_cols_start, c("duet_outcome", duet_dn), static_cols_end)
|
||||
wf_duet = comb_df_sl[, cols_to_select_duet]
|
||||
|
||||
#pivot_cols_ps = cols_to_select_ps[1:5]; pivot_cols_ps
|
||||
pivot_cols_duet = cols_to_select_duet[1: (length(static_cols_start) + 1)]; pivot_cols_duet
|
||||
expected_rows_lf = nrow(wf_duet) * (length(wf_duet) - length(pivot_cols_duet))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: duet
|
||||
lf_duet = gather(wf_duet
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(duet_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_duet) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", duet_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_duet']] = wf_duet
|
||||
wf_lf_dataL[['lf_duet']] = lf_duet
|
||||
|
||||
############################################################################
|
||||
#==============
|
||||
# mCSM-lig
|
||||
#==============
|
||||
# WF data: mcsm_lig
|
||||
cols_to_select_mcsm_lig = c(static_cols_start, c("ligand_outcome", mcsm_lig_dn), static_cols_end)
|
||||
wf_mcsm_lig = comb_df_sl[, cols_to_select_mcsm_lig]
|
||||
|
||||
pivot_cols_mcsm_lig = cols_to_select_mcsm_lig[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_lig
|
||||
expected_rows_lf = nrow(wf_mcsm_lig) * (length(wf_mcsm_lig) - length(pivot_cols_mcsm_lig))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mcsm_lig
|
||||
lf_mcsm_lig = gather(wf_mcsm_lig
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mcsm_lig_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mcsm_lig) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", mcsm_lig_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for mcsm_lig")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig
|
||||
wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig
|
||||
############################################################################
|
||||
#==============
|
||||
# FoldX
|
||||
#==============
|
||||
# WF data: Foldx
|
||||
cols_to_select_foldx= c(static_cols_start, c("foldx_outcome", foldx_dn), static_cols_end)
|
||||
wf_foldx = comb_df_sl[, cols_to_select_foldx]
|
||||
|
||||
pivot_cols_foldx = cols_to_select_foldx[1: (length(static_cols_start) + 1)]; pivot_cols_foldx
|
||||
expected_rows_lf = nrow(wf_foldx) * (length(wf_foldx) - length(pivot_cols_foldx))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: Foldx
|
||||
lf_foldx = gather(wf_foldx
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(foldx_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_foldx) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", foldx_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_foldx']] = wf_foldx
|
||||
wf_lf_dataL[['lf_foldx']] = lf_foldx
|
||||
|
||||
############################################################################
|
||||
#==============
|
||||
# Deepddg
|
||||
#==============
|
||||
# WF data: deepddg
|
||||
cols_to_select_deepddg = c(static_cols_start, c("deepddg_outcome", deepddg_dn), static_cols_end)
|
||||
wf_deepddg = comb_df_sl[, cols_to_select_deepddg]
|
||||
|
||||
pivot_cols_deepddg = cols_to_select_deepddg[1: (length(static_cols_start) + 1)]; pivot_cols_deepddg
|
||||
expected_rows_lf = nrow(wf_deepddg) * (length(wf_deepddg) - length(pivot_cols_deepddg))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: Deepddg
|
||||
lf_deepddg = gather(wf_deepddg
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(deepddg_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_deepddg) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", deepddg_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_deepddg']] = wf_deepddg
|
||||
wf_lf_dataL[['lf_deepddg']] = lf_deepddg
|
||||
############################################################################
|
||||
#==============
|
||||
# Dynamut2: LF
|
||||
#==============
|
||||
# WF data: dynamut2
|
||||
cols_to_select_dynamut2 = c(static_cols_start, c("ddg_dynamut2_outcome", dynamut2_dn), static_cols_end)
|
||||
wf_dynamut2 = comb_df_sl[, cols_to_select_dynamut2]
|
||||
|
||||
pivot_cols_dynamut2 = cols_to_select_dynamut2[1: (length(static_cols_start) + 1)]; pivot_cols_dynamut2
|
||||
expected_rows_lf = nrow(wf_dynamut2) * (length(wf_dynamut2) - length(pivot_cols_dynamut2))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: dynamut2
|
||||
lf_dynamut2 = gather(wf_dynamut2
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(dynamut2_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_dynamut2) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", dynamut2_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_dynamut2']] = wf_dynamut2
|
||||
wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2
|
||||
############################################################################
|
||||
#==================
|
||||
# Consurf: LF
|
||||
#https://consurf.tau.ac.il/overview.php
|
||||
# consurf_score:
|
||||
# <0 (below average): slowly evolving i.e CONSERVED
|
||||
# >0 (above average): rapidly evolving, i.e VARIABLE
|
||||
#table(df$consurf_colour_rev)
|
||||
# TODO
|
||||
#1--> "most_variable", 2--> "", 3-->"", 4-->""
|
||||
#5-->"", 6-->"", 7-->"", 8-->"", 9-->"most_conserved"
|
||||
#====================
|
||||
# FIXME: if you add category column to consurf
|
||||
#cols_to_select_consurf = c(static_cols_start, c("consurf_outcome", consurf_dn), static_cols_end)
|
||||
#wf_consurf = comb_df_sl[, cols_to_select_consurf]
|
||||
#pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start) + 1)]; pivot_cols_consurf
|
||||
|
||||
# WF data: consurf
|
||||
cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end)
|
||||
wf_consurf = comb_df_sl[, cols_to_select_consurf]
|
||||
|
||||
pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start))]; pivot_cols_consurf
|
||||
expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: consurf
|
||||
lf_consurf = gather(wf_consurf
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(consurf_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_consurf) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", consurf_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_consurf']] = wf_consurf
|
||||
wf_lf_dataL[['lf_consurf']] = lf_consurf
|
||||
###########################################################################
|
||||
#==============
|
||||
# SNAP2: LF
|
||||
#==============
|
||||
# WF data: snap2
|
||||
cols_to_select_snap2 = c(static_cols_start, c("snap2_outcome", snap2_dn), static_cols_end)
|
||||
wf_snap2 = comb_df_sl[, cols_to_select_snap2]
|
||||
|
||||
pivot_cols_snap2 = cols_to_select_snap2[1: (length(static_cols_start) + 1)]; pivot_cols_snap2
|
||||
expected_rows_lf = nrow(wf_snap2) * (length(wf_snap2) - length(pivot_cols_snap2))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: snap2
|
||||
lf_snap2 = gather(wf_snap2
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(snap2_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_snap2) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", snap2_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_snap2']] = wf_snap2
|
||||
wf_lf_dataL[['lf_snap2']] = lf_snap2
|
||||
|
||||
############################################################################
|
||||
if (tolower(gene_name)%in%geneL_na){
|
||||
#==============
|
||||
# mCSM-NA: LF
|
||||
#==============
|
||||
# WF data: mcsm-na
|
||||
cols_to_select_mcsm_na = c(static_cols_start, c("mcsm_na_outcome", mcsm_na_dn), static_cols_end)
|
||||
wf_mcsm_na = comb_df_sl[, cols_to_select_mcsm_na]
|
||||
|
||||
pivot_cols_mcsm_na = cols_to_select_mcsm_na[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_na
|
||||
expected_rows_lf = nrow(wf_mcsm_na) * (length(wf_mcsm_na) - length(pivot_cols_mcsm_na))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mcsm-na
|
||||
lf_mcsm_na = gather(wf_mcsm_na
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mcsm_na_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mcsm_na) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", mcsm_na_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na
|
||||
wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na
|
||||
|
||||
}
|
||||
#-------------------------------------------------------------------
|
||||
if (tolower(gene_name)%in%geneL_ppi2){
|
||||
#==============
|
||||
# mCSM-PPI2: LF
|
||||
#==============
|
||||
# WF data: mcsm-ppi2
|
||||
cols_to_select_mcsm_ppi2 = c(static_cols_start, c("mcsm_ppi2_outcome", mcsm_ppi2_dn), static_cols_end)
|
||||
wf_mcsm_ppi2 = comb_df_sl[, cols_to_select_mcsm_ppi2]
|
||||
|
||||
pivot_cols_mcsm_ppi2 = cols_to_select_mcsm_ppi2[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_ppi2
|
||||
expected_rows_lf = nrow(wf_mcsm_ppi2) * (length(wf_mcsm_ppi2) - length(pivot_cols_mcsm_ppi2))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mcsm-ppi2
|
||||
lf_mcsm_ppi2 = gather(wf_mcsm_ppi2
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mcsm_ppi2_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mcsm_ppi2) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", mcsm_ppi2_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm_ppi2']] = wf_mcsm_ppi2
|
||||
wf_lf_dataL[['lf_mcsm_ppi2']] = lf_mcsm_ppi2
|
||||
|
||||
}
|
||||
#-------------------------------------------------------------------
|
||||
if (tolower(gene_name)%in%geneL_dy){
|
||||
#==============
|
||||
# Dynamut: LF
|
||||
#==============
|
||||
# WF data: dynamut
|
||||
cols_to_select_dynamut = c(static_cols_start, c("ddg_dynamut_outcome", dynamut_dn), static_cols_end)
|
||||
wf_dynamut = comb_df_sl[, cols_to_select_dynamut]
|
||||
|
||||
pivot_cols_dynamut = cols_to_select_dynamut[1: (length(static_cols_start) + 1)]; pivot_cols_dynamut
|
||||
expected_rows_lf = nrow(wf_dynamut) * (length(wf_dynamut) - length(pivot_cols_dynamut))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: dynamut
|
||||
lf_dynamut = gather(wf_dynamut
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(dynamut_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_dynamut) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", dynamut_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_dynamut']] = wf_dynamut
|
||||
wf_lf_dataL[['lf_dynamut']] = lf_dynamut
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#==============
|
||||
# EnCOM ddg: LF
|
||||
#==============
|
||||
# WF data: encomddg
|
||||
cols_to_select_encomddg = c(static_cols_start, c("ddg_encom_outcome", encom_ddg_dn), static_cols_end)
|
||||
wf_encomddg = comb_df_sl[, cols_to_select_encomddg]
|
||||
|
||||
pivot_cols_encomddg = cols_to_select_encomddg[1: (length(static_cols_start) + 1)]; pivot_cols_encomddg
|
||||
expected_rows_lf = nrow(wf_encomddg ) * (length(wf_encomddg ) - length(pivot_cols_encomddg))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: encomddg
|
||||
lf_encomddg = gather(wf_encomddg
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(encom_ddg_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_encomddg) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for ", encom_ddg_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_encomddg']] = wf_encomddg
|
||||
wf_lf_dataL[['lf_encomddg']] = lf_encomddg
|
||||
#-------------------------------------------------------------------------
|
||||
#==============
|
||||
# EnCOM dds: LF
|
||||
#==============
|
||||
# WF data: encomdds
|
||||
cols_to_select_encomdds = c(static_cols_start, c("dds_encom_outcome", encom_dds_dn), static_cols_end)
|
||||
wf_encomdds = comb_df_sl[, cols_to_select_encomdds]
|
||||
|
||||
pivot_cols_encomdds = cols_to_select_encomdds[1: (length(static_cols_start) + 1)]; pivot_cols_encomdds
|
||||
expected_rows_lf = nrow(wf_encomdds) * (length(wf_encomdds) - length(pivot_cols_encomdds))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: encomdds
|
||||
lf_encomdds = gather(wf_encomdds
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(encom_dds_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_encomdds) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for", encom_dds_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_encomdds']] = wf_encomdds
|
||||
wf_lf_dataL[['lf_encomdds']] = lf_encomdds
|
||||
#-------------------------------------------------------------------------
|
||||
#==============
|
||||
# SDM: LF
|
||||
#==============
|
||||
# WF data: sdm
|
||||
cols_to_select_sdm = c(static_cols_start, c("ddg_sdm_outcome", sdm_dn), static_cols_end)
|
||||
wf_sdm = comb_df_sl[, cols_to_select_sdm]
|
||||
|
||||
pivot_cols_sdm = cols_to_select_sdm[1: (length(static_cols_start) + 1)]; pivot_cols_sdm
|
||||
expected_rows_lf = nrow(wf_sdm) * (length(wf_sdm) - length(pivot_cols_sdm))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: sdm
|
||||
lf_sdm = gather(wf_sdm
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(sdm_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_sdm) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for", sdm_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_sdm']] = wf_sdm
|
||||
wf_lf_dataL[['lf_sdm']] = lf_sdm
|
||||
#-------------------------------------------------------------------------
|
||||
#==============
|
||||
# mCSM: LF
|
||||
#==============
|
||||
# WF data: mcsm
|
||||
cols_to_select_mcsm = c(static_cols_start, c("ddg_mcsm_outcome", mcsm_dn), static_cols_end)
|
||||
wf_mcsm = comb_df_sl[, cols_to_select_mcsm]
|
||||
|
||||
pivot_cols_mcsm = cols_to_select_mcsm[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm
|
||||
expected_rows_lf = nrow(wf_mcsm) * (length(wf_mcsm) - length(pivot_cols_mcsm))
|
||||
expected_rows_lf
|
||||
|
||||
# LF data: mcsm
|
||||
lf_mcsm = gather(wf_mcsm
|
||||
, key = param_type
|
||||
, value = param_value
|
||||
, all_of(mcsm_dn):tail(static_cols_end,1)
|
||||
, factor_key = TRUE)
|
||||
|
||||
if (nrow(lf_mcsm) == expected_rows_lf){
|
||||
cat("\nPASS: long format data created for", mcsm_dn)
|
||||
}else{
|
||||
cat("\nFAIL: long format data could not be created for duet")
|
||||
quit()
|
||||
}
|
||||
|
||||
# Assign them to the output list
|
||||
wf_lf_dataL[['wf_mcsm']] = wf_mcsm
|
||||
wf_lf_dataL[['lf_mcsm']] = lf_mcsm
|
||||
|
||||
}
|
||||
#-------------------------------------------------------------------------
|
||||
return(wf_lf_dataL)
|
||||
}
|
||||
############################################################################
|
53
scripts/functions/tests/test_dm_om_data.R
Normal file
53
scripts/functions/tests/test_dm_om_data.R
Normal file
|
@ -0,0 +1,53 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#source("~/git/LSHTM_analysis/config/alr.R")
|
||||
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||
source("~/git/LSHTM_analysis/config/gid.R")
|
||||
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||
#source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
|
||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||
|
||||
all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene_name = gene)
|
||||
|
||||
wf_duet = all_dm_om_df[['wf_duet']]
|
||||
lf_duet = all_dm_om_df[['lf_duet']]
|
||||
|
||||
wf_mcsm_lig = all_dm_om_df[['wf_mcsm_lig']]
|
||||
lf_mcsm_lig = all_dm_om_df[['lf_mcsm_lig']]
|
||||
|
||||
wf_foldx = all_dm_om_df[['wf_foldx']]
|
||||
lf_foldx = all_dm_om_df[['lf_foldx']]
|
||||
|
||||
wf_deepddg = all_dm_om_df[['wf_deepddg']]
|
||||
lf_deepddg = all_dm_om_df[['lf_deepddg']]
|
||||
|
||||
wf_dynamut2 = all_dm_om_df[['wf_dynamut2']]
|
||||
lf_dynamut2 = all_dm_om_df[['lf_dynamut2']]
|
||||
|
||||
wf_consurf = all_dm_om_df[['wf_consurf']]
|
||||
lf_consurf = all_dm_om_df[['lf_consurf']]
|
||||
|
||||
wf_snap2 = all_dm_om_df[['wf_snap2']]
|
||||
lf_snap2 = all_dm_om_df[['lf_snap2']]
|
||||
|
||||
wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
|
||||
lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]
|
||||
|
||||
wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
|
||||
lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
|
||||
|
||||
wf_dynamut = all_dm_om_df[['wf_dynamut']]
|
||||
lf_dynamut = all_dm_om_df[['lf_dynamut']]
|
||||
|
||||
wf_encomddg = all_dm_om_df[['wf_encomddg']]
|
||||
lf_encomddg = all_dm_om_df[['lf_encomddg']]
|
||||
|
||||
wf_encomdds = all_dm_om_df[['wf_encomdds']]
|
||||
lf_encomdds = all_dm_om_df[['lf_encomdds']]
|
||||
|
||||
wf_sdm = all_dm_om_df[['wf_sdm']]
|
||||
lf_sdm = all_dm_om_df[['lf_sdm']]
|
||||
|
||||
wf_mcsm = all_dm_om_df[['wf_mcsm']]
|
||||
lf_mcsm = all_dm_om_df[['lf_mcsm']]
|
Loading…
Add table
Add a link
Reference in a new issue