agenerated corr plots with MAF and provean

This commit is contained in:
Tanushree Tunstall 2022-08-06 13:08:42 +01:00
parent 1a513913ce
commit 569e372476
6 changed files with 115 additions and 104 deletions

View file

@ -22,9 +22,12 @@
#1) Corr type?
#2)
##################################################################
# LigDist_colname #from globals: plotting_globals.R
# ppi2Dist_colname #from globals: plotting_globals.R
# naDist_colname #from globals: plotting_globals.R
corr_data_extract <- function(df
#, gene_name = gene
, drug_name = drug
, gene
, drug
#, ligand_dist_colname = LigDist_colname
, colnames_to_extract
, colnames_display_key
@ -38,34 +41,48 @@ corr_data_extract <- function(df
, "\n=========================================")
cat("\nExtracting default columns for"
#, "\nGene name:", gene
, "\nGene name:", gene
, "\nDrug name:", drug)
colnames_to_extract = c(drug
#, "mutationinformation"
#, "mutation_info_labels"
, "dst_mode"
, "duet_stability_change"
, "ligand_affinity_change"
, "ligand_distance"
#, ligand_dist_colname
, "interface_dist"
, "ddg_foldx"
, "deepddg"
, "asa"
, "rsa"
, "kd_values"
, "rd_values"
, "af"
, "log10_or_mychisq"
, "neglog_pval_fisher"
, "ddg_dynamut2"
, "consurf_score"
, "snap2_score"
, "ddg_dynamut", "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet"
, "mcsm_na_affinity"
, "mcsm_ppi2_affinity"
)
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
common_colnames = c(drug, "dst_mode"
, "duet_stability_change" , "ddg_foldx" , "deepddg" , "ddg_dynamut2"
, "asa" , "rsa" , "kd_values" , "rd_values"
, "maf" , "log10_or_mychisq" , "neglog_pval_fisher"
, LigDist_colname
, "consurf_score" , "snap2_score" , "provean_score"
, "ligand_affinity_change"
#, "ddg_dynamut", "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet"
)
display_common_colnames = c( drug, "dst_mode"
, "DUET" , "FoldX" , "DeepDDG", "Dynamut2"
, "ASA" , "RSA" , "KD" , "RD"
, "MAF" , "Log(OR)" , "-Log(P)"
, "Lig-Dist"
, "ConSurf" , "SNAP2" , "PROVEAN"
, "mCSM-lig"
# , "Dynamut" , "ENCoM-DDG" , "mCSM" , "SDM" , "DUET-d" , "ENCoM-DDS"
)
if (tolower(gene)%in%geneL_normal){
colnames_to_extract = c(common_colnames)
display_colnames = c(display_common_colnames)
}
if (tolower(gene)%in%geneL_ppi2){
colnames_to_extract = c(common_colnames ,"mcsm_ppi2_affinity", ppi2Dist_colname)
display_colnames = c(display_common_colnames,"mCSM-PPI2" , "PPI-Dist")
}
if (tolower(gene)%in%geneL_na){
colnames_to_extract = c(common_colnames,"mcsm_na_affinity", naDist_colname)
display_colnames = c(display_common_colnames, "mCSM-NA", "NA-Dist")
}
# [optional] arg: extract_scaled_cols
if (extract_scaled_cols){
@ -77,46 +94,15 @@ corr_data_extract <- function(df
colnames_to_extract = colnames_to_extract
}
corr_df = df[, colnames(df)%in%colnames_to_extract]
# extract df based on gene
corr_df = df[,colnames_to_extract]
colnames(corr_df)
display_colnames
# arg: colnames_display_key
colnames_display_key = c(duet_stability_change = "DUET"
, ligand_affinity_change = "mCSM-lig"
, ligand_distance = "ligand_distance"
#, ligand_dist_colname = "ligand_distance"
, interface_dist = "interface_dist"
, ddg_foldx = "FoldX"
, deepddg = "DeepDDG"
, asa = "ASA"
, rsa = "RSA"
, kd_values = "KD"
, rd_values = "RD"
, af = "MAF"
, log10_or_mychisq = "Log (OR)"
, neglog_pval_fisher = "-Log (P)"
, ddg_dynamut2 = "Dynamut2"
, consurf_score = "Consurf"
, snap2_score = "SNAP2"
, ddg_dynamut = "Dynamut"
, ddg_encom = "ENCoM-DDG"
, ddg_mcsm = "mCSM"
, ddg_sdm = "SDM"
, ddg_duet = "DUET-d"
, dds_encom = "ENCoM-DDS"
, mcsm_na_affinity = "mCSM-NA"
, mcsm_ppi2_affinity = "mCSM-PPI2")
# COMMENT: This only works when all the columns are in the namekey vector.
# If one is missing, there is no error, but it also renamed as "NA.
#names(corr_df) <- colnames_display_key[names(corr_df)]
# Solution: to use plyr::rename()
# Consider using requireNamespace() instead of library() so its function names doesn't collide with dplyr's.
corr_df = plyr::rename(corr_df
, replace = colnames_display_key
, warn_missing = T
, warn_duplicated = T)
colnames(corr_df)[colnames(corr_df)%in%colnames_to_extract] <- display_colnames
colnames(corr_df)
cat("\nExtracted ncols:", ncol(corr_df)
,"\nRenaming successful")