121 lines
4.7 KiB
R
121 lines
4.7 KiB
R
#!/usr/bin/env Rscript
|
|
#########################################################
|
|
# TASK: Script to format data for Correlation plots:
|
|
|
|
# corr_data_extract()
|
|
# INPUT:
|
|
# df: data with all parameters (my_use case)
|
|
# merged_df3 or merged_df2!?
|
|
# gene: [sanity check]
|
|
# drug: relates to a column name that will need to extracted
|
|
# ligand_dist_colname = LigDist_colname (variable from plotting_globals()
|
|
|
|
# colnames_to_extract = c("mutationinformation", "duet_affinity_change")
|
|
# display_colnames_key = c(mutationinformation = "MUT" , duet_affinity_change = "DUET")
|
|
# extract_scaled_cols = T or F, so that parameters with the _scaled suffix can be extracted.
|
|
# NOTE*: No formatting applied to these cols i.e display name
|
|
|
|
# RETURNS: DF
|
|
# containing all the columns required for generating downstream correlation plots
|
|
|
|
# TO DO: SHINY
|
|
#1) Corr type?
|
|
#2)
|
|
##################################################################
|
|
# LigDist_colname #from globals: plotting_globals.R
|
|
# ppi2Dist_colname #from globals: plotting_globals.R
|
|
# naDist_colname #from globals: plotting_globals.R
|
|
corr_data_extract <- function(df
|
|
, gene
|
|
, drug
|
|
#, ligand_dist_colname = LigDist_colname
|
|
, colnames_to_extract
|
|
, colnames_display_key
|
|
, extract_scaled_cols = F){
|
|
|
|
if ( missing(colnames_to_extract) || missing(colnames_display_key) ){
|
|
#if ( missing(colnames_to_extract) ){
|
|
|
|
cat("\n=========================================="
|
|
, "\nCORR PLOTS data: ALL params"
|
|
, "\n=========================================")
|
|
|
|
cat("\nExtracting default columns for"
|
|
, "\nGene name:", gene
|
|
, "\nDrug name:", drug)
|
|
|
|
geneL_normal = c("pnca")
|
|
geneL_na = c("gid", "rpob")
|
|
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
|
|
|
common_colnames = c(drug, "dst_mode"
|
|
, "duet_stability_change" , "ddg_foldx" , "deepddg" , "ddg_dynamut2"
|
|
, "asa" , "rsa" , "kd_values" , "rd_values"
|
|
, "maf" , "log10_or_mychisq" , "neglog_pval_fisher"
|
|
, LigDist_colname
|
|
, "consurf_score" , "snap2_score" , "provean_score"
|
|
, "ligand_affinity_change"
|
|
#, "ddg_dynamut", "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet"
|
|
)
|
|
|
|
display_common_colnames = c( drug, "dst_mode"
|
|
, "DUET" , "FoldX" , "DeepDDG", "Dynamut2"
|
|
, "ASA" , "RSA" , "KD" , "RD"
|
|
, "MAF" , "Log(OR)" , "-Log(P)"
|
|
, "Lig-Dist"
|
|
, "ConSurf" , "SNAP2" , "PROVEAN"
|
|
, "mCSM-lig"
|
|
# , "Dynamut" , "ENCoM-DDG" , "mCSM" , "SDM" , "DUET-d" , "ENCoM-DDS"
|
|
)
|
|
|
|
if (tolower(gene)%in%geneL_normal){
|
|
colnames_to_extract = c(common_colnames)
|
|
display_colnames = c(display_common_colnames)
|
|
|
|
}
|
|
|
|
if (tolower(gene)%in%geneL_ppi2){
|
|
colnames_to_extract = c(common_colnames ,"mcsm_ppi2_affinity", ppi2Dist_colname)
|
|
display_colnames = c(display_common_colnames,"mCSM-PPI2" , "PPI-Dist")
|
|
}
|
|
|
|
if (tolower(gene)%in%geneL_na){
|
|
colnames_to_extract = c(common_colnames,"mcsm_na_affinity", naDist_colname)
|
|
display_colnames = c(display_common_colnames, "mCSM-NA", "NA-Dist")
|
|
}
|
|
|
|
# [optional] arg: extract_scaled_cols
|
|
if (extract_scaled_cols){
|
|
cat("\nExtracting scaled columns as well...\n")
|
|
all_scaled_cols = colnames(merged_df3)[grep(".*scaled", colnames(merged_df3))]
|
|
colnames_to_extract = c(colnames_to_extract, all_scaled_cols)
|
|
|
|
}else{
|
|
colnames_to_extract = colnames_to_extract
|
|
}
|
|
|
|
# extract df based on gene
|
|
corr_df = df[,colnames_to_extract]
|
|
colnames(corr_df)
|
|
display_colnames
|
|
|
|
# arg: colnames_display_key
|
|
colnames(corr_df)[colnames(corr_df)%in%colnames_to_extract] <- display_colnames
|
|
colnames(corr_df)
|
|
|
|
cat("\nExtracted ncols:", ncol(corr_df)
|
|
,"\nRenaming successful")
|
|
|
|
cat("\nSneak peak...")
|
|
print(head(corr_df))
|
|
|
|
# Move drug column to the end
|
|
last_col = colnames(corr_df[ncol(corr_df)])
|
|
#corr_df_f = corr_df %>% dplyr::relocate(all_of(drug), .after = last_col)
|
|
|
|
#return(corr_df_f)
|
|
return(corr_df)
|
|
|
|
}
|
|
|
|
}
|