updated docs for dm_om_data.R
This commit is contained in:
parent
e795c00831
commit
3d45780c1a
4 changed files with 54 additions and 179 deletions
|
@ -1,128 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
#########################################################
|
|
||||||
# TASK: Script to format data for Correlation plots:
|
|
||||||
# corr_data_extract()
|
|
||||||
# Input:
|
|
||||||
# corr_plot_df: data with all parameters (my_use case)
|
|
||||||
# merged_df3 or merged_df2!?
|
|
||||||
# gene: [sanity check]
|
|
||||||
# drug: relates to a column name that will need to extracted
|
|
||||||
# ligand_dist_colname = LigDist_colname (variable from plotting_globals()
|
|
||||||
|
|
||||||
#colnames_to_extract = c("mutationinformation"
|
|
||||||
# , "duet_affinity_change")
|
|
||||||
#display_colnames_key = c(mutationinformation = "MUT"
|
|
||||||
# , duet_affinity_change = "DUET")
|
|
||||||
# extract_scaled_cols = T or F, so that parameters with the _scaled suffix can be extracted.
|
|
||||||
# No formatting applied to these cols i.e display name
|
|
||||||
|
|
||||||
# TO DO: SHINY
|
|
||||||
#1) Corr type?
|
|
||||||
#2)
|
|
||||||
##################################################################
|
|
||||||
corr_data_extract <- function(corr_plot_df
|
|
||||||
#, gene_name = gene
|
|
||||||
, drug_name = drug
|
|
||||||
, ligand_dist_colname = LigDist_colname
|
|
||||||
, colnames_to_extract
|
|
||||||
, colnames_display_key
|
|
||||||
, extract_scaled_cols = F){
|
|
||||||
|
|
||||||
if ( missing(colnames_to_extract) || missing(colnames_display_key) ){
|
|
||||||
#if ( missing(colnames_to_extract) ){
|
|
||||||
|
|
||||||
cat("\n=========================================="
|
|
||||||
, "\nCORR PLOTS data: ALL params"
|
|
||||||
, "\n=========================================")
|
|
||||||
|
|
||||||
cat("\nExtracting default columns for"
|
|
||||||
#, "\nGene name:", gene
|
|
||||||
, "\nDrug name:", drug)
|
|
||||||
|
|
||||||
colnames_to_extract = c(drug
|
|
||||||
#, "mutationinformation"
|
|
||||||
, "mutation_info_labels"
|
|
||||||
, "duet_stability_change"
|
|
||||||
, "ligand_affinity_change"
|
|
||||||
#, "ligand_distance"
|
|
||||||
, ligand_dist_colname
|
|
||||||
, "ddg_foldx"
|
|
||||||
, "deepddg"
|
|
||||||
, "asa"
|
|
||||||
, "rsa"
|
|
||||||
, "kd_values"
|
|
||||||
, "rd_values"
|
|
||||||
, "af"
|
|
||||||
, "log10_or_mychisq"
|
|
||||||
, "neglog_pval_fisher"
|
|
||||||
, "ddg_dynamut2"
|
|
||||||
, "consurf_score"
|
|
||||||
, "snap2_score"
|
|
||||||
, "ddg_dynamut", "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet"
|
|
||||||
, "mcsm_na_affinity"
|
|
||||||
, "mcsm_ppi2_affinity"
|
|
||||||
)
|
|
||||||
|
|
||||||
# [optional] arg: extract_scaled_cols
|
|
||||||
if (extract_scaled_cols){
|
|
||||||
cat("\nExtracting scaled columns as well...\n")
|
|
||||||
all_scaled_cols = colnames(merged_df3)[grep(".*scaled", colnames(merged_df3))]
|
|
||||||
colnames_to_extract = c(colnames_to_extract, all_scaled_cols)
|
|
||||||
|
|
||||||
}else{
|
|
||||||
colnames_to_extract = colnames_to_extract
|
|
||||||
}
|
|
||||||
|
|
||||||
corr_df = corr_plot_df[, colnames(corr_plot_df)%in%colnames_to_extract]
|
|
||||||
|
|
||||||
# arg: colnames_display_key
|
|
||||||
colnames_display_key = c(duet_stability_change = "DUET"
|
|
||||||
, ligand_affinity_change = "mCSM-lig"
|
|
||||||
#, ligand_distance = "ligand_distance"
|
|
||||||
#, ligand_dist_colname = "ligand_distance"
|
|
||||||
, ddg_foldx = "FoldX"
|
|
||||||
, deepddg = "DeepDDG"
|
|
||||||
, asa = "ASA"
|
|
||||||
, rsa = "RSA"
|
|
||||||
, kd_values = "KD"
|
|
||||||
, rd_values = "RD"
|
|
||||||
, af = "MAF"
|
|
||||||
, log10_or_mychisq = "Log (OR)"
|
|
||||||
, neglog_pval_fisher = "-Log (P)"
|
|
||||||
, ddg_dynamut2 = "Dynamut2"
|
|
||||||
, consurf_score = "Consurf"
|
|
||||||
, snap2_score = "SNAP2"
|
|
||||||
, ddg_dynamut = "Dynamut"
|
|
||||||
, ddg_encom = "ENCoM-DDG"
|
|
||||||
, ddg_mcsm = "mCSM"
|
|
||||||
, ddg_sdm = "SDM"
|
|
||||||
, ddg_duet = "DUET-d"
|
|
||||||
, dds_encom = "ENCoM-DDS"
|
|
||||||
, mcsm_na_affinity = "mCSM-NA"
|
|
||||||
, mcsm_ppi2_affinity = "mCSM-PPI2")
|
|
||||||
|
|
||||||
# COMMENT: This only works when all the columns are in the namekey vector.
|
|
||||||
# If one is missing, there is no error, but it also renamed as "NA.
|
|
||||||
#names(corr_df) <- colnames_display_key[names(corr_df)]
|
|
||||||
|
|
||||||
# Solution: to use plyr::rename()
|
|
||||||
# Consider using requireNamespace() instead of library() so its function names doesn't collide with dplyr's.
|
|
||||||
corr_df = plyr::rename(corr_df
|
|
||||||
, replace = colnames_display_key
|
|
||||||
, warn_missing = T
|
|
||||||
, warn_duplicated = T)
|
|
||||||
|
|
||||||
cat("\nExtracted ncols:", ncol(corr_df)
|
|
||||||
,"\nRenaming successful")
|
|
||||||
|
|
||||||
cat("\nSneak peak...")
|
|
||||||
print(head(corr_df))
|
|
||||||
|
|
||||||
# Move drug column to the end
|
|
||||||
last_col = colnames(corr_df[ncol(corr_df)])
|
|
||||||
corr_df_f = corr_df %>% dplyr::relocate(all_of(drug), .after = last_col)
|
|
||||||
|
|
||||||
return(corr_df_f)
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,28 +1,40 @@
|
||||||
#!/usr/bin/env Rscript
|
#!/usr/bin/env Rscript
|
||||||
#########################################################
|
#########################################################
|
||||||
# TASK: Script to format data for dm om plots:
|
# TASK: Script to format data for dm om plots:
|
||||||
# generating WF and LF data for each of the parameters
|
# generating WF and LF data for each of the parameters:
|
||||||
# duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc
|
# duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc
|
||||||
# Called by get_plotting_dfs.R
|
# Called by get_plotting_dfs.R
|
||||||
|
|
||||||
# dm_om_wf_lf_data()
|
# dm_om_wf_lf_data()
|
||||||
# Input: data with all parameters (merged_df3, my_use case)
|
# INPUT:
|
||||||
|
# df: merged_df3 (data with all parameters)
|
||||||
|
# NOTE*: merged_df2 will not be appropriate as it brings up most params as significant!?,atleast for gid
|
||||||
# gene: [conditional generation of dfs like mcsm-NA, mcsm-ppi2 as not all genes have all these values]
|
# gene: [conditional generation of dfs like mcsm-NA, mcsm-ppi2 as not all genes have all these values]
|
||||||
# colnames_to_extract = c("mutationinformation"
|
# colnames_to_extract : columns to extract, either user-specified.
|
||||||
# , "duet_affinity_change...")
|
#By default it is c("mutationinformation" , "duet_affinity_change...")
|
||||||
# ligand_dist_colname = LigDist_colname # from globals
|
# ligand_dist_colname : column name containing ligand distance. By deafult, it is LigDist_colname (imported from globals)
|
||||||
# dr_muts = dr_muts_col # from globals ...dr_mutations_<drug>
|
# dr_muts : dr_muts_col (imported from globals; dr_mutations_<drug>)
|
||||||
# other_muts = other_muts_col # from globals ...other_mutations_<drug>
|
# other_muts : other_muts_col (imported from globals ...other_mutations_<drug>)
|
||||||
# snp_colname = "mutationinformation"
|
# snp_colname : SNP column name. By default it is "mutationinformation"
|
||||||
# aa_pos_colname = "position" # to sort df by
|
# aa_pos_colname : Column name containing the aa position. This is used to sort the df by.
|
||||||
# mut_colname = "mutation"
|
# mut_colname : Column name containing snp info in format "<abc_pXXdef>. By default, it is "mutation"
|
||||||
# mut_info_colname = "mutation_info"
|
# mut_info_colname : Column name containing mutation info whether it is DM or OM. By default, it is "mutation_info"
|
||||||
# mut_info_label_colname = "mutation_info_labels" # if empty, below used
|
# mut_info_label_colname : Column containing pre-formatted labels for mutation info.
|
||||||
# dr_other_muts_labels = c("DM", "OM") # only used if ^^ = ""
|
# For my use case, this is called "mutation_info_labels"
|
||||||
# categ_cols_to_factor: converts the cols with '_outcome'and 'info' to factor
|
# This column has short labels like DM and OM coresponding to dr_muts and other_muts.
|
||||||
|
# NOTE*: if this is left empty, then the arg ('dr_other_muts_labels') will be used
|
||||||
|
# dr_other_muts_labels : User specified labels, must correspond to dr_muts and other_muts.
|
||||||
|
# NOTE*: Only used if the arg (mut_info_label_colname) is empty!
|
||||||
|
# categ_cols_to_factor : Column names to convert to factors. These mainly correspond to the outcome columns associated with the
|
||||||
|
# arg ('colnames_to_extract'). These have the suffix "_outcome" in their colnames. Additionally column 'mutation_info' is also
|
||||||
|
# converted to factor. By default, it converts the cols with '_outcome'and 'info' to factor.
|
||||||
|
# Users are able to provide a vector of their corresponding column names
|
||||||
|
|
||||||
|
# RETURNS: List
|
||||||
|
# WF nd LF data grouped by mutation_info i.e DM (drug mutations) and OM (other mutations)
|
||||||
|
|
||||||
# TO DO: SHINY
|
# TO DO: SHINY
|
||||||
#1)
|
#1) df to choose (merged_df3 or merged_df2)
|
||||||
#2)
|
#2)
|
||||||
##################################################################
|
##################################################################
|
||||||
dm_om_wf_lf_data <- function(df
|
dm_om_wf_lf_data <- function(df
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
source("~/git/LSHTM_analysis/config/gid.R")
|
|
||||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
|
||||||
|
|
||||||
m3 = corr_data_extract(merged_df3); head(m3)
|
|
||||||
m2 = corr_data_extract(meregd_df2); head(m2)
|
|
||||||
m3S = corr_data_extract(merged_df3, extract_scaled_cols = T); head(m3S)
|
|
|
@ -88,13 +88,6 @@ merged_df3 = all_plot_dfs[[2]]
|
||||||
merged_df2_comp = all_plot_dfs[[3]]
|
merged_df2_comp = all_plot_dfs[[3]]
|
||||||
merged_df3_comp = all_plot_dfs[[4]]
|
merged_df3_comp = all_plot_dfs[[4]]
|
||||||
#======================================================================
|
#======================================================================
|
||||||
####################################################################
|
|
||||||
# Data for combining other dfs
|
|
||||||
####################################################################
|
|
||||||
|
|
||||||
#source("other_dfs_data.R")
|
|
||||||
# Fixed this at source i.e python script
|
|
||||||
# Moved: "other_dfs_data.R" to redundant/
|
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# Data for subcols barplot (~heatmap)
|
# Data for subcols barplot (~heatmap)
|
||||||
|
@ -109,35 +102,39 @@ merged_df3_comp = all_plot_dfs[[4]]
|
||||||
# Data for logoplots
|
# Data for logoplots
|
||||||
####################################################################
|
####################################################################
|
||||||
|
|
||||||
#source(paste0(plot_script_path, "logo_data.R"))
|
#source(paste0(plot_script_path, "logo_data_msa.R"))
|
||||||
#s1 = c("\nSuccessfully sourced logo_data.R")
|
#s1 = c("\nSuccessfully sourced logo_data_msa.R")
|
||||||
#cat(s1)
|
#cat(s1)
|
||||||
# input data is merged_df3
|
|
||||||
# so repurposed it into a function so params can be passed instead to generate
|
|
||||||
# data required for plotting.
|
|
||||||
# Moved "logo_data.R" to redundant/
|
|
||||||
|
|
||||||
source(paste0(plot_script_path, "logo_data_msa.R"))
|
|
||||||
s1 = c("\nSuccessfully sourced logo_data_msa.R")
|
|
||||||
cat(s1)
|
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# Data for DM OM Plots: WF and LF dfs
|
# Data for DM OM Plots: WF and LF dfs
|
||||||
# My function: dm_om_wf_lf_data()
|
# My function: dm_om_wf_lf_data()
|
||||||
####################################################################
|
# location: scripts/functions/dm_om_data.R
|
||||||
#source("other_plots_data.R")
|
#source("other_plots_data.R")
|
||||||
# converted to a function
|
####################################################################
|
||||||
# moved old one to redundant.
|
|
||||||
source(paste0(plot_script_path, "dm_om_data.R"))
|
|
||||||
|
|
||||||
s2 = c("\nSuccessfully sourced other_plots_data.R")
|
#source(paste0(plot_script_path, "dm_om_data.R"))
|
||||||
cat(s2)
|
#s2 = c("\nSuccessfully sourced other_plots_data.R")
|
||||||
|
#cat(s2)
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# Data for Lineage barplots: WF and LF dfs
|
# Data for Lineage barplots: WF and LF dfs
|
||||||
|
# My function: lineage_plot_data()
|
||||||
|
# location: scripts/functions/lineage_plot_data.R
|
||||||
####################################################################
|
####################################################################
|
||||||
|
|
||||||
source(paste0(plot_script_path, "lineage_data.R"))
|
#source(paste0(plot_script_path, "lineage_data.R"))
|
||||||
|
# converted to a function. Moved lineage_data.R to redundant/
|
||||||
|
lineage_dfL = lineage_plot_data(df = merged_df2
|
||||||
|
, lineage_column_name = "lineage"
|
||||||
|
, remove_empty_lineage = F
|
||||||
|
, lineage_label_col_name = "lineage_labels"
|
||||||
|
, id_colname = "id"
|
||||||
|
, snp_colname = "mutationinformation"
|
||||||
|
)
|
||||||
|
|
||||||
|
lin_wf = lineage_dfL[['lin_wf']]
|
||||||
|
lin_lf = lineage_dfL[['lin_lf']]
|
||||||
|
|
||||||
s3 = c("\nSuccessfully sourced lineage_data.R")
|
s3 = c("\nSuccessfully sourced lineage_data.R")
|
||||||
cat(s3)
|
cat(s3)
|
||||||
|
@ -145,6 +142,7 @@ cat(s3)
|
||||||
####################################################################
|
####################################################################
|
||||||
# Data for corr plots:
|
# Data for corr plots:
|
||||||
# My function: corr_data_extract()
|
# My function: corr_data_extract()
|
||||||
|
# location: scripts/functions/corr_plot_data.R
|
||||||
####################################################################
|
####################################################################
|
||||||
# make sure the above script works because merged_df2_combined is needed
|
# make sure the above script works because merged_df2_combined is needed
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue