updated docs for dm_om_data.R

This commit is contained in:
Tanushree Tunstall 2022-02-01 16:23:03 +00:00
parent e795c00831
commit 3d45780c1a
4 changed files with 54 additions and 179 deletions

View file

@ -1,28 +1,40 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Script to format data for dm om plots:
# generating WF and LF data for each of the parameters
# duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc
# generating WF and LF data for each of the parameters:
# duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc
# Called by get_plotting_dfs.R
# dm_om_wf_lf_data()
# Input: data with all parameters (merged_df3, my_use case)
# gene: [conditional generation of dfs like mcsm-NA, mcsm-ppi2 as not all genes have all these values]
# colnames_to_extract = c("mutationinformation"
# , "duet_affinity_change...")
# ligand_dist_colname = LigDist_colname # from globals
# dr_muts = dr_muts_col # from globals ...dr_mutations_<drug>
# other_muts = other_muts_col # from globals ...other_mutations_<drug>
# snp_colname = "mutationinformation"
# aa_pos_colname = "position" # to sort df by
# mut_colname = "mutation"
# mut_info_colname = "mutation_info"
# mut_info_label_colname = "mutation_info_labels" # if empty, below used
# dr_other_muts_labels = c("DM", "OM") # only used if ^^ = ""
# categ_cols_to_factor: converts the cols with '_outcome'and 'info' to factor
# INPUT:
# df: merged_df3 (data with all parameters)
# NOTE*: merged_df2 will not be appropriate as it brings up most params as significant!?,atleast for gid
# gene: [conditional generation of dfs like mcsm-NA, mcsm-ppi2 as not all genes have all these values]
# colnames_to_extract : columns to extract, either user-specified.
#By default it is c("mutationinformation" , "duet_affinity_change...")
# ligand_dist_colname : column name containing ligand distance. By deafult, it is LigDist_colname (imported from globals)
# dr_muts : dr_muts_col (imported from globals; dr_mutations_<drug>)
# other_muts : other_muts_col (imported from globals ...other_mutations_<drug>)
# snp_colname : SNP column name. By default it is "mutationinformation"
# aa_pos_colname : Column name containing the aa position. This is used to sort the df by.
# mut_colname : Column name containing snp info in format "<abc_pXXdef>. By default, it is "mutation"
# mut_info_colname : Column name containing mutation info whether it is DM or OM. By default, it is "mutation_info"
# mut_info_label_colname : Column containing pre-formatted labels for mutation info.
# For my use case, this is called "mutation_info_labels"
# This column has short labels like DM and OM coresponding to dr_muts and other_muts.
# NOTE*: if this is left empty, then the arg ('dr_other_muts_labels') will be used
# dr_other_muts_labels : User specified labels, must correspond to dr_muts and other_muts.
# NOTE*: Only used if the arg (mut_info_label_colname) is empty!
# categ_cols_to_factor : Column names to convert to factors. These mainly correspond to the outcome columns associated with the
# arg ('colnames_to_extract'). These have the suffix "_outcome" in their colnames. Additionally column 'mutation_info' is also
# converted to factor. By default, it converts the cols with '_outcome'and 'info' to factor.
# Users are able to provide a vector of their corresponding column names
# RETURNS: List
# WF nd LF data grouped by mutation_info i.e DM (drug mutations) and OM (other mutations)
# TO DO: SHINY
#1)
#1) df to choose (merged_df3 or merged_df2)
#2)
##################################################################
dm_om_wf_lf_data <- function(df
@ -48,7 +60,7 @@ dm_om_wf_lf_data <- function(df
# common_dfs
common_dfsL = list(
wf_duet = data.frame()
wf_duet = data.frame()
, lf_duet = data.frame()
, wf_mcsm_lig = data.frame()
, lf_mcsm_lig = data.frame()