updated docs for dm_om_data.R

2022-02-01 16:23:03 +00:00 · 2022-02-01 16:23:03 +00:00 · 3d45780c1a
commit 3d45780c1a
parent e795c00831
4 changed files with 54 additions and 179 deletions
--- a/scripts/functions/dm_om_data.R
+++ b/scripts/functions/dm_om_data.R
@ -1,28 +1,40 @@
 #!/usr/bin/env Rscript  
 #########################################################
 # TASK: Script to format data for dm om plots: 
-  # generating WF and LF data for each of the parameters
-   # duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc
+  # generating WF and LF data for each of the parameters:
+    # duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc
  # Called by get_plotting_dfs.R

 # dm_om_wf_lf_data()
-# Input: data with all parameters (merged_df3, my_use case)
-# gene: [conditional generation of dfs like mcsm-NA, mcsm-ppi2 as not all genes have all these values]
-# colnames_to_extract = c("mutationinformation"
-#                         , "duet_affinity_change...")
-# ligand_dist_colname     = LigDist_colname # from globals
-# dr_muts                 = dr_muts_col # from globals ...dr_mutations_<drug>
-# other_muts              = other_muts_col # from globals ...other_mutations_<drug>
-# snp_colname             = "mutationinformation"
-# aa_pos_colname          = "position" # to sort df by
-# mut_colname             = "mutation"
-# mut_info_colname        = "mutation_info"
-# mut_info_label_colname  = "mutation_info_labels" # if empty, below used
-# dr_other_muts_labels    = c("DM", "OM") # only used if ^^ = ""
-# categ_cols_to_factor: converts the cols with '_outcome'and 'info' to factor
+# INPUT: 
+    # df: merged_df3 (data with all parameters)
+      # NOTE*: merged_df2 will not be appropriate as it brings up most params as significant!?,atleast for gid
+    # gene: [conditional generation of dfs like mcsm-NA, mcsm-ppi2 as not all genes have all these values]
+    # colnames_to_extract     : columns to extract, either user-specified. 
+      #By default it is c("mutationinformation" , "duet_affinity_change...")
+    # ligand_dist_colname     : column name containing ligand distance. By deafult, it is LigDist_colname (imported from globals)
+    # dr_muts                 : dr_muts_col (imported from globals; dr_mutations_<drug>)
+    # other_muts              : other_muts_col (imported from globals ...other_mutations_<drug>)
+    # snp_colname             : SNP column name. By default it is "mutationinformation"
+    # aa_pos_colname          : Column name containing the aa position. This is used to sort the df by.
+    # mut_colname             : Column name containing snp info in format "<abc_pXXdef>. By default, it is "mutation"
+    # mut_info_colname        : Column name containing mutation info whether it is DM or OM. By default, it is "mutation_info"
+    # mut_info_label_colname  : Column containing pre-formatted labels for mutation info. 
+      # For my use case, this is called "mutation_info_labels"
+      # This column has short labels like DM and OM coresponding to dr_muts and other_muts.
+      # NOTE*: if this is left empty, then the arg ('dr_other_muts_labels') will be used
+    # dr_other_muts_labels    : User specified labels, must correspond to dr_muts and other_muts. 
+      # NOTE*: Only used if the arg (mut_info_label_colname) is empty!
+    # categ_cols_to_factor    : Column names to convert to factors. These mainly correspond to the outcome columns associated with the
+      # arg ('colnames_to_extract'). These have the suffix "_outcome" in their colnames. Additionally column 'mutation_info' is also 
+      # converted to factor. By default, it converts the cols with '_outcome'and 'info' to factor.
+      # Users are able to provide a vector of their corresponding column names

+# RETURNS: List
+    # WF nd LF data grouped by mutation_info i.e DM (drug mutations) and OM (other mutations)
+    
 # TO DO: SHINY
-#1) 
+#1) df to choose (merged_df3 or merged_df2)
 #2)
 ##################################################################
 dm_om_wf_lf_data <- function(df
@ -48,7 +60,7 @@ dm_om_wf_lf_data <- function(df
  
  # common_dfs
  common_dfsL     = list(
-    wf_duet       = data.frame()
+      wf_duet       = data.frame()
    , lf_duet     = data.frame()
    , wf_mcsm_lig = data.frame()
    , lf_mcsm_lig = data.frame()