From 78ffc970e9f78c3db96c59874e8c36b19ab467e5 Mon Sep 17 00:00:00 2001
From: Tanushree Tunstall <tanu@tunstall.in>
Date: Sun, 4 Sep 2022 16:07:01 +0100
Subject: [PATCH] file load antics

---
 ml/global.R | 139 ++++++++++++++++++----------------------------------
 1 file changed, 47 insertions(+), 92 deletions(-)

diff --git a/ml/global.R b/ml/global.R
index 49d8d35..7a9d312 100644
--- a/ml/global.R
+++ b/ml/global.R
@@ -16,52 +16,43 @@ library(shinycssloaders)
 #options(DT.options = list(scrollX = TRUE))
 
 # FIXME: get rid of this hardcoded thing which i'm only reading in to have resampling types ahead of loading the real files
-thing  = read.csv("/srv/shiny-server/git/Data/ml_combined/genes/pnca_70_30_actual.csv")
+if (interactive()){
+  print("Interactive Session, using home directories")
+  data_dir = "~/git/"
+} else {
+  data_dir = "/srv/shiny-server/git/"
+}
+
+thing  = read.csv(paste0(data_dir, "Data/ml_combined/genes/pnca_70_30_actual.csv"))
 
 # list of splits
 split_type = c(
-  "7030", 
-  "8020", 
-  "sl", 
   "cd_7030", 
   "cd_8020", 
   "cd_sl",
-  "cd_none_bts",
-  "cd_rt"
-  ) 
+  "none"
+)
 
 split_file = c(
-  "_70_30_actual", 
   "_70_30_complete", 
-  "_80_20_actual", 
   "_80_20_complete", 
-  "_sl_actual", 
   "_sl_complete", 
-  "_none_bts_complete",
-  "_rt_complete"
-  )
+  "_none_complete"
+)
 
 # necessary because the names will be wrong otherwise
 split_map = data.frame(
   files=c(
-    "_70_30_actual",
     "_70_30_complete",
-    "_80_20_actual",
     "_80_20_complete",
-    "_sl_actual",
     "_sl_complete",
-    "_none_bts_complete",
-    "_rt_complete"
-    ),
+    "_none"
+  ),
   splits=c(
-    "7030",
     "cd_7030",
-    "8020",
     "cd_8020",
-    "sl",
     "cd_sl",
-    "cd_none_bts",
-    "cd_rt"
+    "none"
   )
 )
 
@@ -79,72 +70,36 @@ for (x in gene) {
   #x=tolower(x)
   for (split in split_file){
     filedata = paste0(x, split)
-    filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/genes/',x,split,'.csv')
+    filename = paste0(data_dir,'LSHTM_ML/output/genes/',x,split,'.csv')
     
     #print(c(filename))
     #load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split])
     load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split])
     #print(load_name)
-    #try({loaded_files[[filedata]] = read.csv(filename)})
+    # try() on its own is fine here because we don't need to do anything if it fails
     try({loaded_files[[load_name]] = read.csv(filename)})
   }
 }
 # Funky loader for combined data
 for (x in gene) {
-  for (ac in c('_actual','_complete')){
-    for (gene_count in c(6,5)){
+  for (ac in c('_actual','_complete', '_FS')){
+    for (gene_count in c(1:6)){
       load_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
-      filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/combined/',load_name, ".csv")
-      print(filename)
-      
-      # if (ac=='') {
-      #   ac2 <- '_complete'
-      # } else {
-      #   ac2 = ac
-      # }
+      filename = paste0(data_dir,'LSHTM_ML/output/combined/',load_name, ".csv")
       store_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
-      print(store_name)
-      try({temp_df = read.csv(filename)})
-      
-      temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column
-      loaded_files[[store_name]] = temp_df
-      
+      # tryCatch is necessary here rather than try() because we need to do more
+      # manipulation afterwards (throwing away the column after loading)
+      load_successful=TRUE
+      tryCatch({temp_df = read.csv(filename)},error=function(e){load_successful<<-FALSE})
+      if (load_successful){
+        temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column
+        loaded_files[[store_name]] = temp_df
+        print(paste0("loaded file: ", filename, "into var: ", store_name))
+      }
     }
   }
 }
 
-# 
-# loaded_files_old=list()
-# for (x in drug) {
-#   for (split in split_type){
-#     filename = paste0('/home/sethp/git/Data/',
-#                       x,
-#                       '/output/ml/tts_',
-#                       split,
-#                       '/',
-#                       combo[drug==x,"gene"],
-#                       '_baselineC_',
-#                       split,
-#                       '.csv')
-#     filedata = paste0(combo[drug==x,"gene"],
-#                       '_baselineC_',
-#                       split
-#                       )
-#     print(c(filename, filedata))
-# 
-#     try({loaded_files_old[[filedata]] = read.csv(filename)})
-#   }
-# }
-
-#plot_data    = thing[thing$resampling=='none',]
-# FIXME commented out for the moment because we need to use
-# this before the data is actually loaded :-(
-# scores = colnames(thing %>% dplyr::select(-c("Model_name",
-#                                              "source_data",
-#                                              "resampling"
-#                                              )
-#                                           )
-#                   )
 scores=c("F1", "ROC_AUC", "JCC", "MCC", "Accuracy", "Recall", "Precision")
 
 resample_types <<- unique(thing$resampling)
@@ -157,7 +112,7 @@ makeplot = function(x, # the DataFrame to plot
                     gene                    = 'NOT SET', # used only for the info box
                     drug                    = 'NOT SET', # used only for the info box
                     combined_training_genes = '999' # used only for the info box
-                    ){
+){
   plot_data    = x[x$resampling==resampler,]
   y_coord_min = min(plot_data[selection])
   
@@ -174,24 +129,24 @@ makeplot = function(x, # the DataFrame to plot
                       "\nFeatures: ", metadata[6],
                       "\nGenes Trained: ", combined_training_genes,
                       "\nTest Gene: ", gene
-                      )
-      } else {
-        metatext=paste0("Train/Test: ",
-                        metadata[1], "/", metadata[2],
-                        "\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4],
-                        "\nResampling: ", metadata[5],
-                        "\nFeatures: ", metadata[6],
-                        "\nTest Gene: ", gene
-        )
-      }
+      )
+    } else {
+      metatext=paste0("Train/Test: ",
+                      metadata[1], "/", metadata[2],
+                      "\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4],
+                      "\nResampling: ", metadata[5],
+                      "\nFeatures: ", metadata[6],
+                      "\nTest Gene: ", gene
+      )
+    }
     
-  #print(metatext)
-  
-  grob <- grobTree(textGrob(metatext,
-    x=0.01,
-    y=0.90,
-    hjust=0,
-    gp=gpar(col="black")
+    #print(metatext)
+    
+    grob <- grobTree(textGrob(metatext,
+                              x=0.01,
+                              y=0.90,
+                              hjust=0,
+                              gp=gpar(col="black")
     )
     )
   }