file load antics

2022-09-04 16:07:01 +01:00 · 2022-09-04 16:07:01 +01:00 · 78ffc970e9
commit 78ffc970e9
parent 1d07c2d5ce
1 changed files with 47 additions and 92 deletions
--- a/ml/global.R
+++ b/ml/global.R
@ -16,52 +16,43 @@ library(shinycssloaders)
 #options(DT.options = list(scrollX = TRUE))

 # FIXME: get rid of this hardcoded thing which i'm only reading in to have resampling types ahead of loading the real files
-thing  = read.csv("/srv/shiny-server/git/Data/ml_combined/genes/pnca_70_30_actual.csv")
+if (interactive()){
+  print("Interactive Session, using home directories")
+  data_dir = "~/git/"
+} else {
+  data_dir = "/srv/shiny-server/git/"
+}
+
+thing  = read.csv(paste0(data_dir, "Data/ml_combined/genes/pnca_70_30_actual.csv"))

 # list of splits
 split_type = c(
-  "7030", 
-  "8020", 
-  "sl", 
  "cd_7030", 
  "cd_8020", 
  "cd_sl",
-  "cd_none_bts",
-  "cd_rt"
-  ) 
+  "none"
+)

 split_file = c(
-  "_70_30_actual", 
  "_70_30_complete", 
-  "_80_20_actual", 
  "_80_20_complete", 
-  "_sl_actual", 
  "_sl_complete", 
-  "_none_bts_complete",
-  "_rt_complete"
-  )
+  "_none_complete"
+)

 # necessary because the names will be wrong otherwise
 split_map = data.frame(
  files=c(
-    "_70_30_actual",
    "_70_30_complete",
-    "_80_20_actual",
    "_80_20_complete",
-    "_sl_actual",
    "_sl_complete",
-    "_none_bts_complete",
-    "_rt_complete"
-    ),
+    "_none"
+  ),
  splits=c(
-    "7030",
    "cd_7030",
-    "8020",
    "cd_8020",
-    "sl",
    "cd_sl",
-    "cd_none_bts",
-    "cd_rt"
+    "none"
  )
 )

@ -79,72 +70,36 @@ for (x in gene) {
  #x=tolower(x)
  for (split in split_file){
    filedata = paste0(x, split)
-    filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/genes/',x,split,'.csv')
+    filename = paste0(data_dir,'LSHTM_ML/output/genes/',x,split,'.csv')
    
    #print(c(filename))
    #load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split])
    load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split])
    #print(load_name)
-    #try({loaded_files[[filedata]] = read.csv(filename)})
+    # try() on its own is fine here because we don't need to do anything if it fails
    try({loaded_files[[load_name]] = read.csv(filename)})
  }
 }
 # Funky loader for combined data
 for (x in gene) {
-  for (ac in c('_actual','_complete')){
-    for (gene_count in c(6,5)){
+  for (ac in c('_actual','_complete', '_FS')){
+    for (gene_count in c(1:6)){
      load_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
-      filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/combined/',load_name, ".csv")
-      print(filename)
-      
-      # if (ac=='') {
-      #   ac2 <- '_complete'
-      # } else {
-      #   ac2 = ac
-      # }
+      filename = paste0(data_dir,'LSHTM_ML/output/combined/',load_name, ".csv")
      store_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
-      print(store_name)
-      try({temp_df = read.csv(filename)})
-      
-      temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column
-      loaded_files[[store_name]] = temp_df
-      
+      # tryCatch is necessary here rather than try() because we need to do more
+      # manipulation afterwards (throwing away the column after loading)
+      load_successful=TRUE
+      tryCatch({temp_df = read.csv(filename)},error=function(e){load_successful<<-FALSE})
+      if (load_successful){
+        temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column
+        loaded_files[[store_name]] = temp_df
+        print(paste0("loaded file: ", filename, "into var: ", store_name))
+      }
    }
  }
 }

-# 
-# loaded_files_old=list()
-# for (x in drug) {
-#   for (split in split_type){
-#     filename = paste0('/home/sethp/git/Data/',
-#                       x,
-#                       '/output/ml/tts_',
-#                       split,
-#                       '/',
-#                       combo[drug==x,"gene"],
-#                       '_baselineC_',
-#                       split,
-#                       '.csv')
-#     filedata = paste0(combo[drug==x,"gene"],
-#                       '_baselineC_',
-#                       split
-#                       )
-#     print(c(filename, filedata))
-# 
-#     try({loaded_files_old[[filedata]] = read.csv(filename)})
-#   }
-# }
-
-#plot_data    = thing[thing$resampling=='none',]
-# FIXME commented out for the moment because we need to use
-# this before the data is actually loaded :-(
-# scores = colnames(thing %>% dplyr::select(-c("Model_name",
-#                                              "source_data",
-#                                              "resampling"
-#                                              )
-#                                           )
-#                   )
 scores=c("F1", "ROC_AUC", "JCC", "MCC", "Accuracy", "Recall", "Precision")

 resample_types <<- unique(thing$resampling)
@ -157,7 +112,7 @@ makeplot = function(x, # the DataFrame to plot
                    gene                    = 'NOT SET', # used only for the info box
                    drug                    = 'NOT SET', # used only for the info box
                    combined_training_genes = '999' # used only for the info box
-                    ){
+){
  plot_data    = x[x$resampling==resampler,]
  y_coord_min = min(plot_data[selection])
  
@ -174,24 +129,24 @@ makeplot = function(x, # the DataFrame to plot
                      "\nFeatures: ", metadata[6],
                      "\nGenes Trained: ", combined_training_genes,
                      "\nTest Gene: ", gene
-                      )
-      } else {
-        metatext=paste0("Train/Test: ",
-                        metadata[1], "/", metadata[2],
-                        "\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4],
-                        "\nResampling: ", metadata[5],
-                        "\nFeatures: ", metadata[6],
-                        "\nTest Gene: ", gene
-        )
-      }
+      )
+    } else {
+      metatext=paste0("Train/Test: ",
+                      metadata[1], "/", metadata[2],
+                      "\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4],
+                      "\nResampling: ", metadata[5],
+                      "\nFeatures: ", metadata[6],
+                      "\nTest Gene: ", gene
+      )
+    }
    
-  #print(metatext)
+    #print(metatext)
    
-  grob <- grobTree(textGrob(metatext,
-    x=0.01,
-    y=0.90,
-    hjust=0,
-    gp=gpar(col="black")
+    grob <- grobTree(textGrob(metatext,
+                              x=0.01,
+                              y=0.90,
+                              hjust=0,
+                              gp=gpar(col="black")
    )
    )
  }