file load antics

2022-09-04 16:07:01 +01:00 · 2022-09-04 16:07:01 +01:00 · 78ffc970e9
commit 78ffc970e9
parent 1d07c2d5ce
1 changed files with 47 additions and 92 deletions
--- a/ml/global.R
+++ b/ml/global.R
@ -16,52 +16,43 @@ library(shinycssloaders)
 #options(DT.options = list(scrollX = TRUE))
 # FIXME: get rid of this hardcoded thing which i'm only reading in to have resampling types ahead of loading the real files
-thing  = read.csv("/srv/shiny-server/git/Data/ml_combined/genes/pnca_70_30_actual.csv")
+if (interactive()){
  print("Interactive Session, using home directories")
  data_dir = "~/git/"
 } else {
  data_dir = "/srv/shiny-server/git/"
 }
 thing  = read.csv(paste0(data_dir, "Data/ml_combined/genes/pnca_70_30_actual.csv"))
 # list of splits
 split_type = c(
  "7030", 
  "8020", 
  "sl", 
  "cd_7030", 
  "cd_8020", 
  "cd_sl",
-  "cd_none_bts",
+  "none"
-  "cd_rt"
+)
  ) 
 split_file = c(
  "_70_30_actual", 
  "_70_30_complete", 
  "_80_20_actual", 
  "_80_20_complete", 
  "_sl_actual", 
  "_sl_complete", 
-  "_none_bts_complete",
+  "_none_complete"
-  "_rt_complete"
+)
  )
 # necessary because the names will be wrong otherwise
 split_map = data.frame(
  files=c(
    "_70_30_actual",
    "_70_30_complete",
    "_80_20_actual",
    "_80_20_complete",
    "_sl_actual",
    "_sl_complete",
-    "_none_bts_complete",
+    "_none"
-    "_rt_complete"
+  ),
    ),
  splits=c(
    "7030",
    "cd_7030",
    "8020",
    "cd_8020",
    "sl",
    "cd_sl",
-    "cd_none_bts",
+    "none"
    "cd_rt"
  )
 )
@ -79,72 +70,36 @@ for (x in gene) {
  #x=tolower(x)
  for (split in split_file){
    filedata = paste0(x, split)
-    filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/genes/',x,split,'.csv')
+    filename = paste0(data_dir,'LSHTM_ML/output/genes/',x,split,'.csv')
    #print(c(filename))
    #load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split])
    load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split])
    #print(load_name)
-    #try({loaded_files[[filedata]] = read.csv(filename)})
+    # try() on its own is fine here because we don't need to do anything if it fails
    try({loaded_files[[load_name]] = read.csv(filename)})
  }
 }
 # Funky loader for combined data
 for (x in gene) {
-  for (ac in c('_actual','_complete')){
+  for (ac in c('_actual','_complete', '_FS')){
-    for (gene_count in c(6,5)){
+    for (gene_count in c(1:6)){
      load_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
-      filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/combined/',load_name, ".csv")
+      filename = paste0(data_dir,'LSHTM_ML/output/combined/',load_name, ".csv")
      print(filename)
      # if (ac=='') {
      #   ac2 <- '_complete'
      # } else {
      #   ac2 = ac
      # }
      store_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
-      print(store_name)
+      # tryCatch is necessary here rather than try() because we need to do more
-      try({temp_df = read.csv(filename)})
+      # manipulation afterwards (throwing away the column after loading)
-      
+      load_successful=TRUE
-      temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column
+      tryCatch({temp_df = read.csv(filename)},error=function(e){load_successful<<-FALSE})
-      loaded_files[[store_name]] = temp_df
+      if (load_successful){
-      
+        temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column
        loaded_files[[store_name]] = temp_df
        print(paste0("loaded file: ", filename, "into var: ", store_name))
      }
    }
  }
 }
 # 
 # loaded_files_old=list()
 # for (x in drug) {
 #   for (split in split_type){
 #     filename = paste0('/home/sethp/git/Data/',
 #                       x,
 #                       '/output/ml/tts_',
 #                       split,
 #                       '/',
 #                       combo[drug==x,"gene"],
 #                       '_baselineC_',
 #                       split,
 #                       '.csv')
 #     filedata = paste0(combo[drug==x,"gene"],
 #                       '_baselineC_',
 #                       split
 #                       )
 #     print(c(filename, filedata))
 # 
 #     try({loaded_files_old[[filedata]] = read.csv(filename)})
 #   }
 # }
 #plot_data    = thing[thing$resampling=='none',]
 # FIXME commented out for the moment because we need to use
 # this before the data is actually loaded :-(
 # scores = colnames(thing %>% dplyr::select(-c("Model_name",
 #                                              "source_data",
 #                                              "resampling"
 #                                              )
 #                                           )
 #                   )
 scores=c("F1", "ROC_AUC", "JCC", "MCC", "Accuracy", "Recall", "Precision")
 resample_types <<- unique(thing$resampling)
@ -157,7 +112,7 @@ makeplot = function(x, # the DataFrame to plot
                    gene                    = 'NOT SET', # used only for the info box
                    drug                    = 'NOT SET', # used only for the info box
                    combined_training_genes = '999' # used only for the info box
-                    ){
+){
  plot_data    = x[x$resampling==resampler,]
  y_coord_min = min(plot_data[selection])
@ -174,24 +129,24 @@ makeplot = function(x, # the DataFrame to plot
                      "\nFeatures: ", metadata[6],
                      "\nGenes Trained: ", combined_training_genes,
                      "\nTest Gene: ", gene
-                      )
+      )
-      } else {
+    } else {
-        metatext=paste0("Train/Test: ",
+      metatext=paste0("Train/Test: ",
-                        metadata[1], "/", metadata[2],
+                      metadata[1], "/", metadata[2],
-                        "\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4],
+                      "\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4],
-                        "\nResampling: ", metadata[5],
+                      "\nResampling: ", metadata[5],
-                        "\nFeatures: ", metadata[6],
+                      "\nFeatures: ", metadata[6],
-                        "\nTest Gene: ", gene
+                      "\nTest Gene: ", gene
-        )
+      )
-      }
+    }
-  #print(metatext)
+    #print(metatext)
-  
+    
-  grob <- grobTree(textGrob(metatext,
+    grob <- grobTree(textGrob(metatext,
-    x=0.01,
+                              x=0.01,
-    y=0.90,
+                              y=0.90,
-    hjust=0,
+                              hjust=0,
-    gp=gpar(col="black")
+                              gp=gpar(col="black")
    )
    )
  }