From 78ffc970e9f78c3db96c59874e8c36b19ab467e5 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Sun, 4 Sep 2022 16:07:01 +0100 Subject: [PATCH] file load antics --- ml/global.R | 139 ++++++++++++++++++---------------------------------- 1 file changed, 47 insertions(+), 92 deletions(-) diff --git a/ml/global.R b/ml/global.R index 49d8d35..7a9d312 100644 --- a/ml/global.R +++ b/ml/global.R @@ -16,52 +16,43 @@ library(shinycssloaders) #options(DT.options = list(scrollX = TRUE)) # FIXME: get rid of this hardcoded thing which i'm only reading in to have resampling types ahead of loading the real files -thing = read.csv("/srv/shiny-server/git/Data/ml_combined/genes/pnca_70_30_actual.csv") +if (interactive()){ + print("Interactive Session, using home directories") + data_dir = "~/git/" +} else { + data_dir = "/srv/shiny-server/git/" +} + +thing = read.csv(paste0(data_dir, "Data/ml_combined/genes/pnca_70_30_actual.csv")) # list of splits split_type = c( - "7030", - "8020", - "sl", "cd_7030", "cd_8020", "cd_sl", - "cd_none_bts", - "cd_rt" - ) + "none" +) split_file = c( - "_70_30_actual", "_70_30_complete", - "_80_20_actual", "_80_20_complete", - "_sl_actual", "_sl_complete", - "_none_bts_complete", - "_rt_complete" - ) + "_none_complete" +) # necessary because the names will be wrong otherwise split_map = data.frame( files=c( - "_70_30_actual", "_70_30_complete", - "_80_20_actual", "_80_20_complete", - "_sl_actual", "_sl_complete", - "_none_bts_complete", - "_rt_complete" - ), + "_none" + ), splits=c( - "7030", "cd_7030", - "8020", "cd_8020", - "sl", "cd_sl", - "cd_none_bts", - "cd_rt" + "none" ) ) @@ -79,72 +70,36 @@ for (x in gene) { #x=tolower(x) for (split in split_file){ filedata = paste0(x, split) - filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/genes/',x,split,'.csv') + filename = paste0(data_dir,'LSHTM_ML/output/genes/',x,split,'.csv') #print(c(filename)) #load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split]) load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split]) #print(load_name) - #try({loaded_files[[filedata]] = read.csv(filename)}) + # try() on its own is fine here because we don't need to do anything if it fails try({loaded_files[[load_name]] = read.csv(filename)}) } } # Funky loader for combined data for (x in gene) { - for (ac in c('_actual','_complete')){ - for (gene_count in c(6,5)){ + for (ac in c('_actual','_complete', '_FS')){ + for (gene_count in c(1:6)){ load_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac) - filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/combined/',load_name, ".csv") - print(filename) - - # if (ac=='') { - # ac2 <- '_complete' - # } else { - # ac2 = ac - # } + filename = paste0(data_dir,'LSHTM_ML/output/combined/',load_name, ".csv") store_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac) - print(store_name) - try({temp_df = read.csv(filename)}) - - temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column - loaded_files[[store_name]] = temp_df - + # tryCatch is necessary here rather than try() because we need to do more + # manipulation afterwards (throwing away the column after loading) + load_successful=TRUE + tryCatch({temp_df = read.csv(filename)},error=function(e){load_successful<<-FALSE}) + if (load_successful){ + temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column + loaded_files[[store_name]] = temp_df + print(paste0("loaded file: ", filename, "into var: ", store_name)) + } } } } -# -# loaded_files_old=list() -# for (x in drug) { -# for (split in split_type){ -# filename = paste0('/home/sethp/git/Data/', -# x, -# '/output/ml/tts_', -# split, -# '/', -# combo[drug==x,"gene"], -# '_baselineC_', -# split, -# '.csv') -# filedata = paste0(combo[drug==x,"gene"], -# '_baselineC_', -# split -# ) -# print(c(filename, filedata)) -# -# try({loaded_files_old[[filedata]] = read.csv(filename)}) -# } -# } - -#plot_data = thing[thing$resampling=='none',] -# FIXME commented out for the moment because we need to use -# this before the data is actually loaded :-( -# scores = colnames(thing %>% dplyr::select(-c("Model_name", -# "source_data", -# "resampling" -# ) -# ) -# ) scores=c("F1", "ROC_AUC", "JCC", "MCC", "Accuracy", "Recall", "Precision") resample_types <<- unique(thing$resampling) @@ -157,7 +112,7 @@ makeplot = function(x, # the DataFrame to plot gene = 'NOT SET', # used only for the info box drug = 'NOT SET', # used only for the info box combined_training_genes = '999' # used only for the info box - ){ +){ plot_data = x[x$resampling==resampler,] y_coord_min = min(plot_data[selection]) @@ -174,24 +129,24 @@ makeplot = function(x, # the DataFrame to plot "\nFeatures: ", metadata[6], "\nGenes Trained: ", combined_training_genes, "\nTest Gene: ", gene - ) - } else { - metatext=paste0("Train/Test: ", - metadata[1], "/", metadata[2], - "\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4], - "\nResampling: ", metadata[5], - "\nFeatures: ", metadata[6], - "\nTest Gene: ", gene - ) - } + ) + } else { + metatext=paste0("Train/Test: ", + metadata[1], "/", metadata[2], + "\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4], + "\nResampling: ", metadata[5], + "\nFeatures: ", metadata[6], + "\nTest Gene: ", gene + ) + } - #print(metatext) - - grob <- grobTree(textGrob(metatext, - x=0.01, - y=0.90, - hjust=0, - gp=gpar(col="black") + #print(metatext) + + grob <- grobTree(textGrob(metatext, + x=0.01, + y=0.90, + hjust=0, + gp=gpar(col="black") ) ) }