file load antics

This commit is contained in:
Tanushree Tunstall 2022-09-04 16:07:01 +01:00
parent 1d07c2d5ce
commit 78ffc970e9

View file

@ -16,52 +16,43 @@ library(shinycssloaders)
#options(DT.options = list(scrollX = TRUE)) #options(DT.options = list(scrollX = TRUE))
# FIXME: get rid of this hardcoded thing which i'm only reading in to have resampling types ahead of loading the real files # FIXME: get rid of this hardcoded thing which i'm only reading in to have resampling types ahead of loading the real files
thing = read.csv("/srv/shiny-server/git/Data/ml_combined/genes/pnca_70_30_actual.csv") if (interactive()){
print("Interactive Session, using home directories")
data_dir = "~/git/"
} else {
data_dir = "/srv/shiny-server/git/"
}
thing = read.csv(paste0(data_dir, "Data/ml_combined/genes/pnca_70_30_actual.csv"))
# list of splits # list of splits
split_type = c( split_type = c(
"7030",
"8020",
"sl",
"cd_7030", "cd_7030",
"cd_8020", "cd_8020",
"cd_sl", "cd_sl",
"cd_none_bts", "none"
"cd_rt" )
)
split_file = c( split_file = c(
"_70_30_actual",
"_70_30_complete", "_70_30_complete",
"_80_20_actual",
"_80_20_complete", "_80_20_complete",
"_sl_actual",
"_sl_complete", "_sl_complete",
"_none_bts_complete", "_none_complete"
"_rt_complete" )
)
# necessary because the names will be wrong otherwise # necessary because the names will be wrong otherwise
split_map = data.frame( split_map = data.frame(
files=c( files=c(
"_70_30_actual",
"_70_30_complete", "_70_30_complete",
"_80_20_actual",
"_80_20_complete", "_80_20_complete",
"_sl_actual",
"_sl_complete", "_sl_complete",
"_none_bts_complete", "_none"
"_rt_complete" ),
),
splits=c( splits=c(
"7030",
"cd_7030", "cd_7030",
"8020",
"cd_8020", "cd_8020",
"sl",
"cd_sl", "cd_sl",
"cd_none_bts", "none"
"cd_rt"
) )
) )
@ -79,72 +70,36 @@ for (x in gene) {
#x=tolower(x) #x=tolower(x)
for (split in split_file){ for (split in split_file){
filedata = paste0(x, split) filedata = paste0(x, split)
filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/genes/',x,split,'.csv') filename = paste0(data_dir,'LSHTM_ML/output/genes/',x,split,'.csv')
#print(c(filename)) #print(c(filename))
#load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split]) #load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split])
load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split]) load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split])
#print(load_name) #print(load_name)
#try({loaded_files[[filedata]] = read.csv(filename)}) # try() on its own is fine here because we don't need to do anything if it fails
try({loaded_files[[load_name]] = read.csv(filename)}) try({loaded_files[[load_name]] = read.csv(filename)})
} }
} }
# Funky loader for combined data # Funky loader for combined data
for (x in gene) { for (x in gene) {
for (ac in c('_actual','_complete')){ for (ac in c('_actual','_complete', '_FS')){
for (gene_count in c(6,5)){ for (gene_count in c(1:6)){
load_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac) load_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/combined/',load_name, ".csv") filename = paste0(data_dir,'LSHTM_ML/output/combined/',load_name, ".csv")
print(filename)
# if (ac=='') {
# ac2 <- '_complete'
# } else {
# ac2 = ac
# }
store_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac) store_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
print(store_name) # tryCatch is necessary here rather than try() because we need to do more
try({temp_df = read.csv(filename)}) # manipulation afterwards (throwing away the column after loading)
load_successful=TRUE
temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column tryCatch({temp_df = read.csv(filename)},error=function(e){load_successful<<-FALSE})
loaded_files[[store_name]] = temp_df if (load_successful){
temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column
loaded_files[[store_name]] = temp_df
print(paste0("loaded file: ", filename, "into var: ", store_name))
}
} }
} }
} }
#
# loaded_files_old=list()
# for (x in drug) {
# for (split in split_type){
# filename = paste0('/home/sethp/git/Data/',
# x,
# '/output/ml/tts_',
# split,
# '/',
# combo[drug==x,"gene"],
# '_baselineC_',
# split,
# '.csv')
# filedata = paste0(combo[drug==x,"gene"],
# '_baselineC_',
# split
# )
# print(c(filename, filedata))
#
# try({loaded_files_old[[filedata]] = read.csv(filename)})
# }
# }
#plot_data = thing[thing$resampling=='none',]
# FIXME commented out for the moment because we need to use
# this before the data is actually loaded :-(
# scores = colnames(thing %>% dplyr::select(-c("Model_name",
# "source_data",
# "resampling"
# )
# )
# )
scores=c("F1", "ROC_AUC", "JCC", "MCC", "Accuracy", "Recall", "Precision") scores=c("F1", "ROC_AUC", "JCC", "MCC", "Accuracy", "Recall", "Precision")
resample_types <<- unique(thing$resampling) resample_types <<- unique(thing$resampling)
@ -157,7 +112,7 @@ makeplot = function(x, # the DataFrame to plot
gene = 'NOT SET', # used only for the info box gene = 'NOT SET', # used only for the info box
drug = 'NOT SET', # used only for the info box drug = 'NOT SET', # used only for the info box
combined_training_genes = '999' # used only for the info box combined_training_genes = '999' # used only for the info box
){ ){
plot_data = x[x$resampling==resampler,] plot_data = x[x$resampling==resampler,]
y_coord_min = min(plot_data[selection]) y_coord_min = min(plot_data[selection])
@ -174,24 +129,24 @@ makeplot = function(x, # the DataFrame to plot
"\nFeatures: ", metadata[6], "\nFeatures: ", metadata[6],
"\nGenes Trained: ", combined_training_genes, "\nGenes Trained: ", combined_training_genes,
"\nTest Gene: ", gene "\nTest Gene: ", gene
) )
} else { } else {
metatext=paste0("Train/Test: ", metatext=paste0("Train/Test: ",
metadata[1], "/", metadata[2], metadata[1], "/", metadata[2],
"\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4], "\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4],
"\nResampling: ", metadata[5], "\nResampling: ", metadata[5],
"\nFeatures: ", metadata[6], "\nFeatures: ", metadata[6],
"\nTest Gene: ", gene "\nTest Gene: ", gene
) )
} }
#print(metatext) #print(metatext)
grob <- grobTree(textGrob(metatext, grob <- grobTree(textGrob(metatext,
x=0.01, x=0.01,
y=0.90, y=0.90,
hjust=0, hjust=0,
gp=gpar(col="black") gp=gpar(col="black")
) )
) )
} }