file load antics
This commit is contained in:
parent
1d07c2d5ce
commit
78ffc970e9
1 changed files with 47 additions and 92 deletions
139
ml/global.R
139
ml/global.R
|
@ -16,52 +16,43 @@ library(shinycssloaders)
|
||||||
#options(DT.options = list(scrollX = TRUE))
|
#options(DT.options = list(scrollX = TRUE))
|
||||||
|
|
||||||
# FIXME: get rid of this hardcoded thing which i'm only reading in to have resampling types ahead of loading the real files
|
# FIXME: get rid of this hardcoded thing which i'm only reading in to have resampling types ahead of loading the real files
|
||||||
thing = read.csv("/srv/shiny-server/git/Data/ml_combined/genes/pnca_70_30_actual.csv")
|
if (interactive()){
|
||||||
|
print("Interactive Session, using home directories")
|
||||||
|
data_dir = "~/git/"
|
||||||
|
} else {
|
||||||
|
data_dir = "/srv/shiny-server/git/"
|
||||||
|
}
|
||||||
|
|
||||||
|
thing = read.csv(paste0(data_dir, "Data/ml_combined/genes/pnca_70_30_actual.csv"))
|
||||||
|
|
||||||
# list of splits
|
# list of splits
|
||||||
split_type = c(
|
split_type = c(
|
||||||
"7030",
|
|
||||||
"8020",
|
|
||||||
"sl",
|
|
||||||
"cd_7030",
|
"cd_7030",
|
||||||
"cd_8020",
|
"cd_8020",
|
||||||
"cd_sl",
|
"cd_sl",
|
||||||
"cd_none_bts",
|
"none"
|
||||||
"cd_rt"
|
)
|
||||||
)
|
|
||||||
|
|
||||||
split_file = c(
|
split_file = c(
|
||||||
"_70_30_actual",
|
|
||||||
"_70_30_complete",
|
"_70_30_complete",
|
||||||
"_80_20_actual",
|
|
||||||
"_80_20_complete",
|
"_80_20_complete",
|
||||||
"_sl_actual",
|
|
||||||
"_sl_complete",
|
"_sl_complete",
|
||||||
"_none_bts_complete",
|
"_none_complete"
|
||||||
"_rt_complete"
|
)
|
||||||
)
|
|
||||||
|
|
||||||
# necessary because the names will be wrong otherwise
|
# necessary because the names will be wrong otherwise
|
||||||
split_map = data.frame(
|
split_map = data.frame(
|
||||||
files=c(
|
files=c(
|
||||||
"_70_30_actual",
|
|
||||||
"_70_30_complete",
|
"_70_30_complete",
|
||||||
"_80_20_actual",
|
|
||||||
"_80_20_complete",
|
"_80_20_complete",
|
||||||
"_sl_actual",
|
|
||||||
"_sl_complete",
|
"_sl_complete",
|
||||||
"_none_bts_complete",
|
"_none"
|
||||||
"_rt_complete"
|
),
|
||||||
),
|
|
||||||
splits=c(
|
splits=c(
|
||||||
"7030",
|
|
||||||
"cd_7030",
|
"cd_7030",
|
||||||
"8020",
|
|
||||||
"cd_8020",
|
"cd_8020",
|
||||||
"sl",
|
|
||||||
"cd_sl",
|
"cd_sl",
|
||||||
"cd_none_bts",
|
"none"
|
||||||
"cd_rt"
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -79,72 +70,36 @@ for (x in gene) {
|
||||||
#x=tolower(x)
|
#x=tolower(x)
|
||||||
for (split in split_file){
|
for (split in split_file){
|
||||||
filedata = paste0(x, split)
|
filedata = paste0(x, split)
|
||||||
filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/genes/',x,split,'.csv')
|
filename = paste0(data_dir,'LSHTM_ML/output/genes/',x,split,'.csv')
|
||||||
|
|
||||||
#print(c(filename))
|
#print(c(filename))
|
||||||
#load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split])
|
#load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split])
|
||||||
load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split])
|
load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split])
|
||||||
#print(load_name)
|
#print(load_name)
|
||||||
#try({loaded_files[[filedata]] = read.csv(filename)})
|
# try() on its own is fine here because we don't need to do anything if it fails
|
||||||
try({loaded_files[[load_name]] = read.csv(filename)})
|
try({loaded_files[[load_name]] = read.csv(filename)})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
# Funky loader for combined data
|
# Funky loader for combined data
|
||||||
for (x in gene) {
|
for (x in gene) {
|
||||||
for (ac in c('_actual','_complete')){
|
for (ac in c('_actual','_complete', '_FS')){
|
||||||
for (gene_count in c(6,5)){
|
for (gene_count in c(1:6)){
|
||||||
load_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
|
load_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
|
||||||
filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/combined/',load_name, ".csv")
|
filename = paste0(data_dir,'LSHTM_ML/output/combined/',load_name, ".csv")
|
||||||
print(filename)
|
|
||||||
|
|
||||||
# if (ac=='') {
|
|
||||||
# ac2 <- '_complete'
|
|
||||||
# } else {
|
|
||||||
# ac2 = ac
|
|
||||||
# }
|
|
||||||
store_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
|
store_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
|
||||||
print(store_name)
|
# tryCatch is necessary here rather than try() because we need to do more
|
||||||
try({temp_df = read.csv(filename)})
|
# manipulation afterwards (throwing away the column after loading)
|
||||||
|
load_successful=TRUE
|
||||||
temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column
|
tryCatch({temp_df = read.csv(filename)},error=function(e){load_successful<<-FALSE})
|
||||||
loaded_files[[store_name]] = temp_df
|
if (load_successful){
|
||||||
|
temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column
|
||||||
|
loaded_files[[store_name]] = temp_df
|
||||||
|
print(paste0("loaded file: ", filename, "into var: ", store_name))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#
|
|
||||||
# loaded_files_old=list()
|
|
||||||
# for (x in drug) {
|
|
||||||
# for (split in split_type){
|
|
||||||
# filename = paste0('/home/sethp/git/Data/',
|
|
||||||
# x,
|
|
||||||
# '/output/ml/tts_',
|
|
||||||
# split,
|
|
||||||
# '/',
|
|
||||||
# combo[drug==x,"gene"],
|
|
||||||
# '_baselineC_',
|
|
||||||
# split,
|
|
||||||
# '.csv')
|
|
||||||
# filedata = paste0(combo[drug==x,"gene"],
|
|
||||||
# '_baselineC_',
|
|
||||||
# split
|
|
||||||
# )
|
|
||||||
# print(c(filename, filedata))
|
|
||||||
#
|
|
||||||
# try({loaded_files_old[[filedata]] = read.csv(filename)})
|
|
||||||
# }
|
|
||||||
# }
|
|
||||||
|
|
||||||
#plot_data = thing[thing$resampling=='none',]
|
|
||||||
# FIXME commented out for the moment because we need to use
|
|
||||||
# this before the data is actually loaded :-(
|
|
||||||
# scores = colnames(thing %>% dplyr::select(-c("Model_name",
|
|
||||||
# "source_data",
|
|
||||||
# "resampling"
|
|
||||||
# )
|
|
||||||
# )
|
|
||||||
# )
|
|
||||||
scores=c("F1", "ROC_AUC", "JCC", "MCC", "Accuracy", "Recall", "Precision")
|
scores=c("F1", "ROC_AUC", "JCC", "MCC", "Accuracy", "Recall", "Precision")
|
||||||
|
|
||||||
resample_types <<- unique(thing$resampling)
|
resample_types <<- unique(thing$resampling)
|
||||||
|
@ -157,7 +112,7 @@ makeplot = function(x, # the DataFrame to plot
|
||||||
gene = 'NOT SET', # used only for the info box
|
gene = 'NOT SET', # used only for the info box
|
||||||
drug = 'NOT SET', # used only for the info box
|
drug = 'NOT SET', # used only for the info box
|
||||||
combined_training_genes = '999' # used only for the info box
|
combined_training_genes = '999' # used only for the info box
|
||||||
){
|
){
|
||||||
plot_data = x[x$resampling==resampler,]
|
plot_data = x[x$resampling==resampler,]
|
||||||
y_coord_min = min(plot_data[selection])
|
y_coord_min = min(plot_data[selection])
|
||||||
|
|
||||||
|
@ -174,24 +129,24 @@ makeplot = function(x, # the DataFrame to plot
|
||||||
"\nFeatures: ", metadata[6],
|
"\nFeatures: ", metadata[6],
|
||||||
"\nGenes Trained: ", combined_training_genes,
|
"\nGenes Trained: ", combined_training_genes,
|
||||||
"\nTest Gene: ", gene
|
"\nTest Gene: ", gene
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
metatext=paste0("Train/Test: ",
|
metatext=paste0("Train/Test: ",
|
||||||
metadata[1], "/", metadata[2],
|
metadata[1], "/", metadata[2],
|
||||||
"\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4],
|
"\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4],
|
||||||
"\nResampling: ", metadata[5],
|
"\nResampling: ", metadata[5],
|
||||||
"\nFeatures: ", metadata[6],
|
"\nFeatures: ", metadata[6],
|
||||||
"\nTest Gene: ", gene
|
"\nTest Gene: ", gene
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#print(metatext)
|
#print(metatext)
|
||||||
|
|
||||||
grob <- grobTree(textGrob(metatext,
|
grob <- grobTree(textGrob(metatext,
|
||||||
x=0.01,
|
x=0.01,
|
||||||
y=0.90,
|
y=0.90,
|
||||||
hjust=0,
|
hjust=0,
|
||||||
gp=gpar(col="black")
|
gp=gpar(col="black")
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue