ML dashboard/Score Selector initial commit
This commit is contained in:
parent
8a8b36d725
commit
5a4535f747
3 changed files with 382 additions and 0 deletions
235
ml/global.R
235
ml/global.R
|
@ -0,0 +1,235 @@
|
|||
library(shiny)
|
||||
library(shinyjs)
|
||||
library(shinydashboard)
|
||||
#library("wesanderson") # ayyyy lmao hipster af
|
||||
library(dplyr)
|
||||
library(ggplot2)
|
||||
library(grid) # for the info box
|
||||
library(plotly)
|
||||
library(shinycssloaders)
|
||||
|
||||
# make shiny non-stupid
|
||||
#options(shiny.launch.browser = FALSE) # i am a big girl and can tie my own laces
|
||||
#options(shiny.port = 8000) # don't change the port every time
|
||||
#options(shiny.host = '0.0.0.0') # This means "listen to all addresses on all interfaces"
|
||||
#options(width=120)
|
||||
#options(DT.options = list(scrollX = TRUE))
|
||||
|
||||
# FIXME: get rid of this hardcoded thing which i'm only reading in to have resampling types ahead of loading the real files
|
||||
thing = read.csv("/srv/shiny-server/git/Data/ml_combined/genes/pnca_70_30_actual.csv")
|
||||
|
||||
# list of splits
|
||||
split_type = c(
|
||||
"7030",
|
||||
"8020",
|
||||
"sl",
|
||||
"cd_7030",
|
||||
"cd_8020",
|
||||
"cd_sl",
|
||||
"cd_none_bts",
|
||||
"cd_rt"
|
||||
)
|
||||
|
||||
split_file = c(
|
||||
"_70_30_actual",
|
||||
"_70_30_complete",
|
||||
"_80_20_actual",
|
||||
"_80_20_complete",
|
||||
"_sl_actual",
|
||||
"_sl_complete",
|
||||
"_none_bts_complete",
|
||||
"_rt_complete"
|
||||
)
|
||||
|
||||
# necessary because the names will be wrong otherwise
|
||||
split_map = data.frame(
|
||||
files=c(
|
||||
"_70_30_actual",
|
||||
"_70_30_complete",
|
||||
"_80_20_actual",
|
||||
"_80_20_complete",
|
||||
"_sl_actual",
|
||||
"_sl_complete",
|
||||
"_none_bts_complete",
|
||||
"_rt_complete"
|
||||
),
|
||||
splits=c(
|
||||
"7030",
|
||||
"cd_7030",
|
||||
"8020",
|
||||
"cd_8020",
|
||||
"sl",
|
||||
"cd_sl",
|
||||
"cd_none_bts",
|
||||
"cd_rt"
|
||||
)
|
||||
)
|
||||
|
||||
metadata_cols = c("n_training_size", "n_test_size", "n_trainingY_ratio", "n_testY_ratio", "resampling", "n_features")
|
||||
|
||||
# hardcoded list of drugs
|
||||
drug = c("ethambutol", "isoniazid", "pyrazinamide", "rifampicin", "streptomycin")
|
||||
gene = c("embb", "katg", "pnca", "rpob", "gid")
|
||||
combo = data.frame(drug, gene)
|
||||
|
||||
# Loader for per-gene CSVs
|
||||
#"/home/sethp/git/Data/ml_combined/genes/pnca_70_30_complete.csv"
|
||||
loaded_files=list()
|
||||
for (x in gene) {
|
||||
#x=tolower(x)
|
||||
for (split in split_file){
|
||||
filedata = paste0(x, split)
|
||||
filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/genes/',x,split,'.csv')
|
||||
|
||||
#print(c(filename))
|
||||
#load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split])
|
||||
load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split])
|
||||
#print(load_name)
|
||||
#try({loaded_files[[filedata]] = read.csv(filename)})
|
||||
try({loaded_files[[load_name]] = read.csv(filename)})
|
||||
}
|
||||
}
|
||||
# Funky loader for combined data
|
||||
for (x in gene) {
|
||||
for (ac in c('_actual','_complete')){
|
||||
for (gene_count in c(6,5)){
|
||||
load_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
|
||||
filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/combined/',load_name, ".csv")
|
||||
print(filename)
|
||||
|
||||
# if (ac=='') {
|
||||
# ac2 <- '_complete'
|
||||
# } else {
|
||||
# ac2 = ac
|
||||
# }
|
||||
store_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
|
||||
print(store_name)
|
||||
try({temp_df = read.csv(filename)})
|
||||
|
||||
temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column
|
||||
loaded_files[[store_name]] = temp_df
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#
|
||||
# loaded_files_old=list()
|
||||
# for (x in drug) {
|
||||
# for (split in split_type){
|
||||
# filename = paste0('/home/sethp/git/Data/',
|
||||
# x,
|
||||
# '/output/ml/tts_',
|
||||
# split,
|
||||
# '/',
|
||||
# combo[drug==x,"gene"],
|
||||
# '_baselineC_',
|
||||
# split,
|
||||
# '.csv')
|
||||
# filedata = paste0(combo[drug==x,"gene"],
|
||||
# '_baselineC_',
|
||||
# split
|
||||
# )
|
||||
# print(c(filename, filedata))
|
||||
#
|
||||
# try({loaded_files_old[[filedata]] = read.csv(filename)})
|
||||
# }
|
||||
# }
|
||||
|
||||
#plot_data = thing[thing$resampling=='none',]
|
||||
# FIXME commented out for the moment because we need to use
|
||||
# this before the data is actually loaded :-(
|
||||
# scores = colnames(thing %>% dplyr::select(-c("Model_name",
|
||||
# "source_data",
|
||||
# "resampling"
|
||||
# )
|
||||
# )
|
||||
# )
|
||||
scores=c("F1", "ROC_AUC", "JCC", "MCC", "Accuracy", "Recall", "Precision")
|
||||
|
||||
resample_types <<- unique(thing$resampling)
|
||||
|
||||
makeplot = function(x, # the DataFrame to plot
|
||||
selection, # scoring method e.g. 'MCC'
|
||||
resampler, # resampling type e.g. 'none'
|
||||
display_infobox = TRUE, # display the infobox on top of the plot
|
||||
display_combined = TRUE, # show stuff that only applies to "combined model" plots
|
||||
gene = 'NOT SET', # used only for the info box
|
||||
drug = 'NOT SET', # used only for the info box
|
||||
combined_training_genes = '999' # used only for the info box
|
||||
){
|
||||
plot_data = x[x$resampling==resampler,]
|
||||
y_coord_min = min(plot_data[selection])
|
||||
|
||||
if (y_coord_min > 0) {
|
||||
y_coord_min = 0
|
||||
}
|
||||
if (display_infobox) {
|
||||
metadata=t(plot_data[1,metadata_cols])
|
||||
if (display_combined){
|
||||
metatext=paste0("Train/Test: ",
|
||||
metadata[1], "/", metadata[2],
|
||||
"\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4],
|
||||
"\nResampling: ", metadata[5],
|
||||
"\nFeatures: ", metadata[6],
|
||||
"\nGenes Trained: ", combined_training_genes,
|
||||
"\nTest Gene: ", gene
|
||||
)
|
||||
} else {
|
||||
metatext=paste0("Train/Test: ",
|
||||
metadata[1], "/", metadata[2],
|
||||
"\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4],
|
||||
"\nResampling: ", metadata[5],
|
||||
"\nFeatures: ", metadata[6],
|
||||
"\nTest Gene: ", gene
|
||||
)
|
||||
}
|
||||
|
||||
#print(metatext)
|
||||
|
||||
grob <- grobTree(textGrob(metatext,
|
||||
x=0.01,
|
||||
y=0.90,
|
||||
hjust=0,
|
||||
gp=gpar(col="black")
|
||||
)
|
||||
)
|
||||
}
|
||||
ggplot(data=plot_data, aes_string(x="Model_name",
|
||||
y=selection,
|
||||
fill="source_data",
|
||||
group=selection) ) +
|
||||
geom_bar(stat="identity"
|
||||
, width = 0.75
|
||||
, position=position_dodge2(padding=0.1, preserve='total', reverse=TRUE)
|
||||
) +
|
||||
coord_cartesian(ylim = c(y_coord_min, 1)) +
|
||||
scale_fill_manual(values = c("BT" = "#605ca8",
|
||||
"CV" = "#bebddb") ) +
|
||||
#guides=guide_legend(reverse=TRUE) +
|
||||
annotation_custom(grob) +
|
||||
# doesn't work with plotly but looks nice :-(
|
||||
geom_label(aes_string(label=selection),
|
||||
position=position_dodge(width = -0.75),
|
||||
#position=position_dodge2(padding=0.1),
|
||||
vjust = 1.5,
|
||||
alpha=0.75,
|
||||
fill="white"
|
||||
|
||||
) +
|
||||
|
||||
# works with plotly but i can't figure out the background yet
|
||||
# geom_text(aes_string(label=selection, group=selection),
|
||||
# position=position_dodge(width = -0.75),
|
||||
# vjust = 1.5,
|
||||
# alpha=0.75,
|
||||
#
|
||||
# ) +
|
||||
|
||||
# add little numbers for the BT bars only
|
||||
labs(x="",y=paste(selection,"Score")) +
|
||||
theme(
|
||||
axis.text.x = element_text(angle = 90),
|
||||
)
|
||||
# ggplotly()
|
||||
}
|
78
ml/server.R
78
ml/server.R
|
@ -0,0 +1,78 @@
|
|||
library(shiny)
|
||||
|
||||
shinyServer(function(input, output) {
|
||||
observeEvent({
|
||||
input$combined_model
|
||||
input$combined_data
|
||||
input$combined_training_genes
|
||||
input$score_dropdown
|
||||
input$resample_dropdown
|
||||
input$drug_dropdown
|
||||
input$split_dropdown
|
||||
|
||||
},{
|
||||
combined_model = input$combined_model
|
||||
selection = input$score_dropdown
|
||||
resampler = input$resample_dropdown
|
||||
selected_drug = input$drug_dropdown
|
||||
selected_split = input$split_dropdown
|
||||
combined_data = input$combined_data
|
||||
combined_training_genes = input$combined_training_genes
|
||||
|
||||
selected_gene = combo[combo$drug == selected_drug,'gene']
|
||||
|
||||
# hide stuff if selected
|
||||
if(combined_model == "combined") {
|
||||
#if(combined_model == TRUE) {
|
||||
|
||||
hide("split_dropdown")
|
||||
hide("resample_dropdown")
|
||||
show("combined_data")
|
||||
show("combined_training_genes")
|
||||
filedata = paste0(combined_training_genes,
|
||||
'genes_logo_skf_BT_',
|
||||
selected_gene,
|
||||
'_',
|
||||
combined_data
|
||||
)
|
||||
print(filedata)
|
||||
|
||||
print('doing COMBINED plot')
|
||||
output$plot <- renderPlot(makeplot(loaded_files[[filedata]],
|
||||
selection,
|
||||
"none", # always 'none' for combined plot
|
||||
gene = combo[drug==selected_drug,"gene"],
|
||||
combined_training_genes = combined_training_genes,
|
||||
display_combined = TRUE,
|
||||
)
|
||||
)
|
||||
# e.g.
|
||||
# makeplot(loaded_files$`5genes_logo_skf_BT_pnca_actual`, "MCC", "none" , gene = 'foo', combined_training_genes = '1234', display_combined = TRUE)
|
||||
} else {
|
||||
show("split_dropdown")
|
||||
show("resample_dropdown")
|
||||
hide("combined_data")
|
||||
hide("combined_training_genes")
|
||||
filedata = paste0(combo[drug==selected_drug,"gene"],
|
||||
'_baselineC_',
|
||||
selected_split
|
||||
)
|
||||
print(filedata)
|
||||
print("doing GENE plot")
|
||||
output$plot <- renderPlot(makeplot(loaded_files[[filedata]],
|
||||
selection,
|
||||
resampler,
|
||||
gene = combo[drug==selected_drug,"gene"],
|
||||
display_combined = FALSE,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
}
|
||||
# 6genes_logo_skf_BT_gid_complete
|
||||
|
||||
# filedata example for combined: 6genes_logo_skf_BT_embb_actual
|
||||
# 6genes_logo_skf_BT_embb_combined
|
||||
})
|
||||
}
|
||||
)
|
69
ml/ui.R
69
ml/ui.R
|
@ -0,0 +1,69 @@
|
|||
library(shiny)
|
||||
library(shinyjs)
|
||||
library(shinydashboard)
|
||||
#library("wesanderson") # ayyyy lmao hipster af
|
||||
library(dplyr)
|
||||
library(ggplot2)
|
||||
library(grid) # for the info box
|
||||
library(plotly)
|
||||
library(shinycssloaders)
|
||||
|
||||
dashboardPage(skin="purple",
|
||||
dashboardHeader(title="Score Selector"),
|
||||
dashboardSidebar(
|
||||
radioButtons("combined_model",
|
||||
label="Graph Model",
|
||||
choiceNames = c("Combined", "Gene"),
|
||||
choiceValues = c("combined", "gene"),
|
||||
selected="gene"
|
||||
),
|
||||
|
||||
# checkboxInput("combined_model",
|
||||
# "Combined Model",
|
||||
# value=FALSE
|
||||
# ),
|
||||
#),
|
||||
|
||||
radioButtons("combined_data",
|
||||
label="Data Type",
|
||||
choiceNames = c("Complete", "Actual"),
|
||||
choiceValues = c("complete", "actual"),
|
||||
selected="complete"
|
||||
),
|
||||
radioButtons("combined_training_genes",
|
||||
label="Training Genes",
|
||||
choiceNames = c("Five", "Six"),
|
||||
choiceValues = c("5","6"),
|
||||
selected = "5"
|
||||
),
|
||||
radioButtons("drug_dropdown",
|
||||
label="Drug",
|
||||
choices = drug,
|
||||
selected="pyrazinamide"
|
||||
),
|
||||
radioButtons("split_dropdown",
|
||||
label="Split",
|
||||
choices = split_type,
|
||||
selected="7030"
|
||||
),
|
||||
radioButtons("score_dropdown",
|
||||
label="Score",
|
||||
choices = scores,
|
||||
selected="MCC"
|
||||
),
|
||||
radioButtons("resample_dropdown",
|
||||
label="Resampling",
|
||||
choices = resample_types,
|
||||
selected="none" # "none" is a value
|
||||
)
|
||||
),
|
||||
dashboardBody(
|
||||
useShinyjs(),
|
||||
#plotlyOutput("plot", height = 800),
|
||||
plotOutput("plot", height = 800),
|
||||
# %>% withSpinner(color="#0dc5c1"), # uncomment if you want the spinner
|
||||
#downloadButton("save", "Download Plot"),
|
||||
#DT::dataTableOut("plotdata"),
|
||||
verbatimTextOutput("debug")
|
||||
)
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue