418 lines
14 KiB
R
418 lines
14 KiB
R
library(shiny)
|
|
library(shinyjs)
|
|
library(shinydashboard)
|
|
#library("wesanderson") # ayyyy lmao hipster af
|
|
library(dplyr)
|
|
library(ggplot2)
|
|
library(grid) # for the info box
|
|
library(plotly)
|
|
library(shinycssloaders)
|
|
|
|
# make shiny non-stupid
|
|
#options(shiny.launch.browser = FALSE) # i am a big girl and can tie my own laces
|
|
#options(shiny.port = 8000) # don't change the port every time
|
|
#options(shiny.host = '0.0.0.0') # This means "listen to all addresses on all interfaces"
|
|
#options(width=120)
|
|
#options(DT.options = list(scrollX = TRUE))
|
|
|
|
# FIXME: get rid of this hardcoded thing which i'm only reading in to have resampling types ahead of loading the real files
|
|
if (interactive()){
|
|
print("Interactive Session, using home directories")
|
|
data_dir = "~/git/"
|
|
} else {
|
|
data_dir = "/srv/shiny-server/git/"
|
|
}
|
|
|
|
thing = read.csv(paste0(data_dir, "Data/ml_combined/genes/pnca_70_30_actual.csv"))
|
|
|
|
# list of splits
|
|
split_type = c(
|
|
"cd_7030",
|
|
"cd_8020",
|
|
"cd_sl",
|
|
"none"
|
|
)
|
|
split_choicenames=c(
|
|
"70:30",
|
|
"80:20",
|
|
"Scaling law",
|
|
"CV thresholds"
|
|
)
|
|
split_file = c(
|
|
"_70_30_complete",
|
|
"_80_20_complete",
|
|
"_sl_complete",
|
|
"_none_complete"
|
|
)
|
|
|
|
split_file_FS = c(
|
|
"_70_30_complete",
|
|
"_80_20_complete",
|
|
"_sl_complete"
|
|
)
|
|
|
|
# necessary because the names will be wrong otherwise
|
|
split_map = data.frame(
|
|
files=c(
|
|
"_70_30_complete",
|
|
"_80_20_complete",
|
|
"_sl_complete",
|
|
"_none_complete"
|
|
),
|
|
splits=c(
|
|
"cd_7030",
|
|
"cd_8020",
|
|
"cd_sl",
|
|
"none"
|
|
)
|
|
)
|
|
|
|
colour_range=c("#605ca8", "#bebddb", "#221e70")
|
|
|
|
metadata_cols = c("n_training_size", "n_test_size", "n_trainingY_ratio", "n_testY_ratio", "resampling", "n_features")
|
|
|
|
# hardcoded list of drugs
|
|
drug = c("ethambutol", "isoniazid", "pyrazinamide", "rifampicin", "streptomycin")
|
|
|
|
drug_choicenames = c("EmbB-ethambutol", "KatG-isoniazid", "PncA-pyrazinamide", "RpoB-rifampicin", "GidB-streptomycin")
|
|
|
|
gene = c("embb", "katg", "pnca", "rpob", "gid")
|
|
combo = data.frame(drug, gene)
|
|
|
|
# Loader for per-gene CSVs
|
|
loaded_files=list()
|
|
for (x in gene) {
|
|
#x=tolower(x)
|
|
for (split in split_file){
|
|
filedata = paste0(x, split)
|
|
filename = paste0(data_dir,'LSHTM_ML/output/genes/',x,split,'.csv')
|
|
|
|
#print(c(filename))
|
|
#load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split])
|
|
load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split])
|
|
print(load_name)
|
|
# try() on its own is fine here because we don't need to do anything if it fails
|
|
try({loaded_files[[load_name]] = read.csv(filename)})
|
|
}
|
|
}
|
|
# Loader for per-gene Feature Selection CSVs
|
|
for (x in gene) {
|
|
#x=tolower(x)
|
|
for (split in split_file_FS){
|
|
filedata = paste0(x, split)
|
|
filename = paste0(data_dir,'LSHTM_ML/output/genes/',x,split,'_FS.csv')
|
|
|
|
#print(c(filename))
|
|
#load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split])
|
|
load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split], '_FS')
|
|
print(load_name)
|
|
# try() on its own is fine here because we don't need to do anything if it fails
|
|
try({loaded_files[[load_name]] = read.csv(filename)})
|
|
}
|
|
}
|
|
|
|
|
|
# Funky loader for combined data
|
|
for (x in gene) {
|
|
for (ac in c('_actual','_complete', '_FS')){
|
|
for (gene_count in c(1:6)){
|
|
load_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
|
|
filename = paste0(data_dir,'LSHTM_ML/output/combined/',load_name, ".csv")
|
|
store_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
|
|
# tryCatch is necessary here rather than try() because we need to do more
|
|
# manipulation afterwards (throwing away the column after loading)
|
|
load_successful=TRUE
|
|
tryCatch({temp_df = read.csv(filename)},error=function(e){load_successful<<-FALSE})
|
|
if (load_successful){
|
|
temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column
|
|
loaded_files[[store_name]] = temp_df
|
|
print(paste0("loaded file: ", filename, "into var: ", store_name))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
scores=c("F1", "ROC_AUC", "JCC", "MCC", "Accuracy", "Recall", "Precision")
|
|
|
|
#resample_types <<- unique(thing$resampling)
|
|
resample_types = c("none", "Random Oversampling", "Over+Under", "Random Undersampling", "SMOTE")
|
|
|
|
|
|
makeplot = function(x, # the DataFrame to plot
|
|
selection, # scoring method e.g. 'MCC'
|
|
resampler, # resampling type e.g. 'none'
|
|
display_infobox = TRUE, # display the infobox on top of the plot
|
|
display_combined = TRUE, # show stuff that only applies to "combined model" plots
|
|
gene = 'NOT SET', # used only for the info box
|
|
drug = 'NOT SET', # used only for the info box
|
|
combined_training_genes = '999' # used only for the info box
|
|
){
|
|
plot_data = x[x$resampling==resampler,]
|
|
y_coord_min = min(plot_data[selection], na.rm=TRUE)
|
|
#y_coord_min = min(plot_data[selection])
|
|
|
|
if (y_coord_min > 0) {
|
|
y_coord_min = 0
|
|
}
|
|
if (display_infobox) {
|
|
metadata=plot_data[1,colnames(plot_data)[colnames(plot_data) %in% metadata_cols]]
|
|
if (display_combined){
|
|
metatext=paste0("Train/Test: ",
|
|
metadata$n_training_size, "/", metadata$n_test_size,
|
|
"\nTrain/Test Target Ratio: ", metadata$n_trainingY_ratio, "/", metadata$n_testY_ratio,
|
|
"\nResampling: ", metadata$resampling,
|
|
"\nFeatures: ", metadata$n_features,
|
|
"\nGenes Trained: ", combined_training_genes
|
|
#"\nTest Gene: ", gene
|
|
)
|
|
} else {
|
|
metatext=paste0("Train/Test: ",
|
|
metadata$n_training_size, "/", metadata$n_test_size,
|
|
"\nTrain/Test Target Ratio: ", metadata$n_trainingY_ratio, "/", metadata$n_testY_ratio,
|
|
"\nResampling: ", metadata$resampling,
|
|
"\nFeatures: ", metadata$n_features,
|
|
"\nTest Gene: ", gene
|
|
)
|
|
}
|
|
|
|
#print(metatext)
|
|
|
|
grob <- grobTree(textGrob(metatext,
|
|
x=0.01,
|
|
y=0.80,
|
|
hjust=0,
|
|
gp=gpar(col="black")
|
|
)
|
|
)
|
|
}
|
|
ggplot(
|
|
data=plot_data, aes_string(
|
|
x="Model_name",
|
|
y=selection,
|
|
fill="source_data" #,
|
|
#group=selection
|
|
)
|
|
) +
|
|
geom_bar(
|
|
stat="identity"
|
|
, width = 0.75
|
|
, position=position_dodge2(padding=0.1, preserve='total', reverse=TRUE)
|
|
) +
|
|
coord_cartesian(ylim = c(y_coord_min, 1)) +
|
|
annotation_custom(grob) +
|
|
geom_text(aes_string(label=selection),
|
|
position=position_dodge(width = -0.75),
|
|
vjust = -0.5,
|
|
alpha=0.75,
|
|
fill="white"
|
|
|
|
) +
|
|
scale_color_manual(values = colour_range) +
|
|
scale_fill_manual(values = colour_range) +
|
|
|
|
|
|
# add little numbers for the BT bars only
|
|
labs(x="",y=paste(selection,"Score")) +
|
|
theme(
|
|
axis.text.x = element_text(angle = 90),
|
|
)
|
|
}
|
|
|
|
if (interactive()){
|
|
ui=dashboardPage(skin="purple",
|
|
dashboardHeader(title="Score Selector"),
|
|
dashboardSidebar(
|
|
radioButtons("combined_model",
|
|
label="Graph Model",
|
|
choiceNames = c("Combined", "Gene"),
|
|
choiceValues = c("combined", "gene"),
|
|
selected="gene"
|
|
),
|
|
|
|
# checkboxInput("combined_model",
|
|
# "Combined Model",
|
|
# value=FALSE
|
|
# ),
|
|
#),
|
|
|
|
# radioButtons("combined_data",
|
|
# label="Data Type",
|
|
# choiceNames = c("Complete", "Actual"),
|
|
# choiceValues = c("complete", "actual"),
|
|
# selected="complete"
|
|
# ),
|
|
radioButtons("combined_training_genes",
|
|
label="Training Genes",
|
|
choiceNames = c("Five", "Six"),
|
|
choiceValues = c("5","6"),
|
|
selected = "5"
|
|
),
|
|
radioButtons("drug_dropdown",
|
|
label="Drug",
|
|
choiceNames = drug_choicenames,
|
|
choices = drug,
|
|
selected="pyrazinamide"
|
|
),
|
|
radioButtons("split_dropdown",
|
|
label="Split",
|
|
choiceNames = split_choicenames,
|
|
choices = split_type,
|
|
selected="cd_7030"
|
|
),
|
|
radioButtons("score_dropdown",
|
|
label="Score",
|
|
choices = scores,
|
|
selected="MCC"
|
|
),
|
|
radioButtons("resample_dropdown",
|
|
label="Resampling",
|
|
choices = resample_types,
|
|
selected="none" # "none" is a value
|
|
)
|
|
),
|
|
dashboardBody(
|
|
useShinyjs(),
|
|
#plotlyOutput("plot", height = 800),
|
|
box(plotOutput("plot"), width="100%"),
|
|
box(plotOutput("feature_plot"), width="100%", title="Feature Selection"),
|
|
# %>% withSpinner(color="#0dc5c1"), # uncomment if you want the spinner
|
|
#downloadButton("save", "Download Plot"),
|
|
#DT::dataTableOut("plotdata"),
|
|
verbatimTextOutput("debug")
|
|
)
|
|
)
|
|
|
|
|
|
server=shinyServer(function(input, output, session) {
|
|
observeEvent({
|
|
input$combined_model
|
|
#input$combined_data
|
|
input$combined_training_genes
|
|
input$score_dropdown
|
|
input$resample_dropdown
|
|
input$drug_dropdown
|
|
input$split_dropdown
|
|
|
|
},{
|
|
combined_model = input$combined_model
|
|
selection = input$score_dropdown
|
|
resampler = input$resample_dropdown
|
|
selected_drug = input$drug_dropdown
|
|
selected_split = input$split_dropdown
|
|
#combined_data = input$combined_data
|
|
combined_training_genes = input$combined_training_genes
|
|
|
|
selected_gene = combo[combo$drug == selected_drug,'gene']
|
|
# if (combined_data == "FS"){
|
|
# updateRadioButtons(
|
|
# inputId="combined_training_genes",
|
|
# choiceNames = c("One", "Two"),
|
|
# choiceValues = c("1", "2"),
|
|
# selected = "2"
|
|
# )
|
|
# } else{
|
|
# updateRadioButtons(
|
|
# inputId="combined_training_genes",
|
|
# choiceNames = c("Five", "Six"),
|
|
# choiceValues = c("5","6"),
|
|
# selected = "5"
|
|
# )
|
|
# }
|
|
# hide stuff if selected
|
|
if(combined_model == "combined") {
|
|
#if(combined_model == TRUE) {
|
|
|
|
hide("split_dropdown")
|
|
#show("resample_dropdown")
|
|
#show("combined_data")
|
|
show("combined_training_genes")
|
|
#show("feature_plot")
|
|
filedata = paste0(combined_training_genes,
|
|
'genes_logo_skf_BT_',
|
|
selected_gene,
|
|
'_',
|
|
"complete"
|
|
#combined_data
|
|
)
|
|
feature_data = paste0(as.character(as.numeric(combined_training_genes)-4), # lol
|
|
'genes_logo_skf_BT_',
|
|
selected_gene,
|
|
'_FS'
|
|
)
|
|
|
|
print(filedata)
|
|
|
|
print('doing COMBINED plot')
|
|
output$plot <- renderPlot(
|
|
makeplot(
|
|
loaded_files[[filedata]],
|
|
selection,
|
|
resampler,
|
|
gene = combo[drug==selected_drug,"gene"],
|
|
combined_training_genes = combined_training_genes,
|
|
display_combined = TRUE
|
|
),height=450
|
|
)
|
|
print("doing FEATURE SELECTION plot corresponding to COMBINED plot")
|
|
output$feature_plot <- renderPlot(
|
|
makeplot(
|
|
loaded_files[[feature_data]],
|
|
selection,
|
|
"none", # always 'none' for Feature Selection
|
|
gene = combo[drug==selected_drug,"gene"],
|
|
combined_training_genes = combined_training_genes,
|
|
display_combined = TRUE
|
|
),height=450
|
|
)
|
|
|
|
# e.g.
|
|
# makeplot(loaded_files$`5genes_logo_skf_BT_pnca_actual`, "MCC", "none" , gene = 'foo', combined_training_genes = '1234', display_combined = TRUE)
|
|
} else {
|
|
show("split_dropdown")
|
|
#show("resample_dropdown")
|
|
#hide("combined_data")
|
|
hide("combined_training_genes")
|
|
#hide("feature_plot")
|
|
|
|
filedata = paste0(
|
|
combo[drug==selected_drug,"gene"],
|
|
'_baselineC_',
|
|
selected_split
|
|
)
|
|
|
|
feature_data = paste0(
|
|
combo[drug==selected_drug,"gene"],
|
|
'_baselineC_',
|
|
selected_split,
|
|
"_FS"
|
|
)
|
|
|
|
print(filedata)
|
|
print("doing GENE plot")
|
|
output$plot <- renderPlot(makeplot(loaded_files[[filedata]],
|
|
selection,
|
|
resampler,
|
|
gene = combo[drug==selected_drug,"gene"],
|
|
display_combined = FALSE
|
|
),height=450
|
|
)
|
|
print("doing FEATURE SELECTION plot corresponding to GENE plot")
|
|
output$feature_plot <- renderPlot(makeplot(loaded_files[[feature_data]],
|
|
selection,
|
|
"none",
|
|
gene = combo[drug==selected_drug,"gene"],
|
|
display_combined = FALSE
|
|
),height=450
|
|
)
|
|
|
|
}
|
|
# 6genes_logo_skf_BT_gid_complete
|
|
|
|
# filedata example for combined: 6genes_logo_skf_BT_embb_actual
|
|
# 6genes_logo_skf_BT_embb_combined
|
|
})
|
|
}
|
|
)
|
|
app <- shinyApp(ui, server)
|
|
runApp(app)
|
|
}
|