From 5a4535f74748662a6f01b4cc5d51a4657449a04b Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Fri, 2 Sep 2022 16:09:33 +0000 Subject: [PATCH] ML dashboard/Score Selector initial commit --- ml/global.R | 235 ++++++++++++++++++++++++++++++++++++++++++++++++++++ ml/server.R | 78 +++++++++++++++++ ml/ui.R | 69 +++++++++++++++ 3 files changed, 382 insertions(+) diff --git a/ml/global.R b/ml/global.R index e69de29..49d8d35 100644 --- a/ml/global.R +++ b/ml/global.R @@ -0,0 +1,235 @@ +library(shiny) +library(shinyjs) +library(shinydashboard) +#library("wesanderson") # ayyyy lmao hipster af +library(dplyr) +library(ggplot2) +library(grid) # for the info box +library(plotly) +library(shinycssloaders) + +# make shiny non-stupid +#options(shiny.launch.browser = FALSE) # i am a big girl and can tie my own laces +#options(shiny.port = 8000) # don't change the port every time +#options(shiny.host = '0.0.0.0') # This means "listen to all addresses on all interfaces" +#options(width=120) +#options(DT.options = list(scrollX = TRUE)) + +# FIXME: get rid of this hardcoded thing which i'm only reading in to have resampling types ahead of loading the real files +thing = read.csv("/srv/shiny-server/git/Data/ml_combined/genes/pnca_70_30_actual.csv") + +# list of splits +split_type = c( + "7030", + "8020", + "sl", + "cd_7030", + "cd_8020", + "cd_sl", + "cd_none_bts", + "cd_rt" + ) + +split_file = c( + "_70_30_actual", + "_70_30_complete", + "_80_20_actual", + "_80_20_complete", + "_sl_actual", + "_sl_complete", + "_none_bts_complete", + "_rt_complete" + ) + +# necessary because the names will be wrong otherwise +split_map = data.frame( + files=c( + "_70_30_actual", + "_70_30_complete", + "_80_20_actual", + "_80_20_complete", + "_sl_actual", + "_sl_complete", + "_none_bts_complete", + "_rt_complete" + ), + splits=c( + "7030", + "cd_7030", + "8020", + "cd_8020", + "sl", + "cd_sl", + "cd_none_bts", + "cd_rt" + ) +) + +metadata_cols = c("n_training_size", "n_test_size", "n_trainingY_ratio", "n_testY_ratio", "resampling", "n_features") + +# hardcoded list of drugs +drug = c("ethambutol", "isoniazid", "pyrazinamide", "rifampicin", "streptomycin") +gene = c("embb", "katg", "pnca", "rpob", "gid") +combo = data.frame(drug, gene) + +# Loader for per-gene CSVs +#"/home/sethp/git/Data/ml_combined/genes/pnca_70_30_complete.csv" +loaded_files=list() +for (x in gene) { + #x=tolower(x) + for (split in split_file){ + filedata = paste0(x, split) + filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/genes/',x,split,'.csv') + + #print(c(filename)) + #load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split]) + load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split]) + #print(load_name) + #try({loaded_files[[filedata]] = read.csv(filename)}) + try({loaded_files[[load_name]] = read.csv(filename)}) + } +} +# Funky loader for combined data +for (x in gene) { + for (ac in c('_actual','_complete')){ + for (gene_count in c(6,5)){ + load_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac) + filename = paste0('/srv/shiny-server/git/LSHTM_ML/output/combined/',load_name, ".csv") + print(filename) + + # if (ac=='') { + # ac2 <- '_complete' + # } else { + # ac2 = ac + # } + store_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac) + print(store_name) + try({temp_df = read.csv(filename)}) + + temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column + loaded_files[[store_name]] = temp_df + + } + } +} + +# +# loaded_files_old=list() +# for (x in drug) { +# for (split in split_type){ +# filename = paste0('/home/sethp/git/Data/', +# x, +# '/output/ml/tts_', +# split, +# '/', +# combo[drug==x,"gene"], +# '_baselineC_', +# split, +# '.csv') +# filedata = paste0(combo[drug==x,"gene"], +# '_baselineC_', +# split +# ) +# print(c(filename, filedata)) +# +# try({loaded_files_old[[filedata]] = read.csv(filename)}) +# } +# } + +#plot_data = thing[thing$resampling=='none',] +# FIXME commented out for the moment because we need to use +# this before the data is actually loaded :-( +# scores = colnames(thing %>% dplyr::select(-c("Model_name", +# "source_data", +# "resampling" +# ) +# ) +# ) +scores=c("F1", "ROC_AUC", "JCC", "MCC", "Accuracy", "Recall", "Precision") + +resample_types <<- unique(thing$resampling) + +makeplot = function(x, # the DataFrame to plot + selection, # scoring method e.g. 'MCC' + resampler, # resampling type e.g. 'none' + display_infobox = TRUE, # display the infobox on top of the plot + display_combined = TRUE, # show stuff that only applies to "combined model" plots + gene = 'NOT SET', # used only for the info box + drug = 'NOT SET', # used only for the info box + combined_training_genes = '999' # used only for the info box + ){ + plot_data = x[x$resampling==resampler,] + y_coord_min = min(plot_data[selection]) + + if (y_coord_min > 0) { + y_coord_min = 0 + } + if (display_infobox) { + metadata=t(plot_data[1,metadata_cols]) + if (display_combined){ + metatext=paste0("Train/Test: ", + metadata[1], "/", metadata[2], + "\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4], + "\nResampling: ", metadata[5], + "\nFeatures: ", metadata[6], + "\nGenes Trained: ", combined_training_genes, + "\nTest Gene: ", gene + ) + } else { + metatext=paste0("Train/Test: ", + metadata[1], "/", metadata[2], + "\nTrain/Test Target Ratio: ", metadata[3], "/", metadata[4], + "\nResampling: ", metadata[5], + "\nFeatures: ", metadata[6], + "\nTest Gene: ", gene + ) + } + + #print(metatext) + + grob <- grobTree(textGrob(metatext, + x=0.01, + y=0.90, + hjust=0, + gp=gpar(col="black") + ) + ) + } + ggplot(data=plot_data, aes_string(x="Model_name", + y=selection, + fill="source_data", + group=selection) ) + + geom_bar(stat="identity" + , width = 0.75 + , position=position_dodge2(padding=0.1, preserve='total', reverse=TRUE) + ) + + coord_cartesian(ylim = c(y_coord_min, 1)) + + scale_fill_manual(values = c("BT" = "#605ca8", + "CV" = "#bebddb") ) + + #guides=guide_legend(reverse=TRUE) + + annotation_custom(grob) + + # doesn't work with plotly but looks nice :-( + geom_label(aes_string(label=selection), + position=position_dodge(width = -0.75), + #position=position_dodge2(padding=0.1), + vjust = 1.5, + alpha=0.75, + fill="white" + + ) + + + # works with plotly but i can't figure out the background yet + # geom_text(aes_string(label=selection, group=selection), + # position=position_dodge(width = -0.75), + # vjust = 1.5, + # alpha=0.75, + # + # ) + + + # add little numbers for the BT bars only + labs(x="",y=paste(selection,"Score")) + + theme( + axis.text.x = element_text(angle = 90), + ) + # ggplotly() +} diff --git a/ml/server.R b/ml/server.R index e69de29..d7e7864 100644 --- a/ml/server.R +++ b/ml/server.R @@ -0,0 +1,78 @@ +library(shiny) + +shinyServer(function(input, output) { + observeEvent({ + input$combined_model + input$combined_data + input$combined_training_genes + input$score_dropdown + input$resample_dropdown + input$drug_dropdown + input$split_dropdown + + },{ + combined_model = input$combined_model + selection = input$score_dropdown + resampler = input$resample_dropdown + selected_drug = input$drug_dropdown + selected_split = input$split_dropdown + combined_data = input$combined_data + combined_training_genes = input$combined_training_genes + + selected_gene = combo[combo$drug == selected_drug,'gene'] + + # hide stuff if selected + if(combined_model == "combined") { + #if(combined_model == TRUE) { + + hide("split_dropdown") + hide("resample_dropdown") + show("combined_data") + show("combined_training_genes") + filedata = paste0(combined_training_genes, + 'genes_logo_skf_BT_', + selected_gene, + '_', + combined_data + ) + print(filedata) + + print('doing COMBINED plot') + output$plot <- renderPlot(makeplot(loaded_files[[filedata]], + selection, + "none", # always 'none' for combined plot + gene = combo[drug==selected_drug,"gene"], + combined_training_genes = combined_training_genes, + display_combined = TRUE, + ) + ) + # e.g. + # makeplot(loaded_files$`5genes_logo_skf_BT_pnca_actual`, "MCC", "none" , gene = 'foo', combined_training_genes = '1234', display_combined = TRUE) + } else { + show("split_dropdown") + show("resample_dropdown") + hide("combined_data") + hide("combined_training_genes") + filedata = paste0(combo[drug==selected_drug,"gene"], + '_baselineC_', + selected_split + ) + print(filedata) + print("doing GENE plot") + output$plot <- renderPlot(makeplot(loaded_files[[filedata]], + selection, + resampler, + gene = combo[drug==selected_drug,"gene"], + display_combined = FALSE, + ) + ) + + + } + # 6genes_logo_skf_BT_gid_complete + + # filedata example for combined: 6genes_logo_skf_BT_embb_actual + # 6genes_logo_skf_BT_embb_combined +}) +} +) diff --git a/ml/ui.R b/ml/ui.R index e69de29..1831a07 100644 --- a/ml/ui.R +++ b/ml/ui.R @@ -0,0 +1,69 @@ +library(shiny) +library(shinyjs) +library(shinydashboard) +#library("wesanderson") # ayyyy lmao hipster af +library(dplyr) +library(ggplot2) +library(grid) # for the info box +library(plotly) +library(shinycssloaders) + +dashboardPage(skin="purple", + dashboardHeader(title="Score Selector"), + dashboardSidebar( + radioButtons("combined_model", + label="Graph Model", + choiceNames = c("Combined", "Gene"), + choiceValues = c("combined", "gene"), + selected="gene" + ), + + # checkboxInput("combined_model", + # "Combined Model", + # value=FALSE + # ), + #), + + radioButtons("combined_data", + label="Data Type", + choiceNames = c("Complete", "Actual"), + choiceValues = c("complete", "actual"), + selected="complete" + ), + radioButtons("combined_training_genes", + label="Training Genes", + choiceNames = c("Five", "Six"), + choiceValues = c("5","6"), + selected = "5" + ), + radioButtons("drug_dropdown", + label="Drug", + choices = drug, + selected="pyrazinamide" + ), + radioButtons("split_dropdown", + label="Split", + choices = split_type, + selected="7030" + ), + radioButtons("score_dropdown", + label="Score", + choices = scores, + selected="MCC" + ), + radioButtons("resample_dropdown", + label="Resampling", + choices = resample_types, + selected="none" # "none" is a value + ) + ), + dashboardBody( + useShinyjs(), + #plotlyOutput("plot", height = 800), + plotOutput("plot", height = 800), + # %>% withSpinner(color="#0dc5c1"), # uncomment if you want the spinner + #downloadButton("save", "Download Plot"), + #DT::dataTableOut("plotdata"), + verbatimTextOutput("debug") + ) +)