Dashboards/ml/global.R

library(shiny)
library(shinyjs)
library(shinydashboard)
#library("wesanderson") # ayyyy lmao hipster af
library(dplyr)
library(ggplot2)
library(grid) # for the info box
library(plotly)
library(shinycssloaders)

# make shiny non-stupid
#options(shiny.launch.browser = FALSE) # i am a big girl and can tie my own laces
#options(shiny.port = 8000) # don't change the port every time
#options(shiny.host = '0.0.0.0') # This means "listen to all addresses on all interfaces"
#options(width=120)
#options(DT.options = list(scrollX = TRUE))

# FIXME: get rid of this hardcoded thing which i'm only reading in to have resampling types ahead of loading the real files
if (interactive()){
  print("Interactive Session, using home directories")
  data_dir = "~/git/"
} else {
  data_dir = "/srv/shiny-server/git/"
}

thing  = read.csv(paste0(data_dir, "Data/ml_combined/genes/pnca_70_30_actual.csv"))

# list of splits
split_type = c(
  "cd_7030",
  "cd_8020",
  "cd_sl",
  "none"
)
split_choicenames=c(
  "70:30",
  "80:20",
  "Scaling law",
  "CV thresholds"
)
split_file = c(
  "_70_30_complete",
  "_80_20_complete",
  "_sl_complete",
  "_none_complete"
)

split_file_FS = c(
  "_70_30_complete",
  "_80_20_complete",
  "_sl_complete"
)

# necessary because the names will be wrong otherwise
split_map = data.frame(
  files=c(
    "_70_30_complete",
    "_80_20_complete",
    "_sl_complete",
    "_none_complete"
  ),
  splits=c(
    "cd_7030",
    "cd_8020",
    "cd_sl",
    "none"
  )
)

colour_range=c("#605ca8", "#bebddb", "#221e70")

metadata_cols = c("n_training_size", "n_test_size", "n_trainingY_ratio", "n_testY_ratio", "resampling", "n_features")

# hardcoded list of drugs
drug = c("ethambutol", "isoniazid", "pyrazinamide", "rifampicin", "streptomycin")

drug_choicenames = c("EmbB-ethambutol", "KatG-isoniazid", "PncA-pyrazinamide", "RpoB-rifampicin", "GidB-streptomycin")

gene = c("embb", "katg", "pnca", "rpob", "gid")
combo = data.frame(drug, gene)

# Loader for per-gene CSVs
loaded_files=list()
for (x in gene) {
  #x=tolower(x)
  for (split in split_file){
    filedata = paste0(x, split)
    filename = paste0(data_dir,'LSHTM_ML/output/genes/',x,split,'.csv')

    #print(c(filename))
    #load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split])
    load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split])
    print(load_name)
    # try() on its own is fine here because we don't need to do anything if it fails
    try({loaded_files[[load_name]] = read.csv(filename)})
  }
}
# Loader for per-gene Feature Selection CSVs
for (x in gene) {
  #x=tolower(x)
  for (split in split_file_FS){
    filedata = paste0(x, split)
    filename = paste0(data_dir,'LSHTM_ML/output/genes/',x,split,'_FS.csv')

    #print(c(filename))
    #load_name=paste0(combo[gene==x,"drug"],'_',split_map['splits'][split_map['files']==split])
    load_name=paste0(x,'_baselineC_',split_map['splits'][split_map['files']==split], '_FS')
    print(load_name)
    # try() on its own is fine here because we don't need to do anything if it fails
    try({loaded_files[[load_name]] = read.csv(filename)})
  }
}


# Funky loader for combined data
for (x in gene) {
  for (ac in c('_actual','_complete', '_FS')){
    for (gene_count in c(1:6)){
      load_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
      filename = paste0(data_dir,'LSHTM_ML/output/combined/',load_name, ".csv")
      store_name=paste0(gene_count, "genes_logo_skf_BT_", x, ac)
      # tryCatch is necessary here rather than try() because we need to do more
      # manipulation afterwards (throwing away the column after loading)
      load_successful=TRUE
      tryCatch({temp_df = read.csv(filename)},error=function(e){load_successful<<-FALSE})
      if (load_successful){
        temp_df=temp_df[, 2:ncol(temp_df)] # throw away first column
        loaded_files[[store_name]] = temp_df
        print(paste0("loaded file: ", filename, "into var: ", store_name))
      }
    }
  }
}

scores=c("F1", "ROC_AUC", "JCC", "MCC", "Accuracy", "Recall", "Precision")

#resample_types <<- unique(thing$resampling)
resample_types = c("none", "Random Oversampling", "Over+Under", "Random Undersampling", "SMOTE")


makeplot = function(x, # the DataFrame to plot
                    selection, # scoring method e.g. 'MCC'
                    resampler, # resampling type e.g. 'none'
                    display_infobox         = TRUE, # display the infobox on top of the plot
                    display_combined        = TRUE, # show stuff that only applies to "combined model" plots
                    gene                    = 'NOT SET', # used only for the info box
                    drug                    = 'NOT SET', # used only for the info box
                    combined_training_genes = '999' # used only for the info box
){
  plot_data    = x[x$resampling==resampler,]
  y_coord_min = min(plot_data[selection], na.rm=TRUE)
  #y_coord_min = min(plot_data[selection])

  if (y_coord_min > 0) {
    y_coord_min = 0
  }
  if (display_infobox) {
    metadata=plot_data[1,colnames(plot_data)[colnames(plot_data) %in% metadata_cols]]
    if (display_combined){
      metatext=paste0("Train/Test: ",
                      metadata$n_training_size, "/", metadata$n_test_size,
                      "\nTrain/Test Target Ratio: ", metadata$n_trainingY_ratio, "/", metadata$n_testY_ratio,
                      "\nResampling: ", metadata$resampling,
                      "\nFeatures: ", metadata$n_features,
                      "\nGenes Trained: ", combined_training_genes
                      #"\nTest Gene: ", gene
      )
    } else {
      metatext=paste0("Train/Test: ",
                      metadata$n_training_size, "/", metadata$n_test_size,
                      "\nTrain/Test Target Ratio: ", metadata$n_trainingY_ratio, "/", metadata$n_testY_ratio,
                      "\nResampling: ", metadata$resampling,
                      "\nFeatures: ", metadata$n_features,
                      "\nTest Gene: ", gene
      )
    }

    #print(metatext)

    grob <- grobTree(textGrob(metatext,
                              x=0.01,
                              y=0.80,
                              hjust=0,
                              gp=gpar(col="black")
    )
    )
  }
  ggplot(
    data=plot_data, aes_string(
      x="Model_name",
      y=selection,
      fill="source_data" #,
      #group=selection
    )
  ) +
    geom_bar(
      stat="identity"
      , width = 0.75
      , position=position_dodge2(padding=0.1, preserve='total', reverse=TRUE)
    ) +
    coord_cartesian(ylim = c(y_coord_min, 1)) +
    annotation_custom(grob) +
    geom_text(aes_string(label=selection),
              position=position_dodge(width = -0.75),
              vjust = -0.5,
              alpha=0.75,
              fill="white"

    ) +
    scale_color_manual(values = colour_range) +
    scale_fill_manual(values = colour_range) +


    # add little numbers for the BT bars only
    labs(x="",y=paste(selection,"Score")) +
    theme(
      axis.text.x = element_text(angle = 90),
    )
}

if (interactive()){
  ui=dashboardPage(skin="purple",
                   dashboardHeader(title="Score Selector"),
                   dashboardSidebar(
                     radioButtons("combined_model",
                                  label="Graph Model",
                                  choiceNames = c("Combined", "Gene"),
                                  choiceValues = c("combined", "gene"),
                                  selected="gene"
                     ),

                     # checkboxInput("combined_model",
                     #               "Combined Model",
                     #               value=FALSE
                     #               ),
                     #),

                     # radioButtons("combined_data",
                     #              label="Data Type",
                     #              choiceNames = c("Complete", "Actual"),
                     #              choiceValues = c("complete", "actual"),
                     #              selected="complete"
                     # ),
                     radioButtons("combined_training_genes",
                                  label="Training Genes",
                                  choiceNames = c("Five", "Six"),
                                  choiceValues = c("5","6"),
                                  selected = "5"
                     ),
                     radioButtons("drug_dropdown",
                                  label="Drug",
                                  choiceNames = drug_choicenames,
                                  choices = drug,
                                  selected="pyrazinamide"
                     ),
                     radioButtons("split_dropdown",
                                  label="Split",
                                  choiceNames = split_choicenames,
                                  choices = split_type,
                                  selected="cd_7030"
                     ),
                     radioButtons("score_dropdown",
                                  label="Score",
                                  choices = scores,
                                  selected="MCC"
                     ),
                     radioButtons("resample_dropdown",
                                  label="Resampling",
                                  choices = resample_types,
                                  selected="none" # "none" is a value
                     )
                   ),
                   dashboardBody(
                     useShinyjs(),
                     #plotlyOutput("plot", height = 800),
                     box(plotOutput("plot"), width="100%"),
                     box(plotOutput("feature_plot"), width="100%", title="Feature Selection"),
                     #  %>% withSpinner(color="#0dc5c1"), # uncomment if you want the spinner
                     #downloadButton("save", "Download Plot"),
                     #DT::dataTableOut("plotdata"),
                     verbatimTextOutput("debug")
                   )
  )


  server=shinyServer(function(input, output, session) {
    observeEvent({
      input$combined_model
      #input$combined_data
      input$combined_training_genes
      input$score_dropdown
      input$resample_dropdown
      input$drug_dropdown
      input$split_dropdown

    },{
      combined_model          = input$combined_model
      selection               = input$score_dropdown
      resampler               = input$resample_dropdown
      selected_drug           = input$drug_dropdown
      selected_split          = input$split_dropdown
      #combined_data           = input$combined_data
      combined_training_genes = input$combined_training_genes

      selected_gene           = combo[combo$drug == selected_drug,'gene']
      # if (combined_data == "FS"){
      #   updateRadioButtons(
      #     inputId="combined_training_genes",
      #     choiceNames = c("One", "Two"),
      #     choiceValues = c("1", "2"),
      #     selected = "2"
      #   )
      # } else{
      #   updateRadioButtons(
      #     inputId="combined_training_genes",
      #     choiceNames = c("Five", "Six"),
      #     choiceValues = c("5","6"),
      #     selected = "5"
      #   )
      # }
      # hide stuff if selected
      if(combined_model == "combined") {
        #if(combined_model == TRUE) {

        hide("split_dropdown")
        #show("resample_dropdown")
        #show("combined_data")
        show("combined_training_genes")
        #show("feature_plot")
        filedata = paste0(combined_training_genes,
                          'genes_logo_skf_BT_',
                          selected_gene,
                          '_',
                          "complete"
                          #combined_data
        )
        feature_data = paste0(as.character(as.numeric(combined_training_genes)-4), # lol
                              'genes_logo_skf_BT_',
                              selected_gene,
                              '_FS'
        )

        print(filedata)

        print('doing COMBINED plot')
        output$plot <- renderPlot(
          makeplot(
            loaded_files[[filedata]],
            selection,
            resampler,
            gene = combo[drug==selected_drug,"gene"],
            combined_training_genes = combined_training_genes,
            display_combined = TRUE
          ),height=450
        )
        print("doing FEATURE SELECTION plot corresponding to COMBINED plot")
        output$feature_plot <- renderPlot(
          makeplot(
            loaded_files[[feature_data]],
            selection,
            "none", # always 'none' for Feature Selection
            gene = combo[drug==selected_drug,"gene"],
            combined_training_genes = combined_training_genes,
            display_combined = TRUE
          ),height=450
        )

        # e.g.
        # makeplot(loaded_files$`5genes_logo_skf_BT_pnca_actual`, "MCC", "none" , gene = 'foo', combined_training_genes = '1234', display_combined = TRUE)
      } else {
        show("split_dropdown")
        #show("resample_dropdown")
        #hide("combined_data")
        hide("combined_training_genes")
        #hide("feature_plot")

        filedata       = paste0(
          combo[drug==selected_drug,"gene"],
          '_baselineC_',
          selected_split
        )

        feature_data = paste0(
          combo[drug==selected_drug,"gene"],
          '_baselineC_',
          selected_split,
          "_FS"
        )

        print(filedata)
        print("doing GENE plot")
        output$plot <- renderPlot(makeplot(loaded_files[[filedata]],
                                           selection,
                                           resampler,
                                           gene = combo[drug==selected_drug,"gene"],
                                           display_combined = FALSE
        ),height=450
        )
        print("doing FEATURE SELECTION plot corresponding to GENE plot")
        output$feature_plot <- renderPlot(makeplot(loaded_files[[feature_data]],
                                           selection,
                                           "none",
                                           gene = combo[drug==selected_drug,"gene"],
                                           display_combined = FALSE
        ),height=450
        )

      }
      # 6genes_logo_skf_BT_gid_complete

      # filedata example for combined: 6genes_logo_skf_BT_embb_actual
      #                                6genes_logo_skf_BT_embb_combined
    })
  }
  )
  app <- shinyApp(ui, server)
  runApp(app)
}