ML scripts: {'n_jobs': os.cpu_count() }

2022-07-02 10:20:40 +01:00 · 2022-07-02 10:20:40 +01:00 · b8653c6afe
commit b8653c6afe
parent 11af00f1db
4 changed files with 16 additions and 12 deletions
--- a/scripts/ml/ml_functions/FS.py
+++ b/scripts/ml/ml_functions/FS.py
@ -76,8 +76,7 @@ import argparse
 import re
 #####################################
 rs = {'random_state': 42}
-njobs = {'n_jobs': 10}
+njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number of CPU cores
 scoring_fn =  ({ 'mcc'        : make_scorer(matthews_corrcoef)
                , 'fscore'    : make_scorer(f1_score)
--- a/scripts/ml/ml_functions/MultClfs.py
+++ b/scripts/ml/ml_functions/MultClfs.py
@ -76,7 +76,7 @@ import argparse
 import re
 #%% GLOBALS
 rs = {'random_state': 42}
-njobs = {'n_jobs': 10}
+njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number of CPU cores
 scoring_fn =  ({ 'mcc'        : make_scorer(matthews_corrcoef)
                , 'fscore'    : make_scorer(f1_score)
--- a/scripts/ml/ml_functions/SplitTTS.py
+++ b/scripts/ml/ml_functions/SplitTTS.py
@ -41,7 +41,7 @@ import re
 homedir = os.path.expanduser("~")
 #%% GLOBALS
 rs = {'random_state': 42}
-njobs = {'n_jobs': 10}
+njobs = {'n_jobs': os.cpu_count() } # the number of jobs should equal the number of CPU cores
 #%% Define split_tts function #################################################
 def split_tts(ml_input_data
--- a/scripts/plotting/lineage_plots_multipage.R
+++ b/scripts/plotting/lineage_plots_multipage.R
@ -5,8 +5,8 @@ library(ggpubr)
 library(svglite)
 # for testing only
-gene="pncA"
+#gene="pncA"
-drug="pyrazinamide"
+#drug="pyrazinamide"
 lineage_plot=function(gene,drug){
  lineage_filename=paste0(tolower(gene),"_merged_df2.csv")
@ -84,7 +84,7 @@ lineage_plot=function(gene,drug){
    #print (i)
    s_mut = plot_df[plot_df$mutationinformation == i,]
    s_tab = table(s_mut$lineage, s_mut$sensitivity)
-    ft_pvalue_i = fisher.test(s_tab)$p.value
+    ft_pvalue_i = fisher.test(s_tab, workspace=2000000)$p.value
    plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i
  }
  plot_df$pvalR = round(plot_df$pval, 3)
@ -131,12 +131,14 @@ lineage_plot=function(gene,drug){
  # Do plots
  plot_pages = round(length(lin_muts)/25)
  if (plot_pages<1){plot_pages=1}
  p_title = gene
  res = 144 # SVG dots-per-inch
  print(paste0('About to plot ', plot_pages, ' page(s).'))
  sapply(1:plot_pages, function(page){
    print(paste0("Plotting page:", page))
-    svglite(paste0("/tmp/",drug,"-",page,".svg"), width=2048/res, height=1534/res) # old-school square 4:3 CRT shape 1.3:1
+    svglite(paste0("/tmp/",drug,"-",page,".svg"), width=2048/res, height=1534/res) # old-school square 4:3 CRT shape 1.33:1
    print(
      ggplot(plot_df2, aes(x = lineage
                           , fill = sensitivity)) + 
@ -169,17 +171,19 @@ lineage_plot=function(gene,drug){
 # hardcoded list of drugs
 drugs = c(#"ethambutol",
-          #"isoniazid",
+          "isoniazid",
          "pyrazinamide",
          "rifampicin",
          "streptomycin",
-          "cycloserine")
+          #"cycloserine"
          )
 genes = c(#"embB",
-          #"katG",
+          "katG",
          "pncA",
          "rpoB",
          "gid",
-          "alr")
+          #"alr"
          )
 combo = data.frame(drugs, genes)
 #sapply(combo$drugs, function(x){print(c(x,combo[drugs==x,"genes"]))})
@ -188,4 +192,5 @@ combo = data.frame(drugs, genes)
 sapply(combo$drugs, function(drug){
  gene=combo[drugs==drug,"genes"]
  lineage_plot(gene,drug)
  print(c(gene,drug))
 })