diff --git a/scripts/count_vars_ML.R b/scripts/count_vars_ML.R index 6f87c92..07c535f 100644 --- a/scripts/count_vars_ML.R +++ b/scripts/count_vars_ML.R @@ -228,9 +228,11 @@ length(unique((df3$mutationinformation))) # # #dm _om table(df3$mutation_info) -table(df3$mutation_info_labels) table(df3$mutation_info_orig) table(df3$mutation_info_labels_orig) + +table(df3$mutation_info_labels) # different, and matches dst_mode +table(df3$dst_mode) # test_set na_count <-sapply(df3, function(y) sum(length(which(is.na(y))))) @@ -246,8 +248,10 @@ table(df3[drug]) df3_complete = df3 table(df3_complete$dst_mode) - +comp_lin_all = df3_complete[df3_complete$lineage_labels%in%c("L1", "L2", "L3", "L4"),] +table(comp_lin_all$lineage); sum(table(comp_lin_all$lineage)) df3_actual = df3[!is.na(df3$dst), ] table(df3_actual$dst_mode) - +comp_lin_actual = df3_actual[df3_actual$lineage_labels%in%c("L1", "L2", "L3", "L4"),] +table(comp_lin_actual$lineage); sum(table(comp_lin_actual$lineage)) \ No newline at end of file diff --git a/scripts/ml/ml_iterator.py b/scripts/ml/ml_iterator.py index 6daf527..805126f 100755 --- a/scripts/ml/ml_iterator.py +++ b/scripts/ml/ml_iterator.py @@ -41,9 +41,9 @@ gene_model_paramD = {'data_combined_model' : False ############################################################################### #ml_genes = ["pncA", "embB", "katG", "rpoB", "gid"] -ml_gene_drugD = {'pncA' : 'pyrazinamide' - , 'embB' : 'ethambutol' - , 'katG' : 'isoniazid' +ml_gene_drugD = {#'pncA' : 'pyrazinamide' + # 'embB' : 'ethambutol' + 'katG' : 'isoniazid' , 'rpoB' : 'rifampicin' , 'gid' : 'streptomycin' }