various bugs

This commit is contained in:
Tanushree Tunstall 2022-07-10 20:00:35 +01:00
parent 6950c4b057
commit 33e3b5a0a6
3 changed files with 39 additions and 24 deletions

View file

@ -213,32 +213,41 @@ str(a)
################################################### ###################################################
################################################### ###################################################
# #
# source("~/git/LSHTM_analysis/config/alr.R") source("~/git/LSHTM_analysis/config/alr.R")
# source("~/git/LSHTM_analysis/config/embb.R") source("~/git/LSHTM_analysis/config/embb.R")
# source("~/git/LSHTM_analysis/config/gid.R") source("~/git/LSHTM_analysis/config/gid.R")
# source("~/git/LSHTM_analysis/config/katg.R") source("~/git/LSHTM_analysis/config/katg.R")
# source("~/git/LSHTM_analysis/config/pnca.R") source("~/git/LSHTM_analysis/config/pnca.R")
# source("~/git/LSHTM_analysis/config/rpob.R") source("~/git/LSHTM_analysis/config/rpob.R")
# #
# df3_filename = paste0("/home/tanu/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv") df3_filename = paste0("/home/tanu/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv")
# df3 = read.csv(df3_filename) df3 = read.csv(df3_filename)
# #
# # mutationinformation # mutationinformation
# length(unique((df3$mutationinformation))) length(unique((df3$mutationinformation)))
# #
# #dm _om # #dm _om
# table(df3$mutation_info) table(df3$mutation_info)
# table(df3$mutation_info_labels) table(df3$mutation_info_labels)
# table(df3$mutation_info_orig) table(df3$mutation_info_orig)
# table(df3$mutation_info_labels_orig) table(df3$mutation_info_labels_orig)
#
# # test_set # test_set
# na_count <-sapply(df3, function(y) sum(length(which(is.na(y))))) na_count <-sapply(df3, function(y) sum(length(which(is.na(y)))))
# na_count[drug] na_count[drug]
# #
# # training set # # training set
# table(df3[drug]) table(df3[drug])
# #
# # drtype: MDR and XDR # # drtype: MDR and XDR
# #table(df3$drtype) orig i.e. incorrect ones! # #table(df3$drtype) orig i.e. incorrect ones!
# table(df3$drtype_mode_labels) # table(df3$drtype_mode_labels)
df3_complete = df3
table(df3_complete$dst_mode)
df3_actual = df3[!is.na(df3$dst), ]
table(df3_actual$dst_mode)

View file

@ -354,9 +354,10 @@ def MultModelsCl(input_df, target
y_pred = cross_val_predict(model_pipeline y_pred = cross_val_predict(model_pipeline
, input_df , input_df
, target , target
, cv = sel_cv #, commented out thing,
#, groups = group , cv=sel_cv
, **njobs) , **njobs
)
#_tn, _fp, _fn, _tp = confusion_matrix(y_pred, y).ravel() # internally #_tn, _fp, _fn, _tp = confusion_matrix(y_pred, y).ravel() # internally
tn, fp, fn, tp = confusion_matrix(y_pred, target).ravel() tn, fp, fn, tp = confusion_matrix(y_pred, target).ravel()

View file

@ -48,8 +48,13 @@ ml_gene_drugD = {'pncA' : 'pyrazinamide'
, 'gid' : 'streptomycin' , 'gid' : 'streptomycin'
} }
gene_dataD={} gene_dataD={}
split_types = ['70_30', '80_20', 'sl'] split_types = ['70_30',
split_data_types = ['actual', 'complete'] '80_20',
'sl'
]
split_data_types = ['actual',
'complete'
]
for gene, drug in ml_gene_drugD.items(): for gene, drug in ml_gene_drugD.items():
print ('\nGene:', gene print ('\nGene:', gene