various bugs

This commit is contained in:
Tanushree Tunstall 2022-07-10 20:00:35 +01:00
parent 6950c4b057
commit 33e3b5a0a6
3 changed files with 39 additions and 24 deletions

View file

@ -213,32 +213,41 @@ str(a)
###################################################
###################################################
#
# source("~/git/LSHTM_analysis/config/alr.R")
# source("~/git/LSHTM_analysis/config/embb.R")
# source("~/git/LSHTM_analysis/config/gid.R")
# source("~/git/LSHTM_analysis/config/katg.R")
# source("~/git/LSHTM_analysis/config/pnca.R")
# source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/config/katg.R")
source("~/git/LSHTM_analysis/config/pnca.R")
source("~/git/LSHTM_analysis/config/rpob.R")
#
# df3_filename = paste0("/home/tanu/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv")
# df3 = read.csv(df3_filename)
df3_filename = paste0("/home/tanu/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv")
df3 = read.csv(df3_filename)
#
# # mutationinformation
# length(unique((df3$mutationinformation)))
# mutationinformation
length(unique((df3$mutationinformation)))
#
# #dm _om
# table(df3$mutation_info)
# table(df3$mutation_info_labels)
# table(df3$mutation_info_orig)
# table(df3$mutation_info_labels_orig)
#
# # test_set
# na_count <-sapply(df3, function(y) sum(length(which(is.na(y)))))
# na_count[drug]
table(df3$mutation_info)
table(df3$mutation_info_labels)
table(df3$mutation_info_orig)
table(df3$mutation_info_labels_orig)
# test_set
na_count <-sapply(df3, function(y) sum(length(which(is.na(y)))))
na_count[drug]
#
# # training set
# table(df3[drug])
table(df3[drug])
#
# # drtype: MDR and XDR
# #table(df3$drtype) orig i.e. incorrect ones!
# table(df3$drtype_mode_labels)
df3_complete = df3
table(df3_complete$dst_mode)
df3_actual = df3[!is.na(df3$dst), ]
table(df3_actual$dst_mode)

View file

@ -354,9 +354,10 @@ def MultModelsCl(input_df, target
y_pred = cross_val_predict(model_pipeline
, input_df
, target
#, commented out thing,
, cv=sel_cv
#, groups = group
, **njobs)
, **njobs
)
#_tn, _fp, _fn, _tp = confusion_matrix(y_pred, y).ravel() # internally
tn, fp, fn, tp = confusion_matrix(y_pred, target).ravel()

View file

@ -48,8 +48,13 @@ ml_gene_drugD = {'pncA' : 'pyrazinamide'
, 'gid' : 'streptomycin'
}
gene_dataD={}
split_types = ['70_30', '80_20', 'sl']
split_data_types = ['actual', 'complete']
split_types = ['70_30',
'80_20',
'sl'
]
split_data_types = ['actual',
'complete'
]
for gene, drug in ml_gene_drugD.items():
print ('\nGene:', gene