added mmcsm_lig and provean dfs merges in comnining_df.py

This commit is contained in:
Tanushree Tunstall 2022-05-25 08:50:33 +01:00
parent d8041fb494
commit a2bcc3a732
2 changed files with 220 additions and 11 deletions

View file

@ -1,13 +1,17 @@
# count numbers for ML
#source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/katg.R")
source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
gene
gene_match
nrow(merged_df3)
##############################################
#=============
@ -15,7 +19,7 @@ nrow(merged_df3)
#==============
table(merged_df3$mutation_info)
sum(table(merged_df3$mutation_info))
sum(table(merged_df3$mutation_info))
table(merged_df3$mutation_info_orig)
##############################################
#=============
@ -64,3 +68,47 @@ sum(table(merged_df3$drtype_mode_labels))
table(merged_df3$lineage)
sum(table(merged_df3$lineage_labels))
# write file
outfile_merged_df3 = paste0(outdir, '/', tolower(gene), '_merged_df3.csv')
outfile_merged_df3
write.csv(merged_df3, outfile_merged_df3)
outfile_merged_df2 = paste0(outdir, '/', tolower(gene), '_merged_df2.csv')
outfile_merged_df2
write.csv(merged_df2, outfile_merged_df2)
###################################################
###################################################
###################################################
source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/config/katg.R")
source("~/git/LSHTM_analysis/config/pnca.R")
source("~/git/LSHTM_analysis/config/rpob.R")
df3_filename = paste0("/home/tanu/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv")
df3 = read.csv(df3_filename)
# mutationinformation
length(unique((df3$mutationinformation)))
#dm _om
table(df3$mutation_info)
table(df3$mutation_info_labels)
table(df3$mutation_info_orig)
table(df3$mutation_info_labels_orig)
# test_set
na_count <-sapply(df3, function(y) sum(length(which(is.na(y)))))
na_count[drug]
# training set
table(df3[drug])
# drtype: MDR and XDR
#table(df3$drtype) orig i.e. incorrect ones!
table(df3$drtype_mode_labels)