reran to output merged_df3 and merged_df2 csvs from count_vars.ML

This commit is contained in:
Tanushree Tunstall 2022-05-29 03:10:51 +01:00
parent f41cd0082e
commit 650d357afc

View file

@ -4,7 +4,7 @@
#source("~/git/LSHTM_analysis/config/embb.R") #source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/gid.R") #source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/katg.R") #source("~/git/LSHTM_analysis/config/katg.R")
source("~/git/LSHTM_analysis/config/pnca.R") #source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R") #source("~/git/LSHTM_analysis/config/rpob.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
@ -80,35 +80,35 @@ write.csv(merged_df2, outfile_merged_df2)
################################################### ###################################################
################################################### ###################################################
################################################### ###################################################
#
source("~/git/LSHTM_analysis/config/alr.R") # source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R") # source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/config/gid.R") # source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/config/katg.R") # source("~/git/LSHTM_analysis/config/katg.R")
source("~/git/LSHTM_analysis/config/pnca.R") # source("~/git/LSHTM_analysis/config/pnca.R")
source("~/git/LSHTM_analysis/config/rpob.R") # source("~/git/LSHTM_analysis/config/rpob.R")
#
df3_filename = paste0("/home/tanu/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv") # df3_filename = paste0("/home/tanu/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv")
df3 = read.csv(df3_filename) # df3 = read.csv(df3_filename)
#
# mutationinformation # # mutationinformation
length(unique((df3$mutationinformation))) # length(unique((df3$mutationinformation)))
#
#dm _om # #dm _om
table(df3$mutation_info) # table(df3$mutation_info)
table(df3$mutation_info_labels) # table(df3$mutation_info_labels)
table(df3$mutation_info_orig) # table(df3$mutation_info_orig)
table(df3$mutation_info_labels_orig) # table(df3$mutation_info_labels_orig)
#
# test_set # # test_set
na_count <-sapply(df3, function(y) sum(length(which(is.na(y))))) # na_count <-sapply(df3, function(y) sum(length(which(is.na(y)))))
na_count[drug] # na_count[drug]
#
# training set # # training set
table(df3[drug]) # table(df3[drug])
#
# drtype: MDR and XDR # # drtype: MDR and XDR
#table(df3$drtype) orig i.e. incorrect ones! # #table(df3$drtype) orig i.e. incorrect ones!
table(df3$drtype_mode_labels) # table(df3$drtype_mode_labels)
#
#