ML_AI_training/ml_data/.Rhistory

335 lines
15 KiB
Text

source("~/git/LSHTM_analysis/config/alr.R")
# source("~/git/LSHTM_analysis/config/embb.R")
# source("~/git/LSHTM_analysis/config/gid.R")
# source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
# source("~/git/LSHTM_analysis/config/rpob.R")
##################################################
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
######################################################
mdf3_outName = paste0(outdir, "/", tolower(gene), "_merged_df3.csv")
mdf3_outName
if( (length(colnames_order) == ncol(merged_df3)) && (all(colnames_order %in%colnames(merged_df3))) ){
cat("\nProceeding with rearranging columns in merged_df3")
merged_df3_o = merged_df3[ , colnames_order]
cat("\nWriting output file:", mdf3_outName)
write.csv(merged_df3_o, mdf3_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df3_o)
, "\nncols:" , ncol(merged_df3_o))
}else
cat("length mismatch:"
, colnames(merged_df3)[!colnames(merged_df3)%in%(colnames_order )]
)
mdf3_outName = paste0(outdir, "/", tolower(gene), "_merged_df3.csv")
mdf3_outName
cat("\nWriting output file:", mdf3_outName)
write.csv(merged_df3, mdf3_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df3)
, "\nncols:" , ncol(merged_df3))
#=========================================================
mdf2_outName = paste0(outdir, "/", tolower(gene), "_merged_df2.csv")
mdf2_outName
cat("\nWriting output file:", mdf2_outName)
write.csv(merged_df2, mdf2_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df2)
, "\nncols:" , ncol(merged_df2))
###################################################
#config_gene = c("alr", "embb", "gid", "katg", "pnca", "rpob")
#config_gene = c("alr", "embb")
#sapply(config_gene, function(x) source(paste0("~/git/LSHTM_analysis/config/", x, ".R")), USE.NAMES = F)
#----------------------------------------------------
# source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
# source("~/git/LSHTM_analysis/config/gid.R")
# source("~/git/LSHTM_analysis/config/katg.R")
# source("~/git/LSHTM_analysis/config/pnca.R")
# source("~/git/LSHTM_analysis/config/rpob.R")
#----------------------------------------------------
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
active_aa_pos
merged_df3['position']%in%active_aa_pos
merged_df3$position%in%active_aa_pos
merged_df3['active_aa_pos'] <- merged_df3['position']
merged_df3['active_aa_pos']
identical(merged_df3['active_aa_pos'] , merged_df3['position'])
(merged_df3['active_aa_pos'] == merged_df3['position'])
all(merged_df3['active_aa_pos'] == merged_df3['position'])
merged_df3['active_aa_pos'] <- merged_df3['position']
if (merged_df3$position%in%active_aa_pos){
merged_df3['active_aa_pos'] = 1
}else{
merged_df3['active_aa_pos'] = 0
}
merged_df3['active_aa_pos']
table(merged_df3$active_aa_pos)
merged_df3['active_aa_pos'] <- merged_df3['position']
merged_df3$active_aa_pos <- merged_df3$osition
merged_df3$active_aa_pos
merged_df3$active_aa_pos <- merged_df3$position
merged_df3$active_aa_pos
merged_df3$postion%in%active_aa_pos
merged_df3$postion%in%active_aa_pos
merged_df3$postion
erged_df3$position%in%active_aa_pos
merged_df3$position
active_aa_pos
which(merged_df3$position%in%active_aa_pos)
c =which(merged_df3$position%in%active_aa_pos)
merged_df3$position[c]
active_aa_pos
merged_df3$position%in%active_aa_pos
merged_df3$active_aa_pos <- merged_df3$position
merged_df3$active_aa_pos %in% active_aa_pos
ifelse(merged_df3$active_aa_pos %in% active_aa_pos , "1", "0")
table(merged_df3$active_aa_po)
str(merged_df3$active_aa_po)
str(merged_df3$active_aa_pos)
#TODO later!
merged_df3$active_aa_pos <- merged_df3$position
merged_df3$active_aa_pos
ifelse(merged_df3$active_aa_pos %in% active_aa_pos , 1, 0)
str(merged_df3$active_aa_pos)
#str(merged_df3$active_aa_pos)
table(merged_df3$active_aa_pos)
#str(merged_df3$active_aa_pos)
foo = merged_df3$active_aa_pos
merged_df3$active_aa_pos
ifelse(merged_df3$active_aa_pos %in% active_aa_pos , 1, 0)
merged_df3$active_aa_pos = ifelse(merged_df3$position %in% active_aa_pos , 1, 0)
#str(merged_df3$active_aa_pos)
foo = merged_df3$active_aa_pos
#str(merged_df3$active_aa_pos)
table(merged_df3$active_aa_pos)
length(active_aa_pos)
which(merged_df3$position%in%active_aa_pos)
which(merged_df3$position%in%active_aa_pos)
which(!merged_df3$position%in%active_aa_pos)
which(merged_df3$position%in%active_aa_pos)
active_aa_pos)
active_aa_pos
merged_df3$position[209,]
merged_df3[209,]
merged_df3$position[209]
merged_df3[209]
merged_df3[209,]
active_aa_pos
merged_df3$position[!merged_df3$position%in%active_aa_pos]
merged_df3$position[!active_aa_pos%in%merged_df3$position]
active_aa_pos
active_aa_pos[!active_aa_pos%in%merged_df3$position]
#str(merged_df3$active_aa_pos)
table(merged_df3$active_aa_pos)
###################################################
#config_gene = c("alr", "embb", "gid", "katg", "pnca", "rpob")
#config_gene = c("alr", "embb")
#sapply(config_gene, function(x) source(paste0("~/git/LSHTM_analysis/config/", x, ".R")), USE.NAMES = F)
#----------------------------------------------------
# source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
# source("~/git/LSHTM_analysis/config/gid.R")
# source("~/git/LSHTM_analysis/config/katg.R")
# source("~/git/LSHTM_analysis/config/pnca.R")
# source("~/git/LSHTM_analysis/config/rpob.R")
#----------------------------------------------------
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
merged_df3$active_aa_pos = ifelse(merged_df3$position %in% active_aa_pos , 1, 0)
#str(merged_df3$active_aa_pos)
table(merged_df3$active_aa_pos)
cat("\nWriting output file:", mdf3_outName)
write.csv(merged_df3, mdf3_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df3)
, "\nncols:" , ncol(merged_df3))
merged_df2$active_aa_pos = ifelse(merged_df2$position %in% active_aa_pos , 1, 0)
table(merged_df3$active_aa_pos)
table(merged_df2$active_aa_pos)
###################################################
#config_gene = c("alr", "embb", "gid", "katg", "pnca", "rpob")
#config_gene = c("alr", "embb")
#sapply(config_gene, function(x) source(paste0("~/git/LSHTM_analysis/config/", x, ".R")), USE.NAMES = F)
#----------------------------------------------------
# source("~/git/LSHTM_analysis/config/alr.R")
source("~/git/LSHTM_analysis/config/embb.R")
# source("~/git/LSHTM_analysis/config/gid.R")
# source("~/git/LSHTM_analysis/config/katg.R")
# source("~/git/LSHTM_analysis/config/pnca.R")
# source("~/git/LSHTM_analysis/config/rpob.R")
#----------------------------------------------------
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
######################################################
merged_df3$active_aa_pos = ifelse(merged_df3$position %in% active_aa_pos , 1, 0)
table(merged_df3$active_aa_pos)
mdf3_outName = paste0(outdir, "/", tolower(gene), "_merged_df3.csv")
mdf3_outName
cat("\nWriting output file:", mdf3_outName)
write.csv(merged_df3, mdf3_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df3)
, "\nncols:" , ncol(merged_df3))
#=========================================================
merged_df2$active_aa_pos = ifelse(merged_df2$position %in% active_aa_pos , 1, 0)
table(merged_df2$active_aa_pos)
mdf2_outName = paste0(outdir, "/", tolower(gene), "_merged_df2.csv")
mdf2_outName
cat("\nWriting output file:", mdf2_outName)
write.csv(merged_df2, mdf2_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df2)
, "\nncols:" , ncol(merged_df2))
###################################################
#config_gene = c("alr", "embb", "gid", "katg", "pnca", "rpob")
#config_gene = c("alr", "embb")
#sapply(config_gene, function(x) source(paste0("~/git/LSHTM_analysis/config/", x, ".R")), USE.NAMES = F)
#----------------------------------------------------
source("~/git/LSHTM_analysis/config/alr.R")
# source("~/git/LSHTM_analysis/config/embb.R")
# source("~/git/LSHTM_analysis/config/gid.R")
# source("~/git/LSHTM_analysis/config/katg.R")
# source("~/git/LSHTM_analysis/config/pnca.R")
# source("~/git/LSHTM_analysis/config/rpob.R")
#----------------------------------------------------
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
######################################################
merged_df3$active_aa_pos = ifelse(merged_df3$position %in% active_aa_pos , 1, 0)
table(merged_df3$active_aa_pos)
mdf3_outName = paste0(outdir, "/", tolower(gene), "_merged_df3.csv")
mdf3_outName
cat("\nWriting output file:", mdf3_outName)
write.csv(merged_df3, mdf3_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df3)
, "\nncols:" , ncol(merged_df3))
#=========================================================
merged_df2$active_aa_pos = ifelse(merged_df2$position %in% active_aa_pos , 1, 0)
table(merged_df2$active_aa_pos)
mdf2_outName = paste0(outdir, "/", tolower(gene), "_merged_df2.csv")
mdf2_outName
cat("\nWriting output file:", mdf2_outName)
write.csv(merged_df2, mdf2_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df2)
, "\nncols:" , ncol(merged_df2))
###################################################
#config_gene = c("alr", "embb", "gid", "katg", "pnca", "rpob")
#config_gene = c("alr", "embb")
#sapply(config_gene, function(x) source(paste0("~/git/LSHTM_analysis/config/", x, ".R")), USE.NAMES = F)
#----------------------------------------------------
#source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
#----------------------------------------------------
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
gene
drug
######################################################
merged_df3$active_aa_pos = ifelse(merged_df3$position %in% active_aa_pos , 1, 0)
table(merged_df3$active_aa_pos)
mdf3_outName = paste0(outdir, "/", tolower(gene), "_merged_df3.csv")
mdf3_outName
cat("\nWriting output file:", mdf3_outName)
write.csv(merged_df3, mdf3_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df3)
, "\nncols:" , ncol(merged_df3))
#=========================================================
merged_df2$active_aa_pos = ifelse(merged_df2$position %in% active_aa_pos , 1, 0)
table(merged_df2$active_aa_pos)
mdf2_outName = paste0(outdir, "/", tolower(gene), "_merged_df2.csv")
mdf2_outName
cat("\nWriting output file:", mdf2_outName)
write.csv(merged_df2, mdf2_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df2)
, "\nncols:" , ncol(merged_df2))
###################################################
#config_gene = c("alr", "embb", "gid", "katg", "pnca", "rpob")
#config_gene = c("alr", "embb")
#sapply(config_gene, function(x) source(paste0("~/git/LSHTM_analysis/config/", x, ".R")), USE.NAMES = F)
#----------------------------------------------------
#source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
#----------------------------------------------------
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
######################################################
gene; drug
merged_df3$active_aa_pos = ifelse(merged_df3$position %in% active_aa_pos , 1, 0)
table(merged_df3$active_aa_pos)
mdf3_outName = paste0(outdir, "/", tolower(gene), "_merged_df3.csv")
mdf3_outName
cat("\nWriting output file:", mdf3_outName)
write.csv(merged_df3, mdf3_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df3)
, "\nncols:" , ncol(merged_df3))
#=========================================================
merged_df2$active_aa_pos = ifelse(merged_df2$position %in% active_aa_pos , 1, 0)
table(merged_df2$active_aa_pos)
mdf2_outName = paste0(outdir, "/", tolower(gene), "_merged_df2.csv")
mdf2_outName
cat("\nWriting output file:", mdf2_outName)
write.csv(merged_df2, mdf2_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df2)
, "\nncols:" , ncol(merged_df2))
###################################################
#config_gene = c("alr", "embb", "gid", "katg", "pnca", "rpob")
#config_gene = c("alr", "embb")
#sapply(config_gene, function(x) source(paste0("~/git/LSHTM_analysis/config/", x, ".R")), USE.NAMES = F)
#----------------------------------------------------
#source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/katg.R")
source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/rpob.R")
#----------------------------------------------------
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
######################################################
gene; drug
merged_df3$active_aa_pos = ifelse(merged_df3$position %in% active_aa_pos , 1, 0)
table(merged_df3$active_aa_pos)
mdf3_outName = paste0(outdir, "/", tolower(gene), "_merged_df3.csv")
mdf3_outName
cat("\nWriting output file:", mdf3_outName)
write.csv(merged_df3, mdf3_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df3)
, "\nncols:" , ncol(merged_df3))
#=========================================================
merged_df2$active_aa_pos = ifelse(merged_df2$position %in% active_aa_pos , 1, 0)
table(merged_df2$active_aa_pos)
mdf2_outName = paste0(outdir, "/", tolower(gene), "_merged_df2.csv")
mdf2_outName
cat("\nWriting output file:", mdf2_outName)
write.csv(merged_df2, mdf2_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df2)
, "\nncols:" , ncol(merged_df2))
###################################################
#config_gene = c("alr", "embb", "gid", "katg", "pnca", "rpob")
#config_gene = c("alr", "embb")
#sapply(config_gene, function(x) source(paste0("~/git/LSHTM_analysis/config/", x, ".R")), USE.NAMES = F)
#----------------------------------------------------
#source("~/git/LSHTM_analysis/config/alr.R")
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/katg.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
source("~/git/LSHTM_analysis/config/rpob.R")
#----------------------------------------------------
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
######################################################
gene; drug
merged_df3$active_aa_pos = ifelse(merged_df3$position %in% active_aa_pos , 1, 0)
table(merged_df3$active_aa_pos)
mdf3_outName = paste0(outdir, "/", tolower(gene), "_merged_df3.csv")
mdf3_outName
cat("\nWriting output file:", mdf3_outName)
write.csv(merged_df3, mdf3_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df3)
, "\nncols:" , ncol(merged_df3))
#=========================================================
merged_df2$active_aa_pos = ifelse(merged_df2$position %in% active_aa_pos , 1, 0)
table(merged_df2$active_aa_pos)
mdf2_outName = paste0(outdir, "/", tolower(gene), "_merged_df2.csv")
mdf2_outName
cat("\nWriting output file:", mdf2_outName)
write.csv(merged_df2, mdf2_outName, row.names = F)
cat("\nnrows:" , nrow(merged_df2)
, "\nncols:" , ncol(merged_df2))