output merged_df3 and merged_df2 files for all gene-targtes along with active site residues annotated
This commit is contained in:
parent
bff16fc219
commit
51069fdb76
1 changed files with 25 additions and 44 deletions
|
@ -5,61 +5,42 @@
|
||||||
# pyrazinamide: 0 and 1, loss of data
|
# pyrazinamide: 0 and 1, loss of data
|
||||||
# mutation_info_labels: DM and OM, full data
|
# mutation_info_labels: DM and OM, full data
|
||||||
##################################################
|
##################################################
|
||||||
# ONLY ONCE
|
|
||||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
|
||||||
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
|
||||||
#write.csv(colnames(merged_df3), "data_colnames.csv")
|
|
||||||
#---------------------------------------------------
|
|
||||||
colnames_order_pnca = read.csv("~/git/ML_AI_training/ml_data/colnames_order.csv"
|
|
||||||
, header = F)
|
|
||||||
# reorder columns by name
|
|
||||||
colnames_order_pnca <- colnames_order_pnca$V1
|
|
||||||
###################################################
|
###################################################
|
||||||
#config_gene = c("alr", "embb", "gid", "katg", "pnca", "rpob")
|
#config_gene = c("alr", "embb", "gid", "katg", "pnca", "rpob")
|
||||||
#config_gene = c("alr", "embb")
|
#config_gene = c("alr", "embb")
|
||||||
#sapply(config_gene, function(x) source(paste0("~/git/LSHTM_analysis/config/", x, ".R")), USE.NAMES = F)
|
#sapply(config_gene, function(x) source(paste0("~/git/LSHTM_analysis/config/", x, ".R")), USE.NAMES = F)
|
||||||
|
#----------------------------------------------------
|
||||||
#source("~/git/LSHTM_analysis/config/alr.R")
|
#source("~/git/LSHTM_analysis/config/alr.R")
|
||||||
# FIXME: "cycloserine" "mcsm_ppi2_affinity" "mcsm_ppi2_scaled" "mcsm_ppi2_outcome" "interface_dist"
|
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||||
# source("~/git/LSHTM_analysis/config/embb.R")
|
#source("~/git/LSHTM_analysis/config/gid.R")
|
||||||
# source("~/git/LSHTM_analysis/config/gid.R")
|
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||||
# source("~/git/LSHTM_analysis/config/katg.R")
|
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||||
source("~/git/LSHTM_analysis/config/pnca.R")
|
source("~/git/LSHTM_analysis/config/rpob.R")
|
||||||
# source("~/git/LSHTM_analysis/config/rpob.R")
|
#----------------------------------------------------
|
||||||
##################################################
|
|
||||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||||
|
|
||||||
######################################################
|
######################################################
|
||||||
|
gene; drug
|
||||||
|
|
||||||
|
merged_df3$active_aa_pos = ifelse(merged_df3$position %in% active_aa_pos , 1, 0)
|
||||||
|
table(merged_df3$active_aa_pos)
|
||||||
|
|
||||||
mdf3_outName = paste0(outdir, "/", tolower(gene), "_merged_df3.csv")
|
mdf3_outName = paste0(outdir, "/", tolower(gene), "_merged_df3.csv")
|
||||||
mdf3_outName
|
mdf3_outName
|
||||||
|
|
||||||
if( (length(colnames_order) == ncol(merged_df3)) && (all(colnames_order %in%colnames(merged_df3))) ){
|
cat("\nWriting output file:", mdf3_outName)
|
||||||
cat("\nProceeding with rearranging columns in merged_df3")
|
write.csv(merged_df3, mdf3_outName, row.names = F)
|
||||||
merged_df3_o = merged_df3[ , colnames_order]
|
cat("\nnrows:" , nrow(merged_df3)
|
||||||
cat("\nWriting output file:", mdf3_outName)
|
, "\nncols:" , ncol(merged_df3))
|
||||||
write.csv(merged_df3_o, mdf3_outName, row.names = F)
|
|
||||||
cat("\nnrows:" , nrow(merged_df3_o)
|
#=========================================================
|
||||||
, "\nncols:" , ncol(merged_df3_o))
|
merged_df2$active_aa_pos = ifelse(merged_df2$position %in% active_aa_pos , 1, 0)
|
||||||
|
table(merged_df2$active_aa_pos)
|
||||||
}else
|
|
||||||
cat("length mismatch:"
|
|
||||||
, colnames(merged_df3)[!colnames(merged_df3)%in%(colnames_order )]
|
|
||||||
)
|
|
||||||
|
|
||||||
mdf2_outName = paste0(outdir, "/", tolower(gene), "_merged_df2.csv")
|
mdf2_outName = paste0(outdir, "/", tolower(gene), "_merged_df2.csv")
|
||||||
mdf2_outName
|
mdf2_outName
|
||||||
|
|
||||||
if( (length(colnames_order) == ncol(merged_df2)) && (all(colnames_order %in%colnames(merged_df2))) ){
|
cat("\nWriting output file:", mdf2_outName)
|
||||||
cat("\nProceeding with rearranging columns in merged_df3")
|
write.csv(merged_df2, mdf2_outName, row.names = F)
|
||||||
merged_df2_o = merged_df2[ , colnames_order]
|
cat("\nnrows:" , nrow(merged_df2)
|
||||||
cat("\nWriting output file:", mdf2_outName)
|
, "\nncols:" , ncol(merged_df2))
|
||||||
write.csv(merged_df2_o, mdf2_outName, row.names = F)
|
|
||||||
cat("\nnrows:" , nrow(merged_df2_o)
|
|
||||||
, "\nncols:" , ncol(merged_df2_o))
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue