reran to generate merged_df3 with correct dst for dst muts. modified combining_dfs_plotting.R

This commit is contained in:
Tanushree Tunstall 2022-07-08 21:33:57 +01:00
parent 289c8913d0
commit 8079dd7b6c
6 changed files with 148 additions and 211 deletions

View file

@ -4,7 +4,7 @@
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/config/gid.R")
#source("~/git/LSHTM_analysis/config/katg.R")
source("~/git/LSHTM_analysis/config/pnca.R")
#source("~/git/LSHTM_analysis/config/pnca.R")
source("~/git/LSHTM_analysis/config/rpob.R")
#############################
@ -16,19 +16,19 @@ source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# Output files: merged data
#############################
outfile_merged_df3 = paste0(outdir, '/', tolower(gene), '_merged_df3.csv')
outfile_merged_df2 = paste0(outdir, '/', tolower(gene), '_merged_df2.csv')
#outfile_merged_df2 = paste0(outdir, '/', tolower(gene), '_merged_df2.csv')
################################################
# Add acticve site indication
###############################################
merged_df2$active_site = as.integer(merged_df2$position %in% active_aa_pos)
merged_df2_comp$active_site = as.integer(merged_df2_comp$position %in% active_aa_pos)
#merged_df2_comp$active_site = as.integer(merged_df2_comp$position %in% active_aa_pos)
merged_df3$active_site = as.integer(merged_df3$position %in% active_aa_pos)
merged_df3_comp$active_site = as.integer(merged_df3_comp$position %in% active_aa_pos)
#merged_df3_comp$active_site = as.integer(merged_df3_comp$position %in% active_aa_pos)
# check
cols_sel = c('mutationinformation', 'dst', 'mutation_info_labels', 'dm_om_numeric', 'dst_mode')
cols_sel = c('mutationinformation', 'mutation_info_labels', 'dm_om_numeric', 'dst', 'dst_mode')
check_mdf2 = merged_df2[, cols_sel]
check_mdf2T = table(check_mdf2$mutationinformation, check_mdf2$dst_mode)
@ -57,6 +57,8 @@ if (check12) {
stop('FAIL: Something is wrong with the dst_mode column. Quitting!')
}
table(is.na(merged_df3$dst))
#==========================
# CHECK: active site labels
#==========================
@ -137,7 +139,6 @@ if (all(a2 && b2)){
stop("FAIL: could not add drtype mode labels to merged_df3")
##quit()
}
##############################################
#===============
# CHECK: lineage
#===============
@ -179,10 +180,11 @@ if ( all( check12 && aa_check1 && aa_check2 && a1 && b1 && a2 && b2 && l1 && l2
, "\nGene:", gene)
write.csv(merged_df3, outfile_merged_df3)
write.csv(merged_df2, outfile_merged_df2)
#write.csv(merged_df2, outfile_merged_df2)
cat(paste("\nmerged df3 filename:", outfile_merged_df3
, "\nmerged df2 filename:", outfile_merged_df2))
#, "\nmerged df2 filename:", outfile_merged_df2)
))
} else{
stop("FAIL: Not able to write merged dfs. Please check numbers!")
@ -207,15 +209,6 @@ a = merged_df3[, sel]
str(a)
# write file
# outfile_merged_df3 = paste0(outdir, '/', tolower(gene), '_merged_df3.csv')
# outfile_merged_df3
# write.csv(merged_df3, outfile_merged_df3)
#
# outfile_merged_df2 = paste0(outdir, '/', tolower(gene), '_merged_df2.csv')
# outfile_merged_df2
# write.csv(merged_df2, outfile_merged_df2)
###################################################
###################################################
###################################################