fixed source to contain plotting cols and pos_count correctly

This commit is contained in:
Tanushree Tunstall 2022-08-22 14:33:06 +01:00
parent 4147a6b90f
commit 13999a477d
6 changed files with 66 additions and 39 deletions

View file

@ -41,7 +41,7 @@ geneL_ppi2 = c("alr", "embb", "katg", "rpob")
combining_dfs_plotting <- function( my_df_u combining_dfs_plotting <- function( my_df_u
, gene_metadata , gene_metadata
, gene # ADDED #, gene # ADDED
, lig_dist_colname = '' , lig_dist_colname = ''
, lig_dist_cutoff = ''){ , lig_dist_cutoff = ''){
@ -686,8 +686,11 @@ combining_dfs_plotting <- function( my_df_u
min( merged_df3['avg_lig_affinity_scaled']); max( merged_df3['avg_lig_affinity_scaled']) min( merged_df3['avg_lig_affinity_scaled']); max( merged_df3['avg_lig_affinity_scaled'])
################################################################### ###################################################################
# Rectify pos_count column in merged_df3 #--------------------------------------------
# The one in merged_df2 is correct # merged_df3: Rectify pos_count column
# Rename existing pos_count colum to reflect
# that it is correct according to merged_df2
#--------------------------------------------
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all" colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
@ -707,16 +710,25 @@ combining_dfs_plotting <- function( my_df_u
nc_change = which(colnames(merged_df3) == "n") nc_change = which(colnames(merged_df3) == "n")
colnames(merged_df3)[nc_change] <- "pos_count" colnames(merged_df3)[nc_change] <- "pos_count"
class(merged_df3) class(merged_df3)
####################################################################
#-------------------------------------------------
# merged_df2: Rename existing pos_count
# column to df2_pos_count_all like in above df
#-------------------------------------------------
nc_pc_CHANGE_df2 = which(colnames(merged_df2)== "pos_count"); nc_pc_CHANGE_df2
colnames(merged_df2)[nc_pc_CHANGE_df2] = "df2_pos_count_all"
head(merged_df2$pos_count)
head(merged_df2$df2_pos_count_all)
#################################################################### ####################################################################
# ADD: distance to Nucleic acid column for na genes # ADD: distance to Nucleic acid column for na genes
# already done in plotting_data
#################################################################### ####################################################################
#TODO
# Choose few columns to return as plot_df # Choose few columns to return as plot_df
merged_df3 = merged_df3[, colnames(merged_df3)%in%c(plotting_cols, "pos_count", "df2_pos_count_all")]
merged_df2 = merged_df2[, colnames(merged_df2)%in%c(plotting_cols, "df2_pos_count_all")]
#################################################################### ####################################################################
return(list( merged_df2 return(list( merged_df2

View file

@ -121,7 +121,7 @@ dm_om_wf_lf_data <- function(df
mmcsm_lig_dn2 = paste0("mmCSM-lig"); mmcsm_lig_dn2 mmcsm_lig_dn2 = paste0("mmCSM-lig"); mmcsm_lig_dn2
na_dist_dn = paste0("NA Dist(", angstroms_symbol, ")"); na_dist_dn na_dist_dn = paste0("Dist to NA (", angstroms_symbol, ")"); na_dist_dn
mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn
ppi2_dist_dn = paste0("PPI Dist(", angstroms_symbol, ")"); ppi2_dist_dn ppi2_dist_dn = paste0("PPI Dist(", angstroms_symbol, ")"); ppi2_dist_dn
@ -174,7 +174,8 @@ dm_om_wf_lf_data <- function(df
) )
display_common_colnames = c(snp_colname display_common_colnames = c(snp_colname
, mut_colname , "dst_mode" , mut_info_label_colname , mut_colname
, "dst_mode" , mut_info_label_colname
, aa_pos_colname , aa_pos_colname
, "duet_stability_change" , duet_dn , "duet_outcome" , "duet_stability_change" , duet_dn , "duet_outcome"

View file

@ -7,14 +7,10 @@ library(data.table)
library(dplyr) library(dplyr)
# ADDED: New # ADDED: New
geneL_normal = c("pnca") # geneL_normal = c("pnca")
geneL_na = c("gid", "rpob") # geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob") # geneL_ppi2 = c("alr", "embb", "katg", "rpob")
if (tolower(gene)%in%geneL_na){
infilename_nca = paste0("/home/tanu/git/Misc/mcsm_na_dist/"
, tolower(gene), "_nca_distances.csv")
}
#======================================================== #========================================================
# plotting_data(): formatting data for plots # plotting_data(): formatting data for plots
# input args: # input args:
@ -31,8 +27,9 @@ if (tolower(gene)%in%geneL_na){
plotting_data <- function(df plotting_data <- function(df
, gene # ADDED , gene # ADDED
, lig_dist_colname , lig_dist_colname = 'ligand_distance'
, lig_dist_cutoff) { , lig_dist_cutoff = 10
) {
my_df = data.frame() my_df = data.frame()
my_df_u = data.frame() my_df_u = data.frame()
my_df_u_lig = data.frame() my_df_u_lig = data.frame()
@ -89,11 +86,15 @@ plotting_data <- function(df
# all = T) # all = T)
# #
# } # }
geneL_na=c("gid","rpob")
if (tolower(gene)%in%geneL_na){ if (tolower(gene)%in%geneL_na){
infilename_nca = paste0("/home/tanu/git/Misc/mcsm_na_dist/"
, tolower(gene), "_nca_distances.csv")
distcol_nca_name = read.csv(infilename_nca, header = F) distcol_nca_name = read.csv(infilename_nca, header = F)
if (tolower(gene)=='rpob'){ if (tolower(gene)=='rpob'){
print('WARNING: running special-case handler for rpoB') print('WARNING: running special-case handler for rpoB')
# create 5uhc equivalent column for mutationinformation # create 5uhc equivalent column for mutationinformation

View file

@ -88,7 +88,7 @@ cat("\nDim of meta data file: ", dim(gene_metadata))
all_plot_dfs = combining_dfs_plotting(my_df_u all_plot_dfs = combining_dfs_plotting(my_df_u
, gene_metadata , gene_metadata
, gene = gene # ADDED #, gene = gene # ADDED
, lig_dist_colname = LigDist_colname , lig_dist_colname = LigDist_colname
, lig_dist_cutoff = LigDist_cutoff) , lig_dist_cutoff = LigDist_cutoff)

View file

@ -1,6 +1,4 @@
geneL_normal = c("pnca") # Initialise the required dfs based on gene name
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
# LigDist_colname # from globals used # LigDist_colname # from globals used
# ppi2Dist_colname #from globals used # ppi2Dist_colname #from globals used
@ -11,7 +9,7 @@ common_cols = c("mutationinformation"
, drug, "drug_name" , drug, "drug_name"
, "mutation", "mutation_info" , "mutation", "mutation_info"
, "wild_type", "mutant_type", "position" , "wild_type", "mutant_type", "position"
, "pos_count" #, "pos_count", "df2_pos_count_all"
, "snp_frequency" , "snp_frequency"
, "total_id_ucount" , "total_id_ucount"
, "drtype", "drtype_mode", "drtype_max" , "drtype", "drtype_mode", "drtype_max"
@ -63,7 +61,7 @@ common_outcome_affinity_cols = c( "ligand_outcome"
#====================================================== #======================================================
# Plotting cols + affinity cols: conditional on gene # Plotting cols + affinity cols: conditional on gene
#====================================================== #======================================================
if (tolower(gene)%in%geneL_normal){ if (tolower(gene)%in%c("pnca")){
plotting_cols = common_cols plotting_cols = common_cols
raw_affinity_cols = common_raw_affinity_cols raw_affinity_cols = common_raw_affinity_cols
@ -73,35 +71,50 @@ if (tolower(gene)%in%geneL_normal){
} }
# ppi2 genes # ppi2 genes
if (tolower(gene)%in%geneL_ppi2){ if (tolower(gene)%in%c("alr", "embb", "katg")){
plotting_cols = c(common_cols, plotting_cols = c(common_cols,
ppi2Dist_colname, ppi2Dist_colname,
"mcsm_ppi2_affinity", "mcsm_ppi2_scaled", "mcsm_ppi2_outcome") "mcsm_ppi2_affinity", "mcsm_ppi2_scaled", "mcsm_ppi2_outcome")
raw_affinity_cols = c(common_raw_affinity_cols , "mcsm_ppi2_affinity") raw_affinity_cols = c(common_raw_affinity_cols , "mcsm_ppi2_affinity")
scaled_affinity_cols = c(common_scaled_affinity_cols , "mcsm_ppi2_scaled" ) scaled_affinity_cols = c(common_scaled_affinity_cols , "mcsm_ppi2_scaled" )
outcome_affinity_cols = c(common_outcome_affinity_cols , "mcsm_ppi2_outcome") outcome_affinity_cols = c(common_outcome_affinity_cols , "mcsm_ppi2_outcome")
affinity_dist_colnames = c(LigDist_colname, ppi2Dist_colname) affinity_dist_colnames = c(LigDist_colname, ppi2Dist_colname)
} }
#na_genes #na_genes
if (tolower(gene)%in%geneL_na){ if (tolower(gene)%in%c("gid")){
plotting_cols = c(common_cols, plotting_cols = c(common_cols,
naDist_colname, naDist_colname,
"mcsm_na_affinity", "mcsm_na_scaled", "mcsm_na_outcome") "mcsm_na_affinity", "mcsm_na_scaled", "mcsm_na_outcome")
raw_affinity_cols = c(common_raw_affinity_cols , "mcsm_na_affinity") raw_affinity_cols = c(common_raw_affinity_cols , "mcsm_na_affinity")
scaled_affinity_cols = c(common_scaled_affinity_cols , "mcsm_na_scaled") scaled_affinity_cols = c(common_scaled_affinity_cols , "mcsm_na_scaled")
outcome_affinity_cols = c(common_outcome_affinity_cols , "mcsm_na_outcome") outcome_affinity_cols = c(common_outcome_affinity_cols , "mcsm_na_outcome")
affinity_dist_colnames = c(LigDist_colname, ppi2Dist_colname, naDist_colname) affinity_dist_colnames = c(LigDist_colname, naDist_colname)
} }
if (tolower(gene)%in%c("rpob")){ if (tolower(gene)%in%c("rpob")){
plotting_cols = c(plotting_cols, "X5uhc_position","X5uhc_offset") #plotting_cols = c(plotting_cols, "X5uhc_position","X5uhc_offset")
plotting_cols = c(common_cols,
ppi2Dist_colname,
"mcsm_ppi2_affinity", "mcsm_ppi2_scaled", "mcsm_ppi2_outcome",
naDist_colname,
"mcsm_na_affinity", "mcsm_na_scaled", "mcsm_na_outcome",
"X5uhc_position","X5uhc_offset")
raw_affinity_cols = c(common_raw_affinity_cols , "mcsm_ppi2_affinity", "mcsm_na_affinity")
scaled_affinity_cols = c(common_scaled_affinity_cols , "mcsm_ppi2_scaled" , "mcsm_na_scaled")
outcome_affinity_cols = c(common_outcome_affinity_cols , "mcsm_ppi2_outcome", "mcsm_na_outcome")
outcome_affinity_cols = c(common_outcome_affinity_cols , "mcsm_na_outcome")
affinity_dist_colnames = c(LigDist_colname, ppi2Dist_colname, naDist_colname)
} }
#======================================= #=======================================
# All: affinity cols: based on above confition # All: affinity cols: based on above confition
#======================================== #========================================

View file

@ -57,7 +57,7 @@ merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
head(merged_df3$pos_count) head(merged_df3$pos_count)
df3 = merged_df3[, colnames(merged_df3)%in%plotting_cols] df3 = merged_df3[, colnames(merged_df3)%in%plotting_cols]
#"nca_distance"%in%colnames(df3) "nca_distance"%in%colnames(df3)
#======= #=======
# output # output