fixed source to contain plotting cols and pos_count correctly
This commit is contained in:
parent
4147a6b90f
commit
13999a477d
6 changed files with 66 additions and 39 deletions
|
@ -41,7 +41,7 @@ geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||||
|
|
||||||
combining_dfs_plotting <- function( my_df_u
|
combining_dfs_plotting <- function( my_df_u
|
||||||
, gene_metadata
|
, gene_metadata
|
||||||
, gene # ADDED
|
#, gene # ADDED
|
||||||
, lig_dist_colname = ''
|
, lig_dist_colname = ''
|
||||||
, lig_dist_cutoff = ''){
|
, lig_dist_cutoff = ''){
|
||||||
|
|
||||||
|
@ -686,8 +686,11 @@ combining_dfs_plotting <- function( my_df_u
|
||||||
min( merged_df3['avg_lig_affinity_scaled']); max( merged_df3['avg_lig_affinity_scaled'])
|
min( merged_df3['avg_lig_affinity_scaled']); max( merged_df3['avg_lig_affinity_scaled'])
|
||||||
|
|
||||||
###################################################################
|
###################################################################
|
||||||
# Rectify pos_count column in merged_df3
|
#--------------------------------------------
|
||||||
# The one in merged_df2 is correct
|
# merged_df3: Rectify pos_count column
|
||||||
|
# Rename existing pos_count colum to reflect
|
||||||
|
# that it is correct according to merged_df2
|
||||||
|
#--------------------------------------------
|
||||||
|
|
||||||
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
|
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
|
||||||
colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
|
colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
|
||||||
|
@ -707,16 +710,25 @@ combining_dfs_plotting <- function( my_df_u
|
||||||
nc_change = which(colnames(merged_df3) == "n")
|
nc_change = which(colnames(merged_df3) == "n")
|
||||||
colnames(merged_df3)[nc_change] <- "pos_count"
|
colnames(merged_df3)[nc_change] <- "pos_count"
|
||||||
class(merged_df3)
|
class(merged_df3)
|
||||||
|
|
||||||
|
####################################################################
|
||||||
|
#-------------------------------------------------
|
||||||
|
# merged_df2: Rename existing pos_count
|
||||||
|
# column to df2_pos_count_all like in above df
|
||||||
|
#-------------------------------------------------
|
||||||
|
nc_pc_CHANGE_df2 = which(colnames(merged_df2)== "pos_count"); nc_pc_CHANGE_df2
|
||||||
|
colnames(merged_df2)[nc_pc_CHANGE_df2] = "df2_pos_count_all"
|
||||||
|
head(merged_df2$pos_count)
|
||||||
|
head(merged_df2$df2_pos_count_all)
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
# ADD: distance to Nucleic acid column for na genes
|
# ADD: distance to Nucleic acid column for na genes
|
||||||
|
# already done in plotting_data
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
#TODO
|
|
||||||
# Choose few columns to return as plot_df
|
# Choose few columns to return as plot_df
|
||||||
|
|
||||||
|
merged_df3 = merged_df3[, colnames(merged_df3)%in%c(plotting_cols, "pos_count", "df2_pos_count_all")]
|
||||||
|
merged_df2 = merged_df2[, colnames(merged_df2)%in%c(plotting_cols, "df2_pos_count_all")]
|
||||||
|
|
||||||
####################################################################
|
####################################################################
|
||||||
return(list( merged_df2
|
return(list( merged_df2
|
||||||
|
|
|
@ -121,7 +121,7 @@ dm_om_wf_lf_data <- function(df
|
||||||
mmcsm_lig_dn2 = paste0("mmCSM-lig"); mmcsm_lig_dn2
|
mmcsm_lig_dn2 = paste0("mmCSM-lig"); mmcsm_lig_dn2
|
||||||
|
|
||||||
|
|
||||||
na_dist_dn = paste0("NA Dist(", angstroms_symbol, ")"); na_dist_dn
|
na_dist_dn = paste0("Dist to NA (", angstroms_symbol, ")"); na_dist_dn
|
||||||
mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn
|
mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn
|
||||||
|
|
||||||
ppi2_dist_dn = paste0("PPI Dist(", angstroms_symbol, ")"); ppi2_dist_dn
|
ppi2_dist_dn = paste0("PPI Dist(", angstroms_symbol, ")"); ppi2_dist_dn
|
||||||
|
@ -174,7 +174,8 @@ dm_om_wf_lf_data <- function(df
|
||||||
)
|
)
|
||||||
|
|
||||||
display_common_colnames = c(snp_colname
|
display_common_colnames = c(snp_colname
|
||||||
, mut_colname , "dst_mode" , mut_info_label_colname
|
, mut_colname
|
||||||
|
, "dst_mode" , mut_info_label_colname
|
||||||
, aa_pos_colname
|
, aa_pos_colname
|
||||||
|
|
||||||
, "duet_stability_change" , duet_dn , "duet_outcome"
|
, "duet_stability_change" , duet_dn , "duet_outcome"
|
||||||
|
|
|
@ -7,14 +7,10 @@ library(data.table)
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
|
|
||||||
# ADDED: New
|
# ADDED: New
|
||||||
geneL_normal = c("pnca")
|
# geneL_normal = c("pnca")
|
||||||
geneL_na = c("gid", "rpob")
|
# geneL_na = c("gid", "rpob")
|
||||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
# geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||||
|
|
||||||
if (tolower(gene)%in%geneL_na){
|
|
||||||
infilename_nca = paste0("/home/tanu/git/Misc/mcsm_na_dist/"
|
|
||||||
, tolower(gene), "_nca_distances.csv")
|
|
||||||
}
|
|
||||||
#========================================================
|
#========================================================
|
||||||
# plotting_data(): formatting data for plots
|
# plotting_data(): formatting data for plots
|
||||||
# input args:
|
# input args:
|
||||||
|
@ -31,8 +27,9 @@ if (tolower(gene)%in%geneL_na){
|
||||||
|
|
||||||
plotting_data <- function(df
|
plotting_data <- function(df
|
||||||
, gene # ADDED
|
, gene # ADDED
|
||||||
, lig_dist_colname
|
, lig_dist_colname = 'ligand_distance'
|
||||||
, lig_dist_cutoff) {
|
, lig_dist_cutoff = 10
|
||||||
|
) {
|
||||||
my_df = data.frame()
|
my_df = data.frame()
|
||||||
my_df_u = data.frame()
|
my_df_u = data.frame()
|
||||||
my_df_u_lig = data.frame()
|
my_df_u_lig = data.frame()
|
||||||
|
@ -89,11 +86,15 @@ plotting_data <- function(df
|
||||||
# all = T)
|
# all = T)
|
||||||
#
|
#
|
||||||
# }
|
# }
|
||||||
|
geneL_na=c("gid","rpob")
|
||||||
|
|
||||||
if (tolower(gene)%in%geneL_na){
|
if (tolower(gene)%in%geneL_na){
|
||||||
|
infilename_nca = paste0("/home/tanu/git/Misc/mcsm_na_dist/"
|
||||||
|
, tolower(gene), "_nca_distances.csv")
|
||||||
distcol_nca_name = read.csv(infilename_nca, header = F)
|
distcol_nca_name = read.csv(infilename_nca, header = F)
|
||||||
|
|
||||||
if (tolower(gene)=='rpob'){
|
if (tolower(gene)=='rpob'){
|
||||||
|
|
||||||
print('WARNING: running special-case handler for rpoB')
|
print('WARNING: running special-case handler for rpoB')
|
||||||
|
|
||||||
# create 5uhc equivalent column for mutationinformation
|
# create 5uhc equivalent column for mutationinformation
|
||||||
|
|
|
@ -88,7 +88,7 @@ cat("\nDim of meta data file: ", dim(gene_metadata))
|
||||||
|
|
||||||
all_plot_dfs = combining_dfs_plotting(my_df_u
|
all_plot_dfs = combining_dfs_plotting(my_df_u
|
||||||
, gene_metadata
|
, gene_metadata
|
||||||
, gene = gene # ADDED
|
#, gene = gene # ADDED
|
||||||
, lig_dist_colname = LigDist_colname
|
, lig_dist_colname = LigDist_colname
|
||||||
, lig_dist_cutoff = LigDist_cutoff)
|
, lig_dist_cutoff = LigDist_cutoff)
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,4 @@
|
||||||
geneL_normal = c("pnca")
|
# Initialise the required dfs based on gene name
|
||||||
geneL_na = c("gid", "rpob")
|
|
||||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
|
||||||
|
|
||||||
# LigDist_colname # from globals used
|
# LigDist_colname # from globals used
|
||||||
# ppi2Dist_colname #from globals used
|
# ppi2Dist_colname #from globals used
|
||||||
|
@ -11,7 +9,7 @@ common_cols = c("mutationinformation"
|
||||||
, drug, "drug_name"
|
, drug, "drug_name"
|
||||||
, "mutation", "mutation_info"
|
, "mutation", "mutation_info"
|
||||||
, "wild_type", "mutant_type", "position"
|
, "wild_type", "mutant_type", "position"
|
||||||
, "pos_count"
|
#, "pos_count", "df2_pos_count_all"
|
||||||
, "snp_frequency"
|
, "snp_frequency"
|
||||||
, "total_id_ucount"
|
, "total_id_ucount"
|
||||||
, "drtype", "drtype_mode", "drtype_max"
|
, "drtype", "drtype_mode", "drtype_max"
|
||||||
|
@ -63,7 +61,7 @@ common_outcome_affinity_cols = c( "ligand_outcome"
|
||||||
#======================================================
|
#======================================================
|
||||||
# Plotting cols + affinity cols: conditional on gene
|
# Plotting cols + affinity cols: conditional on gene
|
||||||
#======================================================
|
#======================================================
|
||||||
if (tolower(gene)%in%geneL_normal){
|
if (tolower(gene)%in%c("pnca")){
|
||||||
plotting_cols = common_cols
|
plotting_cols = common_cols
|
||||||
|
|
||||||
raw_affinity_cols = common_raw_affinity_cols
|
raw_affinity_cols = common_raw_affinity_cols
|
||||||
|
@ -73,7 +71,7 @@ if (tolower(gene)%in%geneL_normal){
|
||||||
|
|
||||||
}
|
}
|
||||||
# ppi2 genes
|
# ppi2 genes
|
||||||
if (tolower(gene)%in%geneL_ppi2){
|
if (tolower(gene)%in%c("alr", "embb", "katg")){
|
||||||
plotting_cols = c(common_cols,
|
plotting_cols = c(common_cols,
|
||||||
ppi2Dist_colname,
|
ppi2Dist_colname,
|
||||||
"mcsm_ppi2_affinity", "mcsm_ppi2_scaled", "mcsm_ppi2_outcome")
|
"mcsm_ppi2_affinity", "mcsm_ppi2_scaled", "mcsm_ppi2_outcome")
|
||||||
|
@ -87,7 +85,7 @@ if (tolower(gene)%in%geneL_ppi2){
|
||||||
}
|
}
|
||||||
|
|
||||||
#na_genes
|
#na_genes
|
||||||
if (tolower(gene)%in%geneL_na){
|
if (tolower(gene)%in%c("gid")){
|
||||||
plotting_cols = c(common_cols,
|
plotting_cols = c(common_cols,
|
||||||
naDist_colname,
|
naDist_colname,
|
||||||
"mcsm_na_affinity", "mcsm_na_scaled", "mcsm_na_outcome")
|
"mcsm_na_affinity", "mcsm_na_scaled", "mcsm_na_outcome")
|
||||||
|
@ -95,13 +93,28 @@ if (tolower(gene)%in%geneL_na){
|
||||||
raw_affinity_cols = c(common_raw_affinity_cols , "mcsm_na_affinity")
|
raw_affinity_cols = c(common_raw_affinity_cols , "mcsm_na_affinity")
|
||||||
scaled_affinity_cols = c(common_scaled_affinity_cols , "mcsm_na_scaled")
|
scaled_affinity_cols = c(common_scaled_affinity_cols , "mcsm_na_scaled")
|
||||||
outcome_affinity_cols = c(common_outcome_affinity_cols , "mcsm_na_outcome")
|
outcome_affinity_cols = c(common_outcome_affinity_cols , "mcsm_na_outcome")
|
||||||
affinity_dist_colnames = c(LigDist_colname, ppi2Dist_colname, naDist_colname)
|
affinity_dist_colnames = c(LigDist_colname, naDist_colname)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tolower(gene)%in%c("rpob")){
|
if (tolower(gene)%in%c("rpob")){
|
||||||
plotting_cols = c(plotting_cols, "X5uhc_position","X5uhc_offset")
|
#plotting_cols = c(plotting_cols, "X5uhc_position","X5uhc_offset")
|
||||||
|
plotting_cols = c(common_cols,
|
||||||
|
ppi2Dist_colname,
|
||||||
|
"mcsm_ppi2_affinity", "mcsm_ppi2_scaled", "mcsm_ppi2_outcome",
|
||||||
|
naDist_colname,
|
||||||
|
"mcsm_na_affinity", "mcsm_na_scaled", "mcsm_na_outcome",
|
||||||
|
"X5uhc_position","X5uhc_offset")
|
||||||
|
|
||||||
|
|
||||||
|
raw_affinity_cols = c(common_raw_affinity_cols , "mcsm_ppi2_affinity", "mcsm_na_affinity")
|
||||||
|
scaled_affinity_cols = c(common_scaled_affinity_cols , "mcsm_ppi2_scaled" , "mcsm_na_scaled")
|
||||||
|
outcome_affinity_cols = c(common_outcome_affinity_cols , "mcsm_ppi2_outcome", "mcsm_na_outcome")
|
||||||
|
outcome_affinity_cols = c(common_outcome_affinity_cols , "mcsm_na_outcome")
|
||||||
|
affinity_dist_colnames = c(LigDist_colname, ppi2Dist_colname, naDist_colname)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#=======================================
|
#=======================================
|
||||||
# All: affinity cols: based on above confition
|
# All: affinity cols: based on above confition
|
||||||
#========================================
|
#========================================
|
||||||
|
|
|
@ -57,7 +57,7 @@ merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
|
||||||
head(merged_df3$pos_count)
|
head(merged_df3$pos_count)
|
||||||
|
|
||||||
df3 = merged_df3[, colnames(merged_df3)%in%plotting_cols]
|
df3 = merged_df3[, colnames(merged_df3)%in%plotting_cols]
|
||||||
#"nca_distance"%in%colnames(df3)
|
"nca_distance"%in%colnames(df3)
|
||||||
|
|
||||||
#=======
|
#=======
|
||||||
# output
|
# output
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue