renamed 2 to _v2
This commit is contained in:
parent
802d6f8495
commit
8d6c148fff
7 changed files with 74 additions and 588 deletions
|
@ -33,9 +33,15 @@
|
|||
#==========================================================
|
||||
#lig_dist_colname = 'ligand_distance' or global var LigDist_colname
|
||||
#lig_dist_cutoff = 10 or global var LigDist_cutoff
|
||||
geneL_normal = c("pnca")
|
||||
geneL_na = c("gid", "rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
|
||||
|
||||
combining_dfs_plotting <- function( my_df_u
|
||||
, gene_metadata
|
||||
, gene # ADDED
|
||||
, lig_dist_colname = ''
|
||||
, lig_dist_cutoff = ''){
|
||||
|
||||
|
@ -679,6 +685,31 @@ combining_dfs_plotting <- function( my_df_u
|
|||
|
||||
min( merged_df3['avg_lig_affinity_scaled']); max( merged_df3['avg_lig_affinity_scaled'])
|
||||
|
||||
###################################################################
|
||||
# Rectify pos_count column in merged_df3
|
||||
# The one in merged_df2 is correct
|
||||
|
||||
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
|
||||
colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
|
||||
head(merged_df3$pos_count)
|
||||
head(merged_df3$df2_pos_count_all)
|
||||
|
||||
# DROP pos_count column
|
||||
# merged_df3$pos_count <-NULL
|
||||
merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
|
||||
head(merged_df3$pos_count)
|
||||
|
||||
merged_df3 = merged_df3 %>%
|
||||
dplyr::add_count(position)
|
||||
class(merged_df3)
|
||||
merged_df3 = as.data.frame(merged_df3)
|
||||
class(merged_df3)
|
||||
nc_change = which(colnames(merged_df3) == "n")
|
||||
colnames(merged_df3)[nc_change] <- "pos_count"
|
||||
class(merged_df3)
|
||||
####################################################################
|
||||
# ADD: distance to Nucleic acid column for na genes
|
||||
|
||||
|
||||
####################################################################
|
||||
#TODO
|
||||
|
|
|
@ -7,6 +7,10 @@
|
|||
# LigDist_colname #from globals: plotting_globals.R
|
||||
# ppi2Dist_colname #from globals: plotting_globals.R
|
||||
# naDist_colname #from globals: plotting_globals.R
|
||||
geneL_normal = c("pnca")
|
||||
geneL_na = c("gid", "rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
corr_data_extract <- function(df
|
||||
, gene
|
||||
, drug
|
||||
|
|
|
@ -5,6 +5,17 @@
|
|||
# load libraries and functions
|
||||
library(data.table)
|
||||
library(dplyr)
|
||||
|
||||
# ADDED: New
|
||||
geneL_normal = c("pnca")
|
||||
geneL_na = c("gid", "rpob")
|
||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
|
||||
infilename_nca = paste0("/home/tanu/git/Misc/mcsm_na_dist/"
|
||||
, tolower(gene), "_nca_distances.csv")
|
||||
}
|
||||
#========================================================
|
||||
# plotting_data(): formatting data for plots
|
||||
# input args:
|
||||
|
@ -20,6 +31,7 @@ library(dplyr)
|
|||
#lig_dist_cutoff = 10 or global var LigDist_cutoff
|
||||
|
||||
plotting_data <- function(df
|
||||
, gene # ADDED
|
||||
, lig_dist_colname
|
||||
, lig_dist_cutoff) {
|
||||
my_df = data.frame()
|
||||
|
@ -57,7 +69,28 @@ if ( length(unique(df$mutationinformation)) != length(df$mutationinformation)){
|
|||
upos = unique(my_df_u$position)
|
||||
cat("\nDim of clean df:"); cat(dim(my_df_u), "\n")
|
||||
cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n")
|
||||
#===============================================
|
||||
# ADD : na distance column for genes with nucleic acid affinity
|
||||
#===============================================
|
||||
#gid_na_distcol
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
|
||||
distcol_nca_name = read.csv(infilename_nca, header = F)
|
||||
head(distcol_nca_name)
|
||||
colnames(distcol_nca_name) <- c("mutationinformation", "nca_distance")
|
||||
head(distcol_nca_name)
|
||||
class(distcol_nca_name)
|
||||
|
||||
mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)]
|
||||
mcol
|
||||
head(my_df_u$mutationinformation)
|
||||
head(distcol_nca_name$mutationinformation)
|
||||
|
||||
my_df_u = merge(my_df_u, distcol_nca_name,
|
||||
by = "mutationinformation",
|
||||
all = T)
|
||||
|
||||
}
|
||||
#===============================================
|
||||
# extract mutations <10 Angstroms and symbol
|
||||
#===============================================
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue