renamed 2 to _v2

This commit is contained in:
Tanushree Tunstall 2022-08-22 10:53:25 +01:00
parent 802d6f8495
commit 8d6c148fff
7 changed files with 74 additions and 588 deletions

View file

@ -33,9 +33,15 @@
#==========================================================
#lig_dist_colname = 'ligand_distance' or global var LigDist_colname
#lig_dist_cutoff = 10 or global var LigDist_cutoff
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
combining_dfs_plotting <- function( my_df_u
, gene_metadata
, gene # ADDED
, lig_dist_colname = ''
, lig_dist_cutoff = ''){
@ -679,6 +685,31 @@ combining_dfs_plotting <- function( my_df_u
min( merged_df3['avg_lig_affinity_scaled']); max( merged_df3['avg_lig_affinity_scaled'])
###################################################################
# Rectify pos_count column in merged_df3
# The one in merged_df2 is correct
nc_pc_CHANGE = which(colnames(merged_df3)== "pos_count"); nc_pc_CHANGE
colnames(merged_df3)[nc_pc_CHANGE] = "df2_pos_count_all"
head(merged_df3$pos_count)
head(merged_df3$df2_pos_count_all)
# DROP pos_count column
# merged_df3$pos_count <-NULL
merged_df3 = merged_df3[, !colnames(merged_df3)%in%c("pos_count")]
head(merged_df3$pos_count)
merged_df3 = merged_df3 %>%
dplyr::add_count(position)
class(merged_df3)
merged_df3 = as.data.frame(merged_df3)
class(merged_df3)
nc_change = which(colnames(merged_df3) == "n")
colnames(merged_df3)[nc_change] <- "pos_count"
class(merged_df3)
####################################################################
# ADD: distance to Nucleic acid column for na genes
####################################################################
#TODO

View file

@ -7,6 +7,10 @@
# LigDist_colname #from globals: plotting_globals.R
# ppi2Dist_colname #from globals: plotting_globals.R
# naDist_colname #from globals: plotting_globals.R
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
corr_data_extract <- function(df
, gene
, drug

View file

@ -5,6 +5,17 @@
# load libraries and functions
library(data.table)
library(dplyr)
# ADDED: New
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
if (tolower(gene)%in%geneL_na){
infilename_nca = paste0("/home/tanu/git/Misc/mcsm_na_dist/"
, tolower(gene), "_nca_distances.csv")
}
#========================================================
# plotting_data(): formatting data for plots
# input args:
@ -20,6 +31,7 @@ library(dplyr)
#lig_dist_cutoff = 10 or global var LigDist_cutoff
plotting_data <- function(df
, gene # ADDED
, lig_dist_colname
, lig_dist_cutoff) {
my_df = data.frame()
@ -57,7 +69,28 @@ if ( length(unique(df$mutationinformation)) != length(df$mutationinformation)){
upos = unique(my_df_u$position)
cat("\nDim of clean df:"); cat(dim(my_df_u), "\n")
cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n")
#===============================================
# ADD : na distance column for genes with nucleic acid affinity
#===============================================
#gid_na_distcol
if (tolower(gene)%in%geneL_na){
distcol_nca_name = read.csv(infilename_nca, header = F)
head(distcol_nca_name)
colnames(distcol_nca_name) <- c("mutationinformation", "nca_distance")
head(distcol_nca_name)
class(distcol_nca_name)
mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)]
mcol
head(my_df_u$mutationinformation)
head(distcol_nca_name$mutationinformation)
my_df_u = merge(my_df_u, distcol_nca_name,
by = "mutationinformation",
all = T)
}
#===============================================
# extract mutations <10 Angstroms and symbol
#===============================================