a massive waste of time
This commit is contained in:
parent
8d6c148fff
commit
4147a6b90f
3 changed files with 726 additions and 620 deletions
|
@ -8,7 +8,7 @@
|
||||||
##################################################################
|
##################################################################
|
||||||
# from plotting_globals.R
|
# from plotting_globals.R
|
||||||
# DistCutOff, LigDist_colname, ppi2Dist_colname, naDist_colname
|
# DistCutOff, LigDist_colname, ppi2Dist_colname, naDist_colname
|
||||||
gene
|
#gene
|
||||||
|
|
||||||
dm_om_wf_lf_data <- function(df
|
dm_om_wf_lf_data <- function(df
|
||||||
, gene # from globals
|
, gene # from globals
|
||||||
|
@ -28,9 +28,15 @@ dm_om_wf_lf_data <- function(df
|
||||||
sum(is.na(df$maf2))
|
sum(is.na(df$maf2))
|
||||||
|
|
||||||
# Initialise the required dfs based on gene name
|
# Initialise the required dfs based on gene name
|
||||||
|
#geneL_normal = c("pnca")
|
||||||
|
#geneL_na = c("gid", "rpob")
|
||||||
|
#geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||||
|
|
||||||
|
#ADDED: IMPORTANT for rpob to be in both to make sure all data is returned
|
||||||
geneL_normal = c("pnca")
|
geneL_normal = c("pnca")
|
||||||
geneL_na = c("gid", "rpob")
|
geneL_both = c("rpob")
|
||||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
geneL_ppi2 = c("alr", "embb", "katg")
|
||||||
|
geneL_na = c("gid")
|
||||||
|
|
||||||
# common_dfs
|
# common_dfs
|
||||||
common_dfsL = list(
|
common_dfsL = list(
|
||||||
|
@ -59,6 +65,14 @@ dm_om_wf_lf_data <- function(df
|
||||||
wf_lf_dataL = common_dfsL
|
wf_lf_dataL = common_dfsL
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tolower(gene)%in%geneL_ppi2){
|
||||||
|
additional_dfL = list(
|
||||||
|
wf_mcsm_ppi2 = data.frame()
|
||||||
|
, lf_mcsm_ppi2 = data.frame()
|
||||||
|
)
|
||||||
|
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||||
|
}
|
||||||
|
|
||||||
if (tolower(gene)%in%geneL_na){
|
if (tolower(gene)%in%geneL_na){
|
||||||
additional_dfL = list(
|
additional_dfL = list(
|
||||||
wf_mcsm_na = data.frame()
|
wf_mcsm_na = data.frame()
|
||||||
|
@ -67,13 +81,16 @@ dm_om_wf_lf_data <- function(df
|
||||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tolower(gene)%in%geneL_ppi2){
|
if (tolower(gene)%in%geneL_both){
|
||||||
additional_dfL = list(
|
additional_dfL = list(
|
||||||
wf_mcsm_ppi2 = data.frame()
|
wf_mcsm_ppi2 = data.frame(),
|
||||||
, lf_mcsm_ppi2 = data.frame()
|
lf_mcsm_ppi2 = data.frame(),
|
||||||
|
wf_mcsm_na = data.frame(),
|
||||||
|
lf_mcsm_na = data.frame()
|
||||||
)
|
)
|
||||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||||
}
|
}
|
||||||
|
|
||||||
cat("\nInitializing an empty list of length:"
|
cat("\nInitializing an empty list of length:"
|
||||||
, length(wf_lf_dataL))
|
, length(wf_lf_dataL))
|
||||||
|
|
||||||
|
@ -237,6 +254,38 @@ dm_om_wf_lf_data <- function(df
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tolower(gene)%in%geneL_both){
|
||||||
|
colnames_to_extract = c(
|
||||||
|
common_colnames,
|
||||||
|
"mcsm_ppi2_affinity" ,
|
||||||
|
"mcsm_ppi2_scaled" ,
|
||||||
|
"mcsm_ppi2_outcome" ,
|
||||||
|
ppi2Dist_colname,
|
||||||
|
"mcsm_na_affinity" ,
|
||||||
|
"mcsm_na_scaled" ,
|
||||||
|
"mcsm_na_outcome" ,
|
||||||
|
naDist_colname
|
||||||
|
)
|
||||||
|
display_colnames = c(
|
||||||
|
display_common_colnames,
|
||||||
|
"mcsm_ppi2_affinity",
|
||||||
|
mcsm_ppi2_dn,
|
||||||
|
"mcsm_ppi2_outcome",
|
||||||
|
ppi2_dist_dn,
|
||||||
|
"mcsm_na_affinity",
|
||||||
|
mcsm_na_dn,
|
||||||
|
"mcsm_na_outcome",
|
||||||
|
na_dist_dn
|
||||||
|
)
|
||||||
|
comb_df_sl = df[, colnames_to_extract]
|
||||||
|
colnames(comb_df_sl) = display_colnames
|
||||||
|
comb_df_sl_ppi2 = comb_df_sl[comb_df_sl[[ppi2_dist_dn]]<DistCutOff,]
|
||||||
|
comb_df_sl_na = comb_df_sl[comb_df_sl[[na_dist_dn]]<DistCutOff,]
|
||||||
|
static_cols_end = c(na_dist_dn, static_cols_end_common)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# Affinity filtered data: mcsm-lig: COMMON for all genes, mcsm-lig --> LigDist_colname
|
# Affinity filtered data: mcsm-lig: COMMON for all genes, mcsm-lig --> LigDist_colname
|
||||||
comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dist_dn]]<DistCutOff,]
|
comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dist_dn]]<DistCutOff,]
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,6 @@ geneL_na = c("gid", "rpob")
|
||||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||||
|
|
||||||
if (tolower(gene)%in%geneL_na){
|
if (tolower(gene)%in%geneL_na){
|
||||||
|
|
||||||
infilename_nca = paste0("/home/tanu/git/Misc/mcsm_na_dist/"
|
infilename_nca = paste0("/home/tanu/git/Misc/mcsm_na_dist/"
|
||||||
, tolower(gene), "_nca_distances.csv")
|
, tolower(gene), "_nca_distances.csv")
|
||||||
}
|
}
|
||||||
|
@ -72,25 +71,65 @@ cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n")
|
||||||
#===============================================
|
#===============================================
|
||||||
# ADD : na distance column for genes with nucleic acid affinity
|
# ADD : na distance column for genes with nucleic acid affinity
|
||||||
#===============================================
|
#===============================================
|
||||||
#gid_na_distcol
|
# if (tolower(gene)%in%geneL_na){
|
||||||
if (tolower(gene)%in%geneL_na){
|
#
|
||||||
|
# distcol_nca_name = read.csv(infilename_nca, header = F)
|
||||||
|
# head(distcol_nca_name)
|
||||||
|
# colnames(distcol_nca_name) <- c("mutationinformation", "nca_distance")
|
||||||
|
# head(distcol_nca_name)
|
||||||
|
# class(distcol_nca_name)
|
||||||
|
#
|
||||||
|
# mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)]
|
||||||
|
# mcol
|
||||||
|
# head(my_df_u$mutationinformation)
|
||||||
|
# head(distcol_nca_name$mutationinformation)
|
||||||
|
#
|
||||||
|
# my_df_u = merge(my_df_u, distcol_nca_name,
|
||||||
|
# by = "mutationinformation",
|
||||||
|
# all = T)
|
||||||
|
#
|
||||||
|
# }
|
||||||
|
|
||||||
|
if (tolower(gene)%in%geneL_na){
|
||||||
distcol_nca_name = read.csv(infilename_nca, header = F)
|
distcol_nca_name = read.csv(infilename_nca, header = F)
|
||||||
|
|
||||||
|
if (tolower(gene)=='rpob'){
|
||||||
|
print('WARNING: running special-case handler for rpoB')
|
||||||
|
|
||||||
|
# create 5uhc equivalent column for mutationinformation
|
||||||
|
my_df_u$X5uhc_mutationinformation = paste0(my_df_u$wild_type,
|
||||||
|
my_df_u$X5uhc_position,
|
||||||
|
my_df_u$mutant_type)
|
||||||
|
|
||||||
|
colnames(distcol_nca_name) <- c("X5uhc_mutationinformation", "nca_distance")
|
||||||
|
|
||||||
|
# do stuff here
|
||||||
|
mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)]
|
||||||
|
cat(paste0("\nMerging for gene: ", tolower(gene), "\non column: ", mcol))
|
||||||
|
|
||||||
|
head(my_df_u$mutationinformation)
|
||||||
|
head(distcol_nca_name$X5uhc_mutationinformation)
|
||||||
|
|
||||||
|
my_df_u = merge(my_df_u, distcol_nca_name,
|
||||||
|
by = "X5uhc_mutationinformation",
|
||||||
|
all = T)
|
||||||
|
|
||||||
|
} else {
|
||||||
head(distcol_nca_name)
|
head(distcol_nca_name)
|
||||||
colnames(distcol_nca_name) <- c("mutationinformation", "nca_distance")
|
colnames(distcol_nca_name) <- c("mutationinformation", "nca_distance")
|
||||||
head(distcol_nca_name)
|
head(distcol_nca_name)
|
||||||
class(distcol_nca_name)
|
class(distcol_nca_name)
|
||||||
|
|
||||||
mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)]
|
mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)]
|
||||||
mcol
|
cat(paste0("\nMerging for gene: ", tolower(gene), "\non column: ", mcol))
|
||||||
head(my_df_u$mutationinformation)
|
head(my_df_u$mutationinformation)
|
||||||
head(distcol_nca_name$mutationinformation)
|
head(distcol_nca_name$mutationinformation)
|
||||||
|
|
||||||
my_df_u = merge(my_df_u, distcol_nca_name,
|
my_df_u = merge(my_df_u, distcol_nca_name,
|
||||||
by = "mutationinformation",
|
by = "mutationinformation",
|
||||||
all = T)
|
all = T)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#===============================================
|
#===============================================
|
||||||
# extract mutations <10 Angstroms and symbol
|
# extract mutations <10 Angstroms and symbol
|
||||||
#===============================================
|
#===============================================
|
||||||
|
@ -111,3 +150,4 @@ return(all_df)
|
||||||
########################################################################
|
########################################################################
|
||||||
# end of data extraction and cleaning for plots #
|
# end of data extraction and cleaning for plots #
|
||||||
########################################################################
|
########################################################################
|
||||||
|
|
||||||
|
|
|
@ -60,8 +60,8 @@ pd_df = plotting_data(mcsm_df
|
||||||
my_df = pd_df[[1]]
|
my_df = pd_df[[1]]
|
||||||
my_df_u = pd_df[[2]] # this forms one of the input for combining_dfs_plotting()
|
my_df_u = pd_df[[2]] # this forms one of the input for combining_dfs_plotting()
|
||||||
|
|
||||||
max_ang <- round(max(my_df_u[LigDist_colname]))
|
max_ang <- round(max(my_df_u[[LigDist_colname]]))
|
||||||
min_ang <- round(min(my_df_u[LigDist_colname]))
|
min_ang <- round(min(my_df_u[[LigDist_colname]]))
|
||||||
|
|
||||||
cat("\nLigand distance colname:", LigDist_colname
|
cat("\nLigand distance colname:", LigDist_colname
|
||||||
, "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b"
|
, "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b"
|
||||||
|
@ -128,6 +128,11 @@ geneL_normal = c("pnca")
|
||||||
geneL_na = c("gid", "rpob")
|
geneL_na = c("gid", "rpob")
|
||||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||||
|
|
||||||
|
# geneL_normal = c("pnca")
|
||||||
|
# geneL_both = c("rpob")
|
||||||
|
# geneL_ppi2 = c("alr", "embb", "katg")
|
||||||
|
# geneL_na = c("gid")
|
||||||
|
|
||||||
all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
|
all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
|
||||||
|
|
||||||
wf_duet = all_dm_om_df[['wf_duet']]
|
wf_duet = all_dm_om_df[['wf_duet']]
|
||||||
|
@ -158,15 +163,27 @@ lf_provean = all_dm_om_df[['lf_provean']]
|
||||||
wf_dist_gen = all_dm_om_df[['wf_dist_gen']]
|
wf_dist_gen = all_dm_om_df[['wf_dist_gen']]
|
||||||
lf_dist_gen = all_dm_om_df[['lf_dist_gen']]
|
lf_dist_gen = all_dm_om_df[['lf_dist_gen']]
|
||||||
|
|
||||||
|
# ppi2 genes
|
||||||
|
if (tolower(gene)%in%geneL_ppi2){
|
||||||
|
wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
|
||||||
|
lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
|
||||||
|
}
|
||||||
|
|
||||||
|
# na genes
|
||||||
if (tolower(gene)%in%geneL_na){
|
if (tolower(gene)%in%geneL_na){
|
||||||
wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
|
wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
|
||||||
lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]
|
lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tolower(gene)%in%geneL_ppi2){
|
# both ppi2+na genes:: NOT NEEDED Here as its is handled by the two ifs above
|
||||||
wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
|
# if (tolower(gene)%in%geneL_both){
|
||||||
lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
|
# wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
|
||||||
}
|
# lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
|
||||||
|
#
|
||||||
|
# wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
|
||||||
|
# lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]
|
||||||
|
# }
|
||||||
|
|
||||||
|
|
||||||
s2 = c("\nSuccessfully sourced other_plots_data.R")
|
s2 = c("\nSuccessfully sourced other_plots_data.R")
|
||||||
cat(s2)
|
cat(s2)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue