a massive waste of time

This commit is contained in:
Tanushree Tunstall 2022-08-22 13:05:53 +01:00
parent 8d6c148fff
commit 4147a6b90f
3 changed files with 726 additions and 620 deletions

View file

@ -1,14 +1,14 @@
#!/usr/bin/env Rscript #!/usr/bin/env Rscript
######################################################### #########################################################
# TASK: Script to format data for dm om plots: # TASK: Script to format data for dm om plots:
# generating WF and LF data for each of the parameters: # generating WF and LF data for each of the parameters:
# duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc # duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc
# Called by get_plotting_dfs.R # Called by get_plotting_dfs.R
################################################################## ##################################################################
# from plotting_globals.R # from plotting_globals.R
# DistCutOff, LigDist_colname, ppi2Dist_colname, naDist_colname # DistCutOff, LigDist_colname, ppi2Dist_colname, naDist_colname
gene #gene
dm_om_wf_lf_data <- function(df dm_om_wf_lf_data <- function(df
, gene # from globals , gene # from globals
@ -28,9 +28,15 @@ dm_om_wf_lf_data <- function(df
sum(is.na(df$maf2)) sum(is.na(df$maf2))
# Initialise the required dfs based on gene name # Initialise the required dfs based on gene name
#geneL_normal = c("pnca")
#geneL_na = c("gid", "rpob")
#geneL_ppi2 = c("alr", "embb", "katg", "rpob")
#ADDED: IMPORTANT for rpob to be in both to make sure all data is returned
geneL_normal = c("pnca") geneL_normal = c("pnca")
geneL_na = c("gid", "rpob") geneL_both = c("rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob") geneL_ppi2 = c("alr", "embb", "katg")
geneL_na = c("gid")
# common_dfs # common_dfs
common_dfsL = list( common_dfsL = list(
@ -59,6 +65,14 @@ dm_om_wf_lf_data <- function(df
wf_lf_dataL = common_dfsL wf_lf_dataL = common_dfsL
} }
if (tolower(gene)%in%geneL_ppi2){
additional_dfL = list(
wf_mcsm_ppi2 = data.frame()
, lf_mcsm_ppi2 = data.frame()
)
wf_lf_dataL = c(common_dfsL, additional_dfL)
}
if (tolower(gene)%in%geneL_na){ if (tolower(gene)%in%geneL_na){
additional_dfL = list( additional_dfL = list(
wf_mcsm_na = data.frame() wf_mcsm_na = data.frame()
@ -67,13 +81,16 @@ dm_om_wf_lf_data <- function(df
wf_lf_dataL = c(common_dfsL, additional_dfL) wf_lf_dataL = c(common_dfsL, additional_dfL)
} }
if (tolower(gene)%in%geneL_ppi2){ if (tolower(gene)%in%geneL_both){
additional_dfL = list( additional_dfL = list(
wf_mcsm_ppi2 = data.frame() wf_mcsm_ppi2 = data.frame(),
, lf_mcsm_ppi2 = data.frame() lf_mcsm_ppi2 = data.frame(),
wf_mcsm_na = data.frame(),
lf_mcsm_na = data.frame()
) )
wf_lf_dataL = c(common_dfsL, additional_dfL) wf_lf_dataL = c(common_dfsL, additional_dfL)
} }
cat("\nInitializing an empty list of length:" cat("\nInitializing an empty list of length:"
, length(wf_lf_dataL)) , length(wf_lf_dataL))
@ -237,454 +254,486 @@ dm_om_wf_lf_data <- function(df
} }
if (tolower(gene)%in%geneL_both){
colnames_to_extract = c(
common_colnames,
"mcsm_ppi2_affinity" ,
"mcsm_ppi2_scaled" ,
"mcsm_ppi2_outcome" ,
ppi2Dist_colname,
"mcsm_na_affinity" ,
"mcsm_na_scaled" ,
"mcsm_na_outcome" ,
naDist_colname
)
display_colnames = c(
display_common_colnames,
"mcsm_ppi2_affinity",
mcsm_ppi2_dn,
"mcsm_ppi2_outcome",
ppi2_dist_dn,
"mcsm_na_affinity",
mcsm_na_dn,
"mcsm_na_outcome",
na_dist_dn
)
comb_df_sl = df[, colnames_to_extract]
colnames(comb_df_sl) = display_colnames
comb_df_sl_ppi2 = comb_df_sl[comb_df_sl[[ppi2_dist_dn]]<DistCutOff,]
comb_df_sl_na = comb_df_sl[comb_df_sl[[na_dist_dn]]<DistCutOff,]
static_cols_end = c(na_dist_dn, static_cols_end_common)
}
# Affinity filtered data: mcsm-lig: COMMON for all genes, mcsm-lig --> LigDist_colname # Affinity filtered data: mcsm-lig: COMMON for all genes, mcsm-lig --> LigDist_colname
comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dist_dn]]<DistCutOff,] comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dist_dn]]<DistCutOff,]
} }
#====================== #======================
# Selecting dfs # Selecting dfs
# with appropriate cols # with appropriate cols
#======================= #=======================
static_cols_start = c(snp_colname static_cols_start = c(snp_colname
, aa_pos_colname , aa_pos_colname
, mut_colname , mut_colname
, mut_info_label_colname) , mut_info_label_colname)
# static_cols_end # static_cols_end
cat("\nEnd colnames for gene:", static_cols_end) cat("\nEnd colnames for gene:", static_cols_end)
######################################################################### #########################################################################
#============== #==============
# Distance and genomics # Distance and genomics
#============== #==============
# WF data: dist + genomics # WF data: dist + genomics
cols_to_select_dist_gen = c(static_cols_start, c("duet_outcome", duet_dn), static_cols_end) cols_to_select_dist_gen = c(static_cols_start, c("duet_outcome", duet_dn), static_cols_end)
wf_dist_gen = comb_df_sl[, cols_to_select_dist_gen]; head(wf_dist_gen) wf_dist_gen = comb_df_sl[, cols_to_select_dist_gen]; head(wf_dist_gen)
#pivot_cols_ps = cols_to_select_ps[1:5]; pivot_cols_ps #pivot_cols_ps = cols_to_select_ps[1:5]; pivot_cols_ps
pivot_cols_dist_gen = cols_to_select_dist_gen[1: (length(static_cols_start) + 1)]; pivot_cols_dist_gen pivot_cols_dist_gen = cols_to_select_dist_gen[1: (length(static_cols_start) + 1)]; pivot_cols_dist_gen
expected_rows_lf = nrow(wf_dist_gen) * (length(wf_dist_gen) - length(pivot_cols_dist_gen)) expected_rows_lf = nrow(wf_dist_gen) * (length(wf_dist_gen) - length(pivot_cols_dist_gen))
expected_rows_lf expected_rows_lf
# LF dist and genomics # LF dist and genomics
lf_dist_gen = tidyr::gather(wf_dist_gen lf_dist_gen = tidyr::gather(wf_dist_gen
, key = param_type , key = param_type
, value = param_value , value = param_value
, all_of(duet_dn):tail(static_cols_end,1) , all_of(duet_dn):tail(static_cols_end,1)
, factor_key = TRUE) , factor_key = TRUE)
if (nrow(lf_dist_gen) == expected_rows_lf){ if (nrow(lf_dist_gen) == expected_rows_lf){
cat("\nPASS: long format data created for Distance and Genomics") cat("\nPASS: long format data created for Distance and Genomics")
}else{ }else{
cat("\nFAIL: long format data could not be created for Distance and Genomics") cat("\nFAIL: long format data could not be created for Distance and Genomics")
quit() quit()
} }
# DROP duet cols # DROP duet cols
drop_cols = c(duet_dn, "duet_outcome"); drop_cols drop_cols = c(duet_dn, "duet_outcome"); drop_cols
table(lf_dist_gen$param_type) table(lf_dist_gen$param_type)
lf_dist_gen = lf_dist_gen[!lf_dist_gen$param_type%in%drop_cols,] lf_dist_gen = lf_dist_gen[!lf_dist_gen$param_type%in%drop_cols,]
lf_dist_gen$param_type = factor(lf_dist_gen$param_type) lf_dist_gen$param_type = factor(lf_dist_gen$param_type)
table(lf_dist_gen$param_type) table(lf_dist_gen$param_type)
# NEW columns [outcome and outcome colname] # NEW columns [outcome and outcome colname]
lf_dist_gen$outcome_colname = mut_info_colname lf_dist_gen$outcome_colname = mut_info_colname
lf_dist_gen$outcome = lf_dist_gen[[mut_info_label_colname]] lf_dist_gen$outcome = lf_dist_gen[[mut_info_label_colname]]
head(lf_dist_gen) head(lf_dist_gen)
wf_dist_gen = subset(wf_dist_gen, select = !(names(wf_dist_gen) %in% drop_cols)) wf_dist_gen = subset(wf_dist_gen, select = !(names(wf_dist_gen) %in% drop_cols))
colnames(wf_dist_gen) colnames(wf_dist_gen)
colnames(lf_dist_gen) colnames(lf_dist_gen)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_dist_gen']] = wf_dist_gen wf_lf_dataL[['wf_dist_gen']] = wf_dist_gen
wf_lf_dataL[['lf_dist_gen']] = lf_dist_gen wf_lf_dataL[['lf_dist_gen']] = lf_dist_gen
########################################################## ##########################################################
#============== #==============
# DUET # DUET
#============== #==============
# WF data: duet # WF data: duet
cols_to_select_duet = c(static_cols_start, c("duet_outcome", duet_dn), static_cols_end) cols_to_select_duet = c(static_cols_start, c("duet_outcome", duet_dn), static_cols_end)
wf_duet = comb_df_sl[, cols_to_select_duet] wf_duet = comb_df_sl[, cols_to_select_duet]
#pivot_cols_ps = cols_to_select_ps[1:5]; pivot_cols_ps #pivot_cols_ps = cols_to_select_ps[1:5]; pivot_cols_ps
pivot_cols_duet = cols_to_select_duet[1: (length(static_cols_start) + 1)]; pivot_cols_duet pivot_cols_duet = cols_to_select_duet[1: (length(static_cols_start) + 1)]; pivot_cols_duet
expected_rows_lf = nrow(wf_duet) * (length(wf_duet) - length(pivot_cols_duet)) expected_rows_lf = nrow(wf_duet) * (length(wf_duet) - length(pivot_cols_duet))
expected_rows_lf expected_rows_lf
# LF data: duet # LF data: duet
lf_duet = tidyr::gather(wf_duet lf_duet = tidyr::gather(wf_duet
, key = param_type , key = param_type
, value = param_value , value = param_value
, all_of(duet_dn):tail(static_cols_end,1) , all_of(duet_dn):tail(static_cols_end,1)
, factor_key = TRUE) , factor_key = TRUE)
if (nrow(lf_duet) == expected_rows_lf){ if (nrow(lf_duet) == expected_rows_lf){
cat("\nPASS: long format data created for ", duet_dn) cat("\nPASS: long format data created for ", duet_dn)
}else{ }else{
cat("\nFAIL: long format data could not be created for duet") cat("\nFAIL: long format data could not be created for duet")
quit() quit()
} }
table(lf_duet$param_type) table(lf_duet$param_type)
# NEW columns [outcome and outcome colname] # NEW columns [outcome and outcome colname]
lf_duet$outcome_colname = "duet_outcome" lf_duet$outcome_colname = "duet_outcome"
lf_duet$outcome = lf_duet$duet_outcome lf_duet$outcome = lf_duet$duet_outcome
# DROP static cols # DROP static cols
lf_duet = lf_duet[!lf_duet$param_type%in%c(static_cols_end),] lf_duet = lf_duet[!lf_duet$param_type%in%c(static_cols_end),]
lf_duet$param_type = factor(lf_duet$param_type) lf_duet$param_type = factor(lf_duet$param_type)
table(lf_duet$param_type); colnames(lf_duet) table(lf_duet$param_type); colnames(lf_duet)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_duet']] = wf_duet wf_lf_dataL[['wf_duet']] = wf_duet
wf_lf_dataL[['lf_duet']] = lf_duet wf_lf_dataL[['lf_duet']] = lf_duet
############################################################################ ############################################################################
#============== #==============
# FoldX # FoldX
#============== #==============
# WF data: Foldx # WF data: Foldx
cols_to_select_foldx= c(static_cols_start, c("foldx_outcome", foldx_dn), static_cols_end) cols_to_select_foldx= c(static_cols_start, c("foldx_outcome", foldx_dn), static_cols_end)
wf_foldx = comb_df_sl[, cols_to_select_foldx] wf_foldx = comb_df_sl[, cols_to_select_foldx]
pivot_cols_foldx = cols_to_select_foldx[1: (length(static_cols_start) + 1)]; pivot_cols_foldx pivot_cols_foldx = cols_to_select_foldx[1: (length(static_cols_start) + 1)]; pivot_cols_foldx
expected_rows_lf = nrow(wf_foldx) * (length(wf_foldx) - length(pivot_cols_foldx)) expected_rows_lf = nrow(wf_foldx) * (length(wf_foldx) - length(pivot_cols_foldx))
expected_rows_lf expected_rows_lf
# LF data: Foldx # LF data: Foldx
lf_foldx = gather(wf_foldx lf_foldx = gather(wf_foldx
, key = param_type , key = param_type
, value = param_value , value = param_value
, all_of(foldx_dn):tail(static_cols_end,1) , all_of(foldx_dn):tail(static_cols_end,1)
, factor_key = TRUE) , factor_key = TRUE)
if (nrow(lf_foldx) == expected_rows_lf){ if (nrow(lf_foldx) == expected_rows_lf){
cat("\nPASS: long format data created for ", foldx_dn) cat("\nPASS: long format data created for ", foldx_dn)
}else{ }else{
cat("\nFAIL: long format data could not be created for duet") cat("\nFAIL: long format data could not be created for duet")
quit() quit()
} }
# NEW column # NEW column
lf_foldx$outcome_colname = "foldx_outcome" lf_foldx$outcome_colname = "foldx_outcome"
lf_foldx$outcome = lf_foldx$foldx_outcome lf_foldx$outcome = lf_foldx$foldx_outcome
# DROP static cols # DROP static cols
lf_foldx = lf_foldx[!lf_foldx$param_type%in%c(static_cols_end),] lf_foldx = lf_foldx[!lf_foldx$param_type%in%c(static_cols_end),]
lf_foldx$param_type = factor(lf_foldx$param_type) lf_foldx$param_type = factor(lf_foldx$param_type)
table(lf_foldx$param_type); colnames(lf_foldx) table(lf_foldx$param_type); colnames(lf_foldx)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_foldx']] = wf_foldx wf_lf_dataL[['wf_foldx']] = wf_foldx
wf_lf_dataL[['lf_foldx']] = lf_foldx wf_lf_dataL[['lf_foldx']] = lf_foldx
############################################################################ ############################################################################
#============== #==============
# Deepddg # Deepddg
#============== #==============
# WF data: deepddg # WF data: deepddg
cols_to_select_deepddg = c(static_cols_start, c("deepddg_outcome", deepddg_dn), static_cols_end) cols_to_select_deepddg = c(static_cols_start, c("deepddg_outcome", deepddg_dn), static_cols_end)
wf_deepddg = comb_df_sl[, cols_to_select_deepddg] wf_deepddg = comb_df_sl[, cols_to_select_deepddg]
pivot_cols_deepddg = cols_to_select_deepddg[1: (length(static_cols_start) + 1)]; pivot_cols_deepddg pivot_cols_deepddg = cols_to_select_deepddg[1: (length(static_cols_start) + 1)]; pivot_cols_deepddg
expected_rows_lf = nrow(wf_deepddg) * (length(wf_deepddg) - length(pivot_cols_deepddg)) expected_rows_lf = nrow(wf_deepddg) * (length(wf_deepddg) - length(pivot_cols_deepddg))
expected_rows_lf expected_rows_lf
# LF data: Deepddg # LF data: Deepddg
lf_deepddg = gather(wf_deepddg lf_deepddg = gather(wf_deepddg
, key = param_type , key = param_type
, value = param_value , value = param_value
, all_of(deepddg_dn):tail(static_cols_end,1) , all_of(deepddg_dn):tail(static_cols_end,1)
, factor_key = TRUE) , factor_key = TRUE)
if (nrow(lf_deepddg) == expected_rows_lf){ if (nrow(lf_deepddg) == expected_rows_lf){
cat("\nPASS: long format data created for ", deepddg_dn) cat("\nPASS: long format data created for ", deepddg_dn)
}else{ }else{
cat("\nFAIL: long format data could not be created for duet") cat("\nFAIL: long format data could not be created for duet")
quit() quit()
} }
# NEW columns [outcome and outcome colname] # NEW columns [outcome and outcome colname]
lf_deepddg$outcome_colname = "deepddg_outcome" lf_deepddg$outcome_colname = "deepddg_outcome"
lf_deepddg$outcome = lf_deepddg$deepddg_outcome lf_deepddg$outcome = lf_deepddg$deepddg_outcome
# DROP static cols # DROP static cols
lf_deepddg = lf_deepddg[!lf_deepddg$param_type%in%c(static_cols_end),] lf_deepddg = lf_deepddg[!lf_deepddg$param_type%in%c(static_cols_end),]
lf_deepddg$param_type = factor(lf_deepddg$param_type) lf_deepddg$param_type = factor(lf_deepddg$param_type)
table(lf_deepddg$param_type); colnames(lf_deepddg) table(lf_deepddg$param_type); colnames(lf_deepddg)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_deepddg']] = wf_deepddg wf_lf_dataL[['wf_deepddg']] = wf_deepddg
wf_lf_dataL[['lf_deepddg']] = lf_deepddg wf_lf_dataL[['lf_deepddg']] = lf_deepddg
############################################################################ ############################################################################
#============== #==============
# Dynamut2: LF # Dynamut2: LF
#============== #==============
# WF data: dynamut2 # WF data: dynamut2
cols_to_select_dynamut2 = c(static_cols_start, c("ddg_dynamut2_outcome", dynamut2_dn), static_cols_end) cols_to_select_dynamut2 = c(static_cols_start, c("ddg_dynamut2_outcome", dynamut2_dn), static_cols_end)
wf_dynamut2 = comb_df_sl[, cols_to_select_dynamut2] wf_dynamut2 = comb_df_sl[, cols_to_select_dynamut2]
pivot_cols_dynamut2 = cols_to_select_dynamut2[1: (length(static_cols_start) + 1)]; pivot_cols_dynamut2 pivot_cols_dynamut2 = cols_to_select_dynamut2[1: (length(static_cols_start) + 1)]; pivot_cols_dynamut2
expected_rows_lf = nrow(wf_dynamut2) * (length(wf_dynamut2) - length(pivot_cols_dynamut2)) expected_rows_lf = nrow(wf_dynamut2) * (length(wf_dynamut2) - length(pivot_cols_dynamut2))
expected_rows_lf expected_rows_lf
# LF data: dynamut2 # LF data: dynamut2
lf_dynamut2 = gather(wf_dynamut2 lf_dynamut2 = gather(wf_dynamut2
, key = param_type , key = param_type
, value = param_value , value = param_value
, all_of(dynamut2_dn):tail(static_cols_end,1) , all_of(dynamut2_dn):tail(static_cols_end,1)
, factor_key = TRUE) , factor_key = TRUE)
if (nrow(lf_dynamut2) == expected_rows_lf){ if (nrow(lf_dynamut2) == expected_rows_lf){
cat("\nPASS: long format data created for ", dynamut2_dn) cat("\nPASS: long format data created for ", dynamut2_dn)
}else{ }else{
cat("\nFAIL: long format data could not be created for duet") cat("\nFAIL: long format data could not be created for duet")
quit() quit()
} }
# NEW columns [outcome and outcome colname] # NEW columns [outcome and outcome colname]
lf_dynamut2$outcome_colname = "ddg_dynamut2_outcome" lf_dynamut2$outcome_colname = "ddg_dynamut2_outcome"
lf_dynamut2$outcome = lf_dynamut2$ddg_dynamut2_outcome lf_dynamut2$outcome = lf_dynamut2$ddg_dynamut2_outcome
# DROP static cols # DROP static cols
lf_dynamut2 = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_cols_end),] lf_dynamut2 = lf_dynamut2[!lf_dynamut2$param_type%in%c(static_cols_end),]
lf_dynamut2$param_type = factor(lf_dynamut2$param_type) lf_dynamut2$param_type = factor(lf_dynamut2$param_type)
table(lf_dynamut2$param_type); colnames(lf_dynamut2) table(lf_dynamut2$param_type); colnames(lf_dynamut2)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_dynamut2']] = wf_dynamut2 wf_lf_dataL[['wf_dynamut2']] = wf_dynamut2
wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2 wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2
###################################################################################### ######################################################################################
#================== #==================
# Consurf: LF # Consurf: LF
#https://consurf.tau.ac.il/overview.php #https://consurf.tau.ac.il/overview.php
# consurf_score: # consurf_score:
# <0 (below average): slowly evolving i.e CONSERVED # <0 (below average): slowly evolving i.e CONSERVED
# >0 (above average): rapidly evolving, i.e VARIABLE # >0 (above average): rapidly evolving, i.e VARIABLE
#table(df$consurf_colour_rev) #table(df$consurf_colour_rev)
# TODO # TODO
#1--> "most_variable", 2--> "", 3-->"", 4-->"" #1--> "most_variable", 2--> "", 3-->"", 4-->""
#5-->"", 6-->"", 7-->"", 8-->"", 9-->"most_conserved" #5-->"", 6-->"", 7-->"", 8-->"", 9-->"most_conserved"
#==================== #====================
# WF data: consurf # WF data: consurf
cols_to_select_consurf = c(static_cols_start, c("consurf_outcome", consurf_dn), static_cols_end) cols_to_select_consurf = c(static_cols_start, c("consurf_outcome", consurf_dn), static_cols_end)
wf_consurf = comb_df_sl[, cols_to_select_consurf] wf_consurf = comb_df_sl[, cols_to_select_consurf]
pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start) + 1)]; pivot_cols_consurf pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start) + 1)]; pivot_cols_consurf
expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf)) expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf))
expected_rows_lf expected_rows_lf
# when outcome didn't exist # when outcome didn't exist
#cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end) #cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end)
#wf_consurf = comb_df_sl[, cols_to_select_consurf] #wf_consurf = comb_df_sl[, cols_to_select_consurf]
# #
# pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start))]; pivot_cols_consurf # pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start))]; pivot_cols_consurf
# expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf)) # expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf))
# expected_rows_lf # expected_rows_lf
# LF data: consurf # LF data: consurf
lf_consurf = gather(wf_consurf lf_consurf = gather(wf_consurf
, key = param_type , key = param_type
, value = param_value , value = param_value
, all_of(consurf_dn):tail(static_cols_end,1) , all_of(consurf_dn):tail(static_cols_end,1)
, factor_key = TRUE) , factor_key = TRUE)
if (nrow(lf_consurf) == expected_rows_lf){ if (nrow(lf_consurf) == expected_rows_lf){
cat("\nPASS: long format data created for ", consurf_dn) cat("\nPASS: long format data created for ", consurf_dn)
}else{ }else{
cat("\nFAIL: long format data could not be created for duet") cat("\nFAIL: long format data could not be created for duet")
quit() quit()
} }
# NEW columns [outcome and outcome colname] # NEW columns [outcome and outcome colname]
lf_consurf$outcome_colname = "consurf_outcome" lf_consurf$outcome_colname = "consurf_outcome"
lf_consurf$outcome = lf_consurf$consurf_outcome lf_consurf$outcome = lf_consurf$consurf_outcome
# DROP static cols # DROP static cols
lf_consurf = lf_consurf[!lf_consurf$param_type%in%c(static_cols_end),] lf_consurf = lf_consurf[!lf_consurf$param_type%in%c(static_cols_end),]
lf_consurf$param_type = factor(lf_consurf$param_type) lf_consurf$param_type = factor(lf_consurf$param_type)
table(lf_consurf$param_type); colnames(lf_consurf) table(lf_consurf$param_type); colnames(lf_consurf)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_consurf']] = wf_consurf wf_lf_dataL[['wf_consurf']] = wf_consurf
wf_lf_dataL[['lf_consurf']] = lf_consurf wf_lf_dataL[['lf_consurf']] = lf_consurf
########################################################################### ###########################################################################
#============== #==============
# SNAP2: LF # SNAP2: LF
#============== #==============
# WF data: snap2 # WF data: snap2
cols_to_select_snap2 = c(static_cols_start, c("snap2_outcome", snap2_dn), static_cols_end) cols_to_select_snap2 = c(static_cols_start, c("snap2_outcome", snap2_dn), static_cols_end)
wf_snap2 = comb_df_sl[, cols_to_select_snap2] wf_snap2 = comb_df_sl[, cols_to_select_snap2]
pivot_cols_snap2 = cols_to_select_snap2[1: (length(static_cols_start) + 1)]; pivot_cols_snap2 pivot_cols_snap2 = cols_to_select_snap2[1: (length(static_cols_start) + 1)]; pivot_cols_snap2
expected_rows_lf = nrow(wf_snap2) * (length(wf_snap2) - length(pivot_cols_snap2)) expected_rows_lf = nrow(wf_snap2) * (length(wf_snap2) - length(pivot_cols_snap2))
expected_rows_lf expected_rows_lf
# LF data: snap2 # LF data: snap2
lf_snap2 = gather(wf_snap2 lf_snap2 = gather(wf_snap2
, key = param_type , key = param_type
, value = param_value , value = param_value
, all_of(snap2_dn):tail(static_cols_end,1) , all_of(snap2_dn):tail(static_cols_end,1)
, factor_key = TRUE) , factor_key = TRUE)
if (nrow(lf_snap2) == expected_rows_lf){ if (nrow(lf_snap2) == expected_rows_lf){
cat("\nPASS: long format data created for ", snap2_dn) cat("\nPASS: long format data created for ", snap2_dn)
}else{ }else{
cat("\nFAIL: long format data could not be created for duet") cat("\nFAIL: long format data could not be created for duet")
quit() quit()
} }
# NEW columns [outcome and outcome colname] # NEW columns [outcome and outcome colname]
lf_snap2$outcome_colname = "snap2_outcome" lf_snap2$outcome_colname = "snap2_outcome"
lf_snap2$outcome = lf_snap2$snap2_outcome lf_snap2$outcome = lf_snap2$snap2_outcome
# DROP static cols # DROP static cols
lf_snap2 = lf_snap2[!lf_snap2$param_type%in%c(static_cols_end),] lf_snap2 = lf_snap2[!lf_snap2$param_type%in%c(static_cols_end),]
lf_snap2$param_type = factor(lf_snap2$param_type) lf_snap2$param_type = factor(lf_snap2$param_type)
table(lf_snap2$param_type); colnames(lf_snap2) table(lf_snap2$param_type); colnames(lf_snap2)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_snap2']] = wf_snap2 wf_lf_dataL[['wf_snap2']] = wf_snap2
wf_lf_dataL[['lf_snap2']] = lf_snap2 wf_lf_dataL[['lf_snap2']] = lf_snap2
#============== #==============
# Provean2: LF # Provean2: LF
#============== #==============
# WF data: provean # WF data: provean
cols_to_select_provean = c(static_cols_start, c("provean_outcome", provean_dn), static_cols_end) cols_to_select_provean = c(static_cols_start, c("provean_outcome", provean_dn), static_cols_end)
wf_provean = comb_df_sl[, cols_to_select_provean] wf_provean = comb_df_sl[, cols_to_select_provean]
pivot_cols_provean = cols_to_select_provean[1: (length(static_cols_start) + 1)]; pivot_cols_provean pivot_cols_provean = cols_to_select_provean[1: (length(static_cols_start) + 1)]; pivot_cols_provean
expected_rows_lf = nrow(wf_provean) * (length(wf_provean) - length(pivot_cols_provean)) expected_rows_lf = nrow(wf_provean) * (length(wf_provean) - length(pivot_cols_provean))
expected_rows_lf expected_rows_lf
# LF data: provean # LF data: provean
lf_provean = gather(wf_provean lf_provean = gather(wf_provean
, key = param_type , key = param_type
, value = param_value , value = param_value
, all_of(provean_dn):tail(static_cols_end,1) , all_of(provean_dn):tail(static_cols_end,1)
, factor_key = TRUE) , factor_key = TRUE)
if (nrow(lf_provean) == expected_rows_lf){ if (nrow(lf_provean) == expected_rows_lf){
cat("\nPASS: long format data created for ", provean_dn) cat("\nPASS: long format data created for ", provean_dn)
}else{ }else{
cat("\nFAIL: long format data could not be created for duet") cat("\nFAIL: long format data could not be created for duet")
quit() quit()
} }
# NEW columns [outcome and outcome colname] # NEW columns [outcome and outcome colname]
lf_provean$outcome_colname = "provean_outcome" lf_provean$outcome_colname = "provean_outcome"
lf_provean$outcome = lf_provean$provean_outcome lf_provean$outcome = lf_provean$provean_outcome
# DROP static cols # DROP static cols
lf_provean = lf_provean[!lf_provean$param_type%in%c(static_cols_end),] lf_provean = lf_provean[!lf_provean$param_type%in%c(static_cols_end),]
lf_provean$param_type = factor(lf_provean$param_type) lf_provean$param_type = factor(lf_provean$param_type)
table(lf_provean$param_type); colnames(lf_provean) table(lf_provean$param_type); colnames(lf_provean)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_provean']] = wf_provean wf_lf_dataL[['wf_provean']] = wf_provean
wf_lf_dataL[['lf_provean']] = lf_provean wf_lf_dataL[['lf_provean']] = lf_provean
########################################################################### ###########################################################################
# AFFINITY cols # AFFINITY cols
########################################################################### ###########################################################################
#========================= #=========================
# mCSM-lig: # mCSM-lig:
# data filtered by cut off # data filtered by cut off
#========================= #=========================
#--------------------- #---------------------
# mCSM-lig: WF and lF # mCSM-lig: WF and lF
#---------------------- #----------------------
# WF data: mcsm_lig # WF data: mcsm_lig
cols_to_select_mcsm_lig = c(static_cols_start, c("ligand_outcome", mcsm_lig_dn), static_cols_end) cols_to_select_mcsm_lig = c(static_cols_start, c("ligand_outcome", mcsm_lig_dn), static_cols_end)
wf_mcsm_lig = comb_df_sl_lig[, cols_to_select_mcsm_lig] # filtered df wf_mcsm_lig = comb_df_sl_lig[, cols_to_select_mcsm_lig] # filtered df
pivot_cols_mcsm_lig = cols_to_select_mcsm_lig[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_lig pivot_cols_mcsm_lig = cols_to_select_mcsm_lig[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_lig
expected_rows_lf = nrow(wf_mcsm_lig) * (length(wf_mcsm_lig) - length(pivot_cols_mcsm_lig)) expected_rows_lf = nrow(wf_mcsm_lig) * (length(wf_mcsm_lig) - length(pivot_cols_mcsm_lig))
expected_rows_lf expected_rows_lf
# LF data: mcsm_lig # LF data: mcsm_lig
lf_mcsm_lig = gather(wf_mcsm_lig lf_mcsm_lig = gather(wf_mcsm_lig
, key = param_type , key = param_type
, value = param_value , value = param_value
, all_of(mcsm_lig_dn):tail(static_cols_end,1) , all_of(mcsm_lig_dn):tail(static_cols_end,1)
, factor_key = TRUE) , factor_key = TRUE)
if (nrow(lf_mcsm_lig) == expected_rows_lf){ if (nrow(lf_mcsm_lig) == expected_rows_lf){
cat("\nPASS: long format data created for ", mcsm_lig_dn) cat("\nPASS: long format data created for ", mcsm_lig_dn)
}else{ }else{
cat("\nFAIL: long format data could not be created for mcsm_lig") cat("\nFAIL: long format data could not be created for mcsm_lig")
quit() quit()
} }
# NEW columns [outcome and outcome colname] # NEW columns [outcome and outcome colname]
lf_mcsm_lig$outcome_colname = "ligand_outcome" lf_mcsm_lig$outcome_colname = "ligand_outcome"
lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome lf_mcsm_lig$outcome = lf_mcsm_lig$ligand_outcome
# DROP static cols # DROP static cols
lf_mcsm_lig = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_cols_end),] lf_mcsm_lig = lf_mcsm_lig[!lf_mcsm_lig$param_type%in%c(static_cols_end),]
lf_mcsm_lig$param_type = factor(lf_mcsm_lig$param_type) lf_mcsm_lig$param_type = factor(lf_mcsm_lig$param_type)
table(lf_mcsm_lig$param_type); colnames(lf_mcsm_lig) table(lf_mcsm_lig$param_type); colnames(lf_mcsm_lig)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig
wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig
#========================= #=========================
# mmCSM-lig2: # mmCSM-lig2:
# data filtered by cut off # data filtered by cut off
#========================= #=========================
#--------------------- #---------------------
# mmCSM-lig2: WF and lF # mmCSM-lig2: WF and lF
#---------------------- #----------------------
# WF data: mmcsm_lig2 # WF data: mmcsm_lig2
cols_to_select_mmcsm_lig2 = c(static_cols_start, c("mmcsm_lig_outcome", mmcsm_lig_dn2), static_cols_end) cols_to_select_mmcsm_lig2 = c(static_cols_start, c("mmcsm_lig_outcome", mmcsm_lig_dn2), static_cols_end)
wf_mmcsm_lig2 = comb_df_sl_lig[, cols_to_select_mmcsm_lig2] # filtered df wf_mmcsm_lig2 = comb_df_sl_lig[, cols_to_select_mmcsm_lig2] # filtered df
pivot_cols_mmcsm_lig2 = cols_to_select_mmcsm_lig2[1: (length(static_cols_start) + 1)]; pivot_cols_mmcsm_lig2 pivot_cols_mmcsm_lig2 = cols_to_select_mmcsm_lig2[1: (length(static_cols_start) + 1)]; pivot_cols_mmcsm_lig2
expected_rows_lf = nrow(wf_mmcsm_lig2) * (length(wf_mmcsm_lig2) - length(pivot_cols_mmcsm_lig2)) expected_rows_lf = nrow(wf_mmcsm_lig2) * (length(wf_mmcsm_lig2) - length(pivot_cols_mmcsm_lig2))
expected_rows_lf expected_rows_lf
# LF data: mmcsm_lig2 # LF data: mmcsm_lig2
lf_mmcsm_lig2 = gather(wf_mmcsm_lig2 lf_mmcsm_lig2 = gather(wf_mmcsm_lig2
, key = param_type , key = param_type
, value = param_value , value = param_value
, all_of(mmcsm_lig_dn2):tail(static_cols_end,1) , all_of(mmcsm_lig_dn2):tail(static_cols_end,1)
, factor_key = TRUE) , factor_key = TRUE)
if (nrow(lf_mmcsm_lig2) == expected_rows_lf){ if (nrow(lf_mmcsm_lig2) == expected_rows_lf){
cat("\nPASS: long format data created for ", mmcsm_lig_dn2) cat("\nPASS: long format data created for ", mmcsm_lig_dn2)
}else{ }else{
cat("\nFAIL: long format data could not be created for mmcsm_lig2") cat("\nFAIL: long format data could not be created for mmcsm_lig2")
quit() quit()
} }
# NEW columns [outcome and outcome colname] # NEW columns [outcome and outcome colname]
lf_mmcsm_lig2$outcome_colname = "mmcsm_lig_outcome" lf_mmcsm_lig2$outcome_colname = "mmcsm_lig_outcome"
lf_mmcsm_lig2$outcome = lf_mmcsm_lig2$mmcsm_lig_outcome lf_mmcsm_lig2$outcome = lf_mmcsm_lig2$mmcsm_lig_outcome
# DROP static cols # DROP static cols
lf_mmcsm_lig2 = lf_mmcsm_lig2[!lf_mmcsm_lig2$param_type%in%c(static_cols_end),] lf_mmcsm_lig2 = lf_mmcsm_lig2[!lf_mmcsm_lig2$param_type%in%c(static_cols_end),]
lf_mmcsm_lig2$param_type = factor(lf_mmcsm_lig2$param_type) lf_mmcsm_lig2$param_type = factor(lf_mmcsm_lig2$param_type)
table(lf_mmcsm_lig2$param_type); colnames(lf_mmcsm_lig2) table(lf_mmcsm_lig2$param_type); colnames(lf_mmcsm_lig2)
# Assign them to the output list # Assign them to the output list
wf_lf_dataL[['wf_mmcsm_lig2']] = wf_mmcsm_lig2 wf_lf_dataL[['wf_mmcsm_lig2']] = wf_mmcsm_lig2
wf_lf_dataL[['lf_mmcsm_lig2']] = lf_mmcsm_lig2 wf_lf_dataL[['lf_mmcsm_lig2']] = lf_mmcsm_lig2
#========================= #=========================
# mcsm-ppi2 affinity # mcsm-ppi2 affinity
# data filtered by cut off # data filtered by cut off
#======================== #========================
if (tolower(gene)%in%geneL_ppi2){ if (tolower(gene)%in%geneL_ppi2){
#----------------- #-----------------
# mCSM-PPI2: WF and lF # mCSM-PPI2: WF and lF
#----------------- #-----------------
@ -724,15 +773,15 @@ if (tolower(gene)%in%geneL_ppi2){
wf_lf_dataL[['wf_mcsm_ppi2']] = wf_mcsm_ppi2 wf_lf_dataL[['wf_mcsm_ppi2']] = wf_mcsm_ppi2
wf_lf_dataL[['lf_mcsm_ppi2']] = lf_mcsm_ppi2 wf_lf_dataL[['lf_mcsm_ppi2']] = lf_mcsm_ppi2
} }
#==================== #====================
# mcsm-NA affinity # mcsm-NA affinity
# data filtered by cut off # data filtered by cut off
#==================== #====================
if (tolower(gene)%in%geneL_na){ if (tolower(gene)%in%geneL_na){
#--------------- #---------------
# mCSM-NA: WF and lF # mCSM-NA: WF and lF
#----------------- #-----------------
@ -772,8 +821,8 @@ if (tolower(gene)%in%geneL_na){
wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na
wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na
} }
return(wf_lf_dataL) return(wf_lf_dataL)
} }
############################################################################ ############################################################################

View file

@ -12,20 +12,19 @@ geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob") geneL_ppi2 = c("alr", "embb", "katg", "rpob")
if (tolower(gene)%in%geneL_na){ if (tolower(gene)%in%geneL_na){
infilename_nca = paste0("/home/tanu/git/Misc/mcsm_na_dist/" infilename_nca = paste0("/home/tanu/git/Misc/mcsm_na_dist/"
, tolower(gene), "_nca_distances.csv") , tolower(gene), "_nca_distances.csv")
} }
#======================================================== #========================================================
# plotting_data(): formatting data for plots # plotting_data(): formatting data for plots
# input args: # input args:
## input csv file ## input csv file
## lig cut off dist, default = 10 Ang ## lig cut off dist, default = 10 Ang
# output: list of 4 dfs, that need to be decompressed # output: list of 4 dfs, that need to be decompressed
## my_df ## my_df
## my_df_u ## my_df_u
## my_df_u_lig ## my_df_u_lig
## dup_muts ## dup_muts
#======================================================== #========================================================
#lig_dist_colname = 'ligand_distance' or global var LigDist_colname #lig_dist_colname = 'ligand_distance' or global var LigDist_colname
#lig_dist_cutoff = 10 or global var LigDist_cutoff #lig_dist_cutoff = 10 or global var LigDist_cutoff
@ -34,24 +33,24 @@ plotting_data <- function(df
, gene # ADDED , gene # ADDED
, lig_dist_colname , lig_dist_colname
, lig_dist_cutoff) { , lig_dist_cutoff) {
my_df = data.frame() my_df = data.frame()
my_df_u = data.frame() my_df_u = data.frame()
my_df_u_lig = data.frame() my_df_u_lig = data.frame()
dup_muts = data.frame() dup_muts = data.frame()
#=========================== #===========================
# Read file: struct params # Read file: struct params
#=========================== #===========================
#df = read.csv(infile_params, header = T) #df = read.csv(infile_params, header = T)
cat("\nInput dimensions:", dim(df)) cat("\nInput dimensions:", dim(df))
#================================== #==================================
# extract unique mutation entries # extract unique mutation entries
#================================== #==================================
# check for duplicate mutations # check for duplicate mutations
if ( length(unique(df$mutationinformation)) != length(df$mutationinformation)){ if ( length(unique(df$mutationinformation)) != length(df$mutationinformation)){
cat(paste0("\nCAUTION:", " Duplicate mutations identified" cat(paste0("\nCAUTION:", " Duplicate mutations identified"
, "\nExtracting these...\n")) , "\nExtracting these...\n"))
#cat(my_df[duplicated(my_df$mutationinformation),]) #cat(my_df[duplicated(my_df$mutationinformation),])
@ -61,53 +60,94 @@ if ( length(unique(df$mutationinformation)) != length(df$mutationinformation)){
, "\nNo. of unique duplicate mutations:", dup_muts_nu , "\nNo. of unique duplicate mutations:", dup_muts_nu
, "\n\nExtracting df with unique mutations only\n")) , "\n\nExtracting df with unique mutations only\n"))
my_df_u = df[!duplicated(df$mutationinformation),] my_df_u = df[!duplicated(df$mutationinformation),]
}else{ } else {
cat(paste0("\nNo duplicate mutations detected\n")) cat(paste0("\nNo duplicate mutations detected\n"))
my_df_u = df my_df_u = df
} }
upos = unique(my_df_u$position) upos = unique(my_df_u$position)
cat("\nDim of clean df:"); cat(dim(my_df_u), "\n") cat("\nDim of clean df:"); cat(dim(my_df_u), "\n")
cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n") cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n")
#=============================================== #===============================================
# ADD : na distance column for genes with nucleic acid affinity # ADD : na distance column for genes with nucleic acid affinity
#=============================================== #===============================================
#gid_na_distcol # if (tolower(gene)%in%geneL_na){
if (tolower(gene)%in%geneL_na){ #
# distcol_nca_name = read.csv(infilename_nca, header = F)
# head(distcol_nca_name)
# colnames(distcol_nca_name) <- c("mutationinformation", "nca_distance")
# head(distcol_nca_name)
# class(distcol_nca_name)
#
# mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)]
# mcol
# head(my_df_u$mutationinformation)
# head(distcol_nca_name$mutationinformation)
#
# my_df_u = merge(my_df_u, distcol_nca_name,
# by = "mutationinformation",
# all = T)
#
# }
if (tolower(gene)%in%geneL_na){
distcol_nca_name = read.csv(infilename_nca, header = F) distcol_nca_name = read.csv(infilename_nca, header = F)
if (tolower(gene)=='rpob'){
print('WARNING: running special-case handler for rpoB')
# create 5uhc equivalent column for mutationinformation
my_df_u$X5uhc_mutationinformation = paste0(my_df_u$wild_type,
my_df_u$X5uhc_position,
my_df_u$mutant_type)
colnames(distcol_nca_name) <- c("X5uhc_mutationinformation", "nca_distance")
# do stuff here
mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)]
cat(paste0("\nMerging for gene: ", tolower(gene), "\non column: ", mcol))
head(my_df_u$mutationinformation)
head(distcol_nca_name$X5uhc_mutationinformation)
my_df_u = merge(my_df_u, distcol_nca_name,
by = "X5uhc_mutationinformation",
all = T)
} else {
head(distcol_nca_name) head(distcol_nca_name)
colnames(distcol_nca_name) <- c("mutationinformation", "nca_distance") colnames(distcol_nca_name) <- c("mutationinformation", "nca_distance")
head(distcol_nca_name) head(distcol_nca_name)
class(distcol_nca_name) class(distcol_nca_name)
mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)] mcol = colnames(distcol_nca_name)[colnames(distcol_nca_name)%in%colnames(my_df_u)]
mcol cat(paste0("\nMerging for gene: ", tolower(gene), "\non column: ", mcol))
head(my_df_u$mutationinformation) head(my_df_u$mutationinformation)
head(distcol_nca_name$mutationinformation) head(distcol_nca_name$mutationinformation)
my_df_u = merge(my_df_u, distcol_nca_name, my_df_u = merge(my_df_u, distcol_nca_name,
by = "mutationinformation", by = "mutationinformation",
all = T) all = T)
}
}
} #===============================================
#=============================================== # extract mutations <10 Angstroms and symbol
# extract mutations <10 Angstroms and symbol #===============================================
#=============================================== table(my_df_u[[lig_dist_colname]] < lig_dist_cutoff)
table(my_df_u[[lig_dist_colname]] < lig_dist_cutoff)
my_df_u_lig = my_df_u[my_df_u[[lig_dist_colname]] < lig_dist_cutoff,] my_df_u_lig = my_df_u[my_df_u[[lig_dist_colname]] < lig_dist_cutoff,]
cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10\u212b of the ligand\n")) cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10\u212b of the ligand\n"))
# return list of DFs # return list of DFs
my_df = df my_df = df
#df_names = c("my_df", "my_df_u", "my_df_u_lig", "dup_muts") #df_names = c("my_df", "my_df_u", "my_df_u_lig", "dup_muts")
all_df = list(my_df, my_df_u, my_df_u_lig, dup_muts) all_df = list(my_df, my_df_u, my_df_u_lig, dup_muts)
#all_df = Map(setNames, all_df, df_names) #all_df = Map(setNames, all_df, df_names)
return(all_df) return(all_df)
} }
######################################################################## ########################################################################
# end of data extraction and cleaning for plots # # end of data extraction and cleaning for plots #
######################################################################## ########################################################################

View file

@ -60,8 +60,8 @@ pd_df = plotting_data(mcsm_df
my_df = pd_df[[1]] my_df = pd_df[[1]]
my_df_u = pd_df[[2]] # this forms one of the input for combining_dfs_plotting() my_df_u = pd_df[[2]] # this forms one of the input for combining_dfs_plotting()
max_ang <- round(max(my_df_u[LigDist_colname])) max_ang <- round(max(my_df_u[[LigDist_colname]]))
min_ang <- round(min(my_df_u[LigDist_colname])) min_ang <- round(min(my_df_u[[LigDist_colname]]))
cat("\nLigand distance colname:", LigDist_colname cat("\nLigand distance colname:", LigDist_colname
, "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b" , "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b"
@ -128,6 +128,11 @@ geneL_normal = c("pnca")
geneL_na = c("gid", "rpob") geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob") geneL_ppi2 = c("alr", "embb", "katg", "rpob")
# geneL_normal = c("pnca")
# geneL_both = c("rpob")
# geneL_ppi2 = c("alr", "embb", "katg")
# geneL_na = c("gid")
all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene) all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
wf_duet = all_dm_om_df[['wf_duet']] wf_duet = all_dm_om_df[['wf_duet']]
@ -158,15 +163,27 @@ lf_provean = all_dm_om_df[['lf_provean']]
wf_dist_gen = all_dm_om_df[['wf_dist_gen']] wf_dist_gen = all_dm_om_df[['wf_dist_gen']]
lf_dist_gen = all_dm_om_df[['lf_dist_gen']] lf_dist_gen = all_dm_om_df[['lf_dist_gen']]
# ppi2 genes
if (tolower(gene)%in%geneL_ppi2){
wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
}
# na genes
if (tolower(gene)%in%geneL_na){ if (tolower(gene)%in%geneL_na){
wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']] wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']] lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]
} }
if (tolower(gene)%in%geneL_ppi2){ # both ppi2+na genes:: NOT NEEDED Here as its is handled by the two ifs above
wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']] # if (tolower(gene)%in%geneL_both){
lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']] # wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
} # lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
#
# wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
# lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]
# }
s2 = c("\nSuccessfully sourced other_plots_data.R") s2 = c("\nSuccessfully sourced other_plots_data.R")
cat(s2) cat(s2)