git trimmed downthe dm_om_data.R
This commit is contained in:
parent
fae846395d
commit
05ab89ec09
5 changed files with 168 additions and 351 deletions
|
@ -343,20 +343,45 @@ combining_dfs_plotting <- function( my_df_u
|
||||||
, "\nNo. of rows merged_df3: ", nrow(merged_df3))
|
, "\nNo. of rows merged_df3: ", nrow(merged_df3))
|
||||||
quit()
|
quit()
|
||||||
}
|
}
|
||||||
|
#=========================================
|
||||||
|
# NEW: add consurf outcome
|
||||||
|
#=========================================
|
||||||
|
consurf_colOld = "consurf_colour_rev"
|
||||||
|
consurf_colNew = "consurf_outcome"
|
||||||
|
merged_df3[[consurf_colNew]] = merged_df3[[consurf_colOld]]
|
||||||
|
merged_df3[[consurf_colNew]] = as.factor(merged_df3[[consurf_colNew]])
|
||||||
|
merged_df3[[consurf_colNew]]
|
||||||
|
#levels(merged_df3$consurf_outcome) = c("nsd", 1, 2, 3, 4, 5, 6, 7, 8, 9)
|
||||||
|
|
||||||
|
merged_df2[[consurf_colNew]] = merged_df2[[consurf_colOld]]
|
||||||
|
merged_df2[[consurf_colNew]] = as.factor(merged_df2[[consurf_colNew]])
|
||||||
|
merged_df2[[consurf_colNew]]
|
||||||
|
|
||||||
|
#=========================================
|
||||||
|
# NEW: fixed case for SNAP2 labels
|
||||||
|
#=========================================
|
||||||
|
snap2_colname = "snap2_outcome"
|
||||||
|
merged_df3[[snap2_colname]] <- str_replace(merged_df3[[snap2_colname]], "effect", "Effect")
|
||||||
|
merged_df3[[snap2_colname]] <- str_replace(merged_df3[[snap2_colname]], "neutral", "Neutral")
|
||||||
|
|
||||||
|
merged_df2[[snap2_colname]] <- str_replace(merged_df2[[snap2_colname]], "effect", "Effect")
|
||||||
|
merged_df2[[snap2_colname]] <- str_replace(merged_df2[[snap2_colname]], "neutral", "Neutral")
|
||||||
|
|
||||||
#---------------------------------------------
|
#---------------------------------------------
|
||||||
# add columns that are needed to generate plots with revised colnames and strings
|
# NEW: add columns that are needed to generate
|
||||||
|
# plots with revised colnames and strings
|
||||||
#----------------------------------------------
|
#----------------------------------------------
|
||||||
merged_df3['sensitivity'] = ifelse(merged_df3['dst_mode'] == 1, "R", "S")
|
merged_df3$sensitivity = ifelse(merged_df3$dst_mode == 1, "R", "S")
|
||||||
merged_df3['mutation_info_labels'] = ifelse(merged_df3['mutation_info_labels'] == "DM", "R", "S")
|
merged_df3$mutation_info_labels = ifelse(merged_df3$mutation_info_labels == "DM", "R", "S")
|
||||||
|
|
||||||
merged_df2['sensitivity'] = ifelse(merged_df2['dst_mode'] == 1, "R", "S")
|
merged_df2$sensitivity = ifelse(merged_df2$dst_mode == 1, "R", "S")
|
||||||
merged_df2['mutation_info_labels'] = ifelse(merged_df2['mutation_info_labels'] == "DM", "R", "S")
|
merged_df2$mutation_info_labels = ifelse(merged_df2$mutation_info_labels == "DM", "R", "S")
|
||||||
|
|
||||||
#check1 = all(table(merged_df3["mutation_info_labels"]) == table(merged_df3['sensitivity']))
|
# for epistasis: fill na where dst: No equivalent in merged_df3
|
||||||
#check2 = all(table(merged_df2["mutation_info_labels"]) == table(merged_df2['sensitivity']))
|
merged_df2$dst2 = ifelse(is.na(merged_df2$dst), merged_df2$dst_mode, merged_df2$dst)
|
||||||
|
|
||||||
check1 = all(merged_df3["mutation_info_labels"] == merged_df3['sensitivity'])
|
check1 = all(merged_df3$mutation_info_labels == merged_df3$sensitivity)
|
||||||
check2 = all(merged_df2["mutation_info_labels"] == merged_df2['sensitivity'])
|
check2 = all(merged_df2$mutation_info_labels == merged_df2$sensitivity)
|
||||||
|
|
||||||
if(check1 && check2){
|
if(check1 && check2){
|
||||||
cat("PASS: merged_df3 and merged_df2 have mutation info labels as R and S"
|
cat("PASS: merged_df3 and merged_df2 have mutation info labels as R and S"
|
||||||
|
|
|
@ -5,47 +5,16 @@
|
||||||
# duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc
|
# duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc
|
||||||
# Called by get_plotting_dfs.R
|
# Called by get_plotting_dfs.R
|
||||||
|
|
||||||
# dm_om_wf_lf_data()
|
|
||||||
# INPUT:
|
|
||||||
# df: merged_df3 (data with all parameters)
|
|
||||||
# NOTE*: merged_df2 will not be appropriate as it brings up most params as significant!?,atleast for gid
|
|
||||||
# gene: [conditional generation of dfs like mcsm-NA, mcsm-ppi2 as not all genes have all these values]
|
|
||||||
# colnames_to_extract : columns to extract, either user-specified.
|
|
||||||
#By default it is c("mutationinformation" , "duet_affinity_change...")
|
|
||||||
# ligand_dist_colname : column name containing ligand distance. By deafult, it is LigDist_colname (imported from globals)
|
|
||||||
# dr_muts : dr_muts_col (imported from globals; dr_mutations_<drug>)
|
|
||||||
# other_muts : other_muts_col (imported from globals ...other_mutations_<drug>)
|
|
||||||
# snp_colname : SNP column name. By default it is "mutationinformation"
|
|
||||||
# aa_pos_colname : Column name containing the aa position. This is used to sort the df by.
|
|
||||||
# mut_colname : Column name containing snp info in format "<abc_pXXdef>. By default, it is "mutation"
|
|
||||||
# mut_info_colname : Column name containing mutation info whether it is DM or OM. By default, it is "mutation_info"
|
|
||||||
# mut_info_label_colname : Column containing pre-formatted labels for mutation info.
|
|
||||||
# For my use case, this is called "mutation_info_labels"
|
|
||||||
# This column has short labels like DM and OM coresponding to dr_muts and other_muts.
|
|
||||||
# NOTE*: if this is left empty, then the arg ('dr_other_muts_labels') will be used
|
|
||||||
# dr_other_muts_labels : User specified labels, must correspond to dr_muts and other_muts.
|
|
||||||
# NOTE*: Only used if the arg (mut_info_label_colname) is empty!
|
|
||||||
# categ_cols_to_factor : Column names to convert to factors. These mainly correspond to the outcome columns associated with the
|
|
||||||
# arg ('colnames_to_extract'). These have the suffix "_outcome" in their colnames. Additionally column 'mutation_info' is also
|
|
||||||
# converted to factor. By default, it converts the cols with '_outcome'and 'info' to factor.
|
|
||||||
# Users are able to provide a vector of their corresponding column names
|
|
||||||
|
|
||||||
# RETURNS: List
|
|
||||||
# WF nd LF data grouped by mutation_info i.e DM (drug mutations) and OM (other mutations)
|
|
||||||
|
|
||||||
# TO DO: SHINY
|
|
||||||
#1) df to choose (merged_df3 or merged_df2)
|
|
||||||
#2)
|
|
||||||
##################################################################
|
##################################################################
|
||||||
DistCutOff = 10
|
# from plotting_globals.R
|
||||||
#LigDist_colname # = "ligand_distance" # from globals
|
# DistCutOff, LigDist_colname, ppi2Dist_colname, naDist_colname
|
||||||
ppi2Dist_colname = "interface_dist"
|
|
||||||
naDist_colname = "TBC"
|
|
||||||
|
|
||||||
dm_om_wf_lf_data <- function(df
|
dm_om_wf_lf_data <- function(df
|
||||||
, gene_name = gene # from globals
|
, gene_name = gene # from globals
|
||||||
, colnames_to_extract
|
, colnames_to_extract
|
||||||
, ligand_dist_colname = LigDist_colname # from globals
|
, ligand_dist_colname = LigDist_colname # from globals
|
||||||
|
#, ppi2Dist_colname #from globals used
|
||||||
|
#, naDist_colname #from globals used
|
||||||
, dr_muts = dr_muts_col # from globals
|
, dr_muts = dr_muts_col # from globals
|
||||||
, other_muts = other_muts_col # from globals
|
, other_muts = other_muts_col # from globals
|
||||||
, snp_colname = "mutationinformation"
|
, snp_colname = "mutationinformation"
|
||||||
|
@ -53,28 +22,19 @@ dm_om_wf_lf_data <- function(df
|
||||||
, mut_colname = "mutation"
|
, mut_colname = "mutation"
|
||||||
, mut_info_colname = "mutation_info"
|
, mut_info_colname = "mutation_info"
|
||||||
, mut_info_label_colname = "mutation_info_labels" # if empty, below used
|
, mut_info_label_colname = "mutation_info_labels" # if empty, below used
|
||||||
, dr_other_muts_labels = c("DM", "OM") # only used if ^^ = ""
|
#, dr_other_muts_labels = c("DM", "OM") # only used if ^^ = ""
|
||||||
, categ_cols_to_factor){
|
, categ_cols_to_factor){
|
||||||
|
|
||||||
df = as.data.frame(df)
|
df = as.data.frame(df)
|
||||||
|
|
||||||
df['sensitivity'] = ifelse(df['dst_mode'] == 1, "R", "S")
|
|
||||||
table(df['sensitivity'])
|
|
||||||
|
|
||||||
df[[mut_info_label_colname]] = ifelse(df[[mut_info_label_colname]] == "DM", "R", "S")
|
|
||||||
table(df[[mut_info_label_colname]])
|
|
||||||
|
|
||||||
|
|
||||||
# Initialise the required dfs based on gene name
|
# Initialise the required dfs based on gene name
|
||||||
geneL_normal = c("pnca")
|
geneL_normal = c("pnca")
|
||||||
#geneL_na_dy = c("gid")
|
|
||||||
geneL_na = c("gid", "rpob")
|
geneL_na = c("gid", "rpob")
|
||||||
geneL_dy = c("gid")
|
|
||||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||||
|
|
||||||
# common_dfs
|
# common_dfs
|
||||||
common_dfsL = list(
|
common_dfsL = list(
|
||||||
wf_duet = data.frame()
|
wf_duet = data.frame()
|
||||||
, lf_duet = data.frame()
|
, lf_duet = data.frame()
|
||||||
, wf_mcsm_lig = data.frame()
|
, wf_mcsm_lig = data.frame()
|
||||||
, lf_mcsm_lig = data.frame()
|
, lf_mcsm_lig = data.frame()
|
||||||
|
@ -110,24 +70,6 @@ dm_om_wf_lf_data <- function(df
|
||||||
)
|
)
|
||||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tolower(gene_name)%in%geneL_dy){
|
|
||||||
additional_dfL = list(
|
|
||||||
wf_mcsm_na = data.frame()
|
|
||||||
, lf_mcsm_na = data.frame()
|
|
||||||
, wf_dynamut = data.frame()
|
|
||||||
, lf_dynamut = data.frame()
|
|
||||||
, wf_encomddg = data.frame()
|
|
||||||
, lf_encomddg = data.frame()
|
|
||||||
, wf_encomdds = data.frame()
|
|
||||||
, lf_encomdds = data.frame()
|
|
||||||
, wf_sdm = data.frame()
|
|
||||||
, lf_sdm = data.frame()
|
|
||||||
, wf_mcsm = data.frame()
|
|
||||||
, lf_mcsm = data.frame()
|
|
||||||
)
|
|
||||||
wf_lf_dataL = c(common_dfsL, additional_dfL)
|
|
||||||
}
|
|
||||||
cat("\nInitializing an empty list of length:"
|
cat("\nInitializing an empty list of length:"
|
||||||
, length(wf_lf_dataL))
|
, length(wf_lf_dataL))
|
||||||
|
|
||||||
|
@ -137,26 +79,21 @@ dm_om_wf_lf_data <- function(df
|
||||||
colnames_to_extract = c(snp_colname
|
colnames_to_extract = c(snp_colname
|
||||||
, mut_colname, mut_info_colname, mut_info_label_colname
|
, mut_colname, mut_info_colname, mut_info_label_colname
|
||||||
, aa_pos_colname
|
, aa_pos_colname
|
||||||
, LigDist_colname
|
, LigDist_colname # from globals
|
||||||
, ppi2Dist_colname, naDist_colname
|
, ppi2Dist_colname # from globals
|
||||||
|
, naDist_colname # from globals
|
||||||
, "duet_stability_change" , "duet_scaled" , "duet_outcome"
|
, "duet_stability_change" , "duet_scaled" , "duet_outcome"
|
||||||
, "ligand_affinity_change", "affinity_scaled" , "ligand_outcome"
|
, "ligand_affinity_change", "affinity_scaled" , "ligand_outcome"
|
||||||
, "ddg_foldx" , "foldx_scaled" , "foldx_outcome"
|
, "ddg_foldx" , "foldx_scaled" , "foldx_outcome"
|
||||||
, "deepddg" , "deepddg_scaled" , "deepddg_outcome"
|
, "deepddg" , "deepddg_scaled" , "deepddg_outcome"
|
||||||
, "asa" , "rsa"
|
, "asa" , "rsa"
|
||||||
, "rd_values" , "kd_values"
|
, "rd_values" , "kd_values"
|
||||||
, "log10_or_mychisq" , "neglog_pval_fisher" , "af"
|
, "log10_or_mychisq" , "neglog_pval_fisher" , "maf" #"af"
|
||||||
, "ddg_dynamut2" , "ddg_dynamut2_scaled", "ddg_dynamut2_outcome"
|
, "ddg_dynamut2" , "ddg_dynamut2_scaled", "ddg_dynamut2_outcome"
|
||||||
, "mcsm_ppi2_affinity" , "mcsm_ppi2_scaled" , "mcsm_ppi2_outcome"
|
, "mcsm_ppi2_affinity" , "mcsm_ppi2_scaled" , "mcsm_ppi2_outcome"
|
||||||
, "consurf_score" , "consurf_scaled" #, "consurf_outcome"
|
, "consurf_score" , "consurf_scaled" , "consurf_outcome" # exists now
|
||||||
, "snap2_score" , "snap2_scaled" , "snap2_outcome"
|
, "snap2_score" , "snap2_scaled" , "snap2_outcome"
|
||||||
, "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome"
|
, "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome")
|
||||||
, "ddg_dynamut" , "ddg_dynamut_scaled" , "ddg_dynamut_outcome"
|
|
||||||
, "ddg_encom" , "ddg_encom_scaled" , "ddg_encom_outcome"
|
|
||||||
, "dds_encom" , "dds_encom_scaled" , "dds_encom_outcome"
|
|
||||||
, "ddg_mcsm" , "ddg_mcsm_scaled" , "ddg_mcsm_outcome"
|
|
||||||
, "ddg_sdm" , "ddg_sdm_scaled" , "ddg_sdm_outcome"
|
|
||||||
, "ddg_duet" , "ddg_duet_scaled" , "ddg_duet_outcome")
|
|
||||||
}else{
|
}else{
|
||||||
colnames_to_extract = c(mut_colname, mut_info_colname, mut_info_label_colname
|
colnames_to_extract = c(mut_colname, mut_info_colname, mut_info_label_colname
|
||||||
, aa_pos_colname, LigDist_colname
|
, aa_pos_colname, LigDist_colname
|
||||||
|
@ -186,47 +123,29 @@ dm_om_wf_lf_data <- function(df
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
table(comb_df_s[[mut_info_colname]])
|
table(comb_df_s[[mut_info_colname]])
|
||||||
|
|
||||||
# further checks to make sure dr and other muts are indeed unique
|
|
||||||
dr_muts = comb_df_s[comb_df_s[[mut_info_colname]] == dr_muts,]
|
|
||||||
dr_muts_names = unique(dr_muts$mutation)
|
|
||||||
|
|
||||||
other_muts = comb_df_s[comb_df_s[[mut_info_colname]] == other_muts,]
|
|
||||||
other_muts_names = unique(other_muts$mutation)
|
|
||||||
|
|
||||||
if ( table(dr_muts_names%in%other_muts_names)[[1]] == length(dr_muts_names) &&
|
|
||||||
table(other_muts_names%in%dr_muts_names)[[1]] == length(other_muts_names) ){
|
|
||||||
cat("PASS: dr and other muts are indeed unique")
|
|
||||||
}else{
|
|
||||||
cat("FAIL: dr and others muts are NOT unique!")
|
|
||||||
quit()
|
|
||||||
}
|
|
||||||
|
|
||||||
# pretty display names i.e. labels to reduce major code duplication later
|
# pretty display names i.e. labels to reduce major code duplication later
|
||||||
foo_cnames = data.frame(colnames(comb_df_s))
|
foo_cnames = data.frame(colnames(comb_df_s))
|
||||||
names(foo_cnames) <- "old_name"
|
names(foo_cnames) <- "old_name"
|
||||||
|
|
||||||
stability_suffix <- paste0(delta_symbol, delta_symbol, "G")
|
stability_suffix <- paste0(delta_symbol, delta_symbol, "G")
|
||||||
flexibility_suffix <- paste0(delta_symbol, delta_symbol, "S")
|
#flexibility_suffix <- paste0(delta_symbol, delta_symbol, "S")
|
||||||
|
|
||||||
lig_dn = paste0("Ligand distance (", angstroms_symbol, ")"); lig_dn
|
#lig_dn = paste0("Ligand distance (", angstroms_symbol, ")"); lig_dn
|
||||||
mcsm_lig_dn = paste0("Ligand affinity (log fold change)"); mcsm_lig_dn
|
#mcsm_lig_dn = paste0("Ligand affinity (log fold change)"); mcsm_lig_dn
|
||||||
|
|
||||||
|
lig_dn = paste0("Lig Dist(", angstroms_symbol, ")"); lig_dn
|
||||||
|
mcsm_lig_dn = paste0("mCSM-lig"); mcsm_lig_dn
|
||||||
|
|
||||||
duet_dn = paste0("DUET ", stability_suffix); duet_dn
|
duet_dn = paste0("DUET ", stability_suffix); duet_dn
|
||||||
foldx_dn = paste0("FoldX ", stability_suffix); foldx_dn
|
foldx_dn = paste0("FoldX ", stability_suffix); foldx_dn
|
||||||
deepddg_dn = paste0("Deepddg " , stability_suffix); deepddg_dn
|
deepddg_dn = paste0("Deepddg " , stability_suffix); deepddg_dn
|
||||||
dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn
|
dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn
|
||||||
|
|
||||||
mcsm_na_dn = paste0("mCSM-NA affinity ", stability_suffix); mcsm_na_dn
|
mcsm_na_dn = paste0("mCSM-NA ", stability_suffix); mcsm_na_dn
|
||||||
mcsm_ppi2_dn = paste0("mCSM-PPI2 affinity ", stability_suffix); mcsm_ppi2_dn
|
mcsm_ppi2_dn = paste0("mCSM-PPI2 ", stability_suffix); mcsm_ppi2_dn
|
||||||
consurf_dn = paste0("Consurf"); consurf_dn
|
consurf_dn = paste0("Consurf"); consurf_dn
|
||||||
snap2_dn = paste0("SNAP2"); snap2_dn
|
snap2_dn = paste0("SNAP2"); snap2_dn
|
||||||
|
|
||||||
dynamut_dn = paste0("Dynamut ", stability_suffix); dynamut_dn
|
|
||||||
encom_ddg_dn = paste0("EnCOM " , stability_suffix); encom_ddg_dn
|
|
||||||
encom_dds_dn = paste0("EnCOM " , flexibility_suffix ); encom_dds_dn
|
|
||||||
sdm_dn = paste0("SDM " , stability_suffix); sdm_dn
|
|
||||||
mcsm_dn = paste0("mCSM " , stability_suffix ); mcsm_dn
|
|
||||||
|
|
||||||
|
|
||||||
# change column names: plyr
|
# change column names: plyr
|
||||||
new_colnames = c(asa = "ASA"
|
new_colnames = c(asa = "ASA"
|
||||||
|
@ -235,7 +154,8 @@ new_colnames = c(asa = "ASA"
|
||||||
, kd_values = "KD"
|
, kd_values = "KD"
|
||||||
, log10_or_mychisq = "Log10 (OR)"
|
, log10_or_mychisq = "Log10 (OR)"
|
||||||
, neglog_pval_fisher = "-Log (P)"
|
, neglog_pval_fisher = "-Log (P)"
|
||||||
, af = "MAF"
|
#, af = "MAF"
|
||||||
|
, maf = "MAF"
|
||||||
#, ligand_dist_colname = lig_dn # cannot handle variable name 'ligand_dist_colname'
|
#, ligand_dist_colname = lig_dn # cannot handle variable name 'ligand_dist_colname'
|
||||||
, affinity_scaled = mcsm_lig_dn
|
, affinity_scaled = mcsm_lig_dn
|
||||||
, duet_scaled = duet_dn
|
, duet_scaled = duet_dn
|
||||||
|
@ -245,12 +165,7 @@ new_colnames = c(asa = "ASA"
|
||||||
, mcsm_na_scaled = mcsm_na_dn
|
, mcsm_na_scaled = mcsm_na_dn
|
||||||
, mcsm_ppi2_affinity = mcsm_ppi2_dn
|
, mcsm_ppi2_affinity = mcsm_ppi2_dn
|
||||||
, consurf_score = consurf_dn
|
, consurf_score = consurf_dn
|
||||||
, snap2_score = snap2_dn
|
, snap2_score = snap2_dn)
|
||||||
, ddg_dynamut_scaled = dynamut_dn
|
|
||||||
, ddg_encom_scaled = encom_ddg_dn
|
|
||||||
, dds_encom_scaled = encom_dds_dn
|
|
||||||
, ddg_sdm = sdm_dn
|
|
||||||
, ddg_mcsm = mcsm_dn)
|
|
||||||
|
|
||||||
comb_df_sl1 = plyr::rename(comb_df_s
|
comb_df_sl1 = plyr::rename(comb_df_s
|
||||||
, replace = new_colnames
|
, replace = new_colnames
|
||||||
|
@ -260,29 +175,26 @@ comb_df_sl1 = plyr::rename(comb_df_s
|
||||||
# renaming colname using variable i.e ligand_dist_colname: dplyr
|
# renaming colname using variable i.e ligand_dist_colname: dplyr
|
||||||
comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname))
|
comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname))
|
||||||
names(comb_df_sl)
|
names(comb_df_sl)
|
||||||
|
|
||||||
|
#=======================
|
||||||
|
# NEW: Affinity filtered data
|
||||||
|
#========================
|
||||||
|
# mcsm-lig --> LigDist_colname
|
||||||
|
comb_df_sl_lig = comb_df_sl[comb_df_sl[[lig_dn]]<DistCutOff,]
|
||||||
|
|
||||||
|
# mcsm-ppi2 --> ppi2Dist_colname
|
||||||
|
comb_df_sl_ppi2 = comb_df_sl[comb_df_sl[[ppi2Dist_colname]]<DistCutOff,]
|
||||||
|
|
||||||
|
# mcsm-na --> naDist_colname
|
||||||
|
comb_df_sl_na = comb_df_sl[comb_df_sl[[naDist_colname]]<DistCutOff,]
|
||||||
|
|
||||||
#####################################################################
|
#####################################################################
|
||||||
if (mut_info_label_colname == "") {
|
static_cols1 = mut_info_label_colname
|
||||||
cat("\nAssigning labels:", dr_other_muts_labels, "--> to column:", mut_info_colname)
|
|
||||||
table(comb_df_sl[[mut_info_colname]])
|
|
||||||
|
|
||||||
# dr_muts
|
|
||||||
levels(comb_df_sl[[mut_info_colname]])[levels(comb_df_sl[[mut_info_colname]])==dr_muts] <- dr_other_muts_labels[[1]]
|
|
||||||
# other_muts
|
|
||||||
levels(comb_df_sl[[mut_info_colname]])[levels(comb_df_sl[[mut_info_colname]])==other_muts] <- dr_other_muts_labels[[2]]
|
|
||||||
table(comb_df_sl[[mut_info_colname]])
|
|
||||||
|
|
||||||
static_cols1 = mut_info_colname
|
|
||||||
}else{
|
|
||||||
table(comb_df_sl[[mut_info_label_colname]])
|
|
||||||
static_cols1 = mut_info_label_colname
|
|
||||||
|
|
||||||
}
|
|
||||||
#######################################################################
|
#######################################################################
|
||||||
#======================
|
#======================
|
||||||
# Selecting dfs
|
# Selecting dfs
|
||||||
# with appropriate cols
|
# with appropriate cols
|
||||||
#=======================
|
#=======================
|
||||||
|
|
||||||
static_cols_start = c(snp_colname
|
static_cols_start = c(snp_colname
|
||||||
, aa_pos_colname
|
, aa_pos_colname
|
||||||
, mut_colname
|
, mut_colname
|
||||||
|
@ -296,7 +208,8 @@ static_cols_end = c(lig_dn
|
||||||
, "KD"
|
, "KD"
|
||||||
, "MAF"
|
, "MAF"
|
||||||
, "Log10 (OR)"
|
, "Log10 (OR)"
|
||||||
, "-Log (P)")
|
#, "-Log (P)"
|
||||||
|
)
|
||||||
|
|
||||||
#########################################################################
|
#########################################################################
|
||||||
#==============
|
#==============
|
||||||
|
@ -312,7 +225,7 @@ expected_rows_lf = nrow(wf_duet) * (length(wf_duet) - length(pivot_cols_duet))
|
||||||
expected_rows_lf
|
expected_rows_lf
|
||||||
|
|
||||||
# LF data: duet
|
# LF data: duet
|
||||||
lf_duet = gather(wf_duet
|
lf_duet = tidyr::gather(wf_duet
|
||||||
, key = param_type
|
, key = param_type
|
||||||
, value = param_value
|
, value = param_value
|
||||||
, all_of(duet_dn):tail(static_cols_end,1)
|
, all_of(duet_dn):tail(static_cols_end,1)
|
||||||
|
@ -329,35 +242,6 @@ if (nrow(lf_duet) == expected_rows_lf){
|
||||||
wf_lf_dataL[['wf_duet']] = wf_duet
|
wf_lf_dataL[['wf_duet']] = wf_duet
|
||||||
wf_lf_dataL[['lf_duet']] = lf_duet
|
wf_lf_dataL[['lf_duet']] = lf_duet
|
||||||
|
|
||||||
############################################################################
|
|
||||||
#==============
|
|
||||||
# mCSM-lig
|
|
||||||
#==============
|
|
||||||
# WF data: mcsm_lig
|
|
||||||
cols_to_select_mcsm_lig = c(static_cols_start, c("ligand_outcome", mcsm_lig_dn), static_cols_end)
|
|
||||||
wf_mcsm_lig = comb_df_sl[, cols_to_select_mcsm_lig]
|
|
||||||
|
|
||||||
pivot_cols_mcsm_lig = cols_to_select_mcsm_lig[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_lig
|
|
||||||
expected_rows_lf = nrow(wf_mcsm_lig) * (length(wf_mcsm_lig) - length(pivot_cols_mcsm_lig))
|
|
||||||
expected_rows_lf
|
|
||||||
|
|
||||||
# LF data: mcsm_lig
|
|
||||||
lf_mcsm_lig = gather(wf_mcsm_lig
|
|
||||||
, key = param_type
|
|
||||||
, value = param_value
|
|
||||||
, all_of(mcsm_lig_dn):tail(static_cols_end,1)
|
|
||||||
, factor_key = TRUE)
|
|
||||||
|
|
||||||
if (nrow(lf_mcsm_lig) == expected_rows_lf){
|
|
||||||
cat("\nPASS: long format data created for ", mcsm_lig_dn)
|
|
||||||
}else{
|
|
||||||
cat("\nFAIL: long format data could not be created for mcsm_lig")
|
|
||||||
quit()
|
|
||||||
}
|
|
||||||
|
|
||||||
# Assign them to the output list
|
|
||||||
wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig
|
|
||||||
wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig
|
|
||||||
############################################################################
|
############################################################################
|
||||||
#==============
|
#==============
|
||||||
# FoldX
|
# FoldX
|
||||||
|
@ -446,7 +330,9 @@ if (nrow(lf_dynamut2) == expected_rows_lf){
|
||||||
# Assign them to the output list
|
# Assign them to the output list
|
||||||
wf_lf_dataL[['wf_dynamut2']] = wf_dynamut2
|
wf_lf_dataL[['wf_dynamut2']] = wf_dynamut2
|
||||||
wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2
|
wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2
|
||||||
############################################################################
|
|
||||||
|
|
||||||
|
######################################################################################
|
||||||
#==================
|
#==================
|
||||||
# Consurf: LF
|
# Consurf: LF
|
||||||
#https://consurf.tau.ac.il/overview.php
|
#https://consurf.tau.ac.il/overview.php
|
||||||
|
@ -459,9 +345,9 @@ wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2
|
||||||
#5-->"", 6-->"", 7-->"", 8-->"", 9-->"most_conserved"
|
#5-->"", 6-->"", 7-->"", 8-->"", 9-->"most_conserved"
|
||||||
#====================
|
#====================
|
||||||
# FIXME: if you add category column to consurf
|
# FIXME: if you add category column to consurf
|
||||||
#cols_to_select_consurf = c(static_cols_start, c("consurf_outcome", consurf_dn), static_cols_end)
|
cols_to_select_consurf = c(static_cols_start, c("consurf_outcome", consurf_dn), static_cols_end)
|
||||||
#wf_consurf = comb_df_sl[, cols_to_select_consurf]
|
wf_consurf = comb_df_sl[, cols_to_select_consurf]
|
||||||
#pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start) + 1)]; pivot_cols_consurf
|
pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start) + 1)]; pivot_cols_consurf
|
||||||
|
|
||||||
# WF data: consurf
|
# WF data: consurf
|
||||||
cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end)
|
cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end)
|
||||||
|
@ -517,15 +403,54 @@ if (nrow(lf_snap2) == expected_rows_lf){
|
||||||
# Assign them to the output list
|
# Assign them to the output list
|
||||||
wf_lf_dataL[['wf_snap2']] = wf_snap2
|
wf_lf_dataL[['wf_snap2']] = wf_snap2
|
||||||
wf_lf_dataL[['lf_snap2']] = lf_snap2
|
wf_lf_dataL[['lf_snap2']] = lf_snap2
|
||||||
|
###########################################################################
|
||||||
|
# AFFINITY cols
|
||||||
|
###########################################################################
|
||||||
|
#=========================
|
||||||
|
# mCSM-lig:
|
||||||
|
# data filtered by cut off
|
||||||
|
#=========================
|
||||||
|
#---------------------
|
||||||
|
# mCSM-lig: WF and lF
|
||||||
|
#----------------------
|
||||||
|
# WF data: mcsm_lig
|
||||||
|
cols_to_select_mcsm_lig = c(static_cols_start, c("ligand_outcome", mcsm_lig_dn), static_cols_end)
|
||||||
|
wf_mcsm_lig = comb_df_sl_lig[, cols_to_select_mcsm_lig] # filtered df
|
||||||
|
|
||||||
############################################################################
|
pivot_cols_mcsm_lig = cols_to_select_mcsm_lig[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_lig
|
||||||
|
expected_rows_lf = nrow(wf_mcsm_lig) * (length(wf_mcsm_lig) - length(pivot_cols_mcsm_lig))
|
||||||
|
expected_rows_lf
|
||||||
|
|
||||||
|
# LF data: mcsm_lig
|
||||||
|
lf_mcsm_lig = gather(wf_mcsm_lig
|
||||||
|
, key = param_type
|
||||||
|
, value = param_value
|
||||||
|
, all_of(mcsm_lig_dn):tail(static_cols_end,1)
|
||||||
|
, factor_key = TRUE)
|
||||||
|
|
||||||
|
if (nrow(lf_mcsm_lig) == expected_rows_lf){
|
||||||
|
cat("\nPASS: long format data created for ", mcsm_lig_dn)
|
||||||
|
}else{
|
||||||
|
cat("\nFAIL: long format data could not be created for mcsm_lig")
|
||||||
|
quit()
|
||||||
|
}
|
||||||
|
|
||||||
|
# Assign them to the output list
|
||||||
|
wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig
|
||||||
|
wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig
|
||||||
|
|
||||||
|
#====================
|
||||||
|
# mcsm-NA affinity
|
||||||
|
# data filtered by cut off
|
||||||
|
#====================
|
||||||
if (tolower(gene_name)%in%geneL_na){
|
if (tolower(gene_name)%in%geneL_na){
|
||||||
#==============
|
#---------------
|
||||||
# mCSM-NA: LF
|
# mCSM-NA: WF and lF
|
||||||
#==============
|
#-----------------
|
||||||
# WF data: mcsm-na
|
# WF data: mcsm-na
|
||||||
cols_to_select_mcsm_na = c(static_cols_start, c("mcsm_na_outcome", mcsm_na_dn), static_cols_end)
|
cols_to_select_mcsm_na = c(static_cols_start, c("mcsm_na_outcome", mcsm_na_dn), static_cols_end)
|
||||||
wf_mcsm_na = comb_df_sl[, cols_to_select_mcsm_na]
|
#wf_mcsm_na = comb_df_sl[, cols_to_select_mcsm_na]
|
||||||
|
wf_mcsm_na = comb_df_sl_na[, cols_to_select_mcsm_na]
|
||||||
|
|
||||||
pivot_cols_mcsm_na = cols_to_select_mcsm_na[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_na
|
pivot_cols_mcsm_na = cols_to_select_mcsm_na[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_na
|
||||||
expected_rows_lf = nrow(wf_mcsm_na) * (length(wf_mcsm_na) - length(pivot_cols_mcsm_na))
|
expected_rows_lf = nrow(wf_mcsm_na) * (length(wf_mcsm_na) - length(pivot_cols_mcsm_na))
|
||||||
|
@ -550,14 +475,19 @@ if (tolower(gene_name)%in%geneL_na){
|
||||||
wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na
|
wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na
|
||||||
|
|
||||||
}
|
}
|
||||||
#-------------------------------------------------------------------
|
|
||||||
|
#=========================
|
||||||
|
# mcsm-ppi2 affinity
|
||||||
|
# data filtered by cut off
|
||||||
|
#========================
|
||||||
if (tolower(gene_name)%in%geneL_ppi2){
|
if (tolower(gene_name)%in%geneL_ppi2){
|
||||||
#==============
|
#-----------------
|
||||||
# mCSM-PPI2: LF
|
# mCSM-PPI2: WF and lF
|
||||||
#==============
|
#-----------------
|
||||||
# WF data: mcsm-ppi2
|
# WF data: mcsm-ppi2
|
||||||
cols_to_select_mcsm_ppi2 = c(static_cols_start, c("mcsm_ppi2_outcome", mcsm_ppi2_dn), static_cols_end)
|
cols_to_select_mcsm_ppi2 = c(static_cols_start, c("mcsm_ppi2_outcome", mcsm_ppi2_dn), static_cols_end)
|
||||||
wf_mcsm_ppi2 = comb_df_sl[, cols_to_select_mcsm_ppi2]
|
#wf_mcsm_ppi2 = comb_df_sl[, cols_to_select_mcsm_ppi2]
|
||||||
|
wf_mcsm_ppi2 = comb_df_sl_ppi2[, cols_to_select_mcsm_ppi2]
|
||||||
|
|
||||||
pivot_cols_mcsm_ppi2 = cols_to_select_mcsm_ppi2[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_ppi2
|
pivot_cols_mcsm_ppi2 = cols_to_select_mcsm_ppi2[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_ppi2
|
||||||
expected_rows_lf = nrow(wf_mcsm_ppi2) * (length(wf_mcsm_ppi2) - length(pivot_cols_mcsm_ppi2))
|
expected_rows_lf = nrow(wf_mcsm_ppi2) * (length(wf_mcsm_ppi2) - length(pivot_cols_mcsm_ppi2))
|
||||||
|
@ -582,156 +512,7 @@ if (tolower(gene_name)%in%geneL_ppi2){
|
||||||
wf_lf_dataL[['lf_mcsm_ppi2']] = lf_mcsm_ppi2
|
wf_lf_dataL[['lf_mcsm_ppi2']] = lf_mcsm_ppi2
|
||||||
|
|
||||||
}
|
}
|
||||||
#-------------------------------------------------------------------
|
|
||||||
if (tolower(gene_name)%in%geneL_dy){
|
|
||||||
#==============
|
|
||||||
# Dynamut: LF
|
|
||||||
#==============
|
|
||||||
# WF data: dynamut
|
|
||||||
cols_to_select_dynamut = c(static_cols_start, c("ddg_dynamut_outcome", dynamut_dn), static_cols_end)
|
|
||||||
wf_dynamut = comb_df_sl[, cols_to_select_dynamut]
|
|
||||||
|
|
||||||
pivot_cols_dynamut = cols_to_select_dynamut[1: (length(static_cols_start) + 1)]; pivot_cols_dynamut
|
|
||||||
expected_rows_lf = nrow(wf_dynamut) * (length(wf_dynamut) - length(pivot_cols_dynamut))
|
|
||||||
expected_rows_lf
|
|
||||||
|
|
||||||
# LF data: dynamut
|
|
||||||
lf_dynamut = gather(wf_dynamut
|
|
||||||
, key = param_type
|
|
||||||
, value = param_value
|
|
||||||
, all_of(dynamut_dn):tail(static_cols_end,1)
|
|
||||||
, factor_key = TRUE)
|
|
||||||
|
|
||||||
if (nrow(lf_dynamut) == expected_rows_lf){
|
|
||||||
cat("\nPASS: long format data created for ", dynamut_dn)
|
|
||||||
}else{
|
|
||||||
cat("\nFAIL: long format data could not be created for duet")
|
|
||||||
quit()
|
|
||||||
}
|
|
||||||
|
|
||||||
# Assign them to the output list
|
|
||||||
wf_lf_dataL[['wf_dynamut']] = wf_dynamut
|
|
||||||
wf_lf_dataL[['lf_dynamut']] = lf_dynamut
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------
|
|
||||||
#==============
|
|
||||||
# EnCOM ddg: LF
|
|
||||||
#==============
|
|
||||||
# WF data: encomddg
|
|
||||||
cols_to_select_encomddg = c(static_cols_start, c("ddg_encom_outcome", encom_ddg_dn), static_cols_end)
|
|
||||||
wf_encomddg = comb_df_sl[, cols_to_select_encomddg]
|
|
||||||
|
|
||||||
pivot_cols_encomddg = cols_to_select_encomddg[1: (length(static_cols_start) + 1)]; pivot_cols_encomddg
|
|
||||||
expected_rows_lf = nrow(wf_encomddg ) * (length(wf_encomddg ) - length(pivot_cols_encomddg))
|
|
||||||
expected_rows_lf
|
|
||||||
|
|
||||||
# LF data: encomddg
|
|
||||||
lf_encomddg = gather(wf_encomddg
|
|
||||||
, key = param_type
|
|
||||||
, value = param_value
|
|
||||||
, all_of(encom_ddg_dn):tail(static_cols_end,1)
|
|
||||||
, factor_key = TRUE)
|
|
||||||
|
|
||||||
if (nrow(lf_encomddg) == expected_rows_lf){
|
|
||||||
cat("\nPASS: long format data created for ", encom_ddg_dn)
|
|
||||||
}else{
|
|
||||||
cat("\nFAIL: long format data could not be created for duet")
|
|
||||||
quit()
|
|
||||||
}
|
|
||||||
|
|
||||||
# Assign them to the output list
|
|
||||||
wf_lf_dataL[['wf_encomddg']] = wf_encomddg
|
|
||||||
wf_lf_dataL[['lf_encomddg']] = lf_encomddg
|
|
||||||
#-------------------------------------------------------------------------
|
|
||||||
#==============
|
|
||||||
# EnCOM dds: LF
|
|
||||||
#==============
|
|
||||||
# WF data: encomdds
|
|
||||||
cols_to_select_encomdds = c(static_cols_start, c("dds_encom_outcome", encom_dds_dn), static_cols_end)
|
|
||||||
wf_encomdds = comb_df_sl[, cols_to_select_encomdds]
|
|
||||||
|
|
||||||
pivot_cols_encomdds = cols_to_select_encomdds[1: (length(static_cols_start) + 1)]; pivot_cols_encomdds
|
|
||||||
expected_rows_lf = nrow(wf_encomdds) * (length(wf_encomdds) - length(pivot_cols_encomdds))
|
|
||||||
expected_rows_lf
|
|
||||||
|
|
||||||
# LF data: encomdds
|
|
||||||
lf_encomdds = gather(wf_encomdds
|
|
||||||
, key = param_type
|
|
||||||
, value = param_value
|
|
||||||
, all_of(encom_dds_dn):tail(static_cols_end,1)
|
|
||||||
, factor_key = TRUE)
|
|
||||||
|
|
||||||
if (nrow(lf_encomdds) == expected_rows_lf){
|
|
||||||
cat("\nPASS: long format data created for", encom_dds_dn)
|
|
||||||
}else{
|
|
||||||
cat("\nFAIL: long format data could not be created for duet")
|
|
||||||
quit()
|
|
||||||
}
|
|
||||||
|
|
||||||
# Assign them to the output list
|
|
||||||
wf_lf_dataL[['wf_encomdds']] = wf_encomdds
|
|
||||||
wf_lf_dataL[['lf_encomdds']] = lf_encomdds
|
|
||||||
#-------------------------------------------------------------------------
|
|
||||||
#==============
|
|
||||||
# SDM: LF
|
|
||||||
#==============
|
|
||||||
# WF data: sdm
|
|
||||||
cols_to_select_sdm = c(static_cols_start, c("ddg_sdm_outcome", sdm_dn), static_cols_end)
|
|
||||||
wf_sdm = comb_df_sl[, cols_to_select_sdm]
|
|
||||||
|
|
||||||
pivot_cols_sdm = cols_to_select_sdm[1: (length(static_cols_start) + 1)]; pivot_cols_sdm
|
|
||||||
expected_rows_lf = nrow(wf_sdm) * (length(wf_sdm) - length(pivot_cols_sdm))
|
|
||||||
expected_rows_lf
|
|
||||||
|
|
||||||
# LF data: sdm
|
|
||||||
lf_sdm = gather(wf_sdm
|
|
||||||
, key = param_type
|
|
||||||
, value = param_value
|
|
||||||
, all_of(sdm_dn):tail(static_cols_end,1)
|
|
||||||
, factor_key = TRUE)
|
|
||||||
|
|
||||||
if (nrow(lf_sdm) == expected_rows_lf){
|
|
||||||
cat("\nPASS: long format data created for", sdm_dn)
|
|
||||||
}else{
|
|
||||||
cat("\nFAIL: long format data could not be created for duet")
|
|
||||||
quit()
|
|
||||||
}
|
|
||||||
|
|
||||||
# Assign them to the output list
|
|
||||||
wf_lf_dataL[['wf_sdm']] = wf_sdm
|
|
||||||
wf_lf_dataL[['lf_sdm']] = lf_sdm
|
|
||||||
#-------------------------------------------------------------------------
|
|
||||||
#==============
|
|
||||||
# mCSM: LF
|
|
||||||
#==============
|
|
||||||
# WF data: mcsm
|
|
||||||
cols_to_select_mcsm = c(static_cols_start, c("ddg_mcsm_outcome", mcsm_dn), static_cols_end)
|
|
||||||
wf_mcsm = comb_df_sl[, cols_to_select_mcsm]
|
|
||||||
|
|
||||||
pivot_cols_mcsm = cols_to_select_mcsm[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm
|
|
||||||
expected_rows_lf = nrow(wf_mcsm) * (length(wf_mcsm) - length(pivot_cols_mcsm))
|
|
||||||
expected_rows_lf
|
|
||||||
|
|
||||||
# LF data: mcsm
|
|
||||||
lf_mcsm = gather(wf_mcsm
|
|
||||||
, key = param_type
|
|
||||||
, value = param_value
|
|
||||||
, all_of(mcsm_dn):tail(static_cols_end,1)
|
|
||||||
, factor_key = TRUE)
|
|
||||||
|
|
||||||
if (nrow(lf_mcsm) == expected_rows_lf){
|
|
||||||
cat("\nPASS: long format data created for", mcsm_dn)
|
|
||||||
}else{
|
|
||||||
cat("\nFAIL: long format data could not be created for duet")
|
|
||||||
quit()
|
|
||||||
}
|
|
||||||
|
|
||||||
# Assign them to the output list
|
|
||||||
wf_lf_dataL[['wf_mcsm']] = wf_mcsm
|
|
||||||
wf_lf_dataL[['lf_mcsm']] = lf_mcsm
|
|
||||||
|
|
||||||
}
|
|
||||||
#-------------------------------------------------------------------------
|
|
||||||
return(wf_lf_dataL)
|
return(wf_lf_dataL)
|
||||||
}
|
}
|
||||||
############################################################################
|
############################################################################
|
||||||
|
|
|
@ -39,6 +39,10 @@ resistance_col <<- "drtype"
|
||||||
LigDist_colname <<- "ligand_distance"
|
LigDist_colname <<- "ligand_distance"
|
||||||
LigDist_cutoff <<- 10
|
LigDist_cutoff <<- 10
|
||||||
|
|
||||||
|
DistCutOff = 10
|
||||||
|
ppi2Dist_colname = "interface_dist"
|
||||||
|
naDist_colname = "TBC"
|
||||||
|
|
||||||
#==================
|
#==================
|
||||||
# Angstroms symbol
|
# Angstroms symbol
|
||||||
#==================
|
#==================
|
||||||
|
|
|
@ -112,7 +112,7 @@ cat(s1)
|
||||||
#source("other_plots_data.R")
|
#source("other_plots_data.R")
|
||||||
####################################################################
|
####################################################################
|
||||||
|
|
||||||
source(paste0(plot_script_path, "dm_om_data.R"))
|
#source(paste0(plot_script_path, "dm_om_data.R"))
|
||||||
s2 = c("\nSuccessfully sourced other_plots_data.R")
|
s2 = c("\nSuccessfully sourced other_plots_data.R")
|
||||||
cat(s2)
|
cat(s2)
|
||||||
|
|
||||||
|
|
|
@ -10,8 +10,10 @@ source("~/git/LSHTM_analysis/config/embb.R")
|
||||||
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
||||||
###################################################################
|
###################################################################
|
||||||
# FIXME: ADD distance to NA when SP replies
|
# FIXME: ADD distance to NA when SP replies
|
||||||
|
# DONE: plotting_globals.R
|
||||||
dist_columns = c("ligand_distance", "interface_dist")
|
dist_columns = c("ligand_distance", "interface_dist")
|
||||||
DistCutOff = 10
|
DistCutOff = 10
|
||||||
|
|
||||||
common_cols = c("mutationinformation"
|
common_cols = c("mutationinformation"
|
||||||
, "X5uhc_position"
|
, "X5uhc_position"
|
||||||
, "X5uhc_offset"
|
, "X5uhc_offset"
|
||||||
|
@ -98,22 +100,24 @@ df3 = merged_df3
|
||||||
#=================
|
#=================
|
||||||
# PREFORMATTING: for consistency
|
# PREFORMATTING: for consistency
|
||||||
#=================
|
#=================
|
||||||
df3$sensitivity = ifelse(df3$dst_mode == 1, "R", "S")
|
# DONE: combining_dfs.R
|
||||||
table(df3$sensitivity)
|
# df3$sensitivity = ifelse(df3$dst_mode == 1, "R", "S")
|
||||||
|
# table(df3$sensitivity)
|
||||||
|
|
||||||
# ConSurf labels
|
# ConSurf labels
|
||||||
consurf_colOld = "consurf_colour_rev"
|
#consurf_colOld = "consurf_colour_rev"
|
||||||
consurf_colNew = "consurf_outcome"
|
#consurf_colNew = "consurf_outcome"
|
||||||
df3[[consurf_colNew]] = df3[[consurf_colOld]]
|
#df3[[consurf_colNew]] = df3[[consurf_colOld]]
|
||||||
df3[[consurf_colNew]] = as.factor(df3[[consurf_colNew]])
|
#df3[[consurf_colNew]] = as.factor(df3[[consurf_colNew]])
|
||||||
df3[[consurf_colNew]]
|
#df3[[consurf_colNew]]
|
||||||
|
# not this bit
|
||||||
levels(df3$consurf_outcome) = c( "nsd", 1, 2, 3, 4, 5, 6, 7, 8, 9)
|
levels(df3$consurf_outcome) = c( "nsd", 1, 2, 3, 4, 5, 6, 7, 8, 9)
|
||||||
levels(df3$consurf_outcome)
|
#levels(df3$consurf_outcome)
|
||||||
|
|
||||||
# SNAP2 labels
|
# SNAP2 labels
|
||||||
snap2_colname = "snap2_outcome"
|
#snap2_colname = "snap2_outcome"
|
||||||
df3[[snap2_colname]] <- str_replace(df3[[snap2_colname]], "effect", "Effect")
|
#df3[[snap2_colname]] <- str_replace(df3[[snap2_colname]], "effect", "Effect")
|
||||||
df3[[snap2_colname]] <- str_replace(df3[[snap2_colname]], "neutral", "Neutral")
|
#df3[[snap2_colname]] <- str_replace(df3[[snap2_colname]], "neutral", "Neutral")
|
||||||
|
|
||||||
# for ref: not needed perse as function already does this and assigns labels for barplots
|
# for ref: not needed perse as function already does this and assigns labels for barplots
|
||||||
# labels_duet = levels(as.factor(df3$duet_outcome))
|
# labels_duet = levels(as.factor(df3$duet_outcome))
|
||||||
|
@ -138,14 +142,16 @@ df2 = merged_df2
|
||||||
#=================
|
#=================
|
||||||
# PREFORMATTING: for consistency
|
# PREFORMATTING: for consistency
|
||||||
#=================
|
#=================
|
||||||
df2$sensitivity = ifelse(df2$dst_mode == 1, "R", "S")
|
# DONE: combining_dfs.R
|
||||||
table(df2$sensitivity)
|
# df2$sensitivity = ifelse(df2$dst_mode == 1, "R", "S")
|
||||||
|
# table(df2$sensitivity)
|
||||||
|
|
||||||
#----------------------------------------------------
|
#----------------------------------------------------
|
||||||
# Create dst2: fill na in dst with value of dst_mode
|
# Create dst2: fill na in dst with value of dst_mode
|
||||||
# for epistasis
|
# for epistasis
|
||||||
#----------------------------------------------------
|
#----------------------------------------------------
|
||||||
df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2f$dst)
|
# DONE: combining_dfs.R
|
||||||
|
# df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2f$dst)
|
||||||
|
|
||||||
#----------------------------------------------------
|
#----------------------------------------------------
|
||||||
# reverse signs for foldx scaled values for
|
# reverse signs for foldx scaled values for
|
||||||
|
@ -168,10 +174,11 @@ scaled_cols_stab_revised = c(scaled_cols_stab_revised, "foldx_scaled_signC")
|
||||||
|
|
||||||
######################################################
|
######################################################
|
||||||
# Affinity related variables
|
# Affinity related variables
|
||||||
DistCutOff = 10
|
# DONE:in plotting_globals.R
|
||||||
LigDist_colname # = "ligand_distance" # from globals
|
# DistCutOff = 10
|
||||||
ppi2Dist_colname = "interface_dist"
|
# LigDist_colname # = "ligand_distance" # from globals
|
||||||
naDist_colname = "TBC"
|
# ppi2Dist_colname = "interface_dist"
|
||||||
|
# naDist_colname = "TBC"
|
||||||
|
|
||||||
######################################################
|
######################################################
|
||||||
# corr colnames
|
# corr colnames
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue