renamed 2 to _v2

This commit is contained in:
Tanushree Tunstall 2022-08-22 11:41:42 +01:00
parent c9d7ea9fad
commit 802d6f8495
18 changed files with 761 additions and 976 deletions

View file

@ -6,7 +6,7 @@
# working dir and loading libraries
getwd()
source("~/git/LSHTM_analysis/scripts/Header_TT.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
# cmd args passed
# in from other scripts
# to call this
@ -43,7 +43,7 @@ import_dirs(drug, gene)
# call: plotting_data()
#---------------------------
if (!exists("infile_params") && exists("gene")){
#if (!is.character(infile_params) && exists("gene")){ # when running as cmd
#if (!is.character(infile_params) && exists("gene")){ # when running as cmd
in_filename_params = paste0(tolower(gene), "_all_params.csv")
infile_params = paste0(outdir, "/", in_filename_params)
cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n")
@ -70,7 +70,7 @@ cat("\nLigand distance colname:", LigDist_colname
# call: combining_dfs_plotting()
#--------------------------------
if (!exists("infile_metadata") && exists("gene")){
#if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd
#if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd
in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid
infile_metadata = paste0(outdir, "/", in_filename_metadata)
cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n")
@ -109,137 +109,137 @@ merged_df3 = all_plot_dfs[[2]]
####################################################################
# Data for logoplots
####################################################################
#
#
# source(paste0(plot_script_path, "logo_data_msa.R"))
# s1 = c("\nSuccessfully sourced logo_data_msa.R")
# cat(s1)
#
#
# ####################################################################
# # Data for DM OM Plots: WF and LF dfs
# # My function: dm_om_wf_lf_data()
# # location: scripts/functions/dm_om_data.R
# #source("other_plots_data.R")
# ####################################################################
#
#
# #source(paste0(plot_script_path, "dm_om_data.R")) # calling the function directly instead
# geneL_normal = c("pnca")
# geneL_na = c("gid", "rpob")
# geneL_ppi2 = c("alr", "embb", "katg", "rpob")
#
# all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
#
# wf_duet = all_dm_om_df[['wf_duet']]
# lf_duet = all_dm_om_df[['lf_duet']]
#
# wf_mcsm_lig = all_dm_om_df[['wf_mcsm_lig']]
# lf_mcsm_lig = all_dm_om_df[['lf_mcsm_lig']]
#
# wf_foldx = all_dm_om_df[['wf_foldx']]
# lf_foldx = all_dm_om_df[['lf_foldx']]
#
# wf_deepddg = all_dm_om_df[['wf_deepddg']]
# lf_deepddg = all_dm_om_df[['lf_deepddg']]
#
# wf_dynamut2 = all_dm_om_df[['wf_dynamut2']]
# lf_dynamut2 = all_dm_om_df[['lf_dynamut2']]
#
# wf_consurf = all_dm_om_df[['wf_consurf']]
# lf_consurf = all_dm_om_df[['lf_consurf']]
#
# wf_snap2 = all_dm_om_df[['wf_snap2']]
# lf_snap2 = all_dm_om_df[['lf_snap2']]
#
# wf_provean = all_dm_om_df[['wf_provean']]
# lf_provean = all_dm_om_df[['lf_provean']]
#
# # NEW
# wf_dist_gen = all_dm_om_df[['wf_dist_gen']]
# lf_dist_gen = all_dm_om_df[['lf_dist_gen']]
#
# if (tolower(gene)%in%geneL_na){
# wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
# lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]
# }
#
# if (tolower(gene)%in%geneL_ppi2){
# wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
# lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
# }
#
# s2 = c("\nSuccessfully sourced other_plots_data.R")
# cat(s2)
#
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
all_dm_om_df = dm_om_wf_lf_data(df = merged_df3, gene = gene)
wf_duet = all_dm_om_df[['wf_duet']]
lf_duet = all_dm_om_df[['lf_duet']]
wf_mcsm_lig = all_dm_om_df[['wf_mcsm_lig']]
lf_mcsm_lig = all_dm_om_df[['lf_mcsm_lig']]
wf_foldx = all_dm_om_df[['wf_foldx']]
lf_foldx = all_dm_om_df[['lf_foldx']]
wf_deepddg = all_dm_om_df[['wf_deepddg']]
lf_deepddg = all_dm_om_df[['lf_deepddg']]
wf_dynamut2 = all_dm_om_df[['wf_dynamut2']]
lf_dynamut2 = all_dm_om_df[['lf_dynamut2']]
wf_consurf = all_dm_om_df[['wf_consurf']]
lf_consurf = all_dm_om_df[['lf_consurf']]
wf_snap2 = all_dm_om_df[['wf_snap2']]
lf_snap2 = all_dm_om_df[['lf_snap2']]
wf_provean = all_dm_om_df[['wf_provean']]
lf_provean = all_dm_om_df[['lf_provean']]
# NEW
wf_dist_gen = all_dm_om_df[['wf_dist_gen']]
lf_dist_gen = all_dm_om_df[['lf_dist_gen']]
if (tolower(gene)%in%geneL_na){
wf_mcsm_na = all_dm_om_df[['wf_mcsm_na']]
lf_mcsm_na = all_dm_om_df[['lf_mcsm_na']]
}
if (tolower(gene)%in%geneL_ppi2){
wf_mcsm_ppi2 = all_dm_om_df[['wf_mcsm_ppi2']]
lf_mcsm_ppi2 = all_dm_om_df[['lf_mcsm_ppi2']]
}
s2 = c("\nSuccessfully sourced other_plots_data.R")
cat(s2)
#
# ####################################################################
# # Data for Lineage barplots: WF and LF dfs
# # My function: lineage_plot_data()
# # location: scripts/functions/lineage_plot_data.R
# ####################################################################
#
# #source(paste0(plot_script_path, "lineage_data.R"))
#
source(paste0(plot_script_path, "lineage_data.R"))
# # converted to a function. Moved lineage_data.R to redundant/
# lineage_dfL = lineage_plot_data(merged_df2
# , lineage_column_name = "lineage"
# , remove_empty_lineage = F
# , lineage_label_col_name = "lineage_labels"
# , id_colname = "id"
# , snp_colname = "mutationinformation"
# )
#
# lin_wf = lineage_dfL[['lin_wf']]
# lin_lf = lineage_dfL[['lin_lf']]
#
# s3 = c("\nSuccessfully sourced lineage_data.R")
# cat(s3)
#
# ####################################################################
# # Data for corr plots:
# # My function: corr_data_extract()
# # location: scripts/functions/corr_plot_data.R
# ####################################################################
# # make sure the above script works because merged_df2_combined is needed
# merged_df3 = as.data.frame(merged_df3)
#
# corr_df_m3_f = corr_data_extract(merged_df3
lineage_dfL = lineage_plot_data(merged_df2
, lineage_column_name = "lineage"
, remove_empty_lineage = F
, lineage_label_col_name = "lineage_labels"
, id_colname = "id"
, snp_colname = "mutationinformation"
)
lin_wf = lineage_dfL[['lin_wf']]
lin_lf = lineage_dfL[['lin_lf']]
s3 = c("\nSuccessfully sourced lineage_data.R")
cat(s3)
####################################################################
# Data for corr plots:
# My function: corr_data_extract()
# location: scripts/functions/corr_plot_data.R
####################################################################
# make sure the above script works because merged_df2_combined is needed
merged_df3 = as.data.frame(merged_df3)
corr_df_m3_f = corr_data_extract(merged_df3
, gene = gene
, drug = drug
, extract_scaled_cols = F)
head(corr_df_m3_f)
# corr_df_m2_f = corr_data_extract(merged_df2
# , gene = gene
# , drug = drug
# , extract_scaled_cols = F)
# head(corr_df_m3_f)
#
# # corr_df_m2_f = corr_data_extract(merged_df2
# # , gene = gene
# # , drug = drug
# # , extract_scaled_cols = F)
# # head(corr_df_m2_f)
#
# s4 = c("\nSuccessfully sourced Corr_data.R")
# cat(s4)
#
# ########################################################################
# # End of script
# ########################################################################
# head(corr_df_m2_f)
s4 = c("\nSuccessfully sourced Corr_data.R")
cat(s4)
########################################################################
# End of script
########################################################################
# if ( all( length(s1), length(s2), length(s3), length(s4) ) > 0 ){
# cat(
# "\n##################################################"
# , "\nSuccessful: get_plotting_dfs.R worked!"
# , "\n###################################################\n")
# } else {
# cat(
# "\n#################################################"
# , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
# , "\n###################################################\n" )
# cat(
# "\n##################################################"
# , "\nSuccessful: get_plotting_dfs.R worked!"
# , "\n###################################################\n")
# } else {
# cat(
# "\n#################################################"
# , "\nFAIL: get_plotting_dfs.R didn't complete fully!Please check"
# , "\n###################################################\n" )
# }
#
# ########################################################################
# # clear excess variables: from the global enviornment
#
# vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))]
# vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))]
# vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))]
# vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))]
#
# rm( infile_metadata
# , infile_params
# , vars0
# , vars1
# , vars2
# , vars3)
########################################################################
# clear excess variables: from the global enviornment
vars0 = ls(envir = .GlobalEnv)[grepl("curr_*", ls(envir = .GlobalEnv))]
vars1 = ls(envir = .GlobalEnv)[grepl("^cols_to*", ls(envir = .GlobalEnv))]
vars2 = ls(envir = .GlobalEnv)[grepl("pivot_cols_*", ls(envir = .GlobalEnv))]
vars3 = ls(envir = .GlobalEnv)[grepl("expected_*", ls(envir = .GlobalEnv))]
rm( infile_metadata
, infile_params
, vars0
, vars1
, vars2
, vars3)