From 344a74a9e18a3a8b6f1b800a5d23daa6925bdca3 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 13 Jan 2022 18:53:47 +0000 Subject: [PATCH] saving work for logo plots --- scripts/functions/logo_plots_func.R | 25 ------------------------- scripts/plotting/get_plotting_dfs.R | 6 +++--- scripts/plotting/logo_data.R | 1 + scripts/plotting/logo_plots.R | 2 +- scripts/run_mutate.sh | 19 +++++++++++++++++++ 5 files changed, 24 insertions(+), 29 deletions(-) delete mode 100644 scripts/functions/logo_plots_func.R diff --git a/scripts/functions/logo_plots_func.R b/scripts/functions/logo_plots_func.R deleted file mode 100644 index 428f6cb..0000000 --- a/scripts/functions/logo_plots_func.R +++ /dev/null @@ -1,25 +0,0 @@ -#logo plots - -# create functions - -# one with OR - # --> select/drop down option to remove empty positions - # --> select/drop down option for colour - # --> if clustalx and taylor, set variable to black bg + white font - # --> if chemistry and hydrophobicity, then grey bg + black font - - # --> select/drop down option for log scale - # --> should include WT - -# one for multiple muts - # --> select/drop down option to filter count of nsSNPs - # --> select/drop down option for colour - # --> should include WT - -# Data used -wide_df_or # or logo plot -wide_df_or_mult # > 1 sites -wide_df_logor_m #make it as a scale option -tab_mt # mutant logo plot -tab_wt # wt logo plot - diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index 56e8a67..4e70fb1 100644 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -109,10 +109,10 @@ merged_df3_comp = all_plot_dfs[[4]] # Data for logoplots #################################################################### -source(paste0(plot_script_path, "logo_data.R")) +#source(paste0(plot_script_path, "logo_data.R")) -s1 = c("\nSuccessfully sourced logo_data.R") -cat(s1) +#s1 = c("\nSuccessfully sourced logo_data.R") +#cat(s1) #################################################################### # Data for DM OM Plots: Long format dfs diff --git a/scripts/plotting/logo_data.R b/scripts/plotting/logo_data.R index 87d503b..e8341d3 100644 --- a/scripts/plotting/logo_data.R +++ b/scripts/plotting/logo_data.R @@ -7,6 +7,7 @@ #------------------------- logo_data = merged_df3 #logo_data = merged_df3_comp +#logo_data = merged_df2 # can't be used because of multiple snps # quick checks colnames(logo_data) diff --git a/scripts/plotting/logo_plots.R b/scripts/plotting/logo_plots.R index 95ccfa4..db67254 100755 --- a/scripts/plotting/logo_plots.R +++ b/scripts/plotting/logo_plots.R @@ -99,7 +99,7 @@ print(logo_or) cat("Logo plot with log10 OR as y axis:", plot_logo_logOR) svg(plot_logo_logOR, width = 30 , height = 6) -logo_logOR = ggseqlogo(wide_df_logor_m +logo_logOR = ggseqlogo(wide_df_logor , method = "custom" , seq_type="aa") + ylab("my custom height") + theme(legend.position = "bottom" diff --git a/scripts/run_mutate.sh b/scripts/run_mutate.sh index 3de2210..43263c5 100644 --- a/scripts/run_mutate.sh +++ b/scripts/run_mutate.sh @@ -15,3 +15,22 @@ python3 mutate.py -v -o /home/tanu/git/Data/pyrazinamide/output/pnca_msa.txt /ho sed -i '/^>.*$/d' /home/tanu/git/Data/pyrazinamide/output/pnca_msa.txt printf 'No. of lines after cleaning: ' cat /home/tanu/git/Data/pyrazinamide/output/pnca_msa.txt | wc -l + +#%% +# Date: 13/01/22 +# pre processing +sed 's/"//g' gene_msa_snp.csv > gid_mut_map.csv + +# mut prefix for mutation map file MUST match fasta file header +python3 mutate.py -v -o /home/tanu/git/LSHTM_analysis/scripts/plotting/scratch_plots/TEST2.csv /home/tanu/git/LSHTM_analysis/scripts/plotting/scratch_plots/gid_mut_map.csv /home/tanu/git/Data/streptomycin/input/gid2.fasta + +wc -l TEST2.csv + +# post processing +sed -E 's/>.*//g' TEST2.csv | sed '/^$/d' > TEST3.csv +wc -l TEST3.csv + + +# doubles the no as it adds the mut info +python3 mutate.py -v -o /home/tanu/git/LSHTM_analysis/scripts/plotting/scratch_plots/TEST.csv /home/tanu/git/LSHTM_analysis/scripts/plotting/scratch_plots/pnca_mut_map.csv /home/tanu/git/Data/pyrazinamide/input/pnca.fasta +