saving work for logo plots

2022-01-13 18:53:47 +00:00 · 2022-01-13 18:53:47 +00:00 · 344a74a9e1
commit 344a74a9e1
parent 7cbd9b4996
5 changed files with 24 additions and 29 deletions
--- a/scripts/functions/logo_plots_func.R
+++ b/scripts/functions/logo_plots_func.R
@ -1,25 +0,0 @@
-#logo plots
-
-# create functions
-
-# one with OR
-  # --> select/drop down option to remove empty positions
-  # --> select/drop down option for colour
-      # --> if clustalx and taylor,  set variable to black bg + white font
-      # --> if chemistry and hydrophobicity, then grey bg +  black font 
-
-  # --> select/drop down option for log scale
-  # --> should include WT
-
-# one for multiple muts
-  # --> select/drop down option to filter count of nsSNPs
-  # --> select/drop down  option for colour
-  # --> should include WT
-
-# Data used
-wide_df_or # or logo plot
-wide_df_or_mult # > 1 sites
-wide_df_logor_m #make it as a scale option
-tab_mt # mutant logo plot
-tab_wt # wt logo plot
-
--- a/scripts/plotting/get_plotting_dfs.R
+++ b/scripts/plotting/get_plotting_dfs.R
@ -109,10 +109,10 @@ merged_df3_comp = all_plot_dfs[[4]]
 #                        Data for logoplots
 ####################################################################

-source(paste0(plot_script_path, "logo_data.R"))
+#source(paste0(plot_script_path, "logo_data.R"))

-s1 = c("\nSuccessfully sourced logo_data.R")
-cat(s1)
+#s1 = c("\nSuccessfully sourced logo_data.R")
+#cat(s1)

 ####################################################################
 #                        Data for DM OM Plots: Long format dfs
--- a/scripts/plotting/logo_data.R
+++ b/scripts/plotting/logo_data.R
@ -7,6 +7,7 @@
 #-------------------------
 logo_data = merged_df3
 #logo_data = merged_df3_comp
+#logo_data = merged_df2 # can't be used because of multiple snps

 # quick checks
 colnames(logo_data)
--- a/scripts/plotting/logo_plots.R
+++ b/scripts/plotting/logo_plots.R
@ -99,7 +99,7 @@ print(logo_or)
 cat("Logo plot with log10 OR as y axis:", plot_logo_logOR)
 svg(plot_logo_logOR, width = 30 , height = 6)

-logo_logOR = ggseqlogo(wide_df_logor_m
+logo_logOR = ggseqlogo(wide_df_logor
                       , method = "custom"
                       , seq_type="aa") + ylab("my custom height") +
  theme(legend.position = "bottom"
--- a/scripts/run_mutate.sh
+++ b/scripts/run_mutate.sh
@ -15,3 +15,22 @@ python3 mutate.py -v -o /home/tanu/git/Data/pyrazinamide/output/pnca_msa.txt /ho
 sed -i '/^>.*$/d' /home/tanu/git/Data/pyrazinamide/output/pnca_msa.txt
 printf 'No. of lines after cleaning: '
 cat /home/tanu/git/Data/pyrazinamide/output/pnca_msa.txt | wc -l
+
+#%%
+# Date: 13/01/22
+# pre processing
+sed 's/"//g' gene_msa_snp.csv > gid_mut_map.csv
+
+# mut prefix for mutation map file MUST match fasta file header 
+python3 mutate.py -v -o /home/tanu/git/LSHTM_analysis/scripts/plotting/scratch_plots/TEST2.csv /home/tanu/git/LSHTM_analysis/scripts/plotting/scratch_plots/gid_mut_map.csv /home/tanu/git/Data/streptomycin/input/gid2.fasta
+
+wc -l TEST2.csv
+
+# post processing
+sed -E 's/>.*//g' TEST2.csv | sed '/^$/d' > TEST3.csv
+wc -l TEST3.csv
+
+
+# doubles the no as it adds the mut info
+python3 mutate.py -v -o /home/tanu/git/LSHTM_analysis/scripts/plotting/scratch_plots/TEST.csv /home/tanu/git/LSHTM_analysis/scripts/plotting/scratch_plots/pnca_mut_map.csv /home/tanu/git/Data/pyrazinamide/input/pnca.fasta
+