finding seq discrepancy in MSA for embb

2022-01-17 19:11:10 +00:00 · 2022-01-17 19:11:10 +00:00 · 68a092037b
commit 68a092037b
parent af04c69d66
4 changed files with 36 additions and 9 deletions
--- a/scripts/functions/logoP_snp.R
+++ b/scripts/functions/logoP_snp.R
@ -195,10 +195,10 @@ LogoPlotSnps <- function(plot_df
  cat('\nDone: p0')
  # further customisation
-  mut_logo_p <<- p0 + theme(legend.position = leg_pos
+  mut_logo_p = p0 + theme(legend.position = leg_pos
                          , legend.direction = leg_dir
                          #, legend.title = element_blank()
-                          , legend.title = element_text(size = y_tts
+                          , legend.title = element_text(size = leg_tts
                                                        , colour = ytt_col)
                          , legend.text = element_text(size = leg_ts)
@ -246,7 +246,7 @@ LogoPlotSnps <- function(plot_df
  cat('\nDone: p1')
  # further customisation
-  wt_logo_p <<- p1 + 
+  wt_logo_p = p1 + 
    theme(legend.position = "none"
          , legend.direction = leg_dir
--- a/scripts/functions/tests/test_logo_plots.R
+++ b/scripts/functions/tests/test_logo_plots.R
@ -1,4 +1,6 @@
-source("~/git/LSHTM_analysis/config/gid.R")
+#source("~/git/LSHTM_analysis/config/gid.R")
 source("~/git/LSHTM_analysis/config/alr.R")
 source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
 ################################
@ -56,3 +58,24 @@ LogoPlotSnps(plot_df = merged_df3
             , leg_tts = 16 # leg title size
 )
 ########################################
 # Logo plot MSA
 # Mutant and wild-type
 # wild-type and mutant aa
 # script: logoP_msa.R
 ########################################
 # msa1 = read.csv("/home/tanu/git/Data/cycloserine/output/alr_msa.csv", header = F)
 # head(msa1)
 # msa_seq= msa1$V1
 # head(msa_seq)
 # 
 # msa2 = read.csv("/home/tanu/git/Data/cycloserine/input/alr.1fasta", header = F)
 # head(msa2)
 # wt_seq = msa2$V1
 # head(wt_seq)
 # 
 # # BOTH WORK
 # LogoPlotMSA(msa_seq, wt_seq)
 # LogoPlotMSA(msa1, msa2)
--- a/scripts/gene_targets_names.txt
+++ b/scripts/gene_targets_names.txt
@ -1,7 +1,6 @@
 embb ethambutol
 rpob rifampicin
 alr cycloserine
 katg isoniazid
 pnca pyrazinamide
 gid streptomycin
-
+embb ethambutol
--- a/scripts/run_mutate2.sh
+++ b/scripts/run_mutate2.sh
@ -30,6 +30,8 @@ while read -r gene drug; do
  echo "Running mutate.py on data file $MSA_MAP"
  python3 mutate.py -v -o ${DATA_DIR}/${drug}/output/${gene}_msa_interim.csv $MSA_MAP $DATA_DIR/${drug}/input/${gene}_f2.fasta
  echo "mutate.py completed"
  sed -E 's/>.*//g;/^$/d' ${DATA_DIR}/${drug}/output/${gene}_msa_interim.csv > ${DATA_DIR}/${drug}/output/${gene}_msa.csv
  wc -l ${DATA_DIR}/${drug}/output/${gene}_msa.csv
  echo
 done < gene_targets_names.txt
@ -37,6 +39,9 @@ done < gene_targets_names.txt
 # Stop here so we don't run the examples below :)
 exit
 ########################################################################
 #
 ########################################################################
 # make sure there is no new line at the end of the mutation file (snps.csv)
 # check
 cat  output/gid_metadata.csv | rev | cut -d, -f1 |rev | tail -n +2 |sort | head