updated logo plot data to source from combining_df_plotting.R

2020-09-10 19:58:33 +01:00 · 2020-09-10 19:58:33 +01:00 · c1041ad273
commit c1041ad273
parent e690f5beba
1 changed files with 15 additions and 55 deletions
--- a/scripts/plotting/logo_plot.R
+++ b/scripts/plotting/logo_plot.R
@ -1,71 +1,30 @@
-#=======================================================================
+#!/usr/bin/env Rscript  
-# Task: To generate a logo plot  or bar plot but coloured 
+#########################################################
-# aa properties.
+# TASK: producing boxplots for dr and other muts
 # step1: read mcsm file and OR file
 # step2: plot wild type positions
 # step3: plot mutants per position coloured by aa properties
 # step4: make the size of the letters/bars prop to OR if you can!
-# useful links
+#########################################################
 # https://stackoverflow.com/questions/5438474/plotting-a-sequence-logo-using-ggplot2
 # https://omarwagih.github.io/ggseqlogo/
 # https://kkdey.github.io/Logolas-pages/workflow.html
 # A new sequence logo plot to highlight enrichment and depletion.
 #    https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6288878/
 #very good: http://www.cbs.dtu.dk/biotools/Seq2Logo-2.0/
 #=======================================================================
-#%% specify curr dir
+# working dir and loading libraries
 getwd()
-setwd("~/git/LSHTM_analysis/plotting_test/")
+setwd("~/git/LSHTM_analysis/scripts/plotting")
 getwd()
 #=======================================================================
 #%% load packages
-# header file
+source("Header_TT.R")
-header_dir = "~/git/LSHTM_analysis/"
+#library(ggplot2)
-source(paste0(header_dir, "/", "my_header.R"))
+#library(data.table)
-#=======================================================================
+#library(dplyr)
 #%% variable assignment: input and output paths & filenames
 drug = "pyrazinamide"
 gene = "pncA"
 gene_match = paste0(gene,"_p.")
 cat(gene_match)
 #===========
 # data dir
 #===========
 datadir = paste0("~/git/Data")
 #===========
 # input
 #===========
-# source R script "combining_two_df.R"
+source("combining_dfs_plotting.R")
 #indir = paste0(datadir, "/", drug, "/", "output") # reading files
 indir = "../meta_data_analysis" # sourcing R script
 in_filename = "combining_df_ps.R"
 infile = paste0(indir, "/", in_filename)
 cat(paste0("Input is a R script: ", "\"", infile, "\"")
    , "\n========================================================")
 #===========
 # output
 #===========
 # 1) lineage dist of all muts
 outdir = paste0("~/git/Data", "/", drug, "/", "output/plots") #same as indir
 #cat("Output dir: ", outdir, "\n")
 #file_type = ".svg"
 #out_filename1 = paste0(tolower(gene), "_lineage_dist_ps", file_type) 
 #outfile1 = paste0(outdir, "/", out_filename1)
 #cat(paste0("Output plot1 :", outfile1)
 #    , "\n========================================================")
-#%% end of variable assignment for input and output files
+logo_plot = "logo_plot.svg"
-#=======================================================================
+plot_logo_plot = paste0(plotdir,"/", logo_plot)
 ##%% read input file
 cat("Reading input file(sourcing R script):", in_filename)
 source(infile)
 #==========================
 # This will return:
@ -205,6 +164,7 @@ ggseqlogo(wide_df_logor, method="custom", seq_type="aa") + ylab("my custom heigh
 library(Logolas)
 # data was pnca_msa.txt
 #FIXME: generate this file 
 seqs = read.csv("~/git//Data/pyrazinamide/snp_seqsfile.txt"
                , header = FALSE