diff --git a/scripts/plotting/logo_plot.R b/scripts/plotting/logo_plot.R index faf70a3..b4435b7 100644 --- a/scripts/plotting/logo_plot.R +++ b/scripts/plotting/logo_plot.R @@ -1,71 +1,30 @@ -#======================================================================= -# Task: To generate a logo plot or bar plot but coloured -# aa properties. -# step1: read mcsm file and OR file -# step2: plot wild type positions -# step3: plot mutants per position coloured by aa properties -# step4: make the size of the letters/bars prop to OR if you can! +#!/usr/bin/env Rscript +######################################################### +# TASK: producing boxplots for dr and other muts -# useful links -# https://stackoverflow.com/questions/5438474/plotting-a-sequence-logo-using-ggplot2 -# https://omarwagih.github.io/ggseqlogo/ -# https://kkdey.github.io/Logolas-pages/workflow.html -# A new sequence logo plot to highlight enrichment and depletion. -# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6288878/ - -#very good: http://www.cbs.dtu.dk/biotools/Seq2Logo-2.0/ +######################################################### #======================================================================= -#%% specify curr dir +# working dir and loading libraries getwd() -setwd("~/git/LSHTM_analysis/plotting_test/") +setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() -#======================================================================= -#%% load packages -# header file -header_dir = "~/git/LSHTM_analysis/" -source(paste0(header_dir, "/", "my_header.R")) -#======================================================================= -#%% variable assignment: input and output paths & filenames -drug = "pyrazinamide" -gene = "pncA" -gene_match = paste0(gene,"_p.") -cat(gene_match) - -#=========== -# data dir -#=========== -datadir = paste0("~/git/Data") +source("Header_TT.R") +#library(ggplot2) +#library(data.table) +#library(dplyr) #=========== # input #=========== -# source R script "combining_two_df.R" -#indir = paste0(datadir, "/", drug, "/", "output") # reading files -indir = "../meta_data_analysis" # sourcing R script -in_filename = "combining_df_ps.R" -infile = paste0(indir, "/", in_filename) -cat(paste0("Input is a R script: ", "\"", infile, "\"") - , "\n========================================================") +source("combining_dfs_plotting.R") #=========== # output #=========== -# 1) lineage dist of all muts -outdir = paste0("~/git/Data", "/", drug, "/", "output/plots") #same as indir -#cat("Output dir: ", outdir, "\n") -#file_type = ".svg" -#out_filename1 = paste0(tolower(gene), "_lineage_dist_ps", file_type) -#outfile1 = paste0(outdir, "/", out_filename1) -#cat(paste0("Output plot1 :", outfile1) -# , "\n========================================================") -#%% end of variable assignment for input and output files -#======================================================================= -##%% read input file -cat("Reading input file(sourcing R script):", in_filename) - -source(infile) +logo_plot = "logo_plot.svg" +plot_logo_plot = paste0(plotdir,"/", logo_plot) #========================== # This will return: @@ -205,6 +164,7 @@ ggseqlogo(wide_df_logor, method="custom", seq_type="aa") + ylab("my custom heigh library(Logolas) # data was pnca_msa.txt +#FIXME: generate this file seqs = read.csv("~/git//Data/pyrazinamide/snp_seqsfile.txt" , header = FALSE @@ -217,4 +177,4 @@ logomaker(seqs, type = "EDLogo", color_type = "per_symbol" logomaker(seqs, type = "Logo", color_type = "per_symbol") #%% end of script -#======================================================================= \ No newline at end of file +#=======================================================================