tidied plotting_data.R as a function returning a lits of dfs

This commit is contained in:
Tanushree Tunstall 2021-06-08 16:00:28 +01:00
parent b8d0bc416a
commit b25511a239
3 changed files with 134 additions and 81 deletions

View file

@ -3,40 +3,27 @@
# TASK: producing barplots
# basic barplots with count of mutations
# basic barplots with frequency of count of mutations
# Depends on
## plotting_globals.R (previously dir.R)
## plotting_data.R
#########################################################
#=======================================================================
# working dir and loading libraries
# working dir
getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting")
getwd()
# load libraries
#source("Header_TT.R")
library(ggplot2)
library(data.table)
library(dplyr)
require("getopt", quietly = TRUE) # cmd parse arguments
# Set globals:
# load functions
source("plotting_globals.R")
# pretent cli
drug = "streptomycin"
gene = "gid"
infile = "merged_df3_short.csv"
import_dirs(drug, gene)
source("plotting_data.R")
plotting_data("merged_df3_short.csv")
if (!exists("infile") && exists("gene")){
#in_filename_params = paste0(tolower(gene), "_all_params.csv")
in_filename_params = paste0(tolower(gene), "_comb_stab_struc_params.csv") # part combined for gid
infile = paste0(outdir, "/", in_filename_params)
}
plotting_data(infile)
#=======================================================================
#########################################################
# command line args
spec = matrix(c(
"drug" , "d", 1, "character",
@ -56,9 +43,29 @@ gene = opt$gene
if(is.null(drug)|is.null(gene)) {
stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
}
#=======================================================================
#########################################################
# call functions with relevant args
drug = "streptomycin"
gene = "gid"
import_dirs(drug, gene)
# should return the following dfs, directories and variables
if (!exists("infile") && exists("gene")){
#in_filename_params = paste0(tolower(gene), "_all_params.csv")
in_filename_params = paste0(tolower(gene), "_comb_stab_struc_params.csv") # part combined for gid
infile = paste0(outdir, "/", in_filename_params)
}
#infile = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv"
#infile = ""
# Get the DFs out of plotting_data()
pd_df = plotting_data(infile)
my_df = pd_df[[1]]
my_df_u = pd_df[[2]]
my_df_u_lig = pd_df[[3]]
dup_muts = pd_df[[4]]
#########################################################
# This script: should return the following dfs, directories and variables
# my_df
# my_df_u
# my_df_u_lig
@ -85,11 +92,11 @@ rm(my_df, upos, dup_muts, my_df_u_lig)
# output
#=======
# plot 1
basic_bp_duet = "basic_barplot_PS.svg"
basic_bp_duet = paste0(tolower(gene), "_basic_barplot_PS.svg")
plot_basic_bp_duet = paste0(plotdir,"/", basic_bp_duet)
# plot 2
pos_count_duet = "position_count_PS.svg"
pos_count_duet = paste0(tolower(gene), "_position_count_PS.svg")
plot_pos_count_duet = paste0(plotdir, "/", pos_count_duet)
#=======================================================================
@ -194,12 +201,38 @@ foo = select(df, mutationinformation
#--------------
# start plot 2
#--------------
svg(plot_pos_count_duet)
print(paste0("plot filename:", plot_pos_count_duet))
my_ats = 25 # axis text size
my_als = 22 # axis label size
# to make x axis display all positions
# not sure if to use with sort or directly
my_x = sort(unique(snpsBYpos_df$snpsBYpos))
g = ggplot(snpsBYpos_df, aes(x = snpsBYpos))
OutPlot_pos_count = g + geom_bar(aes (alpha = 0.5)
, show.legend = FALSE) +
scale_x_continuous(breaks = unique(snpsBYpos_df$snpsBYpos)) +
#scale_x_continuous(breaks = my_x) +
geom_label(stat = "count", aes(label = ..count..)
, color = "black"
, size = 10) +
theme(axis.text.x = element_text(size = my_ats
, angle = 0)
, axis.text.y = element_text(size = my_ats
, angle = 0
, hjust = 1)
, axis.title.x = element_text(size = my_als)
, axis.title.y = element_text(size = my_als)
, plot.title = element_blank()) +
labs(x = "Number of nsSNPs"
, y = "Number of Sites")
print(OutPlot_pos_count)
source("dirs.R")
dev.off()
########################################################################
# end of PS barplots
########################################################################
########################################################################