repurposed basic_barplots_foldx.R
This commit is contained in:
parent
6f24fc1fac
commit
d45a9499a2
3 changed files with 120 additions and 30 deletions
|
@ -53,9 +53,29 @@ if(is.null(drug)|is.null(gene)) {
|
||||||
}
|
}
|
||||||
#########################################################
|
#########################################################
|
||||||
# call functions with relevant args
|
# call functions with relevant args
|
||||||
#drug = "streptomycin"
|
|
||||||
#gene = "gid"
|
#------------------------------------------
|
||||||
|
# import_dirs()
|
||||||
|
# should return the follwoing variables:
|
||||||
|
# datadir
|
||||||
|
# indir
|
||||||
|
# outdir
|
||||||
|
# plotdir
|
||||||
|
# dr_muts_col
|
||||||
|
# other_muts_col
|
||||||
|
# resistance_col
|
||||||
|
#--------------------------------------------
|
||||||
import_dirs(drug, gene)
|
import_dirs(drug, gene)
|
||||||
|
#---------------------------------------------
|
||||||
|
# plotting_data()
|
||||||
|
# should return the following dfs:
|
||||||
|
# my_df
|
||||||
|
# my_df_u
|
||||||
|
# my_df_u_lig
|
||||||
|
# dup_muts
|
||||||
|
#----------------------------------------------
|
||||||
|
#infile = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv"
|
||||||
|
#infile = ""
|
||||||
|
|
||||||
#if (!exists("infile") && exists("gene")){
|
#if (!exists("infile") && exists("gene")){
|
||||||
if (!is.character(infile) && exists("gene")){
|
if (!is.character(infile) && exists("gene")){
|
||||||
|
@ -65,9 +85,6 @@ if (!is.character(infile) && exists("gene")){
|
||||||
cat("\nInput file not specified, assuming filename: ", infile, "\n")
|
cat("\nInput file not specified, assuming filename: ", infile, "\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
#infile = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv"
|
|
||||||
#infile = ""
|
|
||||||
|
|
||||||
# Get the DFs out of plotting_data()
|
# Get the DFs out of plotting_data()
|
||||||
pd_df = plotting_data(infile)
|
pd_df = plotting_data(infile)
|
||||||
my_df = pd_df[[1]]
|
my_df = pd_df[[1]]
|
||||||
|
@ -76,12 +93,6 @@ my_df_u_lig = pd_df[[3]]
|
||||||
dup_muts = pd_df[[4]]
|
dup_muts = pd_df[[4]]
|
||||||
|
|
||||||
#########################################################
|
#########################################################
|
||||||
# This script: should return the following dfs, directories and variables
|
|
||||||
# my_df
|
|
||||||
# my_df_u
|
|
||||||
# my_df_u_lig
|
|
||||||
# dup_muts
|
|
||||||
|
|
||||||
cat(paste0("Directories imported:"
|
cat(paste0("Directories imported:"
|
||||||
, "\ndatadir:", datadir
|
, "\ndatadir:", datadir
|
||||||
, "\nindir:", indir
|
, "\nindir:", indir
|
||||||
|
@ -94,7 +105,7 @@ cat(paste0("Directories imported:"
|
||||||
#, "\ngene_match:", gene_match
|
#, "\ngene_match:", gene_match
|
||||||
#, "\nLength of upos:", length(upos)
|
#, "\nLength of upos:", length(upos)
|
||||||
#, "\nAngstrom symbol:", angstroms_symbol))
|
#, "\nAngstrom symbol:", angstroms_symbol))
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
#=======
|
#=======
|
||||||
# output
|
# output
|
||||||
#=======
|
#=======
|
||||||
|
@ -117,7 +128,7 @@ df = my_df_u
|
||||||
str(df)
|
str(df)
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
#****************
|
#****************
|
||||||
# Plot 1:Count of stabilising and destabilsing muts
|
# Plot 1: Count of stabilising and destabilsing muts
|
||||||
#****************
|
#****************
|
||||||
|
|
||||||
svg(plot_basic_bp_duet)
|
svg(plot_basic_bp_duet)
|
||||||
|
|
|
@ -1,27 +1,98 @@
|
||||||
#!/usr/bin/env Rscript
|
#!/usr/bin/env Rscript
|
||||||
#########################################################
|
#########################################################
|
||||||
# TASK: producing barplots for foldx
|
# TASK: producing barplots
|
||||||
# basic barplots with count of mutations
|
# basic barplots with count of mutations
|
||||||
# basic barplots with frequency of count of mutations
|
# basic barplots with frequency of count of mutations
|
||||||
|
|
||||||
|
# Depends on
|
||||||
|
## plotting_globals.R (previously dir.R)
|
||||||
|
## plotting_data.R
|
||||||
#########################################################
|
#########################################################
|
||||||
#=======================================================================
|
# working dir
|
||||||
# working dir and loading libraries
|
|
||||||
getwd()
|
getwd()
|
||||||
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||||
getwd()
|
getwd()
|
||||||
|
|
||||||
|
# load libraries
|
||||||
#source("Header_TT.R")
|
#source("Header_TT.R")
|
||||||
library(ggplot2)
|
library(ggplot2)
|
||||||
library(data.table)
|
library(data.table)
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
source("plotting_data.R")
|
require("getopt", quietly = TRUE) # cmd parse arguments
|
||||||
|
|
||||||
# should return the following dfs, directories and variables
|
# load functions
|
||||||
|
source("plotting_globals.R")
|
||||||
|
source("plotting_data.R")
|
||||||
|
#########################################################
|
||||||
|
# command line args
|
||||||
|
#********************
|
||||||
|
# !!!FUTURE TODO!!!
|
||||||
|
# Can pass additional params of output/plot dir by user.
|
||||||
|
# Not strictly required for my workflow since it is optimised
|
||||||
|
# to have a streamlined input/output flow without filename worries.
|
||||||
|
#********************
|
||||||
|
spec = matrix(c(
|
||||||
|
"drug" ,"d", 1, "character",
|
||||||
|
"gene" ,"g", 1, "character",
|
||||||
|
"data" ,"f", 2, "character"
|
||||||
|
), byrow = TRUE, ncol = 4)
|
||||||
|
|
||||||
|
opt = getopt(spec)
|
||||||
|
|
||||||
|
#FIXME: detect if script running from cmd, then set these
|
||||||
|
drug = opt$drug
|
||||||
|
gene = opt$gene
|
||||||
|
infile = opt$data
|
||||||
|
|
||||||
|
# hardcoding when not using cmd
|
||||||
|
#drug = "streptomycin"
|
||||||
|
#gene = "gid"
|
||||||
|
|
||||||
|
if(is.null(drug)|is.null(gene)) {
|
||||||
|
stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
|
||||||
|
}
|
||||||
|
#########################################################
|
||||||
|
# call functions with relevant args
|
||||||
|
|
||||||
|
#------------------------------------------
|
||||||
|
# import_dirs()
|
||||||
|
# should return the follwoing variables:
|
||||||
|
# datadir
|
||||||
|
# indir
|
||||||
|
# outdir
|
||||||
|
# plotdir
|
||||||
|
# dr_muts_col
|
||||||
|
# other_muts_col
|
||||||
|
# resistance_col
|
||||||
|
#--------------------------------------------
|
||||||
|
import_dirs(drug, gene)
|
||||||
|
#---------------------------------------------
|
||||||
|
# plotting_data()
|
||||||
|
# should return the following dfs:
|
||||||
# my_df
|
# my_df
|
||||||
# my_df_u
|
# my_df_u
|
||||||
# my_df_u_lig
|
# my_df_u_lig
|
||||||
# dup_muts
|
# dup_muts
|
||||||
|
#----------------------------------------------
|
||||||
|
#infile = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv"
|
||||||
|
#infile = ""
|
||||||
|
|
||||||
|
#if (!exists("infile") && exists("gene")){
|
||||||
|
if (!is.character(infile) && exists("gene")){
|
||||||
|
#in_filename_params = paste0(tolower(gene), "_all_params.csv")
|
||||||
|
in_filename_params = paste0(tolower(gene), "_comb_stab_struc_params.csv") # part combined for gid
|
||||||
|
infile = paste0(outdir, "/", in_filename_params)
|
||||||
|
cat("\nInput file not specified, assuming filename: ", infile, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get the DFs out of plotting_data()
|
||||||
|
pd_df = plotting_data(infile)
|
||||||
|
my_df = pd_df[[1]]
|
||||||
|
my_df_u = pd_df[[2]]
|
||||||
|
my_df_u_lig = pd_df[[3]]
|
||||||
|
dup_muts = pd_df[[4]]
|
||||||
|
|
||||||
|
#########################################################
|
||||||
cat(paste0("Directories imported:"
|
cat(paste0("Directories imported:"
|
||||||
, "\ndatadir:", datadir
|
, "\ndatadir:", datadir
|
||||||
, "\nindir:", indir
|
, "\nindir:", indir
|
||||||
|
@ -30,15 +101,11 @@ cat(paste0("Directories imported:"
|
||||||
|
|
||||||
cat(paste0("Variables imported:"
|
cat(paste0("Variables imported:"
|
||||||
, "\ndrug:", drug
|
, "\ndrug:", drug
|
||||||
, "\ngene:", gene
|
, "\ngene:", gene))
|
||||||
, "\ngene_match:", gene_match
|
#, "\ngene_match:", gene_match
|
||||||
, "\nLength of upos:", length(upos)
|
#, "\nLength of upos:", length(upos)
|
||||||
, "\nAngstrom symbol:", angstroms_symbol))
|
#, "\nAngstrom symbol:", angstroms_symbol))
|
||||||
|
#======================================================================
|
||||||
# clear excess variable
|
|
||||||
rm(my_df, upos, dup_muts, my_df_u_lig)
|
|
||||||
|
|
||||||
#=======================================================================
|
|
||||||
#=======
|
#=======
|
||||||
# output
|
# output
|
||||||
#=======
|
#=======
|
||||||
|
@ -56,7 +123,7 @@ df = my_df_u
|
||||||
str(df)
|
str(df)
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
#****************
|
#****************
|
||||||
# Plot 1:Count of stabilising and destabilsing muts
|
# Plot 1: Count of stabilising and destabilsing muts
|
||||||
#****************
|
#****************
|
||||||
|
|
||||||
svg(plot_basic_bp_foldx)
|
svg(plot_basic_bp_foldx)
|
||||||
|
@ -97,5 +164,6 @@ print(foldx_outcome_count)
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
table(df$foldx_outcome)
|
table(df$foldx_outcome)
|
||||||
#=======================================================================
|
########################################################################
|
||||||
|
# end of foldx barplot
|
||||||
|
########################################################################
|
|
@ -16,3 +16,14 @@ basic_barplots_PS.R:source("dirs.R")
|
||||||
resolving_ambiguous_muts.R:source("dirs.R")
|
resolving_ambiguous_muts.R:source("dirs.R")
|
||||||
|
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
|
|
||||||
|
#========
|
||||||
|
# basic_barplots_foldx.R:
|
||||||
|
#========
|
||||||
|
./basic_barplots_foldx.R -d streptomycin -g gid
|
||||||
|
# picks default file name, or you can specify by the -f flag
|
||||||
|
|
||||||
|
sources:
|
||||||
|
## plotting_globals.R (previously dir.R)
|
||||||
|
## plotting_data.R
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue