LSHTM_analysis/scripts/af_or_calcs.R

140 lines
4.6 KiB
R
Executable file

#!/usr/bin/env Rscript
#########################################################
# TASK: To calculate Allele Frequency and
# Odds Ratio from master data
#########################################################
# working dir
setwd("~/git/LSHTM_analysis/scripts")
getwd()
# load libraries
#source("~/git/LSHTM_analysis/scripts/Header_TT.R")
require("getopt", quietly = TRUE) # cmd parse arguments
# load functions
source("functions/plotting_globals.R")
source("functions/mychisq_or.R")
source("functions/myaf_or_calcs.R")
#############################################################
# command line args
#********************
spec = matrix(c(
"drug" ,"d", 1, "character",
"gene" ,"g", 1, "character",
"master_data" ,"m", 2, "character",
"gene_data" ,"G", 2, "character",
"outfile" ,"o", 2, "character",
"idcol" ,"I", 2, "character",
"drmuts_col" ,"D", 2, "character",
"othermuts_col" ,"O", 2, "character"
), byrow = TRUE, ncol = 4)
opt = getopt(spec)
drug = opt$drug
gene = opt$gene
infile_master = opt$master_data
infile_metadata = opt$gene_data
outfile = opt$outfile
idcol = opt$idcol
dr_muts_col = opt$drmuts_col
other_muts_col = opt$othermuts_col
if(is.null(drug)|is.null(gene)) {
stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
}
# import_dirs()
import_dirs(drug, gene)
# setting sensible defaults for opt args
#----------------------------
# input file 1: master data
#----------------------------
#class(infile_master)
if (is.null(infile_master)){
#if (!is.character(infile) && exists("gene")){
#in_filename_master = 'original_tanushree_data_v2.csv' #19K
in_filename_master = 'mtb_gwas_meta_v6.csv' #35k
infile_master = paste0(datadir, in_filename_master)
cat("\nInput file 1 not specified, assuming filename: ", infile_master)
cat(paste0("\nReading infile 1 i.e raw data: ", infile_master) )
}
#---------------------------------------------------
# input file 2: gene associated meta
# data file to extract valid snps and add calcs to.
#---------------------------------------------------
if (is.null(infile_metadata)){
# This is outfile_metadata from data_extraction.py
in_filename_metadata = paste0(tolower(gene), '_metadata.csv')
infile_metadata = paste0(outdir, '/', in_filename_metadata)
cat("\nInput file 2 not specified, assuming filename: ", infile_metadata)
cat(paste0("\nReading infile 2 i.e gene associated metadata:", infile_metadata))
}
#-------------------------------------------
# outfile: csv file containing AF and OR
#-------------------------------------------
if (is.null(outfile)){
# out_filename_af_or = paste0(tolower(gene), '_meta_data_with_AF_OR.csv')
out_filename_af_or = paste0(tolower(gene), '_af_or.csv')
outfile = paste0(outdir, '/', out_filename_af_or)
cat("\nOutfile not specified, assuming filename: ", outfile)
cat(paste0('\nOutput file with full path:', outfile))
}
#-------------------------------------------
# idcol: column name "id"
#-------------------------------------------
if (is.null(idcol)){
idcol = "id"
}
#-------------------------------------------
# dr-and-others muts cols: comes from plotting_globals.R
# colnames that can be constructed using drug
# (dr_mutations_<drug>), (other_mutations_<drug>)
#-------------------------------------------
if (is.null(dr_muts_col)){
dr_muts_col
cat("\ndrug and other mut colnames not specified, sourcing from globals: "
, dr_muts_col, "\n")
}
if (is.null(other_muts_col)){
other_muts_col
cat("\ndrug and other mut colnames not specified, sourcing from globals: "
, other_muts_col, "\n")
}
# Informing the user of the sensible defaults being used:
cat("======================"
, "\nParameters passed:"
, "\n======================"
, "\nDRUG name: ", drug, "\n"
, "\nGENE name: ", gene, "\n"
, "\nReading infile 1 i.e raw data: ", infile_master, "\n"
, "\nReading infile 2 i.e gene associated metadata:", infile_metadata, "\n"
, '\nOutput file with full path:', outfile, "\n"
, "\nColumn name of id:", idcol, "\n"
, "\ndr mutation colname:", dr_muts_col, "\n"
, "\nother mutation colname:", other_muts_col, "\n")
#=======================================================================
#============================
# call function: my_afor()
#=============================
my_afor( drug
, gene
, infile_master
, infile_metadata
, outfile
, idcol
, dr_muts_col
, other_muts_col
)
#=======================================================================